LCOV - code coverage report
Current view: top level - gcc/config/i386 - i386.cc (source / functions) Coverage Total Hit
Test: gcc.info Lines: 89.2 % 13043 11640
Test Date: 2026-06-20 15:32:29 Functions: 97.0 % 473 459
Legend: Lines:     hit not hit

            Line data    Source code
       1              : /* Subroutines used for code generation on IA-32.
       2              :    Copyright (C) 1988-2026 Free Software Foundation, Inc.
       3              : 
       4              : This file is part of GCC.
       5              : 
       6              : GCC is free software; you can redistribute it and/or modify
       7              : it under the terms of the GNU General Public License as published by
       8              : the Free Software Foundation; either version 3, or (at your option)
       9              : any later version.
      10              : 
      11              : GCC is distributed in the hope that it will be useful,
      12              : but WITHOUT ANY WARRANTY; without even the implied warranty of
      13              : MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
      14              : GNU General Public License for more details.
      15              : 
      16              : You should have received a copy of the GNU General Public License
      17              : along with GCC; see the file COPYING3.  If not see
      18              : <http://www.gnu.org/licenses/>.  */
      19              : 
      20              : #define INCLUDE_STRING
      21              : #define IN_TARGET_CODE 1
      22              : 
      23              : #include "config.h"
      24              : #include "system.h"
      25              : #include "coretypes.h"
      26              : #include "backend.h"
      27              : #include "rtl.h"
      28              : #include "tree.h"
      29              : #include "memmodel.h"
      30              : #include "gimple.h"
      31              : #include "cfghooks.h"
      32              : #include "cfgloop.h"
      33              : #include "df.h"
      34              : #include "tm_p.h"
      35              : #include "stringpool.h"
      36              : #include "expmed.h"
      37              : #include "optabs.h"
      38              : #include "regs.h"
      39              : #include "emit-rtl.h"
      40              : #include "recog.h"
      41              : #include "cgraph.h"
      42              : #include "diagnostic.h"
      43              : #include "cfgbuild.h"
      44              : #include "alias.h"
      45              : #include "fold-const.h"
      46              : #include "attribs.h"
      47              : #include "calls.h"
      48              : #include "stor-layout.h"
      49              : #include "varasm.h"
      50              : #include "output.h"
      51              : #include "insn-attr.h"
      52              : #include "flags.h"
      53              : #include "except.h"
      54              : #include "explow.h"
      55              : #include "expr.h"
      56              : #include "cfgrtl.h"
      57              : #include "common/common-target.h"
      58              : #include "langhooks.h"
      59              : #include "reload.h"
      60              : #include "gimplify.h"
      61              : #include "dwarf2.h"
      62              : #include "tm-constrs.h"
      63              : #include "cselib.h"
      64              : #include "sched-int.h"
      65              : #include "opts.h"
      66              : #include "tree-pass.h"
      67              : #include "context.h"
      68              : #include "pass_manager.h"
      69              : #include "target-globals.h"
      70              : #include "gimple-iterator.h"
      71              : #include "gimple-fold.h"
      72              : #include "tree-vectorizer.h"
      73              : #include "shrink-wrap.h"
      74              : #include "builtins.h"
      75              : #include "rtl-iter.h"
      76              : #include "tree-iterator.h"
      77              : #include "dbgcnt.h"
      78              : #include "case-cfn-macros.h"
      79              : #include "dojump.h"
      80              : #include "fold-const-call.h"
      81              : #include "tree-vrp.h"
      82              : #include "tree-ssanames.h"
      83              : #include "selftest.h"
      84              : #include "selftest-rtl.h"
      85              : #include "print-rtl.h"
      86              : #include "intl.h"
      87              : #include "ifcvt.h"
      88              : #include "symbol-summary.h"
      89              : #include "sreal.h"
      90              : #include "ipa-cp.h"
      91              : #include "ipa-prop.h"
      92              : #include "ipa-fnsummary.h"
      93              : #include "wide-int-bitmask.h"
      94              : #include "tree-vector-builder.h"
      95              : #include "debug.h"
      96              : #include "dwarf2out.h"
      97              : #include "i386-options.h"
      98              : #include "i386-builtins.h"
      99              : #include "i386-expand.h"
     100              : #include "i386-features.h"
     101              : #include "function-abi.h"
     102              : #include "rtl-error.h"
     103              : #include "gimple-pretty-print.h"
     104              : 
     105              : /* This file should be included last.  */
     106              : #include "target-def.h"
     107              : 
     108              : static void ix86_print_operand_address_as (FILE *, rtx, addr_space_t, bool);
     109              : static void ix86_emit_restore_reg_using_pop (rtx, bool = false);
     110              : 
     111              : 
     112              : #ifndef CHECK_STACK_LIMIT
     113              : #define CHECK_STACK_LIMIT (-1)
     114              : #endif
     115              : 
     116              : /* Return index of given mode in mult and division cost tables.  */
     117              : #define MODE_INDEX(mode)                                        \
     118              :   ((mode) == QImode ? 0                                         \
     119              :    : (mode) == HImode ? 1                                       \
     120              :    : (mode) == SImode ? 2                                       \
     121              :    : (mode) == DImode ? 3                                       \
     122              :    : 4)
     123              : 
     124              : 
     125              : /* Set by -mtune.  */
     126              : const struct processor_costs *ix86_tune_cost = NULL;
     127              : 
     128              : /* Set by -mtune or -Os.  */
     129              : const struct processor_costs *ix86_cost = NULL;
     130              : 
     131              : /* In case the average insn count for single function invocation is
     132              :    lower than this constant, emit fast (but longer) prologue and
     133              :    epilogue code.  */
     134              : #define FAST_PROLOGUE_INSN_COUNT 20
     135              : 
     136              : /* Names for 8 (low), 8 (high), and 16-bit registers, respectively.  */
     137              : static const char *const qi_reg_name[] = QI_REGISTER_NAMES;
     138              : static const char *const qi_high_reg_name[] = QI_HIGH_REGISTER_NAMES;
     139              : static const char *const hi_reg_name[] = HI_REGISTER_NAMES;
     140              : 
     141              : /* Array of the smallest class containing reg number REGNO, indexed by
     142              :    REGNO.  Used by REGNO_REG_CLASS in i386.h.  */
     143              : 
     144              : enum reg_class const regclass_map[FIRST_PSEUDO_REGISTER] =
     145              : {
     146              :   /* ax, dx, cx, bx */
     147              :   AREG, DREG, CREG, BREG,
     148              :   /* si, di, bp, sp */
     149              :   SIREG, DIREG, NON_Q_REGS, NON_Q_REGS,
     150              :   /* FP registers */
     151              :   FP_TOP_REG, FP_SECOND_REG, FLOAT_REGS, FLOAT_REGS,
     152              :   FLOAT_REGS, FLOAT_REGS, FLOAT_REGS, FLOAT_REGS,
     153              :   /* arg pointer, flags, fpsr, frame */
     154              :   NON_Q_REGS, NO_REGS, NO_REGS, NON_Q_REGS,
     155              :   /* SSE registers */
     156              :   SSE_FIRST_REG, SSE_REGS, SSE_REGS, SSE_REGS,
     157              :   SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS,
     158              :   /* MMX registers */
     159              :   MMX_REGS, MMX_REGS, MMX_REGS, MMX_REGS,
     160              :   MMX_REGS, MMX_REGS, MMX_REGS, MMX_REGS,
     161              :   /* REX registers */
     162              :   GENERAL_REGS, GENERAL_REGS, GENERAL_REGS, GENERAL_REGS,
     163              :   GENERAL_REGS, GENERAL_REGS, GENERAL_REGS, GENERAL_REGS,
     164              :   /* SSE REX registers */
     165              :   SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS,
     166              :   SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS,
     167              :   /* AVX-512 SSE registers */
     168              :   ALL_SSE_REGS, ALL_SSE_REGS, ALL_SSE_REGS, ALL_SSE_REGS,
     169              :   ALL_SSE_REGS, ALL_SSE_REGS, ALL_SSE_REGS, ALL_SSE_REGS,
     170              :   ALL_SSE_REGS, ALL_SSE_REGS, ALL_SSE_REGS, ALL_SSE_REGS,
     171              :   ALL_SSE_REGS, ALL_SSE_REGS, ALL_SSE_REGS, ALL_SSE_REGS,
     172              :   /* Mask registers.  */
     173              :   ALL_MASK_REGS, MASK_REGS, MASK_REGS, MASK_REGS,
     174              :   MASK_REGS, MASK_REGS, MASK_REGS, MASK_REGS,
     175              :   /* REX2 registers */
     176              :   GENERAL_REGS, GENERAL_REGS, GENERAL_REGS, GENERAL_REGS,
     177              :   GENERAL_REGS, GENERAL_REGS, GENERAL_REGS, GENERAL_REGS,
     178              :   GENERAL_REGS, GENERAL_REGS, GENERAL_REGS, GENERAL_REGS,
     179              :   GENERAL_REGS, GENERAL_REGS, GENERAL_REGS, GENERAL_REGS,
     180              : };
     181              : 
     182              : /* The "default" register map used in 32bit mode.  */
     183              : 
     184              : unsigned int const debugger_register_map[FIRST_PSEUDO_REGISTER] =
     185              : {
     186              :   /* general regs */
     187              :   0, 2, 1, 3, 6, 7, 4, 5,
     188              :   /* fp regs */
     189              :   12, 13, 14, 15, 16, 17, 18, 19,
     190              :   /* arg, flags, fpsr, frame */
     191              :   IGNORED_DWARF_REGNUM, IGNORED_DWARF_REGNUM,
     192              :   IGNORED_DWARF_REGNUM, IGNORED_DWARF_REGNUM,
     193              :   /* SSE */
     194              :   21, 22, 23, 24, 25, 26, 27, 28,
     195              :   /* MMX */
     196              :   29, 30, 31, 32, 33, 34, 35, 36,
     197              :   /* extended integer registers */
     198              :   INVALID_REGNUM, INVALID_REGNUM, INVALID_REGNUM, INVALID_REGNUM,
     199              :   INVALID_REGNUM, INVALID_REGNUM, INVALID_REGNUM, INVALID_REGNUM,
     200              :   /* extended sse registers */
     201              :   INVALID_REGNUM, INVALID_REGNUM, INVALID_REGNUM, INVALID_REGNUM,
     202              :   INVALID_REGNUM, INVALID_REGNUM, INVALID_REGNUM, INVALID_REGNUM,
     203              :   /* AVX-512 registers 16-23 */
     204              :   INVALID_REGNUM, INVALID_REGNUM, INVALID_REGNUM, INVALID_REGNUM,
     205              :   INVALID_REGNUM, INVALID_REGNUM, INVALID_REGNUM, INVALID_REGNUM,
     206              :   /* AVX-512 registers 24-31 */
     207              :   INVALID_REGNUM, INVALID_REGNUM, INVALID_REGNUM, INVALID_REGNUM,
     208              :   INVALID_REGNUM, INVALID_REGNUM, INVALID_REGNUM, INVALID_REGNUM,
     209              :   /* Mask registers */
     210              :   93, 94, 95, 96, 97, 98, 99, 100
     211              : };
     212              : 
     213              : /* The "default" register map used in 64bit mode.  */
     214              : 
     215              : unsigned int const debugger64_register_map[FIRST_PSEUDO_REGISTER] =
     216              : {
     217              :   /* general regs */
     218              :   0, 1, 2, 3, 4, 5, 6, 7,
     219              :   /* fp regs */
     220              :   33, 34, 35, 36, 37, 38, 39, 40,
     221              :   /* arg, flags, fpsr, frame */
     222              :   IGNORED_DWARF_REGNUM, IGNORED_DWARF_REGNUM,
     223              :   IGNORED_DWARF_REGNUM, IGNORED_DWARF_REGNUM,
     224              :   /* SSE */
     225              :   17, 18, 19, 20, 21, 22, 23, 24,
     226              :   /* MMX */
     227              :   41, 42, 43, 44, 45, 46, 47, 48,
     228              :   /* extended integer registers */
     229              :   8, 9, 10, 11, 12, 13, 14, 15,
     230              :   /* extended SSE registers */
     231              :   25, 26, 27, 28, 29, 30, 31, 32,
     232              :   /* AVX-512 registers 16-23 */
     233              :   67, 68, 69, 70, 71, 72, 73, 74,
     234              :   /* AVX-512 registers 24-31 */
     235              :   75, 76, 77, 78, 79, 80, 81, 82,
     236              :   /* Mask registers */
     237              :   118, 119, 120, 121, 122, 123, 124, 125,
     238              :   /* rex2 extend integer registers */
     239              :   130, 131, 132, 133, 134, 135, 136, 137,
     240              :   138, 139, 140, 141, 142, 143, 144, 145
     241              : };
     242              : 
     243              : /* Define the register numbers to be used in Dwarf debugging information.
     244              :    The SVR4 reference port C compiler uses the following register numbers
     245              :    in its Dwarf output code:
     246              :         0 for %eax (gcc regno = 0)
     247              :         1 for %ecx (gcc regno = 2)
     248              :         2 for %edx (gcc regno = 1)
     249              :         3 for %ebx (gcc regno = 3)
     250              :         4 for %esp (gcc regno = 7)
     251              :         5 for %ebp (gcc regno = 6)
     252              :         6 for %esi (gcc regno = 4)
     253              :         7 for %edi (gcc regno = 5)
     254              :    The following three DWARF register numbers are never generated by
     255              :    the SVR4 C compiler or by the GNU compilers, but SDB on x86/svr4
     256              :    believed these numbers have these meanings.
     257              :         8  for %eip    (no gcc equivalent)
     258              :         9  for %eflags (gcc regno = 17)
     259              :         10 for %trapno (no gcc equivalent)
     260              :    It is not at all clear how we should number the FP stack registers
     261              :    for the x86 architecture.  If the version of SDB on x86/svr4 were
     262              :    a bit less brain dead with respect to floating-point then we would
     263              :    have a precedent to follow with respect to DWARF register numbers
     264              :    for x86 FP registers, but the SDB on x86/svr4 was so completely
     265              :    broken with respect to FP registers that it is hardly worth thinking
     266              :    of it as something to strive for compatibility with.
     267              :    The version of x86/svr4 SDB I had does (partially)
     268              :    seem to believe that DWARF register number 11 is associated with
     269              :    the x86 register %st(0), but that's about all.  Higher DWARF
     270              :    register numbers don't seem to be associated with anything in
     271              :    particular, and even for DWARF regno 11, SDB only seemed to under-
     272              :    stand that it should say that a variable lives in %st(0) (when
     273              :    asked via an `=' command) if we said it was in DWARF regno 11,
     274              :    but SDB still printed garbage when asked for the value of the
     275              :    variable in question (via a `/' command).
     276              :    (Also note that the labels SDB printed for various FP stack regs
     277              :    when doing an `x' command were all wrong.)
     278              :    Note that these problems generally don't affect the native SVR4
     279              :    C compiler because it doesn't allow the use of -O with -g and
     280              :    because when it is *not* optimizing, it allocates a memory
     281              :    location for each floating-point variable, and the memory
     282              :    location is what gets described in the DWARF AT_location
     283              :    attribute for the variable in question.
     284              :    Regardless of the severe mental illness of the x86/svr4 SDB, we
     285              :    do something sensible here and we use the following DWARF
     286              :    register numbers.  Note that these are all stack-top-relative
     287              :    numbers.
     288              :         11 for %st(0) (gcc regno = 8)
     289              :         12 for %st(1) (gcc regno = 9)
     290              :         13 for %st(2) (gcc regno = 10)
     291              :         14 for %st(3) (gcc regno = 11)
     292              :         15 for %st(4) (gcc regno = 12)
     293              :         16 for %st(5) (gcc regno = 13)
     294              :         17 for %st(6) (gcc regno = 14)
     295              :         18 for %st(7) (gcc regno = 15)
     296              : */
     297              : unsigned int const svr4_debugger_register_map[FIRST_PSEUDO_REGISTER] =
     298              : {
     299              :   /* general regs */
     300              :   0, 2, 1, 3, 6, 7, 5, 4,
     301              :   /* fp regs */
     302              :   11, 12, 13, 14, 15, 16, 17, 18,
     303              :   /* arg, flags, fpsr, frame */
     304              :   IGNORED_DWARF_REGNUM, 9,
     305              :   IGNORED_DWARF_REGNUM, IGNORED_DWARF_REGNUM,
     306              :   /* SSE registers */
     307              :   21, 22, 23, 24, 25, 26, 27, 28,
     308              :   /* MMX registers */
     309              :   29, 30, 31, 32, 33, 34, 35, 36,
     310              :   /* extended integer registers */
     311              :   INVALID_REGNUM, INVALID_REGNUM, INVALID_REGNUM, INVALID_REGNUM,
     312              :   INVALID_REGNUM, INVALID_REGNUM, INVALID_REGNUM, INVALID_REGNUM,
     313              :   /* extended sse registers */
     314              :   INVALID_REGNUM, INVALID_REGNUM, INVALID_REGNUM, INVALID_REGNUM,
     315              :   INVALID_REGNUM, INVALID_REGNUM, INVALID_REGNUM, INVALID_REGNUM,
     316              :   /* AVX-512 registers 16-23 */
     317              :   INVALID_REGNUM, INVALID_REGNUM, INVALID_REGNUM, INVALID_REGNUM,
     318              :   INVALID_REGNUM, INVALID_REGNUM, INVALID_REGNUM, INVALID_REGNUM,
     319              :   /* AVX-512 registers 24-31 */
     320              :   INVALID_REGNUM, INVALID_REGNUM, INVALID_REGNUM, INVALID_REGNUM,
     321              :   INVALID_REGNUM, INVALID_REGNUM, INVALID_REGNUM, INVALID_REGNUM,
     322              :   /* Mask registers */
     323              :   93, 94, 95, 96, 97, 98, 99, 100
     324              : };
     325              : 
     326              : /* Define parameter passing and return registers.  */
     327              : 
     328              : static int const x86_64_int_parameter_registers[6] =
     329              : {
     330              :   DI_REG, SI_REG, DX_REG, CX_REG, R8_REG, R9_REG
     331              : };
     332              : 
     333              : static int const x86_64_ms_abi_int_parameter_registers[4] =
     334              : {
     335              :   CX_REG, DX_REG, R8_REG, R9_REG
     336              : };
     337              : 
     338              : /* Similar as Clang's preserve_none function parameter passing.
     339              :    NB: Use DI_REG and SI_REG, see ix86_function_arg_regno_p.  */
     340              : 
     341              : static int const x86_64_preserve_none_int_parameter_registers[6] =
     342              : {
     343              :   R12_REG, R13_REG, R14_REG, R15_REG, DI_REG, SI_REG
     344              : };
     345              : 
     346              : static int const x86_64_int_return_registers[2] =
     347              : {
     348              :   AX_REG, DX_REG
     349              : };
     350              : 
     351              : /* Define the structure for the machine field in struct function.  */
     352              : 
     353              : struct GTY(()) stack_local_entry {
     354              :   unsigned short mode;
     355              :   unsigned short n;
     356              :   rtx rtl;
     357              :   struct stack_local_entry *next;
     358              : };
     359              : 
     360              : /* Which cpu are we scheduling for.  */
     361              : enum attr_cpu ix86_schedule;
     362              : 
     363              : /* Which cpu are we optimizing for.  */
     364              : enum processor_type ix86_tune;
     365              : 
     366              : /* Which instruction set architecture to use.  */
     367              : enum processor_type ix86_arch;
     368              : 
     369              : /* True if processor has SSE prefetch instruction.  */
     370              : unsigned char ix86_prefetch_sse;
     371              : 
     372              : /* Preferred alignment for stack boundary in bits.  */
     373              : unsigned int ix86_preferred_stack_boundary;
     374              : 
     375              : /* Alignment for incoming stack boundary in bits specified at
     376              :    command line.  */
     377              : unsigned int ix86_user_incoming_stack_boundary;
     378              : 
     379              : /* Default alignment for incoming stack boundary in bits.  */
     380              : unsigned int ix86_default_incoming_stack_boundary;
     381              : 
     382              : /* Alignment for incoming stack boundary in bits.  */
     383              : unsigned int ix86_incoming_stack_boundary;
     384              : 
     385              : /* True if there is no direct access to extern symbols.  */
     386              : bool ix86_has_no_direct_extern_access;
     387              : 
     388              : /* Calling abi specific va_list type nodes.  */
     389              : tree sysv_va_list_type_node;
     390              : tree ms_va_list_type_node;
     391              : 
     392              : /* Prefix built by ASM_GENERATE_INTERNAL_LABEL.  */
     393              : char internal_label_prefix[16];
     394              : int internal_label_prefix_len;
     395              : 
     396              : /* Fence to use after loop using movnt.  */
     397              : tree x86_mfence;
     398              : 
     399              : /* Register class used for passing given 64bit part of the argument.
     400              :    These represent classes as documented by the PS ABI, with the exception
     401              :    of SSESF, SSEDF classes, that are basically SSE class, just gcc will
     402              :    use SF or DFmode move instead of DImode to avoid reformatting penalties.
     403              : 
     404              :    Similarly we play games with INTEGERSI_CLASS to use cheaper SImode moves
     405              :    whenever possible (upper half does contain padding).  */
     406              : enum x86_64_reg_class
     407              :   {
     408              :     X86_64_NO_CLASS,
     409              :     X86_64_INTEGER_CLASS,
     410              :     X86_64_INTEGERSI_CLASS,
     411              :     X86_64_SSE_CLASS,
     412              :     X86_64_SSEHF_CLASS,
     413              :     X86_64_SSESF_CLASS,
     414              :     X86_64_SSEDF_CLASS,
     415              :     X86_64_SSEUP_CLASS,
     416              :     X86_64_X87_CLASS,
     417              :     X86_64_X87UP_CLASS,
     418              :     X86_64_COMPLEX_X87_CLASS,
     419              :     X86_64_MEMORY_CLASS
     420              :   };
     421              : 
     422              : #define MAX_CLASSES 8
     423              : 
     424              : /* Table of constants used by fldpi, fldln2, etc....  */
     425              : static REAL_VALUE_TYPE ext_80387_constants_table [5];
     426              : static bool ext_80387_constants_init;
     427              : 
     428              : 
     429              : static rtx ix86_function_value (const_tree, const_tree, bool);
     430              : static bool ix86_function_value_regno_p (const unsigned int);
     431              : static unsigned int ix86_function_arg_boundary (machine_mode,
     432              :                                                 const_tree);
     433              : static rtx ix86_static_chain (const_tree, bool);
     434              : static int ix86_function_regparm (const_tree, const_tree);
     435              : static void ix86_compute_frame_layout (void);
     436              : static tree ix86_canonical_va_list_type (tree);
     437              : static unsigned int split_stack_prologue_scratch_regno (void);
     438              : static bool i386_asm_output_addr_const_extra (FILE *, rtx);
     439              : 
     440              : static bool ix86_can_inline_p (tree, tree);
     441              : static unsigned int ix86_minimum_incoming_stack_boundary (bool);
     442              : 
     443              : typedef enum ix86_flags_cc
     444              : {
     445              :   X86_CCO = 0, X86_CCNO, X86_CCB, X86_CCNB,
     446              :   X86_CCE, X86_CCNE, X86_CCBE, X86_CCNBE,
     447              :   X86_CCS, X86_CCNS, X86_CCP, X86_CCNP,
     448              :   X86_CCL, X86_CCNL, X86_CCLE, X86_CCNLE
     449              : } ix86_cc;
     450              : 
     451              : static const char *ix86_ccmp_dfv_mapping[] =
     452              : {
     453              :   "{dfv=of}", "{dfv=}", "{dfv=cf}", "{dfv=}",
     454              :   "{dfv=zf}", "{dfv=}", "{dfv=cf, zf}", "{dfv=}",
     455              :   "{dfv=sf}", "{dfv=}", "{dfv=cf}", "{dfv=}",
     456              :   "{dfv=sf}", "{dfv=sf, of}", "{dfv=sf, of, zf}", "{dfv=sf, of}"
     457              : };
     458              : 
     459              : 
     460              : /* Whether -mtune= or -march= were specified */
     461              : int ix86_tune_defaulted;
     462              : int ix86_arch_specified;
     463              : 
     464              : /* Return true if a red-zone is in use.  We can't use red-zone when
     465              :    there are local indirect jumps, like "indirect_jump" or "tablejump",
     466              :    which jumps to another place in the function, since "call" in the
     467              :    indirect thunk pushes the return address onto stack, destroying
     468              :    red-zone.
     469              : 
     470              :    NB: Don't use red-zone for functions with no_caller_saved_registers
     471              :    and 32 GPRs or 16 XMM registers since 128-byte red-zone is too small
     472              :    for 31 GPRs or 15 GPRs + 16 XMM registers.
     473              : 
     474              :    TODO: If we can reserve the first 2 WORDs, for PUSH and, another
     475              :    for CALL, in red-zone, we can allow local indirect jumps with
     476              :    indirect thunk.  */
     477              : 
     478              : bool
     479      9933591 : ix86_using_red_zone (void)
     480              : {
     481      9933591 :   return (TARGET_RED_ZONE
     482      8991252 :           && !TARGET_64BIT_MS_ABI
     483      8688543 :           && ((!TARGET_APX_EGPR && !TARGET_SSE)
     484      8665538 :               || (cfun->machine->call_saved_registers
     485      8665538 :                   != TYPE_NO_CALLER_SAVED_REGISTERS))
     486     18622073 :           && (!cfun->machine->has_local_indirect_jump
     487        47318 :               || cfun->machine->indirect_branch_type == indirect_branch_keep));
     488              : }
     489              : 
     490              : /* Return true, if profiling code should be emitted before
     491              :    prologue. Otherwise it returns false.
     492              :    Note: For x86 with "hotfix" it is sorried.  */
     493              : static bool
     494      4516396 : ix86_profile_before_prologue (void)
     495              : {
     496      4516396 :   return flag_fentry != 0;
     497              : }
     498              : 
     499              : /* Update register usage after having seen the compiler flags.  */
     500              : 
     501              : static void
     502       841208 : ix86_conditional_register_usage (void)
     503              : {
     504       841208 :   int i, c_mask;
     505              : 
     506              :   /* If there are no caller-saved registers, preserve all registers.
     507              :      except fixed_regs and registers used for function return value
     508              :      since aggregate_value_p checks call_used_regs[regno] on return
     509              :      value.  */
     510       841208 :   if (cfun
     511        69212 :       && (cfun->machine->call_saved_registers
     512        69212 :           == TYPE_NO_CALLER_SAVED_REGISTERS))
     513       462489 :     for (i = 0; i < FIRST_PSEUDO_REGISTER; i++)
     514       457516 :       if (!fixed_regs[i] && !ix86_function_value_regno_p (i))
     515       422283 :         call_used_regs[i] = 0;
     516              : 
     517              :   /* For 32-bit targets, disable the REX registers.  */
     518       841208 :   if (! TARGET_64BIT)
     519              :     {
     520       134550 :       for (i = FIRST_REX_INT_REG; i <= LAST_REX_INT_REG; i++)
     521       119600 :         CLEAR_HARD_REG_BIT (accessible_reg_set, i);
     522       134550 :       for (i = FIRST_REX_SSE_REG; i <= LAST_REX_SSE_REG; i++)
     523       119600 :         CLEAR_HARD_REG_BIT (accessible_reg_set, i);
     524       254150 :       for (i = FIRST_EXT_REX_SSE_REG; i <= LAST_EXT_REX_SSE_REG; i++)
     525       239200 :         CLEAR_HARD_REG_BIT (accessible_reg_set, i);
     526              :     }
     527              : 
     528              :   /*  See the definition of CALL_USED_REGISTERS in i386.h.  */
     529       841208 :   c_mask = CALL_USED_REGISTERS_MASK (TARGET_64BIT_MS_ABI);
     530              : 
     531       841208 :   CLEAR_HARD_REG_SET (reg_class_contents[(int)CLOBBERED_REGS]);
     532              : 
     533     78232344 :   for (i = 0; i < FIRST_PSEUDO_REGISTER; i++)
     534              :     {
     535              :       /* Set/reset conditionally defined registers from
     536              :          CALL_USED_REGISTERS initializer.  */
     537     77391136 :       if (call_used_regs[i] > 1)
     538     13379705 :         call_used_regs[i] = !!(call_used_regs[i] & c_mask);
     539              : 
     540              :       /* Calculate registers of CLOBBERED_REGS register set
     541              :          as call used registers from GENERAL_REGS register set.  */
     542     77391136 :       if (TEST_HARD_REG_BIT (reg_class_contents[(int)GENERAL_REGS], i)
     543     77391136 :           && call_used_regs[i])
     544     23428097 :         SET_HARD_REG_BIT (reg_class_contents[(int)CLOBBERED_REGS], i);
     545              :     }
     546              : 
     547              :   /* If MMX is disabled, disable the registers.  */
     548       841208 :   if (! TARGET_MMX)
     549       400526 :     accessible_reg_set &= ~reg_class_contents[MMX_REGS];
     550              : 
     551              :   /* If SSE is disabled, disable the registers.  */
     552       841208 :   if (! TARGET_SSE)
     553       394552 :     accessible_reg_set &= ~reg_class_contents[ALL_SSE_REGS];
     554              : 
     555              :   /* If the FPU is disabled, disable the registers.  */
     556       841208 :   if (! (TARGET_80387 || TARGET_FLOAT_RETURNS_IN_80387))
     557       395772 :     accessible_reg_set &= ~reg_class_contents[FLOAT_REGS];
     558              : 
     559              :   /* If AVX512F is disabled, disable the registers.  */
     560       841208 :   if (! TARGET_AVX512F)
     561              :     {
     562     10204131 :       for (i = FIRST_EXT_REX_SSE_REG; i <= LAST_EXT_REX_SSE_REG; i++)
     563      9603888 :         CLEAR_HARD_REG_BIT (accessible_reg_set, i);
     564              : 
     565      1200486 :       accessible_reg_set &= ~reg_class_contents[ALL_MASK_REGS];
     566              :     }
     567              : 
     568              :   /* If APX is disabled, disable the registers.  */
     569       841208 :   if (! (TARGET_APX_EGPR && TARGET_64BIT))
     570              :     {
     571     14289095 :       for (i = FIRST_REX2_INT_REG; i <= LAST_REX2_INT_REG; i++)
     572     13448560 :         CLEAR_HARD_REG_BIT (accessible_reg_set, i);
     573              :     }
     574       841208 : }
     575              : 
     576              : /* Canonicalize a comparison from one we don't have to one we do have.  */
     577              : 
     578              : static void
     579     23954601 : ix86_canonicalize_comparison (int *code, rtx *op0, rtx *op1,
     580              :                               bool op0_preserve_value)
     581              : {
     582              :   /* The order of operands in x87 ficom compare is forced by combine in
     583              :      simplify_comparison () function. Float operator is treated as RTX_OBJ
     584              :      with a precedence over other operators and is always put in the first
     585              :      place. Swap condition and operands to match ficom instruction.  */
     586     23954601 :   if (!op0_preserve_value
     587     23156822 :       && GET_CODE (*op0) == FLOAT && MEM_P (XEXP (*op0, 0)) && REG_P (*op1))
     588              :     {
     589           14 :       enum rtx_code scode = swap_condition ((enum rtx_code) *code);
     590              : 
     591              :       /* We are called only for compares that are split to SAHF instruction.
     592              :          Ensure that we have setcc/jcc insn for the swapped condition.  */
     593           14 :       if (ix86_fp_compare_code_to_integer (scode) != UNKNOWN)
     594              :         {
     595            6 :           std::swap (*op0, *op1);
     596            6 :           *code = (int) scode;
     597            6 :           return;
     598              :         }
     599              :     }
     600              : 
     601              :   /* SUB (a, b) underflows precisely when a < b.  Convert
     602              :      (compare (minus (a b)) a) to (compare (a b))
     603              :      to match *sub<mode>_3 pattern.  */
     604     23156816 :   if (!op0_preserve_value
     605     23156816 :       && (*code == GTU || *code == LEU)
     606      1791497 :       && GET_CODE (*op0) == MINUS
     607        77766 :       && rtx_equal_p (XEXP (*op0, 0), *op1))
     608              :     {
     609          487 :       *op1 = XEXP (*op0, 1);
     610          487 :       *op0 = XEXP (*op0, 0);
     611          487 :       *code = (int) swap_condition ((enum rtx_code) *code);
     612          487 :       return;
     613              :     }
     614              : 
     615              :   /* Swap operands of GTU comparison to canonicalize
     616              :      addcarry/subborrow comparison.  */
     617     23954108 :   if (!op0_preserve_value
     618     23156329 :       && *code == GTU
     619       820375 :       && GET_CODE (*op0) == PLUS
     620       322268 :       && ix86_carry_flag_operator (XEXP (*op0, 0), VOIDmode)
     621        44135 :       && GET_CODE (XEXP (*op0, 1)) == ZERO_EXTEND
     622     23994141 :       && GET_CODE (*op1) == ZERO_EXTEND)
     623              :     {
     624        36734 :       std::swap (*op0, *op1);
     625        36734 :       *code = (int) swap_condition ((enum rtx_code) *code);
     626        36734 :       return;
     627              :     }
     628              : }
     629              : 
     630              : /* Hook to determine if one function can safely inline another.  */
     631              : 
     632              : static bool
     633      9635341 : ix86_can_inline_p (tree caller, tree callee)
     634              : {
     635      9635341 :   tree caller_tree = DECL_FUNCTION_SPECIFIC_TARGET (caller);
     636      9635341 :   tree callee_tree = DECL_FUNCTION_SPECIFIC_TARGET (callee);
     637              : 
     638              :   /* Changes of those flags can be tolerated for always inlines. Lets hope
     639              :      user knows what he is doing.  */
     640      9635341 :   unsigned HOST_WIDE_INT always_inline_safe_mask
     641              :          = (MASK_USE_8BIT_IDIV | MASK_ACCUMULATE_OUTGOING_ARGS
     642              :             | MASK_NO_ALIGN_STRINGOPS | MASK_AVX256_SPLIT_UNALIGNED_LOAD
     643              :             | MASK_AVX256_SPLIT_UNALIGNED_STORE | MASK_CLD
     644              :             | MASK_NO_FANCY_MATH_387 | MASK_IEEE_FP | MASK_INLINE_ALL_STRINGOPS
     645              :             | MASK_INLINE_STRINGOPS_DYNAMICALLY | MASK_RECIP | MASK_STACK_PROBE
     646              :             | MASK_STV | MASK_TLS_DIRECT_SEG_REFS | MASK_VZEROUPPER
     647              :             | MASK_NO_PUSH_ARGS | MASK_OMIT_LEAF_FRAME_POINTER);
     648              : 
     649              : 
     650      9635341 :   if (!callee_tree)
     651      9038303 :     callee_tree = target_option_default_node;
     652      9635341 :   if (!caller_tree)
     653      9038368 :     caller_tree = target_option_default_node;
     654      9635341 :   if (callee_tree == caller_tree)
     655              :     return true;
     656              : 
     657         5292 :   struct cl_target_option *caller_opts = TREE_TARGET_OPTION (caller_tree);
     658         5292 :   struct cl_target_option *callee_opts = TREE_TARGET_OPTION (callee_tree);
     659         5292 :   bool ret = false;
     660         5292 :   bool always_inline
     661         5292 :     = (DECL_DISREGARD_INLINE_LIMITS (callee)
     662         9939 :        && lookup_attribute ("always_inline",
     663         4647 :                             DECL_ATTRIBUTES (callee)));
     664              : 
     665              :   /* If callee only uses GPRs, ignore MASK_80387.  */
     666         5292 :   if (TARGET_GENERAL_REGS_ONLY_P (callee_opts->x_ix86_target_flags))
     667         1030 :     always_inline_safe_mask |= MASK_80387;
     668              : 
     669         5292 :   cgraph_node *callee_node = cgraph_node::get (callee);
     670              :   /* Callee's isa options should be a subset of the caller's, i.e. a SSE4
     671              :      function can inline a SSE2 function but a SSE2 function can't inline
     672              :      a SSE4 function.  */
     673         5292 :   if (((caller_opts->x_ix86_isa_flags & callee_opts->x_ix86_isa_flags)
     674              :        != callee_opts->x_ix86_isa_flags)
     675         5056 :       || ((caller_opts->x_ix86_isa_flags2 & callee_opts->x_ix86_isa_flags2)
     676              :           != callee_opts->x_ix86_isa_flags2))
     677              :     ret = false;
     678              : 
     679              :   /* See if we have the same non-isa options.  */
     680         5019 :   else if ((!always_inline
     681          388 :             && caller_opts->x_target_flags != callee_opts->x_target_flags)
     682         4975 :            || (caller_opts->x_target_flags & ~always_inline_safe_mask)
     683         4975 :                != (callee_opts->x_target_flags & ~always_inline_safe_mask))
     684              :     ret = false;
     685              : 
     686         4975 :   else if (caller_opts->x_ix86_fpmath != callee_opts->x_ix86_fpmath
     687              :            /* If the callee doesn't use FP expressions differences in
     688              :               ix86_fpmath can be ignored.  We are called from FEs
     689              :               for multi-versioning call optimization, so beware of
     690              :               ipa_fn_summaries not available.  */
     691         1247 :            && (! ipa_fn_summaries
     692         1247 :                || ipa_fn_summaries->get (callee_node) == NULL
     693         1247 :                || ipa_fn_summaries->get (callee_node)->fp_expressions))
     694              :     ret = false;
     695              : 
     696              :   /* At this point we cannot identify whether arch or tune setting
     697              :      comes from target attribute or not. So the most conservative way
     698              :      is to allow the callee that uses default arch and tune string to
     699              :      be inlined.  */
     700         4701 :   else if (!strcmp (callee_opts->x_ix86_arch_string, "x86-64")
     701         1430 :            && !strcmp (callee_opts->x_ix86_tune_string, "generic"))
     702              :     ret = true;
     703              : 
     704              :   /* See if arch, tune, etc. are the same. As previous ISA flags already
     705              :      checks if callee's ISA is subset of caller's, do not block
     706              :      always_inline attribute for callee even it has different arch. */
     707         3279 :   else if (!always_inline && caller_opts->arch != callee_opts->arch)
     708              :     ret = false;
     709              : 
     710           15 :   else if (!always_inline && caller_opts->tune != callee_opts->tune)
     711              :     ret = false;
     712              : 
     713         3279 :   else if (!always_inline
     714           15 :            && caller_opts->branch_cost != callee_opts->branch_cost)
     715              :     ret = false;
     716              : 
     717              :   else
     718      9634750 :     ret = true;
     719              : 
     720              :   return ret;
     721              : }
     722              : 
     723              : /* Return true if this goes in large data/bss.  */
     724              : 
     725              : static bool
     726     78359778 : ix86_in_large_data_p (tree exp)
     727              : {
     728     78359778 :   if (ix86_cmodel != CM_MEDIUM && ix86_cmodel != CM_MEDIUM_PIC
     729     78359540 :       && ix86_cmodel != CM_LARGE && ix86_cmodel != CM_LARGE_PIC)
     730              :     return false;
     731              : 
     732         1147 :   if (exp == NULL_TREE)
     733              :     return false;
     734              : 
     735              :   /* Functions are never large data.  */
     736         1147 :   if (TREE_CODE (exp) == FUNCTION_DECL)
     737              :     return false;
     738              : 
     739              :   /* Automatic variables are never large data.  */
     740          279 :   if (VAR_P (exp) && !is_global_var (exp))
     741              :     return false;
     742              : 
     743          279 :   if (VAR_P (exp) && DECL_SECTION_NAME (exp))
     744              :     {
     745           51 :       const char *section = DECL_SECTION_NAME (exp);
     746           51 :       if (strcmp (section, ".ldata") == 0
     747           51 :           || startswith (section, ".ldata.")
     748           51 :           || strcmp (section, ".lbss") == 0
     749           51 :           || startswith (section, ".lbss.")
     750           99 :           || startswith (section, ".gnu.linkonce.lb."))
     751              :         return true;
     752              :       return false;
     753              :     }
     754              :   else
     755              :     {
     756          228 :       HOST_WIDE_INT size = int_size_in_bytes (TREE_TYPE (exp));
     757              : 
     758              :       /* If this is an incomplete type with size 0, then we can't put it
     759              :          in data because it might be too big when completed.  Also,
     760              :          int_size_in_bytes returns -1 if size can vary or is larger than
     761              :          an integer in which case also it is safer to assume that it goes in
     762              :          large data.  */
     763          228 :       if (size <= 0 || size > ix86_section_threshold)
     764              :         return true;
     765              :     }
     766              : 
     767              :   return false;
     768              : }
     769              : 
     770              : /* i386-specific section flag to mark large sections.  */
     771              : #define SECTION_LARGE SECTION_MACH_DEP
     772              : 
     773              : /* Switch to the appropriate section for output of DECL.
     774              :    DECL is either a `VAR_DECL' node or a constant of some sort.
     775              :    RELOC indicates whether forming the initial value of DECL requires
     776              :    link-time relocations.  */
     777              : 
     778              : ATTRIBUTE_UNUSED static section *
     779      1667108 : x86_64_elf_select_section (tree decl, int reloc,
     780              :                            unsigned HOST_WIDE_INT align)
     781              : {
     782      1667108 :   if (ix86_in_large_data_p (decl))
     783              :     {
     784            6 :       const char *sname = NULL;
     785            6 :       unsigned int flags = SECTION_WRITE | SECTION_LARGE;
     786            6 :       switch (categorize_decl_for_section (decl, reloc))
     787              :         {
     788            1 :         case SECCAT_DATA:
     789            1 :           sname = ".ldata";
     790            1 :           break;
     791            0 :         case SECCAT_DATA_REL:
     792            0 :           sname = ".ldata.rel";
     793            0 :           break;
     794            0 :         case SECCAT_DATA_REL_LOCAL:
     795            0 :           sname = ".ldata.rel.local";
     796            0 :           break;
     797            0 :         case SECCAT_DATA_REL_RO:
     798            0 :           sname = ".ldata.rel.ro";
     799            0 :           break;
     800            0 :         case SECCAT_DATA_REL_RO_LOCAL:
     801            0 :           sname = ".ldata.rel.ro.local";
     802            0 :           break;
     803            0 :         case SECCAT_BSS:
     804            0 :           sname = ".lbss";
     805            0 :           flags |= SECTION_BSS;
     806            0 :           break;
     807              :         case SECCAT_RODATA:
     808              :         case SECCAT_RODATA_MERGE_STR:
     809              :         case SECCAT_RODATA_MERGE_STR_INIT:
     810              :         case SECCAT_RODATA_MERGE_CONST:
     811              :           sname = ".lrodata";
     812              :           flags &= ~SECTION_WRITE;
     813              :           break;
     814            0 :         case SECCAT_SRODATA:
     815            0 :         case SECCAT_SDATA:
     816            0 :         case SECCAT_SBSS:
     817            0 :           gcc_unreachable ();
     818              :         case SECCAT_TEXT:
     819              :         case SECCAT_TDATA:
     820              :         case SECCAT_TBSS:
     821              :           /* We don't split these for medium model.  Place them into
     822              :              default sections and hope for best.  */
     823              :           break;
     824              :         }
     825            1 :       if (sname)
     826              :         {
     827              :           /* We might get called with string constants, but get_named_section
     828              :              doesn't like them as they are not DECLs.  Also, we need to set
     829              :              flags in that case.  */
     830            6 :           if (!DECL_P (decl))
     831            3 :             return get_section (sname, flags, NULL);
     832            3 :           return get_named_section (decl, sname, reloc);
     833              :         }
     834              :     }
     835      1667102 :   return default_elf_select_section (decl, reloc, align);
     836              : }
     837              : 
     838              : /* Select a set of attributes for section NAME based on the properties
     839              :    of DECL and whether or not RELOC indicates that DECL's initializer
     840              :    might contain runtime relocations.  */
     841              : 
     842              : static unsigned int ATTRIBUTE_UNUSED
     843     64885167 : x86_64_elf_section_type_flags (tree decl, const char *name, int reloc)
     844              : {
     845     64885167 :   unsigned int flags = default_section_type_flags (decl, name, reloc);
     846              : 
     847     64885167 :   if (ix86_in_large_data_p (decl))
     848           10 :     flags |= SECTION_LARGE;
     849              : 
     850     64885167 :   if (decl == NULL_TREE
     851          375 :       && (strcmp (name, ".ldata.rel.ro") == 0
     852          375 :           || strcmp (name, ".ldata.rel.ro.local") == 0))
     853            0 :     flags |= SECTION_RELRO;
     854              : 
     855     64885167 :   if (strcmp (name, ".lbss") == 0
     856     64885163 :       || startswith (name, ".lbss.")
     857    129770327 :       || startswith (name, ".gnu.linkonce.lb."))
     858              :     {
     859            7 :       flags |= SECTION_BSS;
     860              :       /* Clear SECTION_NOTYPE so .lbss etc. are marked @nobits in
     861              :          default_elf_asm_named_section.  */
     862            7 :       flags &= ~SECTION_NOTYPE;
     863              :     }
     864              : 
     865     64885167 :   return flags;
     866              : }
     867              : 
     868              : /* Build up a unique section name, expressed as a
     869              :    STRING_CST node, and assign it to DECL_SECTION_NAME (decl).
     870              :    RELOC indicates whether the initial value of EXP requires
     871              :    link-time relocations.  */
     872              : 
     873              : static void ATTRIBUTE_UNUSED
     874      1799939 : x86_64_elf_unique_section (tree decl, int reloc)
     875              : {
     876      1799939 :   if (ix86_in_large_data_p (decl))
     877              :     {
     878            3 :       const char *prefix = NULL;
     879              :       /* We only need to use .gnu.linkonce if we don't have COMDAT groups.  */
     880            3 :       bool one_only = DECL_COMDAT_GROUP (decl) && !HAVE_COMDAT_GROUP;
     881              : 
     882            3 :       switch (categorize_decl_for_section (decl, reloc))
     883              :         {
     884            0 :         case SECCAT_DATA:
     885            0 :         case SECCAT_DATA_REL:
     886            0 :         case SECCAT_DATA_REL_LOCAL:
     887            0 :         case SECCAT_DATA_REL_RO:
     888            0 :         case SECCAT_DATA_REL_RO_LOCAL:
     889            0 :           prefix = one_only ? ".ld" : ".ldata";
     890              :           break;
     891            3 :         case SECCAT_BSS:
     892            3 :           prefix = one_only ? ".lb" : ".lbss";
     893              :           break;
     894              :         case SECCAT_RODATA:
     895              :         case SECCAT_RODATA_MERGE_STR:
     896              :         case SECCAT_RODATA_MERGE_STR_INIT:
     897              :         case SECCAT_RODATA_MERGE_CONST:
     898              :           prefix = one_only ? ".lr" : ".lrodata";
     899              :           break;
     900            0 :         case SECCAT_SRODATA:
     901            0 :         case SECCAT_SDATA:
     902            0 :         case SECCAT_SBSS:
     903            0 :           gcc_unreachable ();
     904              :         case SECCAT_TEXT:
     905              :         case SECCAT_TDATA:
     906              :         case SECCAT_TBSS:
     907              :           /* We don't split these for medium model.  Place them into
     908              :              default sections and hope for best.  */
     909              :           break;
     910              :         }
     911            3 :       if (prefix)
     912              :         {
     913            3 :           const char *name, *linkonce;
     914            3 :           char *string;
     915              : 
     916            3 :           name = IDENTIFIER_POINTER (DECL_ASSEMBLER_NAME (decl));
     917            3 :           name = targetm.strip_name_encoding (name);
     918              : 
     919              :           /* If we're using one_only, then there needs to be a .gnu.linkonce
     920              :              prefix to the section name.  */
     921            3 :           linkonce = one_only ? ".gnu.linkonce" : "";
     922              : 
     923            3 :           string = ACONCAT ((linkonce, prefix, ".", name, NULL));
     924              : 
     925            3 :           set_decl_section_name (decl, string);
     926            3 :           return;
     927              :         }
     928              :     }
     929      1799936 :   default_unique_section (decl, reloc);
     930              : }
     931              : 
     932              : /* Return true if TYPE has no_callee_saved_registers or preserve_none
     933              :    attribute.  */
     934              : 
     935              : bool
     936      7551079 : ix86_type_no_callee_saved_registers_p (const_tree type)
     937              : {
     938     15102158 :   return (lookup_attribute ("no_callee_saved_registers",
     939      7551079 :                             TYPE_ATTRIBUTES (type)) != NULL
     940     15102025 :           || lookup_attribute ("preserve_none",
     941      7550946 :                                TYPE_ATTRIBUTES (type)) != NULL);
     942              : }
     943              : 
     944              : #ifdef COMMON_ASM_OP
     945              : 
     946              : #ifndef LARGECOMM_SECTION_ASM_OP
     947              : #define LARGECOMM_SECTION_ASM_OP "\t.largecomm\t"
     948              : #endif
     949              : 
     950              : /* This says how to output assembler code to declare an
     951              :    uninitialized external linkage data object.
     952              : 
     953              :    For medium model x86-64 we need to use LARGECOMM_SECTION_ASM_OP opcode for
     954              :    large objects.  */
     955              : void
     956       172936 : x86_elf_aligned_decl_common (FILE *file, tree decl,
     957              :                         const char *name, unsigned HOST_WIDE_INT size,
     958              :                         unsigned align)
     959              : {
     960       172936 :   if ((ix86_cmodel == CM_MEDIUM || ix86_cmodel == CM_MEDIUM_PIC
     961       172930 :        || ix86_cmodel == CM_LARGE || ix86_cmodel == CM_LARGE_PIC)
     962            7 :       && size > (unsigned int)ix86_section_threshold)
     963              :     {
     964            1 :       switch_to_section (get_named_section (decl, ".lbss", 0));
     965            1 :       fputs (LARGECOMM_SECTION_ASM_OP, file);
     966              :     }
     967              :   else
     968       172935 :     fputs (COMMON_ASM_OP, file);
     969       172936 :   assemble_name (file, name);
     970       172936 :   fprintf (file, "," HOST_WIDE_INT_PRINT_UNSIGNED ",%u\n",
     971              :            size, align / BITS_PER_UNIT);
     972       172936 : }
     973              : #endif
     974              : 
     975              : /* Utility function for targets to use in implementing
     976              :    ASM_OUTPUT_ALIGNED_BSS.  */
     977              : 
     978              : void
     979       769881 : x86_output_aligned_bss (FILE *file, tree decl, const char *name,
     980              :                         unsigned HOST_WIDE_INT size, unsigned align)
     981              : {
     982       769881 :   if ((ix86_cmodel == CM_MEDIUM || ix86_cmodel == CM_MEDIUM_PIC
     983       769871 :        || ix86_cmodel == CM_LARGE || ix86_cmodel == CM_LARGE_PIC)
     984           42 :       && size > (unsigned int)ix86_section_threshold)
     985            3 :     switch_to_section (get_named_section (decl, ".lbss", 0));
     986              :   else
     987       769878 :     switch_to_section (bss_section);
     988       925781 :   ASM_OUTPUT_ALIGN (file, floor_log2 (align / BITS_PER_UNIT));
     989              : #ifdef ASM_DECLARE_OBJECT_NAME
     990       769881 :   last_assemble_variable_decl = decl;
     991       769881 :   ASM_DECLARE_OBJECT_NAME (file, name, decl);
     992              : #else
     993              :   /* Standard thing is just output label for the object.  */
     994              :   ASM_OUTPUT_LABEL (file, name);
     995              : #endif /* ASM_DECLARE_OBJECT_NAME */
     996       769881 :   ASM_OUTPUT_SKIP (file, size ? size : 1);
     997       769881 : }
     998              : 
     999              : /* Decide whether we must probe the stack before any space allocation
    1000              :    on this target.  It's essentially TARGET_STACK_PROBE except when
    1001              :    -fstack-check causes the stack to be already probed differently.  */
    1002              : 
    1003              : bool
    1004       871471 : ix86_target_stack_probe (void)
    1005              : {
    1006              :   /* Do not probe the stack twice if static stack checking is enabled.  */
    1007       871471 :   if (flag_stack_check == STATIC_BUILTIN_STACK_CHECK)
    1008              :     return false;
    1009              : 
    1010       871471 :   return TARGET_STACK_PROBE;
    1011              : }
    1012              : 
    1013              : /* Decide whether we can make a sibling call to a function.  DECL is the
    1014              :    declaration of the function being targeted by the call and EXP is the
    1015              :    CALL_EXPR representing the call.  */
    1016              : 
    1017              : static bool
    1018       135517 : ix86_function_ok_for_sibcall (tree decl, tree exp)
    1019              : {
    1020       135517 :   tree type, decl_or_type;
    1021       135517 :   rtx a, b;
    1022       135517 :   bool bind_global = decl && !targetm.binds_local_p (decl);
    1023              : 
    1024       135517 :   if (ix86_function_naked (current_function_decl))
    1025              :     return false;
    1026              : 
    1027              :   /* Sibling call isn't OK if there are no caller-saved registers
    1028              :      since all registers must be preserved before return.  */
    1029       135515 :   if (cfun->machine->call_saved_registers
    1030       135515 :       == TYPE_NO_CALLER_SAVED_REGISTERS)
    1031              :     return false;
    1032              : 
    1033              :   /* If we are generating position-independent code, we cannot sibcall
    1034              :      optimize direct calls to global functions, as the PLT requires
    1035              :      %ebx be live. (Darwin does not have a PLT.)  */
    1036       135486 :   if (!TARGET_MACHO
    1037       135486 :       && !TARGET_64BIT
    1038        11331 :       && flag_pic
    1039         8405 :       && flag_plt
    1040         8405 :       && bind_global)
    1041              :     return false;
    1042              : 
    1043              :   /* If we need to align the outgoing stack, then sibcalling would
    1044              :      unalign the stack, which may break the called function.  */
    1045       130839 :   if (ix86_minimum_incoming_stack_boundary (true)
    1046       130839 :       < PREFERRED_STACK_BOUNDARY)
    1047              :     return false;
    1048              : 
    1049       130258 :   if (decl)
    1050              :     {
    1051       119385 :       decl_or_type = decl;
    1052       119385 :       type = TREE_TYPE (decl);
    1053              :     }
    1054              :   else
    1055              :     {
    1056              :       /* We're looking at the CALL_EXPR, we need the type of the function.  */
    1057        10873 :       type = CALL_EXPR_FN (exp);                /* pointer expression */
    1058        10873 :       type = TREE_TYPE (type);                  /* pointer type */
    1059        10873 :       type = TREE_TYPE (type);                  /* function type */
    1060        10873 :       decl_or_type = type;
    1061              :     }
    1062              : 
    1063              :   /* Sibling call isn't OK if callee has no callee-saved registers
    1064              :      and the calling function has callee-saved registers.  */
    1065       130258 :   if ((cfun->machine->call_saved_registers
    1066       130258 :        != TYPE_NO_CALLEE_SAVED_REGISTERS)
    1067       130258 :       && cfun->machine->call_saved_registers != TYPE_PRESERVE_NONE
    1068       130258 :       && ix86_type_no_callee_saved_registers_p (type))
    1069              :     return false;
    1070              : 
    1071              :   /* If outgoing reg parm stack space changes, we cannot do sibcall.  */
    1072       130242 :   if ((OUTGOING_REG_PARM_STACK_SPACE (type)
    1073       130242 :        != OUTGOING_REG_PARM_STACK_SPACE (TREE_TYPE (current_function_decl)))
    1074       259738 :       || (REG_PARM_STACK_SPACE (decl_or_type)
    1075       129496 :           != REG_PARM_STACK_SPACE (current_function_decl)))
    1076              :     {
    1077          746 :       maybe_complain_about_tail_call (exp,
    1078              :                                       "inconsistent size of stack space"
    1079              :                                       " allocated for arguments which are"
    1080              :                                       " passed in registers");
    1081          746 :       return false;
    1082              :     }
    1083              : 
    1084              :   /* Check that the return value locations are the same.  Like
    1085              :      if we are returning floats on the 80387 register stack, we cannot
    1086              :      make a sibcall from a function that doesn't return a float to a
    1087              :      function that does or, conversely, from a function that does return
    1088              :      a float to a function that doesn't; the necessary stack adjustment
    1089              :      would not be executed.  This is also the place we notice
    1090              :      differences in the return value ABI.  Note that it is ok for one
    1091              :      of the functions to have void return type as long as the return
    1092              :      value of the other is passed in a register.  */
    1093       129496 :   a = ix86_function_value (TREE_TYPE (exp), decl_or_type, false);
    1094       129496 :   b = ix86_function_value (TREE_TYPE (DECL_RESULT (cfun->decl)),
    1095       129496 :                            cfun->decl, false);
    1096       129496 :   if (STACK_REG_P (a) || STACK_REG_P (b))
    1097              :     {
    1098         1020 :       if (!rtx_equal_p (a, b))
    1099              :         return false;
    1100              :     }
    1101       128476 :   else if (VOID_TYPE_P (TREE_TYPE (DECL_RESULT (cfun->decl))))
    1102              :     ;
    1103        23786 :   else if (!rtx_equal_p (a, b))
    1104              :     return false;
    1105              : 
    1106       129115 :   if (TARGET_64BIT)
    1107              :     {
    1108              :       /* The SYSV ABI has more call-clobbered registers;
    1109              :          disallow sibcalls from MS to SYSV.  */
    1110       122431 :       if (cfun->machine->call_abi == MS_ABI
    1111       122431 :           && ix86_function_type_abi (type) == SYSV_ABI)
    1112              :         return false;
    1113              :     }
    1114              :   else
    1115              :     {
    1116              :       /* If this call is indirect, we'll need to be able to use a
    1117              :          call-clobbered register for the address of the target function.
    1118              :          Make sure that all such registers are not used for passing
    1119              :          parameters.  Note that DLLIMPORT functions and call to global
    1120              :          function via GOT slot are indirect.  */
    1121         6684 :       if (!decl
    1122         4770 :           || (bind_global && flag_pic && !flag_plt)
    1123              :           || (TARGET_DLLIMPORT_DECL_ATTRIBUTES && DECL_DLLIMPORT_P (decl))
    1124         4770 :           || flag_force_indirect_call)
    1125              :         {
    1126              :           /* Check if regparm >= 3 since arg_reg_available is set to
    1127              :              false if regparm == 0.  If regparm is 1 or 2, there is
    1128              :              always a call-clobbered register available.
    1129              : 
    1130              :              ??? The symbol indirect call doesn't need a call-clobbered
    1131              :              register.  But we don't know if this is a symbol indirect
    1132              :              call or not here.  */
    1133         1914 :           if (ix86_function_regparm (type, decl) >= 3
    1134         1914 :               && !cfun->machine->arg_reg_available)
    1135              :             return false;
    1136              :         }
    1137              :     }
    1138              : 
    1139       129115 :   if (decl && ix86_use_pseudo_pic_reg ())
    1140              :     {
    1141              :       /* When PIC register is used, it must be restored after ifunc
    1142              :          function returns.  */
    1143         2059 :        cgraph_node *node = cgraph_node::get (decl);
    1144         2059 :        if (node && node->ifunc_resolver)
    1145              :          return false;
    1146              :     }
    1147              : 
    1148              :   /* Disable sibcall if callee has indirect_return attribute and
    1149              :      caller doesn't since callee will return to the caller's caller
    1150              :      via an indirect jump.  */
    1151       129115 :   if (((flag_cf_protection & (CF_RETURN | CF_BRANCH))
    1152              :        == (CF_RETURN | CF_BRANCH))
    1153        50032 :       && lookup_attribute ("indirect_return", TYPE_ATTRIBUTES (type))
    1154       129119 :       && !lookup_attribute ("indirect_return",
    1155            4 :                             TYPE_ATTRIBUTES (TREE_TYPE (cfun->decl))))
    1156              :     return false;
    1157              : 
    1158              :   /* Otherwise okay.  That also includes certain types of indirect calls.  */
    1159              :   return true;
    1160              : }
    1161              : 
    1162              : /* This function determines from TYPE the calling-convention.  */
    1163              : 
    1164              : unsigned int
    1165      6280580 : ix86_get_callcvt (const_tree type)
    1166              : {
    1167      6280580 :   unsigned int ret = 0;
    1168      6280580 :   bool is_stdarg;
    1169      6280580 :   tree attrs;
    1170              : 
    1171      6280580 :   if (TARGET_64BIT)
    1172              :     return IX86_CALLCVT_CDECL;
    1173              : 
    1174      3270782 :   attrs = TYPE_ATTRIBUTES (type);
    1175      3270782 :   if (attrs != NULL_TREE)
    1176              :     {
    1177        67754 :       if (lookup_attribute ("cdecl", attrs))
    1178              :         ret |= IX86_CALLCVT_CDECL;
    1179        67754 :       else if (lookup_attribute ("stdcall", attrs))
    1180              :         ret |= IX86_CALLCVT_STDCALL;
    1181        67754 :       else if (lookup_attribute ("fastcall", attrs))
    1182              :         ret |= IX86_CALLCVT_FASTCALL;
    1183        67745 :       else if (lookup_attribute ("thiscall", attrs))
    1184              :         ret |= IX86_CALLCVT_THISCALL;
    1185              : 
    1186              :       /* Regparm isn't allowed for thiscall and fastcall.  */
    1187              :       if ((ret & (IX86_CALLCVT_THISCALL | IX86_CALLCVT_FASTCALL)) == 0)
    1188              :         {
    1189        67745 :           if (lookup_attribute ("regparm", attrs))
    1190        15834 :             ret |= IX86_CALLCVT_REGPARM;
    1191        67745 :           if (lookup_attribute ("sseregparm", attrs))
    1192            0 :             ret |= IX86_CALLCVT_SSEREGPARM;
    1193              :         }
    1194              : 
    1195        67754 :       if (IX86_BASE_CALLCVT(ret) != 0)
    1196            9 :         return ret;
    1197              :     }
    1198              : 
    1199      3270773 :   is_stdarg = stdarg_p (type);
    1200      3270773 :   if (TARGET_RTD && !is_stdarg)
    1201            0 :     return IX86_CALLCVT_STDCALL | ret;
    1202              : 
    1203      3270773 :   if (ret != 0
    1204      3270773 :       || is_stdarg
    1205      3245909 :       || TREE_CODE (type) != METHOD_TYPE
    1206      3407406 :       || ix86_function_type_abi (type) != MS_ABI)
    1207      3270773 :     return IX86_CALLCVT_CDECL | ret;
    1208              : 
    1209              :   return IX86_CALLCVT_THISCALL;
    1210              : }
    1211              : 
    1212              : /* Return 0 if the attributes for two types are incompatible, 1 if they
    1213              :    are compatible, and 2 if they are nearly compatible (which causes a
    1214              :    warning to be generated).  */
    1215              : 
    1216              : static int
    1217      1522424 : ix86_comp_type_attributes (const_tree type1, const_tree type2)
    1218              : {
    1219      1522424 :   unsigned int ccvt1, ccvt2;
    1220              : 
    1221      1522424 :   if (TREE_CODE (type1) != FUNCTION_TYPE
    1222      1522424 :       && TREE_CODE (type1) != METHOD_TYPE)
    1223              :     return 1;
    1224              : 
    1225      1516029 :   ccvt1 = ix86_get_callcvt (type1);
    1226      1516029 :   ccvt2 = ix86_get_callcvt (type2);
    1227      1516029 :   if (ccvt1 != ccvt2)
    1228              :     return 0;
    1229      3009918 :   if (ix86_function_regparm (type1, NULL)
    1230      1504959 :       != ix86_function_regparm (type2, NULL))
    1231              :     return 0;
    1232              : 
    1233      1467164 :   if (ix86_type_no_callee_saved_registers_p (type1)
    1234       733582 :       != ix86_type_no_callee_saved_registers_p (type2))
    1235              :     return 0;
    1236              : 
    1237              :   /* preserve_none attribute uses a different calling convention is
    1238              :      only for 64-bit.  */
    1239       733454 :   if (TARGET_64BIT
    1240      1466848 :       && (lookup_attribute ("preserve_none", TYPE_ATTRIBUTES (type1))
    1241       733394 :           != lookup_attribute ("preserve_none",
    1242       733394 :                                TYPE_ATTRIBUTES (type2))))
    1243              :     return 0;
    1244              : 
    1245              :   return 1;
    1246              : }
    1247              : 
    1248              : /* Return the regparm value for a function with the indicated TYPE and DECL.
    1249              :    DECL may be NULL when calling function indirectly
    1250              :    or considering a libcall.  */
    1251              : 
    1252              : static int
    1253      4281768 : ix86_function_regparm (const_tree type, const_tree decl)
    1254              : {
    1255      4281768 :   tree attr;
    1256      4281768 :   int regparm;
    1257      4281768 :   unsigned int ccvt;
    1258              : 
    1259      4281768 :   if (TARGET_64BIT)
    1260      3009798 :     return (ix86_function_type_abi (type) == SYSV_ABI
    1261      3009798 :             ? X86_64_REGPARM_MAX : X86_64_MS_REGPARM_MAX);
    1262      1271970 :   ccvt = ix86_get_callcvt (type);
    1263      1271970 :   regparm = ix86_regparm;
    1264              : 
    1265      1271970 :   if ((ccvt & IX86_CALLCVT_REGPARM) != 0)
    1266              :     {
    1267         2020 :       attr = lookup_attribute ("regparm", TYPE_ATTRIBUTES (type));
    1268         2020 :       if (attr)
    1269              :         {
    1270         2020 :           regparm = TREE_INT_CST_LOW (TREE_VALUE (TREE_VALUE (attr)));
    1271         2020 :           return regparm;
    1272              :         }
    1273              :     }
    1274      1269950 :   else if ((ccvt & IX86_CALLCVT_FASTCALL) != 0)
    1275              :     return 2;
    1276      1269950 :   else if ((ccvt & IX86_CALLCVT_THISCALL) != 0)
    1277              :     return 1;
    1278              : 
    1279              :   /* Use register calling convention for local functions when possible.  */
    1280      1269950 :   if (decl
    1281      1205654 :       && TREE_CODE (decl) == FUNCTION_DECL)
    1282              :     {
    1283      1195571 :       cgraph_node *target = cgraph_node::get (decl);
    1284      1195571 :       if (target)
    1285      1188065 :         target = target->function_symbol ();
    1286              : 
    1287              :       /* Caller and callee must agree on the calling convention, so
    1288              :          checking here just optimize means that with
    1289              :          __attribute__((optimize (...))) caller could use regparm convention
    1290              :          and callee not, or vice versa.  Instead look at whether the callee
    1291              :          is optimized or not.  */
    1292      1188065 :       if (target && opt_for_fn (target->decl, optimize)
    1293      2375238 :           && !(profile_flag && !flag_fentry))
    1294              :         {
    1295      1187173 :           if (target->local && target->can_change_signature)
    1296              :             {
    1297       140264 :               int local_regparm, globals = 0, regno;
    1298              : 
    1299              :               /* Make sure no regparm register is taken by a
    1300              :                  fixed register variable.  */
    1301       140264 :               for (local_regparm = 0; local_regparm < REGPARM_MAX;
    1302              :                    local_regparm++)
    1303       105198 :                 if (fixed_regs[local_regparm])
    1304              :                   break;
    1305              : 
    1306              :               /* We don't want to use regparm(3) for nested functions as
    1307              :                  these use a static chain pointer in the third argument.  */
    1308        35066 :               if (local_regparm == 3 && DECL_STATIC_CHAIN (target->decl))
    1309              :                 local_regparm = 2;
    1310              : 
    1311              :               /* Save a register for the split stack.  */
    1312        35066 :               if (flag_split_stack)
    1313              :                 {
    1314        20696 :                   if (local_regparm == 3)
    1315              :                     local_regparm = 2;
    1316          707 :                   else if (local_regparm == 2
    1317          707 :                            && DECL_STATIC_CHAIN (target->decl))
    1318              :                     local_regparm = 1;
    1319              :                 }
    1320              : 
    1321              :               /* Each fixed register usage increases register pressure,
    1322              :                  so less registers should be used for argument passing.
    1323              :                  This functionality can be overridden by an explicit
    1324              :                  regparm value.  */
    1325       245462 :               for (regno = AX_REG; regno <= DI_REG; regno++)
    1326       210396 :                 if (fixed_regs[regno])
    1327            0 :                   globals++;
    1328              : 
    1329        35066 :               local_regparm
    1330        35066 :                 = globals < local_regparm ? local_regparm - globals : 0;
    1331              : 
    1332        35066 :               if (local_regparm > regparm)
    1333      4281768 :                 regparm = local_regparm;
    1334              :             }
    1335              :         }
    1336              :     }
    1337              : 
    1338              :   return regparm;
    1339              : }
    1340              : 
    1341              : /* Return 1 or 2, if we can pass up to SSE_REGPARM_MAX SFmode (1) and
    1342              :    DFmode (2) arguments in SSE registers for a function with the
    1343              :    indicated TYPE and DECL.  DECL may be NULL when calling function
    1344              :    indirectly or considering a libcall.  Return -1 if any FP parameter
    1345              :    should be rejected by error.  This is used in siutation we imply SSE
    1346              :    calling convention but the function is called from another function with
    1347              :    SSE disabled. Otherwise return 0.  */
    1348              : 
    1349              : static int
    1350      1077632 : ix86_function_sseregparm (const_tree type, const_tree decl, bool warn)
    1351              : {
    1352      1077632 :   gcc_assert (!TARGET_64BIT);
    1353              : 
    1354              :   /* Use SSE registers to pass SFmode and DFmode arguments if requested
    1355              :      by the sseregparm attribute.  */
    1356      1077632 :   if (TARGET_SSEREGPARM
    1357      1077632 :       || (type && lookup_attribute ("sseregparm", TYPE_ATTRIBUTES (type))))
    1358              :     {
    1359            0 :       if (!TARGET_SSE)
    1360              :         {
    1361            0 :           if (warn)
    1362              :             {
    1363            0 :               if (decl)
    1364            0 :                 error ("calling %qD with attribute sseregparm without "
    1365              :                        "SSE/SSE2 enabled", decl);
    1366              :               else
    1367            0 :                 error ("calling %qT with attribute sseregparm without "
    1368              :                        "SSE/SSE2 enabled", type);
    1369              :             }
    1370            0 :           return 0;
    1371              :         }
    1372              : 
    1373              :       return 2;
    1374              :     }
    1375              : 
    1376      1077632 :   if (!decl)
    1377              :     return 0;
    1378              : 
    1379       979041 :   cgraph_node *target = cgraph_node::get (decl);
    1380       979041 :   if (target)
    1381       971542 :     target = target->function_symbol ();
    1382              : 
    1383              :   /* For local functions, pass up to SSE_REGPARM_MAX SFmode
    1384              :      (and DFmode for SSE2) arguments in SSE registers.  */
    1385       971542 :   if (target
    1386              :       /* TARGET_SSE_MATH */
    1387       971542 :       && (target_opts_for_fn (target->decl)->x_ix86_fpmath & FPMATH_SSE)
    1388         1296 :       && opt_for_fn (target->decl, optimize)
    1389       972838 :       && !(profile_flag && !flag_fentry))
    1390              :     {
    1391         1296 :       if (target->local && target->can_change_signature)
    1392              :         {
    1393              :           /* Refuse to produce wrong code when local function with SSE enabled
    1394              :              is called from SSE disabled function.
    1395              :              FIXME: We need a way to detect these cases cross-ltrans partition
    1396              :              and avoid using SSE calling conventions on local functions called
    1397              :              from function with SSE disabled.  For now at least delay the
    1398              :              warning until we know we are going to produce wrong code.
    1399              :              See PR66047  */
    1400            0 :           if (!TARGET_SSE && warn)
    1401              :             return -1;
    1402            0 :           return TARGET_SSE2_P (target_opts_for_fn (target->decl)
    1403            0 :                                 ->x_ix86_isa_flags) ? 2 : 1;
    1404              :         }
    1405              :     }
    1406              : 
    1407              :   return 0;
    1408              : }
    1409              : 
    1410              : /* Return true if EAX is live at the start of the function.  Used by
    1411              :    ix86_expand_prologue to determine if we need special help before
    1412              :    calling allocate_stack_worker.  */
    1413              : 
    1414              : static bool
    1415         7090 : ix86_eax_live_at_start_p (void)
    1416              : {
    1417              :   /* Cheat.  Don't bother working forward from ix86_function_regparm
    1418              :      to the function type to whether an actual argument is located in
    1419              :      eax.  Instead just look at cfg info, which is still close enough
    1420              :      to correct at this point.  This gives false positives for broken
    1421              :      functions that might use uninitialized data that happens to be
    1422              :      allocated in eax, but who cares?  */
    1423         7090 :   return REGNO_REG_SET_P (df_get_live_out (ENTRY_BLOCK_PTR_FOR_FN (cfun)), 0);
    1424              : }
    1425              : 
    1426              : static bool
    1427       159978 : ix86_keep_aggregate_return_pointer (tree fntype)
    1428              : {
    1429       159978 :   tree attr;
    1430              : 
    1431       159978 :   if (!TARGET_64BIT)
    1432              :     {
    1433       159978 :       attr = lookup_attribute ("callee_pop_aggregate_return",
    1434       159978 :                                TYPE_ATTRIBUTES (fntype));
    1435       159978 :       if (attr)
    1436            0 :         return (TREE_INT_CST_LOW (TREE_VALUE (TREE_VALUE (attr))) == 0);
    1437              : 
    1438              :       /* For 32-bit MS-ABI the default is to keep aggregate
    1439              :          return pointer.  */
    1440       159978 :       if (ix86_function_type_abi (fntype) == MS_ABI)
    1441              :         return true;
    1442              :     }
    1443              :   return KEEP_AGGREGATE_RETURN_POINTER != 0;
    1444              : }
    1445              : 
    1446              : /* Value is the number of bytes of arguments automatically
    1447              :    popped when returning from a subroutine call.
    1448              :    FUNDECL is the declaration node of the function (as a tree),
    1449              :    FUNTYPE is the data type of the function (as a tree),
    1450              :    or for a library call it is an identifier node for the subroutine name.
    1451              :    SIZE is the number of bytes of arguments passed on the stack.
    1452              : 
    1453              :    On the 80386, the RTD insn may be used to pop them if the number
    1454              :      of args is fixed, but if the number is variable then the caller
    1455              :      must pop them all.  RTD can't be used for library calls now
    1456              :      because the library is compiled with the Unix compiler.
    1457              :    Use of RTD is a selectable option, since it is incompatible with
    1458              :    standard Unix calling sequences.  If the option is not selected,
    1459              :    the caller must always pop the args.
    1460              : 
    1461              :    The attribute stdcall is equivalent to RTD on a per module basis.  */
    1462              : 
    1463              : static poly_int64
    1464      7568092 : ix86_return_pops_args (tree fundecl, tree funtype, poly_int64 size)
    1465              : {
    1466      7568092 :   unsigned int ccvt;
    1467              : 
    1468              :   /* None of the 64-bit ABIs pop arguments.  */
    1469      7568092 :   if (TARGET_64BIT)
    1470      6694480 :     return 0;
    1471              : 
    1472       873612 :   ccvt = ix86_get_callcvt (funtype);
    1473              : 
    1474       873612 :   if ((ccvt & (IX86_CALLCVT_STDCALL | IX86_CALLCVT_FASTCALL
    1475              :                | IX86_CALLCVT_THISCALL)) != 0
    1476       873612 :       && ! stdarg_p (funtype))
    1477            3 :     return size;
    1478              : 
    1479              :   /* Lose any fake structure return argument if it is passed on the stack.  */
    1480       873609 :   if (aggregate_value_p (TREE_TYPE (funtype), fundecl)
    1481       873609 :       && !ix86_keep_aggregate_return_pointer (funtype))
    1482              :     {
    1483       159978 :       int nregs = ix86_function_regparm (funtype, fundecl);
    1484       159978 :       if (nregs == 0)
    1485       459066 :         return GET_MODE_SIZE (Pmode);
    1486              :     }
    1487              : 
    1488       720587 :   return 0;
    1489              : }
    1490              : 
    1491              : /* Implement the TARGET_LEGITIMATE_COMBINED_INSN hook.  */
    1492              : 
    1493              : static bool
    1494      9980433 : ix86_legitimate_combined_insn (rtx_insn *insn)
    1495              : {
    1496      9980433 :   int i;
    1497              : 
    1498              :   /* Check operand constraints in case hard registers were propagated
    1499              :      into insn pattern.  This check prevents combine pass from
    1500              :      generating insn patterns with invalid hard register operands.
    1501              :      These invalid insns can eventually confuse reload to error out
    1502              :      with a spill failure.  See also PRs 46829 and 46843.  */
    1503              : 
    1504      9980433 :   gcc_assert (INSN_CODE (insn) >= 0);
    1505              : 
    1506      9980433 :   extract_insn (insn);
    1507      9980433 :   preprocess_constraints (insn);
    1508              : 
    1509      9980433 :   int n_operands = recog_data.n_operands;
    1510      9980433 :   int n_alternatives = recog_data.n_alternatives;
    1511     34138056 :   for (i = 0; i < n_operands; i++)
    1512              :     {
    1513     24161138 :       rtx op = recog_data.operand[i];
    1514     24161138 :       machine_mode mode = GET_MODE (op);
    1515     24161138 :       const operand_alternative *op_alt;
    1516     24161138 :       int offset = 0;
    1517     24161138 :       bool win;
    1518     24161138 :       int j;
    1519              : 
    1520              :       /* A unary operator may be accepted by the predicate, but it
    1521              :          is irrelevant for matching constraints.  */
    1522     24161138 :       if (UNARY_P (op))
    1523        48422 :         op = XEXP (op, 0);
    1524              : 
    1525     24161138 :       if (SUBREG_P (op))
    1526              :         {
    1527       875049 :           if (REG_P (SUBREG_REG (op))
    1528       875049 :               && REGNO (SUBREG_REG (op)) < FIRST_PSEUDO_REGISTER)
    1529           54 :             offset = subreg_regno_offset (REGNO (SUBREG_REG (op)),
    1530           54 :                                           GET_MODE (SUBREG_REG (op)),
    1531           54 :                                           SUBREG_BYTE (op),
    1532           54 :                                           GET_MODE (op));
    1533       875049 :           op = SUBREG_REG (op);
    1534              :         }
    1535              : 
    1536     24161138 :       if (!(REG_P (op) && HARD_REGISTER_P (op)))
    1537     23863146 :         continue;
    1538              : 
    1539       297992 :       op_alt = recog_op_alt;
    1540              : 
    1541              :       /* Operand has no constraints, anything is OK.  */
    1542       297992 :       win = !n_alternatives;
    1543              : 
    1544       297992 :       alternative_mask preferred = get_preferred_alternatives (insn);
    1545       823977 :       for (j = 0; j < n_alternatives; j++, op_alt += n_operands)
    1546              :         {
    1547       522389 :           if (!TEST_BIT (preferred, j))
    1548       140690 :             continue;
    1549       381699 :           if (op_alt[i].anything_ok
    1550       203051 :               || (op_alt[i].matches != -1
    1551        33735 :                   && operands_match_p
    1552        33735 :                   (recog_data.operand[i],
    1553        33735 :                    recog_data.operand[op_alt[i].matches]))
    1554       580627 :               || reg_fits_class_p (op, op_alt[i].cl, offset, mode))
    1555              :             {
    1556              :               win = true;
    1557              :               break;
    1558              :             }
    1559              :         }
    1560              : 
    1561       297992 :       if (!win)
    1562              :         return false;
    1563              :     }
    1564              : 
    1565              :   return true;
    1566              : }
    1567              : 
    1568              : /* Implement the TARGET_ASAN_SHADOW_OFFSET hook.  */
    1569              : 
    1570              : static unsigned HOST_WIDE_INT
    1571         4814 : ix86_asan_shadow_offset (void)
    1572              : {
    1573         4814 :   return SUBTARGET_SHADOW_OFFSET;
    1574              : }
    1575              : 
    1576              : /* Argument support functions.  */
    1577              : 
    1578              : /* Return true when register may be used to pass function parameters.  */
    1579              : bool
    1580   1483838402 : ix86_function_arg_regno_p (int regno)
    1581              : {
    1582   1483838402 :   int i;
    1583   1483838402 :   enum calling_abi call_abi;
    1584   1483838402 :   const int *parm_regs;
    1585              : 
    1586   1480391343 :   if (TARGET_SSE && SSE_REGNO_P (regno)
    1587   2454078416 :       && regno < FIRST_SSE_REG + SSE_REGPARM_MAX)
    1588              :     return true;
    1589              : 
    1590   1364010278 :    if (!TARGET_64BIT)
    1591    129246272 :      return (regno < REGPARM_MAX
    1592    129246272 :              || (TARGET_MMX && MMX_REGNO_P (regno)
    1593     11616272 :                  && regno < FIRST_MMX_REG + MMX_REGPARM_MAX));
    1594              : 
    1595              :   /* TODO: The function should depend on current function ABI but
    1596              :      builtins.cc would need updating then. Therefore we use the
    1597              :      default ABI.  */
    1598   1234764006 :   call_abi = ix86_cfun_abi ();
    1599              : 
    1600              :   /* RAX is used as hidden argument to va_arg functions.  */
    1601   1234764006 :   if (call_abi == SYSV_ABI && regno == AX_REG)
    1602              :     return true;
    1603              : 
    1604   1220494877 :   if (cfun
    1605   1220494545 :       && cfun->machine->call_saved_registers == TYPE_PRESERVE_NONE)
    1606              :     parm_regs = x86_64_preserve_none_int_parameter_registers;
    1607   1220475953 :   else if (call_abi == MS_ABI)
    1608              :     parm_regs = x86_64_ms_abi_int_parameter_registers;
    1609              :   else
    1610   1184480401 :     parm_regs = x86_64_int_parameter_registers;
    1611              : 
    1612  16334981790 :   for (i = 0; i < (call_abi == MS_ABI
    1613   8167490895 :                    ? X86_64_MS_REGPARM_MAX : X86_64_REGPARM_MAX); i++)
    1614   7034293727 :     if (regno == parm_regs[i])
    1615              :       return true;
    1616              :   return false;
    1617              : }
    1618              : 
    1619              : /* Return if we do not know how to pass ARG solely in registers.  */
    1620              : 
    1621              : static bool
    1622    403722770 : ix86_must_pass_in_stack (const function_arg_info &arg)
    1623              : {
    1624    403722770 :   if (must_pass_in_stack_var_size_or_pad (arg))
    1625              :     return true;
    1626              : 
    1627              :   /* For 32-bit, we want TImode aggregates to go on the stack.  But watch out!
    1628              :      The layout_type routine is crafty and tries to trick us into passing
    1629              :      currently unsupported vector types on the stack by using TImode.  */
    1630      1772560 :   return (!TARGET_64BIT && arg.mode == TImode
    1631    403722733 :           && arg.type && TREE_CODE (arg.type) != VECTOR_TYPE);
    1632              : }
    1633              : 
    1634              : /* It returns the size, in bytes, of the area reserved for arguments passed
    1635              :    in registers for the function represented by fndecl dependent to the used
    1636              :    abi format.  */
    1637              : int
    1638     10654422 : ix86_reg_parm_stack_space (const_tree fndecl)
    1639              : {
    1640     10654422 :   enum calling_abi call_abi = SYSV_ABI;
    1641     10654422 :   if (fndecl != NULL_TREE && TREE_CODE (fndecl) == FUNCTION_DECL)
    1642     10344022 :     call_abi = ix86_function_abi (fndecl);
    1643              :   else
    1644       310400 :     call_abi = ix86_function_type_abi (fndecl);
    1645     10654422 :   if (TARGET_64BIT && call_abi == MS_ABI)
    1646       119312 :     return 32;
    1647              :   return 0;
    1648              : }
    1649              : 
    1650              : /* We add this as a workaround in order to use libc_has_function
    1651              :    hook in i386.md.  */
    1652              : bool
    1653            0 : ix86_libc_has_function (enum function_class fn_class)
    1654              : {
    1655            0 :   return targetm.libc_has_function (fn_class, NULL_TREE);
    1656              : }
    1657              : 
    1658              : /* Returns value SYSV_ABI, MS_ABI dependent on fntype,
    1659              :    specifying the call abi used.  */
    1660              : enum calling_abi
    1661    439153382 : ix86_function_type_abi (const_tree fntype)
    1662              : {
    1663    439153382 :   enum calling_abi abi = ix86_abi;
    1664              : 
    1665    439153382 :   if (fntype == NULL_TREE || TYPE_ATTRIBUTES (fntype) == NULL_TREE)
    1666              :     return abi;
    1667              : 
    1668     17527157 :   if (abi == SYSV_ABI
    1669     17527157 :       && lookup_attribute ("ms_abi", TYPE_ATTRIBUTES (fntype)))
    1670              :     {
    1671      2628110 :       static int warned;
    1672      2628110 :       if (TARGET_X32 && !warned)
    1673              :         {
    1674            1 :           error ("X32 does not support %<ms_abi%> attribute");
    1675            1 :           warned = 1;
    1676              :         }
    1677              : 
    1678              :       abi = MS_ABI;
    1679              :     }
    1680     14899047 :   else if (abi == MS_ABI
    1681     14899047 :            && lookup_attribute ("sysv_abi", TYPE_ATTRIBUTES (fntype)))
    1682              :     abi = SYSV_ABI;
    1683              : 
    1684              :   return abi;
    1685              : }
    1686              : 
    1687              : enum calling_abi
    1688    217225685 : ix86_function_abi (const_tree fndecl)
    1689              : {
    1690    217225685 :   return fndecl ? ix86_function_type_abi (TREE_TYPE (fndecl)) : ix86_abi;
    1691              : }
    1692              : 
    1693              : /* Returns value SYSV_ABI, MS_ABI dependent on cfun,
    1694              :    specifying the call abi used.  */
    1695              : enum calling_abi
    1696   2086117287 : ix86_cfun_abi (void)
    1697              : {
    1698   2086117287 :   return cfun ? cfun->machine->call_abi : ix86_abi;
    1699              : }
    1700              : 
    1701              : bool
    1702      5015351 : ix86_function_ms_hook_prologue (const_tree fn)
    1703              : {
    1704      5015351 :   if (fn && lookup_attribute ("ms_hook_prologue", DECL_ATTRIBUTES (fn)))
    1705              :     {
    1706            8 :       if (decl_function_context (fn) != NULL_TREE)
    1707            0 :         error_at (DECL_SOURCE_LOCATION (fn),
    1708              :                   "%<ms_hook_prologue%> attribute is not compatible "
    1709              :                   "with nested function");
    1710              :       else
    1711              :         return true;
    1712              :     }
    1713              :   return false;
    1714              : }
    1715              : 
    1716              : bool
    1717    115320726 : ix86_function_naked (const_tree fn)
    1718              : {
    1719    115320726 :   if (fn && lookup_attribute ("naked", DECL_ATTRIBUTES (fn)))
    1720              :     return true;
    1721              : 
    1722              :   return false;
    1723              : }
    1724              : 
    1725              : /* Write the extra assembler code needed to declare a function properly.  */
    1726              : 
    1727              : void
    1728      1557440 : ix86_asm_output_function_label (FILE *out_file, const char *fname,
    1729              :                                 tree decl)
    1730              : {
    1731      1557440 :   bool is_ms_hook = ix86_function_ms_hook_prologue (decl);
    1732              : 
    1733      1557440 :   if (cfun)
    1734      1553834 :     cfun->machine->function_label_emitted = true;
    1735              : 
    1736      1557440 :   if (is_ms_hook)
    1737              :     {
    1738            2 :       int i, filler_count = (TARGET_64BIT ? 32 : 16);
    1739            2 :       unsigned int filler_cc = 0xcccccccc;
    1740              : 
    1741           18 :       for (i = 0; i < filler_count; i += 4)
    1742           16 :         fprintf (out_file, ASM_LONG " %#x\n", filler_cc);
    1743              :     }
    1744              : 
    1745              : #ifdef SUBTARGET_ASM_UNWIND_INIT
    1746              :   SUBTARGET_ASM_UNWIND_INIT (out_file);
    1747              : #endif
    1748              : 
    1749      1557440 :   assemble_function_label_raw (out_file, fname);
    1750              : 
    1751              :   /* Output magic byte marker, if hot-patch attribute is set.  */
    1752      1557440 :   if (is_ms_hook)
    1753              :     {
    1754            2 :       if (TARGET_64BIT)
    1755              :         {
    1756              :           /* leaq [%rsp + 0], %rsp  */
    1757            2 :           fputs (ASM_BYTE "0x48, 0x8d, 0xa4, 0x24, 0x00, 0x00, 0x00, 0x00\n",
    1758              :                  out_file);
    1759              :         }
    1760              :       else
    1761              :         {
    1762              :           /* movl.s %edi, %edi
    1763              :              push   %ebp
    1764              :              movl.s %esp, %ebp */
    1765            0 :           fputs (ASM_BYTE "0x8b, 0xff, 0x55, 0x8b, 0xec\n", out_file);
    1766              :         }
    1767              :     }
    1768      1557440 : }
    1769              : 
    1770              : /* Output a user-defined label.  In AT&T syntax, registers are prefixed
    1771              :    with %, so labels require no punctuation.  In Intel syntax, registers
    1772              :    are unprefixed, so labels may clash with registers or other operators,
    1773              :    and require quoting.  */
    1774              : void
    1775     35028410 : ix86_asm_output_labelref (FILE *file, const char *prefix, const char *label)
    1776              : {
    1777     35028410 :   if (ASSEMBLER_DIALECT == ASM_ATT)
    1778     35027309 :     fprintf (file, "%s%s", prefix, label);
    1779              :   else
    1780         1101 :     fprintf (file, "\"%s%s\"", prefix, label);
    1781     35028410 : }
    1782              : 
    1783              : /* Implementation of call abi switching target hook. Specific to FNDECL
    1784              :    the specific call register sets are set.  See also
    1785              :    ix86_conditional_register_usage for more details.  */
    1786              : void
    1787    196794778 : ix86_call_abi_override (const_tree fndecl)
    1788              : {
    1789    196794778 :   cfun->machine->call_abi = ix86_function_abi (fndecl);
    1790    196794778 : }
    1791              : 
    1792              : /* Return 1 if pseudo register should be created and used to hold
    1793              :    GOT address for PIC code.  */
    1794              : bool
    1795    170527898 : ix86_use_pseudo_pic_reg (void)
    1796              : {
    1797    170527898 :   if ((TARGET_64BIT
    1798    159479963 :        && (ix86_cmodel == CM_SMALL_PIC
    1799              :            || TARGET_PECOFF))
    1800    164845732 :       || !flag_pic)
    1801    165729926 :     return false;
    1802              :   return true;
    1803              : }
    1804              : 
    1805              : /* Initialize large model PIC register.  */
    1806              : 
    1807              : static void
    1808           56 : ix86_init_large_pic_reg (unsigned int tmp_regno)
    1809              : {
    1810           56 :   rtx_code_label *label;
    1811           56 :   rtx tmp_reg;
    1812              : 
    1813           56 :   gcc_assert (Pmode == DImode);
    1814           56 :   label = gen_label_rtx ();
    1815           56 :   emit_label (label);
    1816           56 :   LABEL_PRESERVE_P (label) = 1;
    1817           56 :   tmp_reg = gen_rtx_REG (Pmode, tmp_regno);
    1818           56 :   gcc_assert (REGNO (pic_offset_table_rtx) != tmp_regno);
    1819           56 :   emit_insn (gen_set_rip_rex64 (pic_offset_table_rtx,
    1820              :                                 label));
    1821           56 :   emit_insn (gen_set_got_offset_rex64 (tmp_reg, label));
    1822           56 :   emit_insn (gen_add2_insn (pic_offset_table_rtx, tmp_reg));
    1823           56 :   const char *name = LABEL_NAME (label);
    1824           56 :   PUT_CODE (label, NOTE);
    1825           56 :   NOTE_KIND (label) = NOTE_INSN_DELETED_LABEL;
    1826           56 :   NOTE_DELETED_LABEL_NAME (label) = name;
    1827           56 : }
    1828              : 
    1829              : /* Create and initialize PIC register if required.  */
    1830              : static void
    1831      1488370 : ix86_init_pic_reg (void)
    1832              : {
    1833      1488370 :   edge entry_edge;
    1834      1488370 :   rtx_insn *seq;
    1835              : 
    1836      1488370 :   if (!ix86_use_pseudo_pic_reg ())
    1837              :     return;
    1838              : 
    1839        40470 :   start_sequence ();
    1840              : 
    1841        40470 :   if (TARGET_64BIT)
    1842              :     {
    1843           69 :       if (ix86_cmodel == CM_LARGE_PIC)
    1844           53 :         ix86_init_large_pic_reg (R11_REG);
    1845              :       else
    1846           16 :         emit_insn (gen_set_got_rex64 (pic_offset_table_rtx));
    1847              :     }
    1848              :   else
    1849              :     {
    1850              :       /*  If there is future mcount call in the function it is more profitable
    1851              :           to emit SET_GOT into ABI defined REAL_PIC_OFFSET_TABLE_REGNUM.  */
    1852        40401 :       rtx reg = crtl->profile
    1853        40401 :                 ? gen_rtx_REG (Pmode, REAL_PIC_OFFSET_TABLE_REGNUM)
    1854        40401 :                 : pic_offset_table_rtx;
    1855        40401 :       rtx_insn *insn = emit_insn (gen_set_got (reg));
    1856        40401 :       RTX_FRAME_RELATED_P (insn) = 1;
    1857        40401 :       if (crtl->profile)
    1858            0 :         emit_move_insn (pic_offset_table_rtx, reg);
    1859        40401 :       add_reg_note (insn, REG_CFA_FLUSH_QUEUE, NULL_RTX);
    1860              :     }
    1861              : 
    1862        40470 :   seq = end_sequence ();
    1863              : 
    1864        40470 :   entry_edge = single_succ_edge (ENTRY_BLOCK_PTR_FOR_FN (cfun));
    1865        40470 :   insert_insn_on_edge (seq, entry_edge);
    1866        40470 :   commit_one_edge_insertion (entry_edge);
    1867              : }
    1868              : 
    1869              : /* Initialize a variable CUM of type CUMULATIVE_ARGS
    1870              :    for a call to a function whose data type is FNTYPE.
    1871              :    For a library call, FNTYPE is 0.  */
    1872              : 
    1873              : void
    1874     10378754 : init_cumulative_args (CUMULATIVE_ARGS *cum,  /* Argument info to initialize */
    1875              :                       tree fntype,      /* tree ptr for function decl */
    1876              :                       rtx libname,      /* SYMBOL_REF of library name or 0 */
    1877              :                       tree fndecl,
    1878              :                       int caller)
    1879              : {
    1880     10378754 :   struct cgraph_node *local_info_node = NULL;
    1881     10378754 :   struct cgraph_node *target = NULL;
    1882              : 
    1883              :   /* Set silent_p to false to raise an error for invalid calls when
    1884              :      expanding function body.  */
    1885     10378754 :   cfun->machine->silent_p = false;
    1886              : 
    1887     10378754 :   memset (cum, 0, sizeof (*cum));
    1888              : 
    1889     10378754 :   tree preserve_none_type;
    1890     10378754 :   if (fndecl)
    1891              :     {
    1892     10039654 :       target = cgraph_node::get (fndecl);
    1893     10039654 :       if (target)
    1894              :         {
    1895      9894558 :           target = target->function_symbol ();
    1896      9894558 :           local_info_node = cgraph_node::local_info_node (target->decl);
    1897      9894558 :           cum->call_abi = ix86_function_abi (target->decl);
    1898      9894558 :           preserve_none_type = TREE_TYPE (target->decl);
    1899              :         }
    1900              :       else
    1901              :         {
    1902       145096 :           cum->call_abi = ix86_function_abi (fndecl);
    1903       145096 :           preserve_none_type = TREE_TYPE (fndecl);
    1904              :         }
    1905              :     }
    1906              :   else
    1907              :     {
    1908       339100 :       cum->call_abi = ix86_function_type_abi (fntype);
    1909       339100 :       preserve_none_type = fntype;
    1910              :     }
    1911     10378754 :   cum->preserve_none_abi
    1912     10378754 :     = (preserve_none_type
    1913     20639813 :        && (lookup_attribute ("preserve_none",
    1914     10261059 :                              TYPE_ATTRIBUTES (preserve_none_type))
    1915              :            != nullptr));
    1916              : 
    1917     10378754 :   cum->caller = caller;
    1918              : 
    1919              :   /* Set up the number of registers to use for passing arguments.  */
    1920     10378754 :   cum->nregs = ix86_regparm;
    1921     10378754 :   if (TARGET_64BIT)
    1922              :     {
    1923      9342343 :       cum->nregs = (cum->call_abi == SYSV_ABI
    1924      9342343 :                    ? X86_64_REGPARM_MAX
    1925              :                    : X86_64_MS_REGPARM_MAX);
    1926              :     }
    1927     10378754 :   if (TARGET_SSE)
    1928              :     {
    1929     10369644 :       cum->sse_nregs = SSE_REGPARM_MAX;
    1930     10369644 :       if (TARGET_64BIT)
    1931              :         {
    1932      9333353 :           cum->sse_nregs = (cum->call_abi == SYSV_ABI
    1933      9333353 :                            ? X86_64_SSE_REGPARM_MAX
    1934              :                            : X86_64_MS_SSE_REGPARM_MAX);
    1935              :         }
    1936              :     }
    1937     10378754 :   if (TARGET_MMX)
    1938     11202389 :     cum->mmx_nregs = MMX_REGPARM_MAX;
    1939     10378754 :   cum->warn_avx512f = true;
    1940     10378754 :   cum->warn_avx = true;
    1941     10378754 :   cum->warn_sse = true;
    1942     10378754 :   cum->warn_mmx = true;
    1943              : 
    1944              :   /* Because type might mismatch in between caller and callee, we need to
    1945              :      use actual type of function for local calls.
    1946              :      FIXME: cgraph_analyze can be told to actually record if function uses
    1947              :      va_start so for local functions maybe_vaarg can be made aggressive
    1948              :      helping K&R code.
    1949              :      FIXME: once typesytem is fixed, we won't need this code anymore.  */
    1950     10378754 :   if (local_info_node && local_info_node->local
    1951       421790 :       && local_info_node->can_change_signature)
    1952       398574 :     fntype = TREE_TYPE (target->decl);
    1953     10378754 :   cum->stdarg = stdarg_p (fntype);
    1954     20757508 :   cum->maybe_vaarg = (fntype
    1955     10849418 :                       ? (!prototype_p (fntype) || stdarg_p (fntype))
    1956       117695 :                       : !libname);
    1957              : 
    1958     10378754 :   cum->decl = fndecl;
    1959              : 
    1960     10378754 :   cum->warn_empty = !warn_abi || cum->stdarg;
    1961     10378754 :   if (!cum->warn_empty && fntype)
    1962              :     {
    1963      2601309 :       function_args_iterator iter;
    1964      2601309 :       tree argtype;
    1965      2601309 :       bool seen_empty_type = false;
    1966      7215694 :       FOREACH_FUNCTION_ARGS (fntype, argtype, iter)
    1967              :         {
    1968      7215631 :           if (argtype == error_mark_node || VOID_TYPE_P (argtype))
    1969              :             break;
    1970      4634376 :           if (TYPE_EMPTY_P (argtype))
    1971              :             seen_empty_type = true;
    1972      4562635 :           else if (seen_empty_type)
    1973              :             {
    1974        19991 :               cum->warn_empty = true;
    1975        19991 :               break;
    1976              :             }
    1977              :         }
    1978              :     }
    1979              : 
    1980     10378754 :   if (!TARGET_64BIT)
    1981              :     {
    1982              :       /* If there are variable arguments, then we won't pass anything
    1983              :          in registers in 32-bit mode. */
    1984      1036411 :       if (stdarg_p (fntype))
    1985              :         {
    1986         9087 :           cum->nregs = 0;
    1987              :           /* Since in 32-bit, variable arguments are always passed on
    1988              :              stack, there is scratch register available for indirect
    1989              :              sibcall.  */
    1990         9087 :           cfun->machine->arg_reg_available = true;
    1991         9087 :           cum->sse_nregs = 0;
    1992         9087 :           cum->mmx_nregs = 0;
    1993         9087 :           cum->warn_avx512f = false;
    1994         9087 :           cum->warn_avx = false;
    1995         9087 :           cum->warn_sse = false;
    1996         9087 :           cum->warn_mmx = false;
    1997         9087 :           return;
    1998              :         }
    1999              : 
    2000              :       /* Use ecx and edx registers if function has fastcall attribute,
    2001              :          else look for regparm information.  */
    2002      1027324 :       if (fntype)
    2003              :         {
    2004      1014138 :           unsigned int ccvt = ix86_get_callcvt (fntype);
    2005      1014138 :           if ((ccvt & IX86_CALLCVT_THISCALL) != 0)
    2006              :             {
    2007            0 :               cum->nregs = 1;
    2008            0 :               cum->fastcall = 1; /* Same first register as in fastcall.  */
    2009              :             }
    2010      1014138 :           else if ((ccvt & IX86_CALLCVT_FASTCALL) != 0)
    2011              :             {
    2012            4 :               cum->nregs = 2;
    2013            4 :               cum->fastcall = 1;
    2014              :             }
    2015              :           else
    2016      1014134 :             cum->nregs = ix86_function_regparm (fntype, fndecl);
    2017              :         }
    2018              : 
    2019              :       /* Set up the number of SSE registers used for passing SFmode
    2020              :          and DFmode arguments.  Warn for mismatching ABI.  */
    2021      1027324 :       cum->float_in_sse = ix86_function_sseregparm (fntype, fndecl, true);
    2022              :     }
    2023              : 
    2024     10369667 :   cfun->machine->arg_reg_available = (cum->nregs > 0);
    2025              : }
    2026              : 
    2027              : /* Return the "natural" mode for TYPE.  In most cases, this is just TYPE_MODE.
    2028              :    But in the case of vector types, it is some vector mode.
    2029              : 
    2030              :    When we have only some of our vector isa extensions enabled, then there
    2031              :    are some modes for which vector_mode_supported_p is false.  For these
    2032              :    modes, the generic vector support in gcc will choose some non-vector mode
    2033              :    in order to implement the type.  By computing the natural mode, we'll
    2034              :    select the proper ABI location for the operand and not depend on whatever
    2035              :    the middle-end decides to do with these vector types.
    2036              : 
    2037              :    The midde-end can't deal with the vector types > 16 bytes.  In this
    2038              :    case, we return the original mode and warn ABI change if CUM isn't
    2039              :    NULL.
    2040              : 
    2041              :    If INT_RETURN is true, warn ABI change if the vector mode isn't
    2042              :    available for function return value.  */
    2043              : 
    2044              : static machine_mode
    2045    228200560 : type_natural_mode (const_tree type, const CUMULATIVE_ARGS *cum,
    2046              :                    bool in_return)
    2047              : {
    2048    228200560 :   machine_mode mode = TYPE_MODE (type);
    2049              : 
    2050    228200560 :   if (VECTOR_TYPE_P (type) && !VECTOR_MODE_P (mode))
    2051              :     {
    2052       467243 :       HOST_WIDE_INT size = int_size_in_bytes (type);
    2053       467243 :       if ((size == 8 || size == 16 || size == 32 || size == 64)
    2054              :           /* ??? Generic code allows us to create width 1 vectors.  Ignore.  */
    2055       467243 :           && TYPE_VECTOR_SUBPARTS (type) > 1)
    2056              :         {
    2057       430671 :           machine_mode innermode = TYPE_MODE (TREE_TYPE (type));
    2058              : 
    2059              :           /* There are no XFmode vector modes ...  */
    2060       430671 :           if (innermode == XFmode)
    2061              :             return mode;
    2062              : 
    2063              :           /* ... and no decimal float vector modes.  */
    2064       430118 :           if (DECIMAL_FLOAT_MODE_P (innermode))
    2065              :             return mode;
    2066              : 
    2067       429825 :           if (SCALAR_FLOAT_TYPE_P (TREE_TYPE (type)))
    2068              :             mode = MIN_MODE_VECTOR_FLOAT;
    2069              :           else
    2070       360005 :             mode = MIN_MODE_VECTOR_INT;
    2071              : 
    2072              :           /* Get the mode which has this inner mode and number of units.  */
    2073      9086658 :           FOR_EACH_MODE_FROM (mode, mode)
    2074     18908281 :             if (GET_MODE_NUNITS (mode) == TYPE_VECTOR_SUBPARTS (type)
    2075     10251448 :                 && GET_MODE_INNER (mode) == innermode)
    2076              :               {
    2077       429825 :                 if (size == 64 && !TARGET_AVX512F && !TARGET_IAMCU)
    2078              :                   {
    2079       293564 :                     static bool warnedavx512f;
    2080       293564 :                     static bool warnedavx512f_ret;
    2081              : 
    2082       293564 :                     if (cum && cum->warn_avx512f && !warnedavx512f)
    2083              :                       {
    2084         1361 :                         if (warning (OPT_Wpsabi, "AVX512F vector argument "
    2085              :                                      "without AVX512F enabled changes the ABI"))
    2086            2 :                           warnedavx512f = true;
    2087              :                       }
    2088       292203 :                     else if (in_return && !warnedavx512f_ret)
    2089              :                       {
    2090       283582 :                         if (warning (OPT_Wpsabi, "AVX512F vector return "
    2091              :                                      "without AVX512F enabled changes the ABI"))
    2092            4 :                           warnedavx512f_ret = true;
    2093              :                       }
    2094              : 
    2095       293564 :                     return TYPE_MODE (type);
    2096              :                   }
    2097       136261 :                 else if (size == 32 && !TARGET_AVX && !TARGET_IAMCU)
    2098              :                   {
    2099       135712 :                     static bool warnedavx;
    2100       135712 :                     static bool warnedavx_ret;
    2101              : 
    2102       135712 :                     if (cum && cum->warn_avx && !warnedavx)
    2103              :                       {
    2104          770 :                         if (warning (OPT_Wpsabi, "AVX vector argument "
    2105              :                                      "without AVX enabled changes the ABI"))
    2106            5 :                           warnedavx = true;
    2107              :                       }
    2108       134942 :                     else if (in_return && !warnedavx_ret)
    2109              :                       {
    2110       120871 :                         if (warning (OPT_Wpsabi, "AVX vector return "
    2111              :                                      "without AVX enabled changes the ABI"))
    2112           10 :                           warnedavx_ret = true;
    2113              :                       }
    2114              : 
    2115       135712 :                     return TYPE_MODE (type);
    2116              :                   }
    2117          549 :                 else if (((size == 8 && TARGET_64BIT) || size == 16)
    2118          546 :                          && !TARGET_SSE
    2119          140 :                          && !TARGET_IAMCU)
    2120              :                   {
    2121          140 :                     static bool warnedsse;
    2122          140 :                     static bool warnedsse_ret;
    2123              : 
    2124          140 :                     if (cum && cum->warn_sse && !warnedsse)
    2125              :                       {
    2126           19 :                         if (warning (OPT_Wpsabi, "SSE vector argument "
    2127              :                                      "without SSE enabled changes the ABI"))
    2128            6 :                           warnedsse = true;
    2129              :                       }
    2130          121 :                     else if (!TARGET_64BIT && in_return && !warnedsse_ret)
    2131              :                       {
    2132            0 :                         if (warning (OPT_Wpsabi, "SSE vector return "
    2133              :                                      "without SSE enabled changes the ABI"))
    2134            0 :                           warnedsse_ret = true;
    2135              :                       }
    2136              :                   }
    2137          409 :                 else if ((size == 8 && !TARGET_64BIT)
    2138            0 :                          && (!cfun
    2139            0 :                              || cfun->machine->func_type == TYPE_NORMAL)
    2140            0 :                          && !TARGET_MMX
    2141            0 :                          && !TARGET_IAMCU)
    2142              :                   {
    2143            0 :                     static bool warnedmmx;
    2144            0 :                     static bool warnedmmx_ret;
    2145              : 
    2146            0 :                     if (cum && cum->warn_mmx && !warnedmmx)
    2147              :                       {
    2148            0 :                         if (warning (OPT_Wpsabi, "MMX vector argument "
    2149              :                                      "without MMX enabled changes the ABI"))
    2150            0 :                           warnedmmx = true;
    2151              :                       }
    2152            0 :                     else if (in_return && !warnedmmx_ret)
    2153              :                       {
    2154            0 :                         if (warning (OPT_Wpsabi, "MMX vector return "
    2155              :                                      "without MMX enabled changes the ABI"))
    2156            0 :                           warnedmmx_ret = true;
    2157              :                       }
    2158              :                   }
    2159          549 :                 return mode;
    2160              :               }
    2161              : 
    2162            0 :           gcc_unreachable ();
    2163              :         }
    2164              :     }
    2165              : 
    2166              :   return mode;
    2167              : }
    2168              : 
    2169              : /* We want to pass a value in REGNO whose "natural" mode is MODE.  However,
    2170              :    this may not agree with the mode that the type system has chosen for the
    2171              :    register, which is ORIG_MODE.  If ORIG_MODE is not BLKmode, then we can
    2172              :    go ahead and use it.  Otherwise we have to build a PARALLEL instead.  */
    2173              : 
    2174              : static rtx
    2175     36477301 : gen_reg_or_parallel (machine_mode mode, machine_mode orig_mode,
    2176              :                      unsigned int regno)
    2177              : {
    2178     36477301 :   rtx tmp;
    2179              : 
    2180     36477301 :   if (orig_mode != BLKmode)
    2181     36477273 :     tmp = gen_rtx_REG (orig_mode, regno);
    2182              :   else
    2183              :     {
    2184           28 :       tmp = gen_rtx_REG (mode, regno);
    2185           28 :       tmp = gen_rtx_EXPR_LIST (VOIDmode, tmp, const0_rtx);
    2186           28 :       tmp = gen_rtx_PARALLEL (orig_mode, gen_rtvec (1, tmp));
    2187              :     }
    2188              : 
    2189     36477301 :   return tmp;
    2190              : }
    2191              : 
    2192              : /* x86-64 register passing implementation.  See x86-64 ABI for details.  Goal
    2193              :    of this code is to classify each 8bytes of incoming argument by the register
    2194              :    class and assign registers accordingly.  */
    2195              : 
    2196              : /* Return the union class of CLASS1 and CLASS2.
    2197              :    See the x86-64 PS ABI for details.  */
    2198              : 
    2199              : static enum x86_64_reg_class
    2200     55167844 : merge_classes (enum x86_64_reg_class class1, enum x86_64_reg_class class2)
    2201              : {
    2202              :   /* Rule #1: If both classes are equal, this is the resulting class.  */
    2203     53922334 :   if (class1 == class2)
    2204              :     return class1;
    2205              : 
    2206              :   /* Rule #2: If one of the classes is NO_CLASS, the resulting class is
    2207              :      the other class.  */
    2208     47769811 :   if (class1 == X86_64_NO_CLASS)
    2209              :     return class2;
    2210     48595766 :   if (class2 == X86_64_NO_CLASS)
    2211              :     return class1;
    2212              : 
    2213              :   /* Rule #3: If one of the classes is MEMORY, the result is MEMORY.  */
    2214      1657209 :   if (class1 == X86_64_MEMORY_CLASS || class2 == X86_64_MEMORY_CLASS)
    2215              :     return X86_64_MEMORY_CLASS;
    2216              : 
    2217              :   /* Rule #4: If one of the classes is INTEGER, the result is INTEGER.  */
    2218      1509699 :   if ((class1 == X86_64_INTEGERSI_CLASS
    2219       189076 :        && (class2 == X86_64_SSESF_CLASS || class2 == X86_64_SSEHF_CLASS))
    2220      1508493 :       || (class2 == X86_64_INTEGERSI_CLASS
    2221       916644 :           && (class1 == X86_64_SSESF_CLASS || class1 == X86_64_SSEHF_CLASS)))
    2222              :     return X86_64_INTEGERSI_CLASS;
    2223      1504556 :   if (class1 == X86_64_INTEGER_CLASS || class1 == X86_64_INTEGERSI_CLASS
    2224       384758 :       || class2 == X86_64_INTEGER_CLASS || class2 == X86_64_INTEGERSI_CLASS)
    2225              :     return X86_64_INTEGER_CLASS;
    2226              : 
    2227              :   /* Rule #5: If one of the classes is X87, X87UP, or COMPLEX_X87 class,
    2228              :      MEMORY is used.  */
    2229        91086 :   if (class1 == X86_64_X87_CLASS
    2230              :       || class1 == X86_64_X87UP_CLASS
    2231        91086 :       || class1 == X86_64_COMPLEX_X87_CLASS
    2232              :       || class2 == X86_64_X87_CLASS
    2233        90181 :       || class2 == X86_64_X87UP_CLASS
    2234        59516 :       || class2 == X86_64_COMPLEX_X87_CLASS)
    2235        31570 :     return X86_64_MEMORY_CLASS;
    2236              : 
    2237              :   /* Rule #6: Otherwise class SSE is used.  */
    2238              :   return X86_64_SSE_CLASS;
    2239              : }
    2240              : 
    2241              : /* Classify the argument of type TYPE and mode MODE.
    2242              :    CLASSES will be filled by the register class used to pass each word
    2243              :    of the operand.  The number of words is returned.  In case the parameter
    2244              :    should be passed in memory, 0 is returned. As a special case for zero
    2245              :    sized containers, classes[0] will be NO_CLASS and 1 is returned.
    2246              : 
    2247              :    BIT_OFFSET is used internally for handling records and specifies offset
    2248              :    of the offset in bits modulo 512 to avoid overflow cases.
    2249              : 
    2250              :    See the x86-64 PS ABI for details.
    2251              : */
    2252              : 
    2253              : static int
    2254    389660458 : classify_argument (machine_mode mode, const_tree type,
    2255              :                    enum x86_64_reg_class classes[MAX_CLASSES], int bit_offset,
    2256              :                    int &zero_width_bitfields)
    2257              : {
    2258    389660458 :   HOST_WIDE_INT bytes
    2259    773161324 :     = mode == BLKmode ? int_size_in_bytes (type) : (int) GET_MODE_SIZE (mode);
    2260    389660458 :   int words = CEIL (bytes + (bit_offset % 64) / 8, UNITS_PER_WORD);
    2261              : 
    2262              :   /* Variable sized entities are always passed/returned in memory.  */
    2263    389660458 :   if (bytes < 0)
    2264              :     return 0;
    2265              : 
    2266    389659259 :   if (mode != VOIDmode)
    2267              :     {
    2268              :       /* The value of "named" doesn't matter.  */
    2269    388548682 :       function_arg_info arg (const_cast<tree> (type), mode, /*named=*/true);
    2270    388548682 :       if (targetm.calls.must_pass_in_stack (arg))
    2271           37 :         return 0;
    2272              :     }
    2273              : 
    2274    389659222 :   if (type && (AGGREGATE_TYPE_P (type)
    2275    353818133 :                || (BITINT_TYPE_P (type) && words > 1)))
    2276              :     {
    2277     36954181 :       int i;
    2278     36954181 :       tree field;
    2279     36954181 :       enum x86_64_reg_class subclasses[MAX_CLASSES];
    2280              : 
    2281              :       /* On x86-64 we pass structures larger than 64 bytes on the stack.  */
    2282     36954181 :       if (bytes > 64)
    2283              :         return 0;
    2284              : 
    2285     92998629 :       for (i = 0; i < words; i++)
    2286     56876046 :         classes[i] = X86_64_NO_CLASS;
    2287              : 
    2288              :       /* Zero sized arrays or structures are NO_CLASS.  We return 0 to
    2289              :          signalize memory class, so handle it as special case.  */
    2290     36122583 :       if (!words)
    2291              :         {
    2292        82718 :           classes[0] = X86_64_NO_CLASS;
    2293        82718 :           return 1;
    2294              :         }
    2295              : 
    2296              :       /* Classify each field of record and merge classes.  */
    2297     36039865 :       switch (TREE_CODE (type))
    2298              :         {
    2299     34010132 :         case RECORD_TYPE:
    2300              :           /* And now merge the fields of structure.  */
    2301    916029451 :           for (field = TYPE_FIELDS (type); field; field = DECL_CHAIN (field))
    2302              :             {
    2303    882538627 :               if (TREE_CODE (field) == FIELD_DECL)
    2304              :                 {
    2305     50342019 :                   int num;
    2306              : 
    2307     50342019 :                   if (TREE_TYPE (field) == error_mark_node)
    2308            4 :                     continue;
    2309              : 
    2310              :                   /* Bitfields are always classified as integer.  Handle them
    2311              :                      early, since later code would consider them to be
    2312              :                      misaligned integers.  */
    2313     50342015 :                   if (DECL_BIT_FIELD (field))
    2314              :                     {
    2315      1254729 :                       if (integer_zerop (DECL_SIZE (field)))
    2316              :                         {
    2317        12839 :                           if (DECL_FIELD_CXX_ZERO_WIDTH_BIT_FIELD (field))
    2318         8021 :                             continue;
    2319         4818 :                           if (zero_width_bitfields != 2)
    2320              :                             {
    2321         4284 :                               zero_width_bitfields = 1;
    2322         4284 :                               continue;
    2323              :                             }
    2324              :                         }
    2325      1242424 :                       for (i = (int_bit_position (field)
    2326      1242424 :                                 + (bit_offset % 64)) / 8 / 8;
    2327      2487934 :                            i < ((int_bit_position (field) + (bit_offset % 64))
    2328      2487934 :                                 + tree_to_shwi (DECL_SIZE (field))
    2329      2487934 :                                 + 63) / 8 / 8; i++)
    2330      1245510 :                         classes[i]
    2331      2491020 :                           = merge_classes (X86_64_INTEGER_CLASS, classes[i]);
    2332              :                     }
    2333              :                   else
    2334              :                     {
    2335     49087286 :                       int pos;
    2336              : 
    2337     49087286 :                       type = TREE_TYPE (field);
    2338              : 
    2339              :                       /* Flexible array member is ignored.  */
    2340     49087286 :                       if (TYPE_MODE (type) == BLKmode
    2341       651016 :                           && TREE_CODE (type) == ARRAY_TYPE
    2342       168531 :                           && TYPE_SIZE (type) == NULL_TREE
    2343         2007 :                           && TYPE_DOMAIN (type) != NULL_TREE
    2344     49088528 :                           && (TYPE_MAX_VALUE (TYPE_DOMAIN (type))
    2345              :                               == NULL_TREE))
    2346              :                         {
    2347         1242 :                           static bool warned;
    2348              : 
    2349         1242 :                           if (!warned && warn_psabi)
    2350              :                             {
    2351            3 :                               warned = true;
    2352            3 :                               inform (input_location,
    2353              :                                       "the ABI of passing struct with"
    2354              :                                       " a flexible array member has"
    2355              :                                       " changed in GCC 4.4");
    2356              :                             }
    2357         1242 :                           continue;
    2358         1242 :                         }
    2359     49086044 :                       num = classify_argument (TYPE_MODE (type), type,
    2360              :                                                subclasses,
    2361     49086044 :                                                (int_bit_position (field)
    2362     49086044 :                                                 + bit_offset) % 512,
    2363              :                                                zero_width_bitfields);
    2364     49086044 :                       if (!num)
    2365              :                         return 0;
    2366     48566736 :                       pos = (int_bit_position (field)
    2367     48566736 :                              + (bit_offset % 64)) / 8 / 8;
    2368    100524438 :                       for (i = 0; i < num && (i + pos) < words; i++)
    2369     51957702 :                         classes[i + pos]
    2370     51957702 :                           = merge_classes (subclasses[i], classes[i + pos]);
    2371              :                     }
    2372              :                 }
    2373              :             }
    2374              :           break;
    2375              : 
    2376       445240 :         case ARRAY_TYPE:
    2377              :           /* Arrays are handled as small records.  */
    2378       445240 :           {
    2379       445240 :             int num;
    2380       445240 :             num = classify_argument (TYPE_MODE (TREE_TYPE (type)),
    2381       445240 :                                      TREE_TYPE (type), subclasses, bit_offset,
    2382              :                                      zero_width_bitfields);
    2383       445240 :             if (!num)
    2384              :               return 0;
    2385              : 
    2386              :             /* The partial classes are now full classes.  */
    2387       429371 :             if (subclasses[0] == X86_64_SSESF_CLASS && bytes != 4)
    2388        13868 :               subclasses[0] = X86_64_SSE_CLASS;
    2389       429371 :             if (subclasses[0] == X86_64_SSEHF_CLASS && bytes != 2)
    2390         5126 :               subclasses[0] = X86_64_SSE_CLASS;
    2391       429371 :             if (subclasses[0] == X86_64_INTEGERSI_CLASS
    2392       161349 :                 && !((bit_offset % 64) == 0 && bytes == 4))
    2393       130127 :               subclasses[0] = X86_64_INTEGER_CLASS;
    2394              : 
    2395      1326278 :             for (i = 0; i < words; i++)
    2396       896907 :               classes[i] = subclasses[i % num];
    2397              : 
    2398              :             break;
    2399              :           }
    2400       273008 :         case UNION_TYPE:
    2401       273008 :         case QUAL_UNION_TYPE:
    2402              :           /* Unions are similar to RECORD_TYPE but offset is always 0.
    2403              :              */
    2404      3038701 :           for (field = TYPE_FIELDS (type); field; field = DECL_CHAIN (field))
    2405              :             {
    2406      2800958 :               if (TREE_CODE (field) == FIELD_DECL)
    2407              :                 {
    2408      1237591 :                   int num;
    2409              : 
    2410      1237591 :                   if (TREE_TYPE (field) == error_mark_node)
    2411           10 :                     continue;
    2412              : 
    2413      1237581 :                   num = classify_argument (TYPE_MODE (TREE_TYPE (field)),
    2414      1237581 :                                            TREE_TYPE (field), subclasses,
    2415              :                                            bit_offset, zero_width_bitfields);
    2416      1237581 :                   if (!num)
    2417              :                     return 0;
    2418      3166948 :                   for (i = 0; i < num && i < words; i++)
    2419      1964632 :                     classes[i] = merge_classes (subclasses[i], classes[i]);
    2420              :                 }
    2421              :             }
    2422              :           break;
    2423              : 
    2424      1311485 :         case BITINT_TYPE:
    2425      1311485 :         case ENUMERAL_TYPE:
    2426              :           /* _BitInt(N) for N > 64 is passed as structure containing
    2427              :              (N + 63) / 64 64-bit elements.  */
    2428      1311485 :           if (words > 2)
    2429              :             return 0;
    2430        75441 :           classes[0] = classes[1] = X86_64_INTEGER_CLASS;
    2431        75441 :           return 2;
    2432              : 
    2433            0 :         default:
    2434            0 :           gcc_unreachable ();
    2435              :         }
    2436              : 
    2437     34157938 :       if (words > 2)
    2438              :         {
    2439              :           /* When size > 16 bytes, if the first one isn't
    2440              :              X86_64_SSE_CLASS or any other ones aren't
    2441              :              X86_64_SSEUP_CLASS, everything should be passed in
    2442              :              memory.  */
    2443      1661806 :           if (classes[0] != X86_64_SSE_CLASS)
    2444              :             return 0;
    2445              : 
    2446       197316 :           for (i = 1; i < words; i++)
    2447       179129 :             if (classes[i] != X86_64_SSEUP_CLASS)
    2448              :               return 0;
    2449              :         }
    2450              : 
    2451              :       /* Final merger cleanup.  */
    2452     76518474 :       for (i = 0; i < words; i++)
    2453              :         {
    2454              :           /* If one class is MEMORY, everything should be passed in
    2455              :              memory.  */
    2456     44037382 :           if (classes[i] == X86_64_MEMORY_CLASS)
    2457              :             return 0;
    2458              : 
    2459              :           /* The X86_64_SSEUP_CLASS should be always preceded by
    2460              :              X86_64_SSE_CLASS or X86_64_SSEUP_CLASS.  */
    2461     44006525 :           if (classes[i] == X86_64_SSEUP_CLASS
    2462       207011 :               && classes[i - 1] != X86_64_SSE_CLASS
    2463        76546 :               && classes[i - 1] != X86_64_SSEUP_CLASS)
    2464              :             {
    2465              :               /* The first one should never be X86_64_SSEUP_CLASS.  */
    2466         1916 :               gcc_assert (i != 0);
    2467         1916 :               classes[i] = X86_64_SSE_CLASS;
    2468              :             }
    2469              : 
    2470              :           /* If X86_64_X87UP_CLASS isn't preceded by X86_64_X87_CLASS,
    2471              :              everything should be passed in memory.  */
    2472     44006525 :           if (classes[i] == X86_64_X87UP_CLASS
    2473       178062 :               && (classes[i - 1] != X86_64_X87_CLASS))
    2474              :             {
    2475         2370 :               static bool warned;
    2476              : 
    2477              :               /* The first one should never be X86_64_X87UP_CLASS.  */
    2478         2370 :               gcc_assert (i != 0);
    2479         2370 :               if (!warned && warn_psabi)
    2480              :                 {
    2481            1 :                   warned = true;
    2482            1 :                   inform (input_location,
    2483              :                           "the ABI of passing union with %<long double%>"
    2484              :                           " has changed in GCC 4.4");
    2485              :                 }
    2486         2370 :               return 0;
    2487              :             }
    2488              :         }
    2489              :       return words;
    2490              :     }
    2491              : 
    2492              :   /* Compute alignment needed.  We align all types to natural boundaries with
    2493              :      exception of XFmode that is aligned to 64bits.  */
    2494    352705041 :   if (mode != VOIDmode && mode != BLKmode)
    2495              :     {
    2496    351130470 :       int mode_alignment = GET_MODE_BITSIZE (mode);
    2497              : 
    2498    351130470 :       if (mode == XFmode)
    2499              :         mode_alignment = 128;
    2500    344256065 :       else if (mode == XCmode)
    2501       548711 :         mode_alignment = 256;
    2502    351130470 :       if (COMPLEX_MODE_P (mode))
    2503      2304594 :         mode_alignment /= 2;
    2504              :       /* Misaligned fields are always returned in memory.  */
    2505    351130470 :       if (bit_offset % mode_alignment)
    2506              :         return 0;
    2507              :     }
    2508              : 
    2509              :   /* for V1xx modes, just use the base mode */
    2510    352697408 :   if (VECTOR_MODE_P (mode) && mode != V1DImode && mode != V1TImode
    2511    446184200 :       && GET_MODE_UNIT_SIZE (mode) == bytes)
    2512         6315 :     mode = GET_MODE_INNER (mode);
    2513              : 
    2514              :   /* Classification of atomic types.  */
    2515    352697408 :   switch (mode)
    2516              :     {
    2517       207933 :     case E_SDmode:
    2518       207933 :     case E_DDmode:
    2519       207933 :       classes[0] = X86_64_SSE_CLASS;
    2520       207933 :       return 1;
    2521        98708 :     case E_TDmode:
    2522        98708 :       classes[0] = X86_64_SSE_CLASS;
    2523        98708 :       classes[1] = X86_64_SSEUP_CLASS;
    2524        98708 :       return 2;
    2525    229723207 :     case E_DImode:
    2526    229723207 :     case E_SImode:
    2527    229723207 :     case E_HImode:
    2528    229723207 :     case E_QImode:
    2529    229723207 :     case E_CSImode:
    2530    229723207 :     case E_CHImode:
    2531    229723207 :     case E_CQImode:
    2532    229723207 :       {
    2533    229723207 :         int size = bit_offset + (int) GET_MODE_BITSIZE (mode);
    2534              : 
    2535              :         /* Analyze last 128 bits only.  */
    2536    229723207 :         size = (size - 1) & 0x7f;
    2537              : 
    2538    229723207 :         if (size < 32)
    2539              :           {
    2540    102235422 :             classes[0] = X86_64_INTEGERSI_CLASS;
    2541    102235422 :             return 1;
    2542              :           }
    2543    127487785 :         else if (size < 64)
    2544              :           {
    2545    116734219 :             classes[0] = X86_64_INTEGER_CLASS;
    2546    116734219 :             return 1;
    2547              :           }
    2548     10753566 :         else if (size < 64+32)
    2549              :           {
    2550      3994645 :             classes[0] = X86_64_INTEGER_CLASS;
    2551      3994645 :             classes[1] = X86_64_INTEGERSI_CLASS;
    2552      3994645 :             return 2;
    2553              :           }
    2554      6758921 :         else if (size < 64+64)
    2555              :           {
    2556      6758921 :             classes[0] = classes[1] = X86_64_INTEGER_CLASS;
    2557      6758921 :             return 2;
    2558              :           }
    2559              :         else
    2560              :           gcc_unreachable ();
    2561              :       }
    2562      2459196 :     case E_CDImode:
    2563      2459196 :     case E_TImode:
    2564      2459196 :       classes[0] = classes[1] = X86_64_INTEGER_CLASS;
    2565      2459196 :       return 2;
    2566            0 :     case E_COImode:
    2567            0 :     case E_OImode:
    2568              :       /* OImode shouldn't be used directly.  */
    2569            0 :       gcc_unreachable ();
    2570              :     case E_CTImode:
    2571              :       return 0;
    2572       863975 :     case E_HFmode:
    2573       863975 :     case E_BFmode:
    2574       863975 :       if (!(bit_offset % 64))
    2575       861425 :         classes[0] = X86_64_SSEHF_CLASS;
    2576              :       else
    2577         2550 :         classes[0] = X86_64_SSE_CLASS;
    2578              :       return 1;
    2579      9723457 :     case E_SFmode:
    2580      9723457 :       if (!(bit_offset % 64))
    2581      9670278 :         classes[0] = X86_64_SSESF_CLASS;
    2582              :       else
    2583        53179 :         classes[0] = X86_64_SSE_CLASS;
    2584              :       return 1;
    2585      4258433 :     case E_DFmode:
    2586      4258433 :       classes[0] = X86_64_SSEDF_CLASS;
    2587      4258433 :       return 1;
    2588      6873689 :     case E_XFmode:
    2589      6873689 :       classes[0] = X86_64_X87_CLASS;
    2590      6873689 :       classes[1] = X86_64_X87UP_CLASS;
    2591      6873689 :       return 2;
    2592      1309162 :     case E_TFmode:
    2593      1309162 :       classes[0] = X86_64_SSE_CLASS;
    2594      1309162 :       classes[1] = X86_64_SSEUP_CLASS;
    2595      1309162 :       return 2;
    2596        77920 :     case E_HCmode:
    2597        77920 :     case E_BCmode:
    2598        77920 :       classes[0] = X86_64_SSE_CLASS;
    2599        77920 :       if (!(bit_offset % 64))
    2600              :         return 1;
    2601              :       else
    2602              :         {
    2603           98 :           classes[1] = X86_64_SSEHF_CLASS;
    2604           98 :           return 2;
    2605              :         }
    2606       690840 :     case E_SCmode:
    2607       690840 :       classes[0] = X86_64_SSE_CLASS;
    2608       690840 :       if (!(bit_offset % 64))
    2609              :         return 1;
    2610              :       else
    2611              :         {
    2612         1119 :           static bool warned;
    2613              : 
    2614         1119 :           if (!warned && warn_psabi)
    2615              :             {
    2616            2 :               warned = true;
    2617            2 :               inform (input_location,
    2618              :                       "the ABI of passing structure with %<complex float%>"
    2619              :                       " member has changed in GCC 4.4");
    2620              :             }
    2621         1119 :           classes[1] = X86_64_SSESF_CLASS;
    2622         1119 :           return 2;
    2623              :         }
    2624       700857 :     case E_DCmode:
    2625       700857 :       classes[0] = X86_64_SSEDF_CLASS;
    2626       700857 :       classes[1] = X86_64_SSEDF_CLASS;
    2627       700857 :       return 2;
    2628       548711 :     case E_XCmode:
    2629       548711 :       classes[0] = X86_64_COMPLEX_X87_CLASS;
    2630       548711 :       return 1;
    2631              :     case E_TCmode:
    2632              :       /* This modes is larger than 16 bytes.  */
    2633              :       return 0;
    2634     25339404 :     case E_V8SFmode:
    2635     25339404 :     case E_V8SImode:
    2636     25339404 :     case E_V32QImode:
    2637     25339404 :     case E_V16HFmode:
    2638     25339404 :     case E_V16BFmode:
    2639     25339404 :     case E_V16HImode:
    2640     25339404 :     case E_V4DFmode:
    2641     25339404 :     case E_V4DImode:
    2642     25339404 :       classes[0] = X86_64_SSE_CLASS;
    2643     25339404 :       classes[1] = X86_64_SSEUP_CLASS;
    2644     25339404 :       classes[2] = X86_64_SSEUP_CLASS;
    2645     25339404 :       classes[3] = X86_64_SSEUP_CLASS;
    2646     25339404 :       return 4;
    2647     27478225 :     case E_V8DFmode:
    2648     27478225 :     case E_V16SFmode:
    2649     27478225 :     case E_V32HFmode:
    2650     27478225 :     case E_V32BFmode:
    2651     27478225 :     case E_V8DImode:
    2652     27478225 :     case E_V16SImode:
    2653     27478225 :     case E_V32HImode:
    2654     27478225 :     case E_V64QImode:
    2655     27478225 :       classes[0] = X86_64_SSE_CLASS;
    2656     27478225 :       classes[1] = X86_64_SSEUP_CLASS;
    2657     27478225 :       classes[2] = X86_64_SSEUP_CLASS;
    2658     27478225 :       classes[3] = X86_64_SSEUP_CLASS;
    2659     27478225 :       classes[4] = X86_64_SSEUP_CLASS;
    2660     27478225 :       classes[5] = X86_64_SSEUP_CLASS;
    2661     27478225 :       classes[6] = X86_64_SSEUP_CLASS;
    2662     27478225 :       classes[7] = X86_64_SSEUP_CLASS;
    2663     27478225 :       return 8;
    2664     37370843 :     case E_V4SFmode:
    2665     37370843 :     case E_V4SImode:
    2666     37370843 :     case E_V16QImode:
    2667     37370843 :     case E_V8HImode:
    2668     37370843 :     case E_V8HFmode:
    2669     37370843 :     case E_V8BFmode:
    2670     37370843 :     case E_V2DFmode:
    2671     37370843 :     case E_V2DImode:
    2672     37370843 :       classes[0] = X86_64_SSE_CLASS;
    2673     37370843 :       classes[1] = X86_64_SSEUP_CLASS;
    2674     37370843 :       return 2;
    2675      3266734 :     case E_V1TImode:
    2676      3266734 :     case E_V1DImode:
    2677      3266734 :     case E_V2SFmode:
    2678      3266734 :     case E_V2SImode:
    2679      3266734 :     case E_V4HImode:
    2680      3266734 :     case E_V4HFmode:
    2681      3266734 :     case E_V4BFmode:
    2682      3266734 :     case E_V2HFmode:
    2683      3266734 :     case E_V2BFmode:
    2684      3266734 :     case E_V8QImode:
    2685      3266734 :       classes[0] = X86_64_SSE_CLASS;
    2686      3266734 :       return 1;
    2687              :     case E_BLKmode:
    2688              :     case E_VOIDmode:
    2689              :       return 0;
    2690        45148 :     default:
    2691        45148 :       gcc_assert (VECTOR_MODE_P (mode));
    2692              : 
    2693        45148 :       if (bytes > 16)
    2694              :         return 0;
    2695              : 
    2696        60568 :       gcc_assert (GET_MODE_CLASS (GET_MODE_INNER (mode)) == MODE_INT);
    2697              : 
    2698        60568 :       if (bit_offset + GET_MODE_BITSIZE (mode) <= 32)
    2699        29850 :         classes[0] = X86_64_INTEGERSI_CLASS;
    2700              :       else
    2701          434 :         classes[0] = X86_64_INTEGER_CLASS;
    2702        30284 :       classes[1] = X86_64_INTEGER_CLASS;
    2703        30284 :       return 1 + (bytes > 8);
    2704              :     }
    2705              : }
    2706              : 
    2707              : /* Wrapper around classify_argument with the extra zero_width_bitfields
    2708              :    argument, to diagnose GCC 12.1 ABI differences for C.  */
    2709              : 
    2710              : static int
    2711    338891059 : classify_argument (machine_mode mode, const_tree type,
    2712              :                    enum x86_64_reg_class classes[MAX_CLASSES], int bit_offset)
    2713              : {
    2714    338891059 :   int zero_width_bitfields = 0;
    2715    338891059 :   static bool warned = false;
    2716    338891059 :   int n = classify_argument (mode, type, classes, bit_offset,
    2717              :                              zero_width_bitfields);
    2718    338891059 :   if (!zero_width_bitfields || warned || !warn_psabi)
    2719              :     return n;
    2720          534 :   enum x86_64_reg_class alt_classes[MAX_CLASSES];
    2721          534 :   zero_width_bitfields = 2;
    2722          534 :   if (classify_argument (mode, type, alt_classes, bit_offset,
    2723              :                          zero_width_bitfields) != n)
    2724            0 :     zero_width_bitfields = 3;
    2725              :   else
    2726         1286 :     for (int i = 0; i < n; i++)
    2727          760 :       if (classes[i] != alt_classes[i])
    2728              :         {
    2729            8 :           zero_width_bitfields = 3;
    2730            8 :           break;
    2731              :         }
    2732          534 :   if (zero_width_bitfields == 3)
    2733              :     {
    2734            8 :       warned = true;
    2735            8 :       const char *url
    2736              :         = CHANGES_ROOT_URL "gcc-12/changes.html#zero_width_bitfields";
    2737              : 
    2738            8 :       inform (input_location,
    2739              :               "the ABI of passing C structures with zero-width bit-fields"
    2740              :               " has changed in GCC %{12.1%}", url);
    2741              :     }
    2742              :   return n;
    2743              : }
    2744              : 
    2745              : /* Examine the argument and return set number of register required in each
    2746              :    class.  Return true iff parameter should be passed in memory.  */
    2747              : 
    2748              : static bool
    2749    229306067 : examine_argument (machine_mode mode, const_tree type, bool in_return,
    2750              :                   int *int_nregs, int *sse_nregs)
    2751              : {
    2752    229306067 :   enum x86_64_reg_class regclass[MAX_CLASSES];
    2753    229306067 :   int n = classify_argument (mode, type, regclass, 0);
    2754              : 
    2755    229306067 :   *int_nregs = 0;
    2756    229306067 :   *sse_nregs = 0;
    2757              : 
    2758    229306067 :   if (!n)
    2759              :     return true;
    2760    664510507 :   for (n--; n >= 0; n--)
    2761    440734285 :     switch (regclass[n])
    2762              :       {
    2763    152252834 :       case X86_64_INTEGER_CLASS:
    2764    152252834 :       case X86_64_INTEGERSI_CLASS:
    2765    152252834 :         (*int_nregs)++;
    2766    152252834 :         break;
    2767     74459857 :       case X86_64_SSE_CLASS:
    2768     74459857 :       case X86_64_SSEHF_CLASS:
    2769     74459857 :       case X86_64_SSESF_CLASS:
    2770     74459857 :       case X86_64_SSEDF_CLASS:
    2771     74459857 :         (*sse_nregs)++;
    2772     74459857 :         break;
    2773              :       case X86_64_NO_CLASS:
    2774              :       case X86_64_SSEUP_CLASS:
    2775              :         break;
    2776      9386961 :       case X86_64_X87_CLASS:
    2777      9386961 :       case X86_64_X87UP_CLASS:
    2778      9386961 :       case X86_64_COMPLEX_X87_CLASS:
    2779      9386961 :         if (!in_return)
    2780              :           return true;
    2781              :         break;
    2782            0 :       case X86_64_MEMORY_CLASS:
    2783            0 :         gcc_unreachable ();
    2784              :       }
    2785              : 
    2786              :   return false;
    2787              : }
    2788              : 
    2789              : /* Construct container for the argument used by GCC interface.  See
    2790              :    FUNCTION_ARG for the detailed description.  */
    2791              : 
    2792              : static rtx
    2793    111198730 : construct_container (machine_mode mode, machine_mode orig_mode,
    2794              :                      const_tree type, bool in_return, int nintregs,
    2795              :                      int nsseregs, const int *intreg, int sse_regno)
    2796              : {
    2797              :   /* The following variables hold the static issued_error state.  */
    2798    111198730 :   static bool issued_sse_arg_error;
    2799    111198730 :   static bool issued_sse_ret_error;
    2800    111198730 :   static bool issued_x87_ret_error;
    2801              : 
    2802    111198730 :   machine_mode tmpmode;
    2803    111198730 :   int bytes
    2804    221715444 :     = mode == BLKmode ? int_size_in_bytes (type) : (int) GET_MODE_SIZE (mode);
    2805    111198730 :   enum x86_64_reg_class regclass[MAX_CLASSES];
    2806    111198730 :   int n;
    2807    111198730 :   int i;
    2808    111198730 :   int nexps = 0;
    2809    111198730 :   int needed_sseregs, needed_intregs;
    2810    111198730 :   rtx exp[MAX_CLASSES];
    2811    111198730 :   rtx ret;
    2812              : 
    2813    111198730 :   if (examine_argument (mode, type, in_return, &needed_intregs,
    2814              :                         &needed_sseregs))
    2815              :     return NULL;
    2816              : 
    2817    110690236 :   if (needed_intregs > nintregs || needed_sseregs > nsseregs)
    2818              :     return NULL;
    2819              : 
    2820              :   /* We allowed the user to turn off SSE for kernel mode.  Don't crash if
    2821              :      some less clueful developer tries to use floating-point anyway.  */
    2822    109585063 :   if (needed_sseregs
    2823     36796220 :       && (!TARGET_SSE || (VALID_SSE2_TYPE_MODE (mode) && !TARGET_SSE2)))
    2824              :     {
    2825              :       /* Return early if we shouldn't raise an error for invalid
    2826              :          calls.  */
    2827           71 :       if (cfun != NULL && cfun->machine->silent_p)
    2828              :         return NULL;
    2829           39 :       if (in_return)
    2830              :         {
    2831           34 :           if (!issued_sse_ret_error)
    2832              :             {
    2833           16 :               if (VALID_SSE2_TYPE_MODE (mode))
    2834            5 :                 error ("SSE register return with SSE2 disabled");
    2835              :               else
    2836           11 :                 error ("SSE register return with SSE disabled");
    2837           16 :               issued_sse_ret_error = true;
    2838              :             }
    2839              :         }
    2840            5 :       else if (!issued_sse_arg_error)
    2841              :         {
    2842            5 :           if (VALID_SSE2_TYPE_MODE (mode))
    2843            0 :             error ("SSE register argument with SSE2 disabled");
    2844              :           else
    2845            5 :             error ("SSE register argument with SSE disabled");
    2846            5 :           issued_sse_arg_error = true;
    2847              :         }
    2848           39 :       return NULL;
    2849              :     }
    2850              : 
    2851    109584992 :   n = classify_argument (mode, type, regclass, 0);
    2852    109584992 :   gcc_assert (n);
    2853              : 
    2854              :   /* Likewise, error if the ABI requires us to return values in the
    2855              :      x87 registers and the user specified -mno-80387.  */
    2856    109584992 :   if (!TARGET_FLOAT_RETURNS_IN_80387 && in_return)
    2857      1424666 :     for (i = 0; i < n; i++)
    2858       751944 :       if (regclass[i] == X86_64_X87_CLASS
    2859              :           || regclass[i] == X86_64_X87UP_CLASS
    2860       751944 :           || regclass[i] == X86_64_COMPLEX_X87_CLASS)
    2861              :         {
    2862              :           /* Return early if we shouldn't raise an error for invalid
    2863              :              calls.  */
    2864           16 :           if (cfun != NULL && cfun->machine->silent_p)
    2865              :             return NULL;
    2866           13 :           if (!issued_x87_ret_error)
    2867              :             {
    2868            8 :               error ("x87 register return with x87 disabled");
    2869            8 :               issued_x87_ret_error = true;
    2870              :             }
    2871           13 :           return NULL;
    2872              :         }
    2873              : 
    2874              :   /* First construct simple cases.  Avoid SCmode, since we want to use
    2875              :      single register to pass this type.  */
    2876    109584976 :   if (n == 1 && mode != SCmode && mode != HCmode)
    2877     72684381 :     switch (regclass[0])
    2878              :       {
    2879     66631201 :       case X86_64_INTEGER_CLASS:
    2880     66631201 :       case X86_64_INTEGERSI_CLASS:
    2881     66631201 :         return gen_rtx_REG (mode, intreg[0]);
    2882      5853225 :       case X86_64_SSE_CLASS:
    2883      5853225 :       case X86_64_SSEHF_CLASS:
    2884      5853225 :       case X86_64_SSESF_CLASS:
    2885      5853225 :       case X86_64_SSEDF_CLASS:
    2886      5853225 :         if (mode != BLKmode)
    2887     11705642 :           return gen_reg_or_parallel (mode, orig_mode,
    2888     11705642 :                                       GET_SSE_REGNO (sse_regno));
    2889              :         break;
    2890       170953 :       case X86_64_X87_CLASS:
    2891       170953 :       case X86_64_COMPLEX_X87_CLASS:
    2892       170953 :         return gen_rtx_REG (mode, FIRST_STACK_REG);
    2893              :       case X86_64_NO_CLASS:
    2894              :         /* Zero sized array, struct or class.  */
    2895              :         return NULL;
    2896            0 :       default:
    2897            0 :         gcc_unreachable ();
    2898              :       }
    2899     36900999 :   if (n == 2
    2900     19103430 :       && regclass[0] == X86_64_SSE_CLASS
    2901     12897286 :       && regclass[1] == X86_64_SSEUP_CLASS
    2902     12892131 :       && mode != BLKmode)
    2903     25784262 :     return gen_reg_or_parallel (mode, orig_mode,
    2904     25784262 :                                 GET_SSE_REGNO (sse_regno));
    2905     24008868 :   if (n == 4
    2906      8430456 :       && regclass[0] == X86_64_SSE_CLASS
    2907      8430456 :       && regclass[1] == X86_64_SSEUP_CLASS
    2908      8430456 :       && regclass[2] == X86_64_SSEUP_CLASS
    2909      8430456 :       && regclass[3] == X86_64_SSEUP_CLASS
    2910      8430456 :       && mode != BLKmode)
    2911     16857534 :     return gen_reg_or_parallel (mode, orig_mode,
    2912     16857534 :                                 GET_SSE_REGNO (sse_regno));
    2913     15580101 :   if (n == 8
    2914      9129315 :       && regclass[0] == X86_64_SSE_CLASS
    2915      9129315 :       && regclass[1] == X86_64_SSEUP_CLASS
    2916      9129315 :       && regclass[2] == X86_64_SSEUP_CLASS
    2917      9129315 :       && regclass[3] == X86_64_SSEUP_CLASS
    2918      9129315 :       && regclass[4] == X86_64_SSEUP_CLASS
    2919      9129315 :       && regclass[5] == X86_64_SSEUP_CLASS
    2920      9129315 :       && regclass[6] == X86_64_SSEUP_CLASS
    2921      9129315 :       && regclass[7] == X86_64_SSEUP_CLASS
    2922      9129315 :       && mode != BLKmode)
    2923     18254358 :     return gen_reg_or_parallel (mode, orig_mode,
    2924     18254358 :                                 GET_SSE_REGNO (sse_regno));
    2925      6452922 :   if (n == 2
    2926      6211299 :       && regclass[0] == X86_64_X87_CLASS
    2927      2242042 :       && regclass[1] == X86_64_X87UP_CLASS)
    2928      2242042 :     return gen_rtx_REG (XFmode, FIRST_STACK_REG);
    2929              : 
    2930      4210880 :   if (n == 2
    2931      3969257 :       && regclass[0] == X86_64_INTEGER_CLASS
    2932      3547567 :       && regclass[1] == X86_64_INTEGER_CLASS
    2933      3539224 :       && (mode == CDImode || mode == TImode || mode == BLKmode)
    2934      3539224 :       && intreg[0] + 1 == intreg[1])
    2935              :     {
    2936      3220156 :       if (mode == BLKmode)
    2937              :         {
    2938              :           /* Use TImode for BLKmode values in 2 integer registers.  */
    2939       515956 :           exp[0] = gen_rtx_EXPR_LIST (VOIDmode,
    2940       257978 :                                       gen_rtx_REG (TImode, intreg[0]),
    2941              :                                       GEN_INT (0));
    2942       257978 :           ret = gen_rtx_PARALLEL (mode, rtvec_alloc (1));
    2943       257978 :           XVECEXP (ret, 0, 0) = exp[0];
    2944       257978 :           return ret;
    2945              :         }
    2946              :       else
    2947      2962178 :         return gen_rtx_REG (mode, intreg[0]);
    2948              :     }
    2949              : 
    2950              :   /* Otherwise figure out the entries of the PARALLEL.  */
    2951      2730549 :   for (i = 0; i < n; i++)
    2952              :     {
    2953      1739825 :       int pos;
    2954              : 
    2955      1739825 :       switch (regclass[i])
    2956              :         {
    2957              :           case X86_64_NO_CLASS:
    2958              :             break;
    2959       994383 :           case X86_64_INTEGER_CLASS:
    2960       994383 :           case X86_64_INTEGERSI_CLASS:
    2961              :             /* Merge TImodes on aligned occasions here too.  */
    2962       994383 :             if (i * 8 + 8 > bytes)
    2963              :               {
    2964         3235 :                 unsigned int tmpbits = (bytes - i * 8) * BITS_PER_UNIT;
    2965         3235 :                 if (!int_mode_for_size (tmpbits, 0).exists (&tmpmode))
    2966              :                   /* We've requested 24 bytes we
    2967              :                      don't have mode for.  Use DImode.  */
    2968          357 :                   tmpmode = DImode;
    2969              :               }
    2970       991148 :             else if (regclass[i] == X86_64_INTEGERSI_CLASS)
    2971              :               tmpmode = SImode;
    2972              :             else
    2973       818640 :               tmpmode = DImode;
    2974      1988766 :             exp [nexps++]
    2975       994383 :               = gen_rtx_EXPR_LIST (VOIDmode,
    2976       994383 :                                    gen_rtx_REG (tmpmode, *intreg),
    2977       994383 :                                    GEN_INT (i*8));
    2978       994383 :             intreg++;
    2979       994383 :             break;
    2980          592 :           case X86_64_SSEHF_CLASS:
    2981          592 :             tmpmode = (mode == BFmode ? BFmode : HFmode);
    2982         1184 :             exp [nexps++]
    2983         1184 :               = gen_rtx_EXPR_LIST (VOIDmode,
    2984              :                                    gen_rtx_REG (tmpmode,
    2985          592 :                                                 GET_SSE_REGNO (sse_regno)),
    2986          592 :                                    GEN_INT (i*8));
    2987          592 :             sse_regno++;
    2988          592 :             break;
    2989         3052 :           case X86_64_SSESF_CLASS:
    2990         6104 :             exp [nexps++]
    2991         6104 :               = gen_rtx_EXPR_LIST (VOIDmode,
    2992              :                                    gen_rtx_REG (SFmode,
    2993         3052 :                                                 GET_SSE_REGNO (sse_regno)),
    2994         3052 :                                    GEN_INT (i*8));
    2995         3052 :             sse_regno++;
    2996         3052 :             break;
    2997       482055 :           case X86_64_SSEDF_CLASS:
    2998       964110 :             exp [nexps++]
    2999       964110 :               = gen_rtx_EXPR_LIST (VOIDmode,
    3000              :                                    gen_rtx_REG (DFmode,
    3001       482055 :                                                 GET_SSE_REGNO (sse_regno)),
    3002       482055 :                                    GEN_INT (i*8));
    3003       482055 :             sse_regno++;
    3004       482055 :             break;
    3005       251537 :           case X86_64_SSE_CLASS:
    3006       251537 :             pos = i;
    3007       251537 :             switch (n)
    3008              :               {
    3009              :               case 1:
    3010              :                 tmpmode = DImode;
    3011              :                 break;
    3012        10128 :               case 2:
    3013        10128 :                 if (i == 0 && regclass[1] == X86_64_SSEUP_CLASS)
    3014              :                   {
    3015            0 :                     tmpmode = TImode;
    3016            0 :                     i++;
    3017              :                   }
    3018              :                 else
    3019              :                   tmpmode = DImode;
    3020              :                 break;
    3021         1689 :               case 4:
    3022         1689 :                 gcc_assert (i == 0
    3023              :                             && regclass[1] == X86_64_SSEUP_CLASS
    3024              :                             && regclass[2] == X86_64_SSEUP_CLASS
    3025              :                             && regclass[3] == X86_64_SSEUP_CLASS);
    3026              :                 tmpmode = OImode;
    3027              :                 i += 3;
    3028              :                 break;
    3029         2136 :               case 8:
    3030         2136 :                 gcc_assert (i == 0
    3031              :                             && regclass[1] == X86_64_SSEUP_CLASS
    3032              :                             && regclass[2] == X86_64_SSEUP_CLASS
    3033              :                             && regclass[3] == X86_64_SSEUP_CLASS
    3034              :                             && regclass[4] == X86_64_SSEUP_CLASS
    3035              :                             && regclass[5] == X86_64_SSEUP_CLASS
    3036              :                             && regclass[6] == X86_64_SSEUP_CLASS
    3037              :                             && regclass[7] == X86_64_SSEUP_CLASS);
    3038              :                 tmpmode = XImode;
    3039              :                 i += 7;
    3040              :                 break;
    3041            0 :               default:
    3042            0 :                 gcc_unreachable ();
    3043              :               }
    3044       503074 :             exp [nexps++]
    3045       503074 :               = gen_rtx_EXPR_LIST (VOIDmode,
    3046              :                                    gen_rtx_REG (tmpmode,
    3047       251537 :                                                 GET_SSE_REGNO (sse_regno)),
    3048       251537 :                                    GEN_INT (pos*8));
    3049       251537 :             sse_regno++;
    3050       251537 :             break;
    3051            0 :           default:
    3052            0 :             gcc_unreachable ();
    3053              :         }
    3054              :     }
    3055              : 
    3056              :   /* Empty aligned struct, union or class.  */
    3057       990724 :   if (nexps == 0)
    3058              :     return NULL;
    3059              : 
    3060       990469 :   ret =  gen_rtx_PARALLEL (mode, rtvec_alloc (nexps));
    3061      2722088 :   for (i = 0; i < nexps; i++)
    3062      1731619 :     XVECEXP (ret, 0, i) = exp [i];
    3063              :   return ret;
    3064              : }
    3065              : 
    3066              : /* Update the data in CUM to advance over an argument of mode MODE
    3067              :    and data type TYPE.  (TYPE is null for libcalls where that information
    3068              :    may not be available.)
    3069              : 
    3070              :    Return a number of integer registers advanced over.  */
    3071              : 
    3072              : static int
    3073      2130430 : function_arg_advance_32 (CUMULATIVE_ARGS *cum, machine_mode mode,
    3074              :                          const_tree type, HOST_WIDE_INT bytes,
    3075              :                          HOST_WIDE_INT words)
    3076              : {
    3077      2130430 :   int res = 0;
    3078      2130430 :   bool error_p = false;
    3079              : 
    3080      2130430 :   if (TARGET_IAMCU)
    3081              :     {
    3082              :       /* Intel MCU psABI passes scalars and aggregates no larger than 8
    3083              :          bytes in registers.  */
    3084            0 :       if (!VECTOR_MODE_P (mode) && bytes <= 8)
    3085            0 :         goto pass_in_reg;
    3086              :       return res;
    3087              :     }
    3088              : 
    3089      2130430 :   switch (mode)
    3090              :     {
    3091              :     default:
    3092              :       break;
    3093              : 
    3094        93818 :     case E_BLKmode:
    3095        93818 :       if (bytes < 0)
    3096              :         break;
    3097              :       /* FALLTHRU */
    3098              : 
    3099      2093611 :     case E_DImode:
    3100      2093611 :     case E_SImode:
    3101      2093611 :     case E_HImode:
    3102      2093611 :     case E_QImode:
    3103        93818 : pass_in_reg:
    3104      2093611 :       cum->words += words;
    3105      2093611 :       cum->nregs -= words;
    3106      2093611 :       cum->regno += words;
    3107      2093611 :       if (cum->nregs >= 0)
    3108        47414 :         res = words;
    3109      2093611 :       if (cum->nregs <= 0)
    3110              :         {
    3111      2059447 :           cum->nregs = 0;
    3112      2059447 :           cfun->machine->arg_reg_available = false;
    3113      2059447 :           cum->regno = 0;
    3114              :         }
    3115              :       break;
    3116              : 
    3117            0 :     case E_OImode:
    3118              :       /* OImode shouldn't be used directly.  */
    3119            0 :       gcc_unreachable ();
    3120              : 
    3121         4743 :     case E_DFmode:
    3122         4743 :       if (cum->float_in_sse == -1)
    3123            0 :         error_p = true;
    3124         4743 :       if (cum->float_in_sse < 2)
    3125              :         break;
    3126              :       /* FALLTHRU */
    3127         1360 :     case E_SFmode:
    3128         1360 :       if (cum->float_in_sse == -1)
    3129            0 :         error_p = true;
    3130         1360 :       if (cum->float_in_sse < 1)
    3131              :         break;
    3132              :       /* FALLTHRU */
    3133              : 
    3134           52 :     case E_V16HFmode:
    3135           52 :     case E_V16BFmode:
    3136           52 :     case E_V8SFmode:
    3137           52 :     case E_V8SImode:
    3138           52 :     case E_V64QImode:
    3139           52 :     case E_V32HImode:
    3140           52 :     case E_V16SImode:
    3141           52 :     case E_V8DImode:
    3142           52 :     case E_V32HFmode:
    3143           52 :     case E_V32BFmode:
    3144           52 :     case E_V16SFmode:
    3145           52 :     case E_V8DFmode:
    3146           52 :     case E_V32QImode:
    3147           52 :     case E_V16HImode:
    3148           52 :     case E_V4DFmode:
    3149           52 :     case E_V4DImode:
    3150           52 :     case E_TImode:
    3151           52 :     case E_V16QImode:
    3152           52 :     case E_V8HImode:
    3153           52 :     case E_V4SImode:
    3154           52 :     case E_V2DImode:
    3155           52 :     case E_V8HFmode:
    3156           52 :     case E_V8BFmode:
    3157           52 :     case E_V4SFmode:
    3158           52 :     case E_V2DFmode:
    3159           52 :       if (!type || !AGGREGATE_TYPE_P (type))
    3160              :         {
    3161           52 :           cum->sse_words += words;
    3162           52 :           cum->sse_nregs -= 1;
    3163           52 :           cum->sse_regno += 1;
    3164           52 :           if (cum->sse_nregs <= 0)
    3165              :             {
    3166            4 :               cum->sse_nregs = 0;
    3167            4 :               cum->sse_regno = 0;
    3168              :             }
    3169              :         }
    3170              :       break;
    3171              : 
    3172           16 :     case E_V8QImode:
    3173           16 :     case E_V4HImode:
    3174           16 :     case E_V4HFmode:
    3175           16 :     case E_V4BFmode:
    3176           16 :     case E_V2SImode:
    3177           16 :     case E_V2SFmode:
    3178           16 :     case E_V1TImode:
    3179           16 :     case E_V1DImode:
    3180           16 :       if (!type || !AGGREGATE_TYPE_P (type))
    3181              :         {
    3182           16 :           cum->mmx_words += words;
    3183           16 :           cum->mmx_nregs -= 1;
    3184           16 :           cum->mmx_regno += 1;
    3185           16 :           if (cum->mmx_nregs <= 0)
    3186              :             {
    3187            0 :               cum->mmx_nregs = 0;
    3188            0 :               cum->mmx_regno = 0;
    3189              :             }
    3190              :         }
    3191              :       break;
    3192              :     }
    3193      2065602 :   if (error_p)
    3194              :     {
    3195            0 :       cum->float_in_sse = 0;
    3196            0 :       error ("calling %qD with SSE calling convention without "
    3197              :              "SSE/SSE2 enabled", cum->decl);
    3198            0 :       sorry ("this is a GCC bug that can be worked around by adding "
    3199              :              "attribute used to function called");
    3200              :     }
    3201              : 
    3202              :   return res;
    3203              : }
    3204              : 
    3205              : static int
    3206     18945027 : function_arg_advance_64 (CUMULATIVE_ARGS *cum, machine_mode mode,
    3207              :                          const_tree type, HOST_WIDE_INT words, bool named)
    3208              : {
    3209     18945027 :   int int_nregs, sse_nregs;
    3210              : 
    3211              :   /* Unnamed 512 and 256bit vector mode parameters are passed on stack.  */
    3212     18945027 :   if (!named && (VALID_AVX512F_REG_MODE (mode)
    3213              :                  || VALID_AVX256_REG_MODE (mode)))
    3214              :     return 0;
    3215              : 
    3216     18944663 :   if (!examine_argument (mode, type, false, &int_nregs, &sse_nregs)
    3217     18944663 :       && sse_nregs <= cum->sse_nregs && int_nregs <= cum->nregs)
    3218              :     {
    3219     16666545 :       cum->nregs -= int_nregs;
    3220     16666545 :       cum->sse_nregs -= sse_nregs;
    3221     16666545 :       cum->regno += int_nregs;
    3222     16666545 :       cum->sse_regno += sse_nregs;
    3223     16666545 :       return int_nregs;
    3224              :     }
    3225              :   else
    3226              :     {
    3227      2278118 :       int align = ix86_function_arg_boundary (mode, type) / BITS_PER_WORD;
    3228      2278118 :       cum->words = ROUND_UP (cum->words, align);
    3229      2278118 :       cum->words += words;
    3230      2278118 :       return 0;
    3231              :     }
    3232              : }
    3233              : 
    3234              : static int
    3235       447161 : function_arg_advance_ms_64 (CUMULATIVE_ARGS *cum, HOST_WIDE_INT bytes,
    3236              :                             HOST_WIDE_INT words)
    3237              : {
    3238              :   /* Otherwise, this should be passed indirect.  */
    3239       447161 :   gcc_assert (bytes == 1 || bytes == 2 || bytes == 4 || bytes == 8);
    3240              : 
    3241       447161 :   cum->words += words;
    3242       447161 :   if (cum->nregs > 0)
    3243              :     {
    3244       289519 :       cum->nregs -= 1;
    3245       289519 :       cum->regno += 1;
    3246       289519 :       return 1;
    3247              :     }
    3248              :   return 0;
    3249              : }
    3250              : 
    3251              : /* Update the data in CUM to advance over argument ARG.  */
    3252              : 
    3253              : static void
    3254     21522985 : ix86_function_arg_advance (cumulative_args_t cum_v,
    3255              :                            const function_arg_info &arg)
    3256              : {
    3257     21522985 :   CUMULATIVE_ARGS *cum = get_cumulative_args (cum_v);
    3258     21522985 :   machine_mode mode = arg.mode;
    3259     21522985 :   HOST_WIDE_INT bytes, words;
    3260     21522985 :   int nregs;
    3261              : 
    3262              :   /* The argument of interrupt handler is a special case and is
    3263              :      handled in ix86_function_arg.  */
    3264     21522985 :   if (!cum->caller && cfun->machine->func_type != TYPE_NORMAL)
    3265              :     return;
    3266              : 
    3267     21522618 :   bytes = arg.promoted_size_in_bytes ();
    3268     21522618 :   words = CEIL (bytes, UNITS_PER_WORD);
    3269              : 
    3270     21522618 :   if (arg.type)
    3271     21208438 :     mode = type_natural_mode (arg.type, NULL, false);
    3272              : 
    3273     21522618 :   if (TARGET_64BIT)
    3274              :     {
    3275     19392188 :       enum calling_abi call_abi = cum ? cum->call_abi : ix86_abi;
    3276              : 
    3277     19392188 :       if (call_abi == MS_ABI)
    3278       447161 :         nregs = function_arg_advance_ms_64 (cum, bytes, words);
    3279              :       else
    3280     18945027 :         nregs = function_arg_advance_64 (cum, mode, arg.type, words,
    3281     18945027 :                                          arg.named);
    3282              :     }
    3283              :   else
    3284      2130430 :     nregs = function_arg_advance_32 (cum, mode, arg.type, bytes, words);
    3285              : 
    3286     21522618 :   if (!nregs)
    3287              :     {
    3288              :       /* Track if there are outgoing arguments on stack.  */
    3289      5712704 :       if (cum->caller)
    3290      2720340 :         cfun->machine->outgoing_args_on_stack = true;
    3291              :     }
    3292              : }
    3293              : 
    3294              : /* Define where to put the arguments to a function.
    3295              :    Value is zero to push the argument on the stack,
    3296              :    or a hard register in which to store the argument.
    3297              : 
    3298              :    MODE is the argument's machine mode.
    3299              :    TYPE is the data type of the argument (as a tree).
    3300              :     This is null for libcalls where that information may
    3301              :     not be available.
    3302              :    CUM is a variable of type CUMULATIVE_ARGS which gives info about
    3303              :     the preceding args and about the function being called.
    3304              :    NAMED is nonzero if this argument is a named parameter
    3305              :     (otherwise it is an extra parameter matching an ellipsis).  */
    3306              : 
    3307              : static rtx
    3308      2557294 : function_arg_32 (CUMULATIVE_ARGS *cum, machine_mode mode,
    3309              :                  machine_mode orig_mode, const_tree type,
    3310              :                  HOST_WIDE_INT bytes, HOST_WIDE_INT words)
    3311              : {
    3312      2557294 :   bool error_p = false;
    3313              : 
    3314              :   /* Avoid the AL settings for the Unix64 ABI.  */
    3315      2557294 :   if (mode == VOIDmode)
    3316       742139 :     return constm1_rtx;
    3317              : 
    3318      1815155 :   if (TARGET_IAMCU)
    3319              :     {
    3320              :       /* Intel MCU psABI passes scalars and aggregates no larger than 8
    3321              :          bytes in registers.  */
    3322            0 :       if (!VECTOR_MODE_P (mode) && bytes <= 8)
    3323            0 :         goto pass_in_reg;
    3324              :       return NULL_RTX;
    3325              :     }
    3326              : 
    3327      1815155 :   switch (mode)
    3328              :     {
    3329              :     default:
    3330              :       break;
    3331              : 
    3332        77786 :     case E_BLKmode:
    3333        77786 :       if (bytes < 0)
    3334              :         break;
    3335              :       /* FALLTHRU */
    3336      1781621 :     case E_DImode:
    3337      1781621 :     case E_SImode:
    3338      1781621 :     case E_HImode:
    3339      1781621 :     case E_QImode:
    3340        77786 : pass_in_reg:
    3341      1781621 :       if (words <= cum->nregs)
    3342              :         {
    3343        45582 :           int regno = cum->regno;
    3344              : 
    3345              :           /* Fastcall allocates the first two DWORD (SImode) or
    3346              :             smaller arguments to ECX and EDX if it isn't an
    3347              :             aggregate type .  */
    3348        45582 :           if (cum->fastcall)
    3349              :             {
    3350            6 :               if (mode == BLKmode
    3351            6 :                   || mode == DImode
    3352            6 :                   || (type && AGGREGATE_TYPE_P (type)))
    3353              :                 break;
    3354              : 
    3355              :               /* ECX not EAX is the first allocated register.  */
    3356            6 :               if (regno == AX_REG)
    3357        45582 :                 regno = CX_REG;
    3358              :             }
    3359        45582 :           return gen_rtx_REG (mode, regno);
    3360              :         }
    3361              :       break;
    3362              : 
    3363         3353 :     case E_DFmode:
    3364         3353 :       if (cum->float_in_sse == -1)
    3365            0 :         error_p = true;
    3366         3353 :       if (cum->float_in_sse < 2)
    3367              :         break;
    3368              :       /* FALLTHRU */
    3369          960 :     case E_SFmode:
    3370          960 :       if (cum->float_in_sse == -1)
    3371            0 :         error_p = true;
    3372          960 :       if (cum->float_in_sse < 1)
    3373              :         break;
    3374              :       /* FALLTHRU */
    3375           12 :     case E_TImode:
    3376              :       /* In 32bit, we pass TImode in xmm registers.  */
    3377           12 :     case E_V16QImode:
    3378           12 :     case E_V8HImode:
    3379           12 :     case E_V4SImode:
    3380           12 :     case E_V2DImode:
    3381           12 :     case E_V8HFmode:
    3382           12 :     case E_V8BFmode:
    3383           12 :     case E_V4SFmode:
    3384           12 :     case E_V2DFmode:
    3385           12 :       if (!type || !AGGREGATE_TYPE_P (type))
    3386              :         {
    3387           12 :           if (cum->sse_nregs)
    3388           12 :             return gen_reg_or_parallel (mode, orig_mode,
    3389           12 :                                         cum->sse_regno + FIRST_SSE_REG);
    3390              :         }
    3391              :       break;
    3392              : 
    3393            0 :     case E_OImode:
    3394            0 :     case E_XImode:
    3395              :       /* OImode and XImode shouldn't be used directly.  */
    3396            0 :       gcc_unreachable ();
    3397              : 
    3398            9 :     case E_V64QImode:
    3399            9 :     case E_V32HImode:
    3400            9 :     case E_V16SImode:
    3401            9 :     case E_V8DImode:
    3402            9 :     case E_V32HFmode:
    3403            9 :     case E_V32BFmode:
    3404            9 :     case E_V16SFmode:
    3405            9 :     case E_V8DFmode:
    3406            9 :     case E_V16HFmode:
    3407            9 :     case E_V16BFmode:
    3408            9 :     case E_V8SFmode:
    3409            9 :     case E_V8SImode:
    3410            9 :     case E_V32QImode:
    3411            9 :     case E_V16HImode:
    3412            9 :     case E_V4DFmode:
    3413            9 :     case E_V4DImode:
    3414            9 :       if (!type || !AGGREGATE_TYPE_P (type))
    3415              :         {
    3416            9 :           if (cum->sse_nregs)
    3417            9 :             return gen_reg_or_parallel (mode, orig_mode,
    3418            9 :                                         cum->sse_regno + FIRST_SSE_REG);
    3419              :         }
    3420              :       break;
    3421              : 
    3422            8 :     case E_V8QImode:
    3423            8 :     case E_V4HImode:
    3424            8 :     case E_V4HFmode:
    3425            8 :     case E_V4BFmode:
    3426            8 :     case E_V2SImode:
    3427            8 :     case E_V2SFmode:
    3428            8 :     case E_V1TImode:
    3429            8 :     case E_V1DImode:
    3430            8 :       if (!type || !AGGREGATE_TYPE_P (type))
    3431              :         {
    3432            8 :           if (cum->mmx_nregs)
    3433            8 :             return gen_reg_or_parallel (mode, orig_mode,
    3434            8 :                                         cum->mmx_regno + FIRST_MMX_REG);
    3435              :         }
    3436              :       break;
    3437              :     }
    3438         4313 :   if (error_p)
    3439              :     {
    3440            0 :       cum->float_in_sse = 0;
    3441            0 :       error ("calling %qD with SSE calling convention without "
    3442              :              "SSE/SSE2 enabled", cum->decl);
    3443            0 :       sorry ("this is a GCC bug that can be worked around by adding "
    3444              :              "attribute used to function called");
    3445              :     }
    3446              : 
    3447              :   return NULL_RTX;
    3448              : }
    3449              : 
    3450              : static rtx
    3451     18655554 : function_arg_64 (const CUMULATIVE_ARGS *cum, machine_mode mode,
    3452              :                  machine_mode orig_mode, const_tree type, bool named)
    3453              : {
    3454              :   /* Handle a hidden AL argument containing number of registers
    3455              :      for varargs x86-64 functions.  */
    3456     18655554 :   if (mode == VOIDmode)
    3457      5174802 :     return GEN_INT (cum->maybe_vaarg
    3458              :                     ? (cum->sse_nregs < 0
    3459              :                        ? X86_64_SSE_REGPARM_MAX
    3460              :                        : cum->sse_regno)
    3461              :                     : -1);
    3462              : 
    3463     13480752 :   switch (mode)
    3464              :     {
    3465              :     default:
    3466              :       break;
    3467              : 
    3468        90563 :     case E_V16HFmode:
    3469        90563 :     case E_V16BFmode:
    3470        90563 :     case E_V8SFmode:
    3471        90563 :     case E_V8SImode:
    3472        90563 :     case E_V32QImode:
    3473        90563 :     case E_V16HImode:
    3474        90563 :     case E_V4DFmode:
    3475        90563 :     case E_V4DImode:
    3476        90563 :     case E_V32HFmode:
    3477        90563 :     case E_V32BFmode:
    3478        90563 :     case E_V16SFmode:
    3479        90563 :     case E_V16SImode:
    3480        90563 :     case E_V64QImode:
    3481        90563 :     case E_V32HImode:
    3482        90563 :     case E_V8DFmode:
    3483        90563 :     case E_V8DImode:
    3484              :       /* Unnamed 256 and 512bit vector mode parameters are passed on stack.  */
    3485        90563 :       if (!named)
    3486              :         return NULL;
    3487              :       break;
    3488              :     }
    3489              : 
    3490     13480388 :   const int *parm_regs;
    3491     13480388 :   if (cum->preserve_none_abi)
    3492              :     parm_regs = x86_64_preserve_none_int_parameter_registers;
    3493              :   else
    3494     13480259 :     parm_regs = x86_64_int_parameter_registers;
    3495              : 
    3496     13480388 :   return construct_container (mode, orig_mode, type, false,
    3497     13480388 :                               cum->nregs, cum->sse_nregs,
    3498     13480388 :                               &parm_regs[cum->regno],
    3499     13480388 :                               cum->sse_regno);
    3500              : }
    3501              : 
    3502              : static rtx
    3503       296428 : function_arg_ms_64 (const CUMULATIVE_ARGS *cum, machine_mode mode,
    3504              :                     machine_mode orig_mode, bool named, const_tree type,
    3505              :                     HOST_WIDE_INT bytes)
    3506              : {
    3507       296428 :   unsigned int regno;
    3508              : 
    3509              :   /* We need to add clobber for MS_ABI->SYSV ABI calls in expand_call.
    3510              :      We use value of -2 to specify that current function call is MSABI.  */
    3511       296428 :   if (mode == VOIDmode)
    3512        36295 :     return GEN_INT (-2);
    3513              : 
    3514              :   /* If we've run out of registers, it goes on the stack.  */
    3515       260133 :   if (cum->nregs == 0)
    3516              :     return NULL_RTX;
    3517              : 
    3518       176374 :   regno = x86_64_ms_abi_int_parameter_registers[cum->regno];
    3519              : 
    3520              :   /* Only floating point modes less than 64 bits are passed in anything but
    3521              :      integer regs.  Larger floating point types are excluded as the Windows
    3522              :      ABI requires vreg args can be shadowed in GPRs (for red zone / varargs). */
    3523       176374 :   if (TARGET_SSE && (mode == HFmode || mode == SFmode || mode == DFmode))
    3524              :     {
    3525        38260 :       if (named)
    3526              :         {
    3527        38260 :           if (type == NULL_TREE || !AGGREGATE_TYPE_P (type))
    3528        37263 :             regno = cum->regno + FIRST_SSE_REG;
    3529              :         }
    3530              :       else
    3531              :         {
    3532            0 :           rtx t1, t2;
    3533              : 
    3534              :           /* Unnamed floating parameters are passed in both the
    3535              :              SSE and integer registers.  */
    3536            0 :           t1 = gen_rtx_REG (mode, cum->regno + FIRST_SSE_REG);
    3537            0 :           t2 = gen_rtx_REG (mode, regno);
    3538            0 :           t1 = gen_rtx_EXPR_LIST (VOIDmode, t1, const0_rtx);
    3539            0 :           t2 = gen_rtx_EXPR_LIST (VOIDmode, t2, const0_rtx);
    3540            0 :           return gen_rtx_PARALLEL (mode, gen_rtvec (2, t1, t2));
    3541              :         }
    3542              :     }
    3543              :   /* Handle aggregated types passed in register.  */
    3544       176374 :   if (orig_mode == BLKmode)
    3545              :     {
    3546            0 :       if (bytes > 0 && bytes <= 8)
    3547            0 :         mode = (bytes > 4 ? DImode : SImode);
    3548            0 :       if (mode == BLKmode)
    3549            0 :         mode = DImode;
    3550              :     }
    3551              : 
    3552       176374 :   return gen_reg_or_parallel (mode, orig_mode, regno);
    3553              : }
    3554              : 
    3555              : /* Return where to put the arguments to a function.
    3556              :    Return zero to push the argument on the stack, or a hard register in which to store the argument.
    3557              : 
    3558              :    ARG describes the argument while CUM gives information about the
    3559              :    preceding args and about the function being called.  */
    3560              : 
    3561              : static rtx
    3562     21509463 : ix86_function_arg (cumulative_args_t cum_v, const function_arg_info &arg)
    3563              : {
    3564     21509463 :   CUMULATIVE_ARGS *cum = get_cumulative_args (cum_v);
    3565     21509463 :   machine_mode mode = arg.mode;
    3566     21509463 :   HOST_WIDE_INT bytes, words;
    3567     21509463 :   rtx reg;
    3568              : 
    3569     21509463 :   if (!cum->caller && cfun->machine->func_type != TYPE_NORMAL)
    3570              :     {
    3571          187 :       gcc_assert (arg.type != NULL_TREE);
    3572          187 :       if (POINTER_TYPE_P (arg.type))
    3573              :         {
    3574              :           /* This is the pointer argument.  */
    3575          122 :           gcc_assert (TYPE_MODE (arg.type) == ptr_mode);
    3576              :           /* It is at -WORD(AP) in the current frame in interrupt and
    3577              :              exception handlers.  */
    3578          122 :           reg = plus_constant (Pmode, arg_pointer_rtx, -UNITS_PER_WORD);
    3579              :         }
    3580              :       else
    3581              :         {
    3582           65 :           gcc_assert (cfun->machine->func_type == TYPE_EXCEPTION
    3583              :                       && TREE_CODE (arg.type) == INTEGER_TYPE
    3584              :                       && TYPE_MODE (arg.type) == word_mode);
    3585              :           /* The error code is the word-mode integer argument at
    3586              :              -2 * WORD(AP) in the current frame of the exception
    3587              :              handler.  */
    3588           65 :           reg = gen_rtx_MEM (word_mode,
    3589           65 :                              plus_constant (Pmode,
    3590              :                                             arg_pointer_rtx,
    3591           65 :                                             -2 * UNITS_PER_WORD));
    3592              :         }
    3593          187 :       return reg;
    3594              :     }
    3595              : 
    3596     21509276 :   bytes = arg.promoted_size_in_bytes ();
    3597     21509276 :   words = CEIL (bytes, UNITS_PER_WORD);
    3598              : 
    3599              :   /* To simplify the code below, represent vector types with a vector mode
    3600              :      even if MMX/SSE are not active.  */
    3601     21509276 :   if (arg.type && VECTOR_TYPE_P (arg.type))
    3602       172194 :     mode = type_natural_mode (arg.type, cum, false);
    3603              : 
    3604     21509276 :   if (TARGET_64BIT)
    3605              :     {
    3606     18951982 :       enum calling_abi call_abi = cum ? cum->call_abi : ix86_abi;
    3607              : 
    3608     18951982 :       if (call_abi == MS_ABI)
    3609       296428 :         reg = function_arg_ms_64 (cum, mode, arg.mode, arg.named,
    3610       296428 :                                   arg.type, bytes);
    3611              :       else
    3612     18655554 :         reg = function_arg_64 (cum, mode, arg.mode, arg.type, arg.named);
    3613              :     }
    3614              :   else
    3615      2557294 :     reg = function_arg_32 (cum, mode, arg.mode, arg.type, bytes, words);
    3616              : 
    3617              :   /* Track if there are outgoing arguments on stack.  */
    3618     21509276 :   if (reg == NULL_RTX && cum->caller)
    3619      2201158 :     cfun->machine->outgoing_args_on_stack = true;
    3620              : 
    3621              :   return reg;
    3622              : }
    3623              : 
    3624              : /* A C expression that indicates when an argument must be passed by
    3625              :    reference.  If nonzero for an argument, a copy of that argument is
    3626              :    made in memory and a pointer to the argument is passed instead of
    3627              :    the argument itself.  The pointer is passed in whatever way is
    3628              :    appropriate for passing a pointer to that type.  */
    3629              : 
    3630              : static bool
    3631     21443588 : ix86_pass_by_reference (cumulative_args_t cum_v, const function_arg_info &arg)
    3632              : {
    3633     21443588 :   CUMULATIVE_ARGS *cum = get_cumulative_args (cum_v);
    3634              : 
    3635     21443588 :   if (TARGET_64BIT)
    3636              :     {
    3637     19323398 :       enum calling_abi call_abi = cum ? cum->call_abi : ix86_abi;
    3638              : 
    3639              :       /* See Windows x64 Software Convention.  */
    3640     19323398 :       if (call_abi == MS_ABI)
    3641              :         {
    3642       441562 :           HOST_WIDE_INT msize = GET_MODE_SIZE (arg.mode);
    3643              : 
    3644       441562 :           if (tree type = arg.type)
    3645              :             {
    3646              :               /* Arrays are passed by reference.  */
    3647       441562 :               if (TREE_CODE (type) == ARRAY_TYPE)
    3648              :                 return true;
    3649              : 
    3650       441562 :               if (RECORD_OR_UNION_TYPE_P (type))
    3651              :                 {
    3652              :                   /* Structs/unions of sizes other than 8, 16, 32, or 64 bits
    3653              :                      are passed by reference.  */
    3654        15103 :                   msize = int_size_in_bytes (type);
    3655              :                 }
    3656              :             }
    3657              : 
    3658              :           /* __m128 is passed by reference.  */
    3659       873131 :           return msize != 1 && msize != 2 && msize != 4 && msize != 8;
    3660              :         }
    3661     18881836 :       else if (arg.type && int_size_in_bytes (arg.type) == -1)
    3662              :         return true;
    3663              :     }
    3664              : 
    3665              :   return false;
    3666              : }
    3667              : 
    3668              : /* Return true when TYPE should be 128bit aligned for 32bit argument
    3669              :    passing ABI.  XXX: This function is obsolete and is only used for
    3670              :    checking psABI compatibility with previous versions of GCC.  */
    3671              : 
    3672              : static bool
    3673      1975539 : ix86_compat_aligned_value_p (const_tree type)
    3674              : {
    3675      1975539 :   machine_mode mode = TYPE_MODE (type);
    3676      1975539 :   if (((TARGET_SSE && SSE_REG_MODE_P (mode))
    3677      1975497 :        || mode == TDmode
    3678      1975497 :        || mode == TFmode
    3679              :        || mode == TCmode)
    3680      1975751 :       && (!TYPE_USER_ALIGN (type) || TYPE_ALIGN (type) > 128))
    3681              :     return true;
    3682      1975327 :   if (TYPE_ALIGN (type) < 128)
    3683              :     return false;
    3684              : 
    3685            0 :   if (AGGREGATE_TYPE_P (type))
    3686              :     {
    3687              :       /* Walk the aggregates recursively.  */
    3688            0 :       switch (TREE_CODE (type))
    3689              :         {
    3690            0 :         case RECORD_TYPE:
    3691            0 :         case UNION_TYPE:
    3692            0 :         case QUAL_UNION_TYPE:
    3693            0 :           {
    3694            0 :             tree field;
    3695              : 
    3696              :             /* Walk all the structure fields.  */
    3697            0 :             for (field = TYPE_FIELDS (type); field; field = DECL_CHAIN (field))
    3698              :               {
    3699            0 :                 if (TREE_CODE (field) == FIELD_DECL
    3700            0 :                     && ix86_compat_aligned_value_p (TREE_TYPE (field)))
    3701              :                   return true;
    3702              :               }
    3703              :             break;
    3704              :           }
    3705              : 
    3706            0 :         case ARRAY_TYPE:
    3707              :           /* Just for use if some languages passes arrays by value.  */
    3708            0 :           if (ix86_compat_aligned_value_p (TREE_TYPE (type)))
    3709              :             return true;
    3710              :           break;
    3711              : 
    3712              :         default:
    3713              :           gcc_unreachable ();
    3714              :         }
    3715              :     }
    3716              :   return false;
    3717              : }
    3718              : 
    3719              : /* Return the alignment boundary for MODE and TYPE with alignment ALIGN.
    3720              :    XXX: This function is obsolete and is only used for checking psABI
    3721              :    compatibility with previous versions of GCC.  */
    3722              : 
    3723              : static unsigned int
    3724      5554748 : ix86_compat_function_arg_boundary (machine_mode mode,
    3725              :                                    const_tree type, unsigned int align)
    3726              : {
    3727              :   /* In 32bit, only _Decimal128 and __float128 are aligned to their
    3728              :      natural boundaries.  */
    3729      5554748 :   if (!TARGET_64BIT && mode != TDmode && mode != TFmode)
    3730              :     {
    3731              :       /* i386 ABI defines all arguments to be 4 byte aligned.  We have to
    3732              :          make an exception for SSE modes since these require 128bit
    3733              :          alignment.
    3734              : 
    3735              :          The handling here differs from field_alignment.  ICC aligns MMX
    3736              :          arguments to 4 byte boundaries, while structure fields are aligned
    3737              :          to 8 byte boundaries.  */
    3738      1987447 :       if (!type)
    3739              :         {
    3740        11908 :           if (!(TARGET_SSE && SSE_REG_MODE_P (mode)))
    3741      1987235 :             align = PARM_BOUNDARY;
    3742              :         }
    3743              :       else
    3744              :         {
    3745      1975539 :           if (!ix86_compat_aligned_value_p (type))
    3746      1987235 :             align = PARM_BOUNDARY;
    3747              :         }
    3748              :     }
    3749     10708329 :   if (align > BIGGEST_ALIGNMENT)
    3750           90 :     align = BIGGEST_ALIGNMENT;
    3751      5554748 :   return align;
    3752              : }
    3753              : 
    3754              : /* Return true when TYPE should be 128bit aligned for 32bit argument
    3755              :    passing ABI.  */
    3756              : 
    3757              : static bool
    3758      1978222 : ix86_contains_aligned_value_p (const_tree type)
    3759              : {
    3760      1978222 :   machine_mode mode = TYPE_MODE (type);
    3761              : 
    3762      1978222 :   if (mode == XFmode || mode == XCmode)
    3763              :     return false;
    3764              : 
    3765      1976081 :   if (TYPE_ALIGN (type) < 128)
    3766              :     return false;
    3767              : 
    3768         2895 :   if (AGGREGATE_TYPE_P (type))
    3769              :     {
    3770              :       /* Walk the aggregates recursively.  */
    3771            0 :       switch (TREE_CODE (type))
    3772              :         {
    3773            0 :         case RECORD_TYPE:
    3774            0 :         case UNION_TYPE:
    3775            0 :         case QUAL_UNION_TYPE:
    3776            0 :           {
    3777            0 :             tree field;
    3778              : 
    3779              :             /* Walk all the structure fields.  */
    3780            0 :             for (field = TYPE_FIELDS (type);
    3781            0 :                  field;
    3782            0 :                  field = DECL_CHAIN (field))
    3783              :               {
    3784            0 :                 if (TREE_CODE (field) == FIELD_DECL
    3785            0 :                     && ix86_contains_aligned_value_p (TREE_TYPE (field)))
    3786              :                   return true;
    3787              :               }
    3788              :             break;
    3789              :           }
    3790              : 
    3791            0 :         case ARRAY_TYPE:
    3792              :           /* Just for use if some languages passes arrays by value.  */
    3793            0 :           if (ix86_contains_aligned_value_p (TREE_TYPE (type)))
    3794              :             return true;
    3795              :           break;
    3796              : 
    3797              :         default:
    3798              :           gcc_unreachable ();
    3799              :         }
    3800              :     }
    3801              :   else
    3802         2895 :     return TYPE_ALIGN (type) >= 128;
    3803              : 
    3804              :   return false;
    3805              : }
    3806              : 
    3807              : /* Gives the alignment boundary, in bits, of an argument with the
    3808              :    specified mode and type.  */
    3809              : 
    3810              : static unsigned int
    3811     10962581 : ix86_function_arg_boundary (machine_mode mode, const_tree type)
    3812              : {
    3813     10962581 :   unsigned int align;
    3814     10962581 :   if (type)
    3815              :     {
    3816              :       /* Since the main variant type is used for call, we convert it to
    3817              :          the main variant type.  */
    3818     10922796 :       type = TYPE_MAIN_VARIANT (type);
    3819     10922796 :       align = TYPE_ALIGN (type);
    3820     10922796 :       if (TYPE_EMPTY_P (type))
    3821        25004 :         return PARM_BOUNDARY;
    3822              :     }
    3823              :   else
    3824        39785 :     align = GET_MODE_ALIGNMENT (mode);
    3825     12962922 :   if (align < PARM_BOUNDARY)
    3826      4120158 :     align = PARM_BOUNDARY;
    3827              :   else
    3828              :     {
    3829      6817419 :       static bool warned;
    3830      6817419 :       unsigned int saved_align = align;
    3831              : 
    3832      6817419 :       if (!TARGET_64BIT)
    3833              :         {
    3834              :           /* i386 ABI defines XFmode arguments to be 4 byte aligned.  */
    3835      2013984 :           if (!type)
    3836              :             {
    3837        35762 :               if (mode == XFmode || mode == XCmode)
    3838              :                 align = PARM_BOUNDARY;
    3839              :             }
    3840      1978222 :           else if (!ix86_contains_aligned_value_p (type))
    3841              :             align = PARM_BOUNDARY;
    3842              : 
    3843        38657 :           if (align < 128)
    3844      1987235 :             align = PARM_BOUNDARY;
    3845              :         }
    3846              : 
    3847      6817419 :       if (warn_psabi
    3848      5559584 :           && !warned
    3849     12372167 :           && align != ix86_compat_function_arg_boundary (mode, type,
    3850              :                                                          saved_align))
    3851              :         {
    3852           90 :           warned = true;
    3853           90 :           inform (input_location,
    3854              :                   "the ABI for passing parameters with %d-byte"
    3855              :                   " alignment has changed in GCC 4.6",
    3856              :                   align / BITS_PER_UNIT);
    3857              :         }
    3858              :     }
    3859              : 
    3860              :   return align;
    3861              : }
    3862              : 
    3863              : /* Return true if N is a possible register number of function value.  */
    3864              : 
    3865              : static bool
    3866      4689721 : ix86_function_value_regno_p (const unsigned int regno)
    3867              : {
    3868      4689721 :   switch (regno)
    3869              :     {
    3870              :     case AX_REG:
    3871              :       return true;
    3872       102091 :     case DX_REG:
    3873       102091 :       return (!TARGET_64BIT || ix86_cfun_abi () != MS_ABI);
    3874              : 
    3875              :       /* Complex values are returned in %st(0)/%st(1) pair.  */
    3876        25299 :     case ST0_REG:
    3877        25299 :     case ST1_REG:
    3878              :       /* TODO: The function should depend on current function ABI but
    3879              :        builtins.cc would need updating then. Therefore we use the
    3880              :        default ABI.  */
    3881        25299 :       if (TARGET_64BIT && ix86_cfun_abi () == MS_ABI)
    3882              :         return false;
    3883        25299 :       return TARGET_FLOAT_RETURNS_IN_80387;
    3884              : 
    3885              :       /* Complex values are returned in %xmm0/%xmm1 pair.  */
    3886      1291414 :     case XMM0_REG:
    3887      1291414 :     case XMM1_REG:
    3888      1291414 :       return TARGET_SSE;
    3889              : 
    3890        10080 :     case MM0_REG:
    3891        10080 :       if (TARGET_MACHO || TARGET_64BIT)
    3892              :         return false;
    3893         2494 :       return TARGET_MMX;
    3894              :     }
    3895              : 
    3896              :   return false;
    3897              : }
    3898              : 
    3899              : /* Check whether the register REGNO should be zeroed on X86.
    3900              :    When ALL_SSE_ZEROED is true, all SSE registers have been zeroed
    3901              :    together, no need to zero it again.
    3902              :    When NEED_ZERO_MMX is true, MMX registers should be cleared.  */
    3903              : 
    3904              : static bool
    3905         1377 : zero_call_used_regno_p (const unsigned int regno,
    3906              :                         bool all_sse_zeroed,
    3907              :                         bool need_zero_mmx)
    3908              : {
    3909          835 :   return GENERAL_REGNO_P (regno)
    3910          819 :          || (!all_sse_zeroed && SSE_REGNO_P (regno))
    3911          439 :          || MASK_REGNO_P (regno)
    3912         1800 :          || (need_zero_mmx && MMX_REGNO_P (regno));
    3913              : }
    3914              : 
    3915              : /* Return the machine_mode that is used to zero register REGNO.  */
    3916              : 
    3917              : static machine_mode
    3918          954 : zero_call_used_regno_mode (const unsigned int regno)
    3919              : {
    3920              :   /* NB: We only need to zero the lower 32 bits for integer registers
    3921              :      and the lower 128 bits for vector registers since destination are
    3922              :      zero-extended to the full register width.  */
    3923          954 :   if (GENERAL_REGNO_P (regno))
    3924              :     return SImode;
    3925              :   else if (SSE_REGNO_P (regno))
    3926          380 :     return V4SFmode;
    3927              :   else if (MASK_REGNO_P (regno))
    3928              :     return HImode;
    3929              :   else if (MMX_REGNO_P (regno))
    3930            0 :     return V2SImode;
    3931              :   else
    3932            0 :     gcc_unreachable ();
    3933              : }
    3934              : 
    3935              : /* Generate a rtx to zero all vector registers together if possible,
    3936              :    otherwise, return NULL.  */
    3937              : 
    3938              : static rtx
    3939          131 : zero_all_vector_registers (HARD_REG_SET need_zeroed_hardregs)
    3940              : {
    3941          131 :   if (!TARGET_AVX)
    3942              :     return NULL;
    3943              : 
    3944          372 :   for (unsigned int regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
    3945          368 :     if ((LEGACY_SSE_REGNO_P (regno)
    3946          336 :          || (TARGET_64BIT
    3947          336 :              && (REX_SSE_REGNO_P (regno)
    3948          304 :                  || (TARGET_AVX512F && EXT_REX_SSE_REGNO_P (regno)))))
    3949          432 :         && !TEST_HARD_REG_BIT (need_zeroed_hardregs, regno))
    3950              :       return NULL;
    3951              : 
    3952            4 :   return gen_avx_vzeroall ();
    3953              : }
    3954              : 
    3955              : /* Generate insns to zero all st registers together.
    3956              :    Return true when zeroing instructions are generated.
    3957              :    Assume the number of st registers that are zeroed is num_of_st,
    3958              :    we will emit the following sequence to zero them together:
    3959              :                   fldz;         \
    3960              :                   fldz;         \
    3961              :                   ...
    3962              :                   fldz;         \
    3963              :                   fstp %%st(0); \
    3964              :                   fstp %%st(0); \
    3965              :                   ...
    3966              :                   fstp %%st(0);
    3967              :    i.e., num_of_st fldz followed by num_of_st fstp to clear the stack
    3968              :    mark stack slots empty.
    3969              : 
    3970              :    How to compute the num_of_st:
    3971              :    There is no direct mapping from stack registers to hard register
    3972              :    numbers.  If one stack register needs to be cleared, we don't know
    3973              :    where in the stack the value remains.  So, if any stack register
    3974              :    needs to be cleared, the whole stack should be cleared.  However,
    3975              :    x87 stack registers that hold the return value should be excluded.
    3976              :    x87 returns in the top (two for complex values) register, so
    3977              :    num_of_st should be 7/6 when x87 returns, otherwise it will be 8.
    3978              :    return the value of num_of_st.  */
    3979              : 
    3980              : 
    3981              : static int
    3982          131 : zero_all_st_registers (HARD_REG_SET need_zeroed_hardregs)
    3983              : {
    3984              : 
    3985              :   /* If the FPU is disabled, no need to zero all st registers.  */
    3986          131 :   if (! (TARGET_80387 || TARGET_FLOAT_RETURNS_IN_80387))
    3987              :     return 0;
    3988              : 
    3989        10329 :   unsigned int num_of_st = 0;
    3990        10329 :   for (unsigned int regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
    3991        10220 :     if ((STACK_REGNO_P (regno) || MMX_REGNO_P (regno))
    3992        10220 :         && TEST_HARD_REG_BIT (need_zeroed_hardregs, regno))
    3993              :       {
    3994              :         num_of_st++;
    3995              :         break;
    3996              :       }
    3997              : 
    3998          130 :   if (num_of_st == 0)
    3999              :     return 0;
    4000              : 
    4001           21 :   bool return_with_x87 = false;
    4002           42 :   return_with_x87 = (crtl->return_rtx
    4003           21 :                      && (STACK_REG_P (crtl->return_rtx)));
    4004              : 
    4005           21 :   bool complex_return = false;
    4006           42 :   complex_return = (crtl->return_rtx
    4007           21 :                     && COMPLEX_MODE_P (GET_MODE (crtl->return_rtx)));
    4008              : 
    4009           21 :   if (return_with_x87)
    4010            2 :     if (complex_return)
    4011              :       num_of_st = 6;
    4012              :     else
    4013            1 :       num_of_st = 7;
    4014              :   else
    4015              :     num_of_st = 8;
    4016              : 
    4017           21 :   rtx st_reg = gen_rtx_REG (XFmode, FIRST_STACK_REG);
    4018          186 :   for (unsigned int i = 0; i < num_of_st; i++)
    4019          165 :     emit_insn (gen_rtx_SET (st_reg, CONST0_RTX (XFmode)));
    4020              : 
    4021          186 :   for (unsigned int i = 0; i < num_of_st; i++)
    4022              :     {
    4023          165 :       rtx insn;
    4024          165 :       insn = emit_insn (gen_rtx_SET (st_reg, st_reg));
    4025          165 :       add_reg_note (insn, REG_DEAD, st_reg);
    4026              :     }
    4027           21 :   return num_of_st;
    4028              : }
    4029              : 
    4030              : 
    4031              : /* When the routine exit in MMX mode, if any ST register needs
    4032              :    to be zeroed, we should clear all MMX registers except the
    4033              :    RET_MMX_REGNO that holds the return value.  */
    4034              : static bool
    4035            0 : zero_all_mm_registers (HARD_REG_SET need_zeroed_hardregs,
    4036              :                        unsigned int ret_mmx_regno)
    4037              : {
    4038            0 :   bool need_zero_all_mm = false;
    4039            0 :   for (unsigned int regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
    4040            0 :     if (STACK_REGNO_P (regno)
    4041            0 :         && TEST_HARD_REG_BIT (need_zeroed_hardregs, regno))
    4042              :       {
    4043              :         need_zero_all_mm = true;
    4044              :         break;
    4045              :       }
    4046              : 
    4047            0 :   if (!need_zero_all_mm)
    4048              :     return false;
    4049              : 
    4050              :   machine_mode mode = V2SImode;
    4051            0 :   for (unsigned int regno = FIRST_MMX_REG; regno <= LAST_MMX_REG; regno++)
    4052            0 :     if (regno != ret_mmx_regno)
    4053              :       {
    4054            0 :         rtx reg = gen_rtx_REG (mode, regno);
    4055            0 :         emit_insn (gen_rtx_SET (reg, CONST0_RTX (mode)));
    4056              :       }
    4057              :   return true;
    4058              : }
    4059              : 
    4060              : /* TARGET_ZERO_CALL_USED_REGS.  */
    4061              : /* Generate a sequence of instructions that zero registers specified by
    4062              :    NEED_ZEROED_HARDREGS.  Return the ZEROED_HARDREGS that are actually
    4063              :    zeroed.  */
    4064              : static HARD_REG_SET
    4065          131 : ix86_zero_call_used_regs (HARD_REG_SET need_zeroed_hardregs)
    4066              : {
    4067          131 :   HARD_REG_SET zeroed_hardregs;
    4068          131 :   bool all_sse_zeroed = false;
    4069          131 :   int all_st_zeroed_num = 0;
    4070          131 :   bool all_mm_zeroed = false;
    4071              : 
    4072          131 :   CLEAR_HARD_REG_SET (zeroed_hardregs);
    4073              : 
    4074              :   /* first, let's see whether we can zero all vector registers together.  */
    4075          131 :   rtx zero_all_vec_insn = zero_all_vector_registers (need_zeroed_hardregs);
    4076          131 :   if (zero_all_vec_insn)
    4077              :     {
    4078            4 :       emit_insn (zero_all_vec_insn);
    4079            4 :       all_sse_zeroed = true;
    4080            4 :       if (TARGET_64BIT && TARGET_AVX512F)
    4081              :         {
    4082            2 :           rtx zero = CONST0_RTX (V4SFmode);
    4083           34 :           for (unsigned int regno = XMM16_REG;
    4084           34 :                regno <= XMM31_REG;
    4085              :                regno++)
    4086              :             {
    4087           32 :               rtx reg = gen_rtx_REG (V4SFmode, regno);
    4088           32 :               emit_move_insn (reg, zero);
    4089              :             }
    4090              :         }
    4091              :     }
    4092              : 
    4093              :   /* mm/st registers are shared registers set, we should follow the following
    4094              :      rules to clear them:
    4095              :                         MMX exit mode         x87 exit mode
    4096              :         -------------|----------------------|---------------
    4097              :         uses x87 reg | clear all MMX        | clear all x87
    4098              :         uses MMX reg | clear individual MMX | clear all x87
    4099              :         x87 + MMX    | clear all MMX        | clear all x87
    4100              : 
    4101              :      first, we should decide which mode (MMX mode or x87 mode) the function
    4102              :      exit with.  */
    4103              : 
    4104          131 :   bool exit_with_mmx_mode = (crtl->return_rtx
    4105          131 :                              && (MMX_REG_P (crtl->return_rtx)));
    4106              : 
    4107          131 :   if (!exit_with_mmx_mode)
    4108              :     /* x87 exit mode, we should zero all st registers together.  */
    4109              :     {
    4110          131 :       all_st_zeroed_num = zero_all_st_registers (need_zeroed_hardregs);
    4111              : 
    4112          131 :       if (all_st_zeroed_num > 0)
    4113          189 :         for (unsigned int regno = FIRST_STACK_REG; regno <= LAST_STACK_REG; regno++)
    4114              :           /* x87 stack registers that hold the return value should be excluded.
    4115              :              x87 returns in the top (two for complex values) register.  */
    4116          168 :           if (all_st_zeroed_num == 8
    4117          168 :               || !((all_st_zeroed_num >= 6 && regno == REGNO (crtl->return_rtx))
    4118              :                    || (all_st_zeroed_num == 6
    4119            7 :                        && (regno == (REGNO (crtl->return_rtx) + 1)))))
    4120          165 :             SET_HARD_REG_BIT (zeroed_hardregs, regno);
    4121              :     }
    4122              :   else
    4123              :     /* MMX exit mode, check whether we can zero all mm registers.  */
    4124              :     {
    4125            0 :       unsigned int exit_mmx_regno = REGNO (crtl->return_rtx);
    4126            0 :       all_mm_zeroed = zero_all_mm_registers (need_zeroed_hardregs,
    4127              :                                              exit_mmx_regno);
    4128            0 :       if (all_mm_zeroed)
    4129            0 :         for (unsigned int regno = FIRST_MMX_REG; regno <= LAST_MMX_REG; regno++)
    4130            0 :           if (regno != exit_mmx_regno)
    4131            0 :             SET_HARD_REG_BIT (zeroed_hardregs, regno);
    4132              :     }
    4133              : 
    4134              :   /* Now, generate instructions to zero all the other registers.  */
    4135              : 
    4136        12183 :   for (unsigned int regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
    4137              :     {
    4138        12052 :       if (!TEST_HARD_REG_BIT (need_zeroed_hardregs, regno))
    4139        10675 :         continue;
    4140         1800 :       if (!zero_call_used_regno_p (regno, all_sse_zeroed,
    4141         1377 :                                    exit_with_mmx_mode && !all_mm_zeroed))
    4142          423 :         continue;
    4143              : 
    4144          954 :       SET_HARD_REG_BIT (zeroed_hardregs, regno);
    4145              : 
    4146          954 :       machine_mode mode = zero_call_used_regno_mode (regno);
    4147              : 
    4148          954 :       rtx reg = gen_rtx_REG (mode, regno);
    4149          954 :       rtx tmp = gen_rtx_SET (reg, CONST0_RTX (mode));
    4150              : 
    4151          954 :       switch (mode)
    4152              :         {
    4153          558 :         case E_SImode:
    4154          558 :           if (!TARGET_USE_MOV0 || optimize_insn_for_size_p ())
    4155              :             {
    4156          558 :               rtx clob = gen_rtx_CLOBBER (VOIDmode,
    4157              :                                           gen_rtx_REG (CCmode,
    4158              :                                                        FLAGS_REG));
    4159          558 :               tmp = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2,
    4160              :                                                            tmp,
    4161              :                                                            clob));
    4162              :             }
    4163              :           /* FALLTHRU.  */
    4164              : 
    4165          954 :         case E_V4SFmode:
    4166          954 :         case E_HImode:
    4167          954 :         case E_V2SImode:
    4168          954 :           emit_insn (tmp);
    4169          954 :           break;
    4170              : 
    4171            0 :         default:
    4172            0 :           gcc_unreachable ();
    4173              :         }
    4174              :     }
    4175          131 :   return zeroed_hardregs;
    4176              : }
    4177              : 
    4178              : /* Define how to find the value returned by a function.
    4179              :    VALTYPE is the data type of the value (as a tree).
    4180              :    If the precise function being called is known, FUNC is its FUNCTION_DECL;
    4181              :    otherwise, FUNC is 0.  */
    4182              : 
    4183              : static rtx
    4184      3933821 : function_value_32 (machine_mode orig_mode, machine_mode mode,
    4185              :                    const_tree fntype, const_tree fn)
    4186              : {
    4187      3933821 :   unsigned int regno;
    4188              : 
    4189              :   /* 8-byte vector modes in %mm0. See ix86_return_in_memory for where
    4190              :      we normally prevent this case when mmx is not available.  However
    4191              :      some ABIs may require the result to be returned like DImode.  */
    4192      4201906 :   if (VECTOR_MODE_P (mode) && GET_MODE_SIZE (mode) == 8)
    4193              :     regno = FIRST_MMX_REG;
    4194              : 
    4195              :   /* 16-byte vector modes in %xmm0.  See ix86_return_in_memory for where
    4196              :      we prevent this case when sse is not available.  However some ABIs
    4197              :      may require the result to be returned like integer TImode.  */
    4198      3924545 :   else if (mode == TImode
    4199      4183354 :            || (VECTOR_MODE_P (mode) && GET_MODE_SIZE (mode) == 16))
    4200              :     regno = FIRST_SSE_REG;
    4201              : 
    4202              :   /* 32-byte vector modes in %ymm0.   */
    4203      3965456 :   else if (VECTOR_MODE_P (mode) && GET_MODE_SIZE (mode) == 32)
    4204              :     regno = FIRST_SSE_REG;
    4205              : 
    4206              :   /* 64-byte vector modes in %zmm0.   */
    4207      3821258 :   else if (VECTOR_MODE_P (mode) && GET_MODE_SIZE (mode) == 64)
    4208              :     regno = FIRST_SSE_REG;
    4209              : 
    4210              :   /* Floating point return values in %st(0) (unless -mno-fp-ret-in-387).  */
    4211      3665736 :   else if (X87_FLOAT_MODE_P (mode) && TARGET_FLOAT_RETURNS_IN_80387)
    4212              :     regno = FIRST_FLOAT_REG;
    4213              :   else
    4214              :     /* Most things go in %eax.  */
    4215      3601349 :     regno = AX_REG;
    4216              : 
    4217              :   /* Return __bf16/ _Float16/_Complex _Foat16 by sse register.  */
    4218      3933821 :   if (mode == HFmode || mode == BFmode)
    4219              :     {
    4220         1907 :       if (!TARGET_SSE2)
    4221              :         {
    4222            0 :           error ("SSE register return with SSE2 disabled");
    4223            0 :           regno = AX_REG;
    4224              :         }
    4225              :       else
    4226              :         regno = FIRST_SSE_REG;
    4227              :     }
    4228              : 
    4229      3933821 :   if (mode == HCmode)
    4230              :     {
    4231          129 :       if (!TARGET_SSE2)
    4232            0 :         error ("SSE register return with SSE2 disabled");
    4233              : 
    4234          129 :       rtx ret = gen_rtx_PARALLEL (mode, rtvec_alloc(1));
    4235          258 :       XVECEXP (ret, 0, 0)
    4236          258 :         = gen_rtx_EXPR_LIST (VOIDmode,
    4237              :                              gen_rtx_REG (SImode,
    4238          129 :                                           TARGET_SSE2 ? FIRST_SSE_REG : AX_REG),
    4239              :                              GEN_INT (0));
    4240          129 :       return ret;
    4241              :     }
    4242              : 
    4243              :   /* Override FP return register with %xmm0 for local functions when
    4244              :      SSE math is enabled or for functions with sseregparm attribute.  */
    4245      3933692 :   if ((fn || fntype) && (mode == SFmode || mode == DFmode))
    4246              :     {
    4247        50308 :       int sse_level = ix86_function_sseregparm (fntype, fn, false);
    4248        50308 :       if (sse_level == -1)
    4249              :         {
    4250            0 :           error ("calling %qD with SSE calling convention without "
    4251              :                  "SSE/SSE2 enabled", fn);
    4252            0 :           sorry ("this is a GCC bug that can be worked around by adding "
    4253              :                  "attribute used to function called");
    4254              :         }
    4255        50308 :       else if ((sse_level >= 1 && mode == SFmode)
    4256        50308 :                || (sse_level == 2 && mode == DFmode))
    4257              :         regno = FIRST_SSE_REG;
    4258              :     }
    4259              : 
    4260              :   /* OImode shouldn't be used directly.  */
    4261      3933692 :   gcc_assert (mode != OImode);
    4262              : 
    4263      3933692 :   return gen_rtx_REG (orig_mode, regno);
    4264              : }
    4265              : 
    4266              : static rtx
    4267     97769118 : function_value_64 (machine_mode orig_mode, machine_mode mode,
    4268              :                    const_tree valtype)
    4269              : {
    4270     97769118 :   rtx ret;
    4271              : 
    4272              :   /* Handle libcalls, which don't provide a type node.  */
    4273     97769118 :   if (valtype == NULL)
    4274              :     {
    4275       102755 :       unsigned int regno;
    4276              : 
    4277       102755 :       switch (mode)
    4278              :         {
    4279              :         case E_BFmode:
    4280              :         case E_HFmode:
    4281              :         case E_HCmode:
    4282              :         case E_SFmode:
    4283              :         case E_SCmode:
    4284              :         case E_DFmode:
    4285              :         case E_DCmode:
    4286              :         case E_TFmode:
    4287              :         case E_SDmode:
    4288              :         case E_DDmode:
    4289              :         case E_TDmode:
    4290              :           regno = FIRST_SSE_REG;
    4291              :           break;
    4292         1040 :         case E_XFmode:
    4293         1040 :         case E_XCmode:
    4294         1040 :           regno = FIRST_FLOAT_REG;
    4295         1040 :           break;
    4296              :         case E_TCmode:
    4297              :           return NULL;
    4298        56288 :         default:
    4299        56288 :           regno = AX_REG;
    4300              :         }
    4301              : 
    4302       102755 :       return gen_rtx_REG (mode, regno);
    4303              :     }
    4304     97666363 :   else if (POINTER_TYPE_P (valtype))
    4305              :     {
    4306              :       /* Pointers are always returned in word_mode.  */
    4307     16048163 :       mode = word_mode;
    4308              :     }
    4309              : 
    4310     97666363 :   ret = construct_container (mode, orig_mode, valtype, true,
    4311              :                              X86_64_MAX_RETURN_NREGS,
    4312              :                              X86_64_MAX_SSE_RETURN_NREGS,
    4313              :                              x86_64_int_return_registers, 0);
    4314              : 
    4315              :   /* For zero sized structures, construct_container returns NULL, but we
    4316              :      need to keep rest of compiler happy by returning meaningful value.  */
    4317     97666363 :   if (!ret)
    4318       200846 :     ret = gen_rtx_REG (orig_mode, AX_REG);
    4319              : 
    4320              :   return ret;
    4321              : }
    4322              : 
    4323              : static rtx
    4324            0 : function_value_ms_32 (machine_mode orig_mode, machine_mode mode,
    4325              :                       const_tree fntype, const_tree fn, const_tree valtype)
    4326              : {
    4327            0 :   unsigned int regno;
    4328              : 
    4329              :   /* Floating point return values in %st(0)
    4330              :      (unless -mno-fp-ret-in-387 or aggregate type of up to 8 bytes).  */
    4331            0 :   if (X87_FLOAT_MODE_P (mode) && TARGET_FLOAT_RETURNS_IN_80387
    4332            0 :            && (GET_MODE_SIZE (mode) > 8
    4333            0 :                || valtype == NULL_TREE || !AGGREGATE_TYPE_P (valtype)))
    4334              :   {
    4335            0 :     regno = FIRST_FLOAT_REG;
    4336            0 :     return gen_rtx_REG (orig_mode, regno);
    4337              :   }
    4338              :   else
    4339            0 :     return function_value_32(orig_mode, mode, fntype,fn);
    4340              : }
    4341              : 
    4342              : static rtx
    4343       787708 : function_value_ms_64 (machine_mode orig_mode, machine_mode mode,
    4344              :                       const_tree valtype)
    4345              : {
    4346       787708 :   unsigned int regno = AX_REG;
    4347              : 
    4348       787708 :   if (TARGET_SSE)
    4349              :     {
    4350       786981 :       unsigned int mode_size = GET_MODE_SIZE (mode);
    4351              : 
    4352       786981 :       switch (mode_size)
    4353              :         {
    4354        34397 :         case 16:
    4355        34397 :         case 32:
    4356        34397 :         case 64:
    4357        34397 :           if (mode_size == 32 && !TARGET_AVX)
    4358              :             break;
    4359        34397 :           if (mode_size == 64 && !TARGET_AVX512F)
    4360              :             break;
    4361        34397 :           if (valtype != NULL_TREE
    4362        34397 :               && !VECTOR_INTEGER_TYPE_P (valtype)
    4363        15828 :               && !INTEGRAL_TYPE_P (valtype)
    4364        50225 :               && !VECTOR_FLOAT_TYPE_P (valtype))
    4365              :             break;
    4366        34397 :           if ((SCALAR_INT_MODE_P (mode) || VECTOR_MODE_P (mode))
    4367              :               && !COMPLEX_MODE_P (mode))
    4368       218079 :             regno = FIRST_SSE_REG;
    4369              :           break;
    4370       741282 :         case 8:
    4371       741282 :         case 4:
    4372       741282 :         case 2:
    4373       741282 :           if (valtype != NULL_TREE && AGGREGATE_TYPE_P (valtype))
    4374              :             break;
    4375       723988 :           if (mode == HFmode || mode == SFmode || mode == DFmode)
    4376       218079 :             regno = FIRST_SSE_REG;
    4377              :           break;
    4378              :         default:
    4379              :           break;
    4380              :         }
    4381              :     }
    4382       787708 :   return gen_rtx_REG (orig_mode, regno);
    4383              : }
    4384              : 
    4385              : static rtx
    4386    102490647 : ix86_function_value_1 (const_tree valtype, const_tree fntype_or_decl,
    4387              :                        machine_mode orig_mode, machine_mode mode)
    4388              : {
    4389    102490647 :   const_tree fn, fntype;
    4390              : 
    4391    102490647 :   fn = NULL_TREE;
    4392    102490647 :   if (fntype_or_decl && DECL_P (fntype_or_decl))
    4393      3535719 :     fn = fntype_or_decl;
    4394      3535719 :   fntype = fn ? TREE_TYPE (fn) : fntype_or_decl;
    4395              : 
    4396    102490647 :   if (ix86_function_type_abi (fntype) == MS_ABI)
    4397              :     {
    4398       787708 :       if (TARGET_64BIT)
    4399       787708 :         return function_value_ms_64 (orig_mode, mode, valtype);
    4400              :       else
    4401            0 :         return function_value_ms_32 (orig_mode, mode, fntype, fn, valtype);
    4402              :     }
    4403    101702939 :   else if (TARGET_64BIT)
    4404     97769118 :     return function_value_64 (orig_mode, mode, valtype);
    4405              :   else
    4406      3933821 :     return function_value_32 (orig_mode, mode, fntype, fn);
    4407              : }
    4408              : 
    4409              : static rtx
    4410    102384754 : ix86_function_value (const_tree valtype, const_tree fntype_or_decl, bool)
    4411              : {
    4412    102384754 :   machine_mode mode, orig_mode;
    4413              : 
    4414    102384754 :   orig_mode = TYPE_MODE (valtype);
    4415    102384754 :   mode = type_natural_mode (valtype, NULL, true);
    4416    102384754 :   return ix86_function_value_1 (valtype, fntype_or_decl, orig_mode, mode);
    4417              : }
    4418              : 
    4419              : /* Pointer function arguments and return values are promoted to
    4420              :    word_mode for normal functions.  */
    4421              : 
    4422              : static machine_mode
    4423     31959264 : ix86_promote_function_mode (const_tree type, machine_mode mode,
    4424              :                             int *punsignedp, const_tree fntype,
    4425              :                             int for_return)
    4426              : {
    4427     31959264 :   if (cfun->machine->func_type == TYPE_NORMAL
    4428     31958241 :       && type != NULL_TREE
    4429     31924110 :       && POINTER_TYPE_P (type))
    4430              :     {
    4431     15932190 :       *punsignedp = POINTERS_EXTEND_UNSIGNED;
    4432     15932190 :       return word_mode;
    4433              :     }
    4434     16027074 :   return default_promote_function_mode (type, mode, punsignedp, fntype,
    4435     16027074 :                                         for_return);
    4436              : }
    4437              : 
    4438              : /* Return true if a structure, union or array with MODE containing FIELD
    4439              :    should be accessed using BLKmode.  */
    4440              : 
    4441              : static bool
    4442    143127462 : ix86_member_type_forces_blk (const_tree field, machine_mode mode)
    4443              : {
    4444              :   /* Union with XFmode must be in BLKmode.  */
    4445    143127462 :   return (mode == XFmode
    4446    143264159 :           && (TREE_CODE (DECL_FIELD_CONTEXT (field)) == UNION_TYPE
    4447       129665 :               || TREE_CODE (DECL_FIELD_CONTEXT (field)) == QUAL_UNION_TYPE));
    4448              : }
    4449              : 
    4450              : rtx
    4451       105893 : ix86_libcall_value (machine_mode mode)
    4452              : {
    4453       105893 :   return ix86_function_value_1 (NULL, NULL, mode, mode);
    4454              : }
    4455              : 
    4456              : /* Return true iff type is returned in memory.  */
    4457              : 
    4458              : static bool
    4459    104383167 : ix86_return_in_memory (const_tree type, const_tree fntype ATTRIBUTE_UNUSED)
    4460              : {
    4461    104383167 :   const machine_mode mode = type_natural_mode (type, NULL, true);
    4462    104383167 :   HOST_WIDE_INT size;
    4463              : 
    4464    104383167 :   if (TARGET_64BIT)
    4465              :     {
    4466     99840893 :       if (ix86_function_type_abi (fntype) == MS_ABI)
    4467              :         {
    4468       707133 :           size = int_size_in_bytes (type);
    4469              : 
    4470              :           /* __m128 is returned in xmm0.  256/512-bit vector values are
    4471              :              returned in ymm0/zmm0 when AVX/AVX512 is enabled.  */
    4472       707133 :           if ((!type || VECTOR_INTEGER_TYPE_P (type)
    4473       687562 :                || INTEGRAL_TYPE_P (type)
    4474       217159 :                || VECTOR_FLOAT_TYPE_P (type))
    4475       505802 :               && (SCALAR_INT_MODE_P (mode) || VECTOR_MODE_P (mode))
    4476              :               && !COMPLEX_MODE_P (mode)
    4477      1212935 :               && ((GET_MODE_SIZE (mode) == 16 || size == 16)
    4478       519126 :                   || (TARGET_AVX && (GET_MODE_SIZE (mode) == 32 || size == 32))
    4479       482061 :                   || (TARGET_AVX512F
    4480        16590 :                       && (GET_MODE_SIZE (mode) == 64 || size == 64))))
    4481              :             return false;
    4482              : 
    4483              :           /* Otherwise, the size must be exactly in [1248]. */
    4484      1329376 :           return size != 1 && size != 2 && size != 4 && size != 8;
    4485              :         }
    4486              :       else
    4487              :         {
    4488     99133760 :           int needed_intregs, needed_sseregs;
    4489              : 
    4490     99133760 :           return examine_argument (mode, type, true,
    4491              :                                    &needed_intregs, &needed_sseregs);
    4492              :         }
    4493              :     }
    4494              :   else
    4495              :     {
    4496      4542274 :       size = int_size_in_bytes (type);
    4497              : 
    4498              :       /* Intel MCU psABI returns scalars and aggregates no larger than 8
    4499              :          bytes in registers.  */
    4500      4542274 :       if (TARGET_IAMCU)
    4501            0 :         return VECTOR_MODE_P (mode) || size < 0 || size > 8;
    4502              : 
    4503      4542274 :       if (mode == BLKmode)
    4504              :         return true;
    4505              : 
    4506      4542274 :       if (MS_AGGREGATE_RETURN && AGGREGATE_TYPE_P (type) && size <= 8)
    4507              :         return false;
    4508              : 
    4509      4542274 :       if (VECTOR_MODE_P (mode) || mode == TImode)
    4510              :         {
    4511              :           /* User-created vectors small enough to fit in EAX.  */
    4512       268055 :           if (size < 8)
    4513              :             return false;
    4514              : 
    4515              :           /* Unless ABI prescribes otherwise,
    4516              :              MMX/3dNow values are returned in MM0 if available.  */
    4517              : 
    4518       268055 :           if (size == 8)
    4519         9266 :             return TARGET_VECT8_RETURNS || !TARGET_MMX;
    4520              : 
    4521              :           /* SSE values are returned in XMM0 if available.  */
    4522       258789 :           if (size == 16)
    4523       108939 :             return !TARGET_SSE;
    4524              : 
    4525              :           /* AVX values are returned in YMM0 if available.  */
    4526       149850 :           if (size == 32)
    4527        72090 :             return !TARGET_AVX;
    4528              : 
    4529              :           /* AVX512F values are returned in ZMM0 if available.  */
    4530        77760 :           if (size == 64)
    4531        77760 :             return !TARGET_AVX512F;
    4532              :         }
    4533              : 
    4534      4274219 :       if (mode == XFmode)
    4535              :         return false;
    4536              : 
    4537      4262499 :       if (size > 12)
    4538              :         return true;
    4539              : 
    4540              :       /* OImode shouldn't be used directly.  */
    4541      3280693 :       gcc_assert (mode != OImode);
    4542              : 
    4543              :       return false;
    4544              :     }
    4545              : }
    4546              : 
    4547              : /* Implement TARGET_PUSH_ARGUMENT.  */
    4548              : 
    4549              : static bool
    4550      9323946 : ix86_push_argument (unsigned int npush)
    4551              : {
    4552              :   /* If SSE2 is available, use vector move to put large argument onto
    4553              :      stack.  NB:  In 32-bit mode, use 8-byte vector move.  */
    4554     11750283 :   return ((!TARGET_SSE2 || npush < (TARGET_64BIT ? 16 : 8))
    4555      9058769 :           && TARGET_PUSH_ARGS
    4556     18382617 :           && !ACCUMULATE_OUTGOING_ARGS);
    4557              : }
    4558              : 
    4559              : 
    4560              : /* Create the va_list data type.  */
    4561              : 
    4562              : static tree
    4563       291323 : ix86_build_builtin_va_list_64 (void)
    4564              : {
    4565       291323 :   tree f_gpr, f_fpr, f_ovf, f_sav, record, type_decl;
    4566              : 
    4567       291323 :   record = lang_hooks.types.make_type (RECORD_TYPE);
    4568       291323 :   type_decl = build_decl (BUILTINS_LOCATION,
    4569              :                           TYPE_DECL, get_identifier ("__va_list_tag"), record);
    4570              : 
    4571       291323 :   f_gpr = build_decl (BUILTINS_LOCATION,
    4572              :                       FIELD_DECL, get_identifier ("gp_offset"),
    4573              :                       unsigned_type_node);
    4574       291323 :   f_fpr = build_decl (BUILTINS_LOCATION,
    4575              :                       FIELD_DECL, get_identifier ("fp_offset"),
    4576              :                       unsigned_type_node);
    4577       291323 :   f_ovf = build_decl (BUILTINS_LOCATION,
    4578              :                       FIELD_DECL, get_identifier ("overflow_arg_area"),
    4579              :                       ptr_type_node);
    4580       291323 :   f_sav = build_decl (BUILTINS_LOCATION,
    4581              :                       FIELD_DECL, get_identifier ("reg_save_area"),
    4582              :                       ptr_type_node);
    4583              : 
    4584       291323 :   va_list_gpr_counter_field = f_gpr;
    4585       291323 :   va_list_fpr_counter_field = f_fpr;
    4586              : 
    4587       291323 :   DECL_FIELD_CONTEXT (f_gpr) = record;
    4588       291323 :   DECL_FIELD_CONTEXT (f_fpr) = record;
    4589       291323 :   DECL_FIELD_CONTEXT (f_ovf) = record;
    4590       291323 :   DECL_FIELD_CONTEXT (f_sav) = record;
    4591              : 
    4592       291323 :   TYPE_STUB_DECL (record) = type_decl;
    4593       291323 :   TYPE_NAME (record) = type_decl;
    4594       291323 :   TYPE_FIELDS (record) = f_gpr;
    4595       291323 :   DECL_CHAIN (f_gpr) = f_fpr;
    4596       291323 :   DECL_CHAIN (f_fpr) = f_ovf;
    4597       291323 :   DECL_CHAIN (f_ovf) = f_sav;
    4598       291323 :   TREE_PUBLIC (type_decl) = 1;
    4599              : 
    4600       291323 :   layout_type (record);
    4601              : 
    4602       291323 :   TYPE_ATTRIBUTES (record) = tree_cons (get_identifier ("sysv_abi va_list"),
    4603       291323 :                                         NULL_TREE, TYPE_ATTRIBUTES (record));
    4604              : 
    4605              :   /* The correct type is an array type of one element.  */
    4606       291323 :   return build_array_type (record, build_index_type (size_zero_node));
    4607              : }
    4608              : 
    4609              : /* Setup the builtin va_list data type and for 64-bit the additional
    4610              :    calling convention specific va_list data types.  */
    4611              : 
    4612              : static tree
    4613       298479 : ix86_build_builtin_va_list (void)
    4614              : {
    4615       298479 :   if (TARGET_64BIT)
    4616              :     {
    4617              :       /* Initialize ABI specific va_list builtin types.
    4618              : 
    4619              :          In lto1, we can encounter two va_list types:
    4620              :          - one as a result of the type-merge across TUs, and
    4621              :          - the one constructed here.
    4622              :          These two types will not have the same TYPE_MAIN_VARIANT, and therefore
    4623              :          a type identity check in canonical_va_list_type based on
    4624              :          TYPE_MAIN_VARIANT (which we used to have) will not work.
    4625              :          Instead, we tag each va_list_type_node with its unique attribute, and
    4626              :          look for the attribute in the type identity check in
    4627              :          canonical_va_list_type.
    4628              : 
    4629              :          Tagging sysv_va_list_type_node directly with the attribute is
    4630              :          problematic since it's a array of one record, which will degrade into a
    4631              :          pointer to record when used as parameter (see build_va_arg comments for
    4632              :          an example), dropping the attribute in the process.  So we tag the
    4633              :          record instead.  */
    4634              : 
    4635              :       /* For SYSV_ABI we use an array of one record.  */
    4636       291323 :       sysv_va_list_type_node = ix86_build_builtin_va_list_64 ();
    4637              : 
    4638              :       /* For MS_ABI we use plain pointer to argument area.  */
    4639       291323 :       tree char_ptr_type = build_pointer_type (char_type_node);
    4640       291323 :       tree attr = tree_cons (get_identifier ("ms_abi va_list"), NULL_TREE,
    4641       291323 :                              TYPE_ATTRIBUTES (char_ptr_type));
    4642       291323 :       ms_va_list_type_node = build_type_attribute_variant (char_ptr_type, attr);
    4643              : 
    4644       291323 :       return ((ix86_abi == MS_ABI)
    4645       291323 :               ? ms_va_list_type_node
    4646       291323 :               : sysv_va_list_type_node);
    4647              :     }
    4648              :   else
    4649              :     {
    4650              :       /* For i386 we use plain pointer to argument area.  */
    4651         7156 :       return build_pointer_type (char_type_node);
    4652              :     }
    4653              : }
    4654              : 
    4655              : /* Worker function for TARGET_SETUP_INCOMING_VARARGS.  */
    4656              : 
    4657              : static void
    4658        15778 : setup_incoming_varargs_64 (CUMULATIVE_ARGS *cum)
    4659              : {
    4660        15778 :   rtx save_area, mem;
    4661        15778 :   alias_set_type set;
    4662        15778 :   int i, max;
    4663              : 
    4664              :   /* GPR size of varargs save area.  */
    4665        15778 :   if (cfun->va_list_gpr_size)
    4666        15315 :     ix86_varargs_gpr_size = X86_64_REGPARM_MAX * UNITS_PER_WORD;
    4667              :   else
    4668          463 :     ix86_varargs_gpr_size = 0;
    4669              : 
    4670              :   /* FPR size of varargs save area.  We don't need it if we don't pass
    4671              :      anything in SSE registers.  */
    4672        15778 :   if (TARGET_SSE && cfun->va_list_fpr_size)
    4673        14718 :     ix86_varargs_fpr_size = X86_64_SSE_REGPARM_MAX * 16;
    4674              :   else
    4675         1060 :     ix86_varargs_fpr_size = 0;
    4676              : 
    4677        15778 :   if (! ix86_varargs_gpr_size && ! ix86_varargs_fpr_size)
    4678              :     return;
    4679              : 
    4680        15484 :   save_area = frame_pointer_rtx;
    4681        15484 :   set = get_varargs_alias_set ();
    4682              : 
    4683        15484 :   max = cum->regno + cfun->va_list_gpr_size / UNITS_PER_WORD;
    4684        15484 :   if (max > X86_64_REGPARM_MAX)
    4685              :     max = X86_64_REGPARM_MAX;
    4686              : 
    4687        15484 :   const int *parm_regs;
    4688        15484 :   if (cum->preserve_none_abi)
    4689              :     parm_regs = x86_64_preserve_none_int_parameter_registers;
    4690              :   else
    4691        15483 :     parm_regs = x86_64_int_parameter_registers;
    4692              : 
    4693        86019 :   for (i = cum->regno; i < max; i++)
    4694              :     {
    4695        70535 :       mem = gen_rtx_MEM (word_mode,
    4696        70535 :                          plus_constant (Pmode, save_area, i * UNITS_PER_WORD));
    4697        70535 :       MEM_NOTRAP_P (mem) = 1;
    4698        70535 :       set_mem_alias_set (mem, set);
    4699        70535 :       emit_move_insn (mem,
    4700        70535 :                       gen_rtx_REG (word_mode, parm_regs[i]));
    4701              :     }
    4702              : 
    4703        15484 :   if (ix86_varargs_fpr_size)
    4704              :     {
    4705        14718 :       machine_mode smode;
    4706        14718 :       rtx_code_label *label;
    4707        14718 :       rtx test;
    4708              : 
    4709              :       /* Now emit code to save SSE registers.  The AX parameter contains number
    4710              :          of SSE parameter registers used to call this function, though all we
    4711              :          actually check here is the zero/non-zero status.  */
    4712              : 
    4713        14718 :       label = gen_label_rtx ();
    4714        14718 :       test = gen_rtx_EQ (VOIDmode, gen_rtx_REG (QImode, AX_REG), const0_rtx);
    4715        14718 :       emit_jump_insn (gen_cbranchqi4 (test, XEXP (test, 0), XEXP (test, 1),
    4716              :                                       label));
    4717              : 
    4718              :       /* ??? If !TARGET_SSE_TYPELESS_STORES, would we perform better if
    4719              :          we used movdqa (i.e. TImode) instead?  Perhaps even better would
    4720              :          be if we could determine the real mode of the data, via a hook
    4721              :          into pass_stdarg.  Ignore all that for now.  */
    4722        14718 :       smode = V4SFmode;
    4723        14718 :       if (crtl->stack_alignment_needed < GET_MODE_ALIGNMENT (smode))
    4724         4220 :         crtl->stack_alignment_needed = GET_MODE_ALIGNMENT (smode);
    4725              : 
    4726        14718 :       max = cum->sse_regno + cfun->va_list_fpr_size / 16;
    4727        14718 :       if (max > X86_64_SSE_REGPARM_MAX)
    4728              :         max = X86_64_SSE_REGPARM_MAX;
    4729              : 
    4730       130838 :       for (i = cum->sse_regno; i < max; ++i)
    4731              :         {
    4732       116120 :           mem = plus_constant (Pmode, save_area,
    4733       116120 :                                i * 16 + ix86_varargs_gpr_size);
    4734       116120 :           mem = gen_rtx_MEM (smode, mem);
    4735       116120 :           MEM_NOTRAP_P (mem) = 1;
    4736       116120 :           set_mem_alias_set (mem, set);
    4737       116120 :           set_mem_align (mem, GET_MODE_ALIGNMENT (smode));
    4738              : 
    4739       116120 :           emit_move_insn (mem, gen_rtx_REG (smode, GET_SSE_REGNO (i)));
    4740              :         }
    4741              : 
    4742        14718 :       emit_label (label);
    4743              :     }
    4744              : }
    4745              : 
    4746              : static void
    4747         5652 : setup_incoming_varargs_ms_64 (CUMULATIVE_ARGS *cum)
    4748              : {
    4749         5652 :   alias_set_type set = get_varargs_alias_set ();
    4750         5652 :   int i;
    4751              : 
    4752              :   /* Reset to zero, as there might be a sysv vaarg used
    4753              :      before.  */
    4754         5652 :   ix86_varargs_gpr_size = 0;
    4755         5652 :   ix86_varargs_fpr_size = 0;
    4756              : 
    4757        14154 :   for (i = cum->regno; i < X86_64_MS_REGPARM_MAX; i++)
    4758              :     {
    4759         8502 :       rtx reg, mem;
    4760              : 
    4761         8502 :       mem = gen_rtx_MEM (Pmode,
    4762         8502 :                          plus_constant (Pmode, virtual_incoming_args_rtx,
    4763         8502 :                                         i * UNITS_PER_WORD));
    4764         8502 :       MEM_NOTRAP_P (mem) = 1;
    4765         8502 :       set_mem_alias_set (mem, set);
    4766              : 
    4767         8502 :       reg = gen_rtx_REG (Pmode, x86_64_ms_abi_int_parameter_registers[i]);
    4768         8502 :       emit_move_insn (mem, reg);
    4769              :     }
    4770         5652 : }
    4771              : 
    4772              : static void
    4773        21584 : ix86_setup_incoming_varargs (cumulative_args_t cum_v,
    4774              :                              const function_arg_info &arg,
    4775              :                              int *, int no_rtl)
    4776              : {
    4777        21584 :   CUMULATIVE_ARGS *cum = get_cumulative_args (cum_v);
    4778        21584 :   CUMULATIVE_ARGS next_cum;
    4779        21584 :   tree fntype;
    4780              : 
    4781              :   /* This argument doesn't appear to be used anymore.  Which is good,
    4782              :      because the old code here didn't suppress rtl generation.  */
    4783        21584 :   gcc_assert (!no_rtl);
    4784              : 
    4785        21584 :   if (!TARGET_64BIT)
    4786          154 :     return;
    4787              : 
    4788        21430 :   fntype = TREE_TYPE (current_function_decl);
    4789              : 
    4790              :   /* For varargs, we do not want to skip the dummy va_dcl argument.
    4791              :      For stdargs, we do want to skip the last named argument.  */
    4792        21430 :   next_cum = *cum;
    4793        21430 :   if ((!TYPE_NO_NAMED_ARGS_STDARG_P (TREE_TYPE (current_function_decl))
    4794          151 :        || arg.type != NULL_TREE)
    4795        21467 :       && stdarg_p (fntype))
    4796        21316 :     ix86_function_arg_advance (pack_cumulative_args (&next_cum), arg);
    4797              : 
    4798        21430 :   if (cum->call_abi == MS_ABI)
    4799         5652 :     setup_incoming_varargs_ms_64 (&next_cum);
    4800              :   else
    4801        15778 :     setup_incoming_varargs_64 (&next_cum);
    4802              : }
    4803              : 
    4804              : /* Checks if TYPE is of kind va_list char *.  */
    4805              : 
    4806              : static bool
    4807        73339 : is_va_list_char_pointer (tree type)
    4808              : {
    4809        73339 :   tree canonic;
    4810              : 
    4811              :   /* For 32-bit it is always true.  */
    4812        73339 :   if (!TARGET_64BIT)
    4813              :     return true;
    4814        73177 :   canonic = ix86_canonical_va_list_type (type);
    4815        73177 :   return (canonic == ms_va_list_type_node
    4816        73177 :           || (ix86_abi == MS_ABI && canonic == va_list_type_node));
    4817              : }
    4818              : 
    4819              : /* Implement va_start.  */
    4820              : 
    4821              : static void
    4822        21072 : ix86_va_start (tree valist, rtx nextarg)
    4823              : {
    4824        21072 :   HOST_WIDE_INT words, n_gpr, n_fpr;
    4825        21072 :   tree f_gpr, f_fpr, f_ovf, f_sav;
    4826        21072 :   tree gpr, fpr, ovf, sav, t;
    4827        21072 :   tree type;
    4828        21072 :   rtx ovf_rtx;
    4829              : 
    4830        21072 :   if (flag_split_stack
    4831           12 :       && cfun->machine->split_stack_varargs_pointer == NULL_RTX)
    4832              :     {
    4833           12 :       unsigned int scratch_regno;
    4834              : 
    4835              :       /* When we are splitting the stack, we can't refer to the stack
    4836              :          arguments using internal_arg_pointer, because they may be on
    4837              :          the old stack.  The split stack prologue will arrange to
    4838              :          leave a pointer to the old stack arguments in a scratch
    4839              :          register, which we here copy to a pseudo-register.  The split
    4840              :          stack prologue can't set the pseudo-register directly because
    4841              :          it (the prologue) runs before any registers have been saved.  */
    4842              : 
    4843           12 :       scratch_regno = split_stack_prologue_scratch_regno ();
    4844           12 :       if (scratch_regno != INVALID_REGNUM)
    4845              :         {
    4846           12 :           rtx reg;
    4847           12 :           rtx_insn *seq;
    4848              : 
    4849           16 :           reg = gen_reg_rtx (Pmode);
    4850           12 :           cfun->machine->split_stack_varargs_pointer = reg;
    4851              : 
    4852           12 :           start_sequence ();
    4853           16 :           emit_move_insn (reg, gen_rtx_REG (Pmode, scratch_regno));
    4854           12 :           seq = end_sequence ();
    4855              : 
    4856           12 :           push_topmost_sequence ();
    4857           12 :           emit_insn_after (seq, entry_of_function ());
    4858           12 :           pop_topmost_sequence ();
    4859              :         }
    4860              :     }
    4861              : 
    4862              :   /* Only 64bit target needs something special.  */
    4863        21072 :   if (is_va_list_char_pointer (TREE_TYPE (valist)))
    4864              :     {
    4865         5656 :       if (cfun->machine->split_stack_varargs_pointer == NULL_RTX)
    4866         5652 :         std_expand_builtin_va_start (valist, nextarg);
    4867              :       else
    4868              :         {
    4869            4 :           rtx va_r, next;
    4870              : 
    4871            4 :           va_r = expand_expr (valist, NULL_RTX, VOIDmode, EXPAND_WRITE);
    4872            8 :           next = expand_binop (ptr_mode, add_optab,
    4873            4 :                                cfun->machine->split_stack_varargs_pointer,
    4874              :                                crtl->args.arg_offset_rtx,
    4875              :                                NULL_RTX, 0, OPTAB_LIB_WIDEN);
    4876            4 :           convert_move (va_r, next, 0);
    4877              :         }
    4878         5656 :       return;
    4879              :     }
    4880              : 
    4881        15416 :   f_gpr = TYPE_FIELDS (TREE_TYPE (sysv_va_list_type_node));
    4882        15416 :   f_fpr = DECL_CHAIN (f_gpr);
    4883        15416 :   f_ovf = DECL_CHAIN (f_fpr);
    4884        15416 :   f_sav = DECL_CHAIN (f_ovf);
    4885              : 
    4886        15416 :   valist = build_simple_mem_ref (valist);
    4887        15416 :   TREE_TYPE (valist) = TREE_TYPE (sysv_va_list_type_node);
    4888              :   /* The following should be folded into the MEM_REF offset.  */
    4889        15416 :   gpr = build3 (COMPONENT_REF, TREE_TYPE (f_gpr), unshare_expr (valist),
    4890              :                 f_gpr, NULL_TREE);
    4891        15416 :   fpr = build3 (COMPONENT_REF, TREE_TYPE (f_fpr), unshare_expr (valist),
    4892              :                 f_fpr, NULL_TREE);
    4893        15416 :   ovf = build3 (COMPONENT_REF, TREE_TYPE (f_ovf), unshare_expr (valist),
    4894              :                 f_ovf, NULL_TREE);
    4895        15416 :   sav = build3 (COMPONENT_REF, TREE_TYPE (f_sav), unshare_expr (valist),
    4896              :                 f_sav, NULL_TREE);
    4897              : 
    4898              :   /* Count number of gp and fp argument registers used.  */
    4899        15416 :   words = crtl->args.info.words;
    4900        15416 :   n_gpr = crtl->args.info.regno;
    4901        15416 :   n_fpr = crtl->args.info.sse_regno;
    4902              : 
    4903        15416 :   if (cfun->va_list_gpr_size)
    4904              :     {
    4905        15169 :       type = TREE_TYPE (gpr);
    4906        15169 :       t = build2 (MODIFY_EXPR, type,
    4907        15169 :                   gpr, build_int_cst (type, n_gpr * 8));
    4908        15169 :       TREE_SIDE_EFFECTS (t) = 1;
    4909        15169 :       expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
    4910              :     }
    4911              : 
    4912        15416 :   if (TARGET_SSE && cfun->va_list_fpr_size)
    4913              :     {
    4914        14560 :       type = TREE_TYPE (fpr);
    4915        14560 :       t = build2 (MODIFY_EXPR, type, fpr,
    4916        14560 :                   build_int_cst (type, n_fpr * 16 + 8*X86_64_REGPARM_MAX));
    4917        14560 :       TREE_SIDE_EFFECTS (t) = 1;
    4918        14560 :       expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
    4919              :     }
    4920              : 
    4921              :   /* Find the overflow area.  */
    4922        15416 :   type = TREE_TYPE (ovf);
    4923        15416 :   if (cfun->machine->split_stack_varargs_pointer == NULL_RTX)
    4924        15408 :     ovf_rtx = crtl->args.internal_arg_pointer;
    4925              :   else
    4926              :     ovf_rtx = cfun->machine->split_stack_varargs_pointer;
    4927        15416 :   t = make_tree (type, ovf_rtx);
    4928        15416 :   if (words != 0)
    4929          492 :     t = fold_build_pointer_plus_hwi (t, words * UNITS_PER_WORD);
    4930              : 
    4931        15416 :   t = build2 (MODIFY_EXPR, type, ovf, t);
    4932        15416 :   TREE_SIDE_EFFECTS (t) = 1;
    4933        15416 :   expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
    4934              : 
    4935        15416 :   if (ix86_varargs_gpr_size || ix86_varargs_fpr_size)
    4936              :     {
    4937              :       /* Find the register save area.
    4938              :          Prologue of the function save it right above stack frame.  */
    4939        15338 :       type = TREE_TYPE (sav);
    4940        15338 :       t = make_tree (type, frame_pointer_rtx);
    4941        15338 :       if (!ix86_varargs_gpr_size)
    4942          169 :         t = fold_build_pointer_plus_hwi (t, -8 * X86_64_REGPARM_MAX);
    4943              : 
    4944        15338 :       t = build2 (MODIFY_EXPR, type, sav, t);
    4945        15338 :       TREE_SIDE_EFFECTS (t) = 1;
    4946        15338 :       expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
    4947              :     }
    4948              : }
    4949              : 
    4950              : /* Implement va_arg.  */
    4951              : 
    4952              : static tree
    4953        52267 : ix86_gimplify_va_arg (tree valist, tree type, gimple_seq *pre_p,
    4954              :                       gimple_seq *post_p)
    4955              : {
    4956        52267 :   static const int intreg[6] = { 0, 1, 2, 3, 4, 5 };
    4957        52267 :   tree f_gpr, f_fpr, f_ovf, f_sav;
    4958        52267 :   tree gpr, fpr, ovf, sav, t;
    4959        52267 :   int size, rsize;
    4960        52267 :   tree lab_false, lab_over = NULL_TREE;
    4961        52267 :   tree addr, t2;
    4962        52267 :   rtx container;
    4963        52267 :   int indirect_p = 0;
    4964        52267 :   tree ptrtype;
    4965        52267 :   machine_mode nat_mode;
    4966        52267 :   unsigned int arg_boundary;
    4967        52267 :   unsigned int type_align;
    4968              : 
    4969              :   /* Only 64bit target needs something special.  */
    4970        52267 :   if (is_va_list_char_pointer (TREE_TYPE (valist)))
    4971          260 :     return std_gimplify_va_arg_expr (valist, type, pre_p, post_p);
    4972              : 
    4973        52007 :   f_gpr = TYPE_FIELDS (TREE_TYPE (sysv_va_list_type_node));
    4974        52007 :   f_fpr = DECL_CHAIN (f_gpr);
    4975        52007 :   f_ovf = DECL_CHAIN (f_fpr);
    4976        52007 :   f_sav = DECL_CHAIN (f_ovf);
    4977              : 
    4978        52007 :   gpr = build3 (COMPONENT_REF, TREE_TYPE (f_gpr),
    4979              :                 valist, f_gpr, NULL_TREE);
    4980              : 
    4981        52007 :   fpr = build3 (COMPONENT_REF, TREE_TYPE (f_fpr), valist, f_fpr, NULL_TREE);
    4982        52007 :   ovf = build3 (COMPONENT_REF, TREE_TYPE (f_ovf), valist, f_ovf, NULL_TREE);
    4983        52007 :   sav = build3 (COMPONENT_REF, TREE_TYPE (f_sav), valist, f_sav, NULL_TREE);
    4984              : 
    4985        52007 :   indirect_p = pass_va_arg_by_reference (type);
    4986        52007 :   if (indirect_p)
    4987          103 :     type = build_pointer_type (type);
    4988        52007 :   size = arg_int_size_in_bytes (type);
    4989        52007 :   rsize = CEIL (size, UNITS_PER_WORD);
    4990              : 
    4991        52007 :   nat_mode = type_natural_mode (type, NULL, false);
    4992        52007 :   switch (nat_mode)
    4993              :     {
    4994           28 :     case E_V16HFmode:
    4995           28 :     case E_V16BFmode:
    4996           28 :     case E_V8SFmode:
    4997           28 :     case E_V8SImode:
    4998           28 :     case E_V32QImode:
    4999           28 :     case E_V16HImode:
    5000           28 :     case E_V4DFmode:
    5001           28 :     case E_V4DImode:
    5002           28 :     case E_V32HFmode:
    5003           28 :     case E_V32BFmode:
    5004           28 :     case E_V16SFmode:
    5005           28 :     case E_V16SImode:
    5006           28 :     case E_V64QImode:
    5007           28 :     case E_V32HImode:
    5008           28 :     case E_V8DFmode:
    5009           28 :     case E_V8DImode:
    5010              :       /* Unnamed 256 and 512bit vector mode parameters are passed on stack.  */
    5011           28 :       if (!TARGET_64BIT_MS_ABI)
    5012              :         {
    5013              :           container = NULL;
    5014              :           break;
    5015              :         }
    5016              :       /* FALLTHRU */
    5017              : 
    5018        51979 :     default:
    5019        51979 :       container = construct_container (nat_mode, TYPE_MODE (type),
    5020              :                                        type, false, X86_64_REGPARM_MAX,
    5021              :                                        X86_64_SSE_REGPARM_MAX, intreg, 0);
    5022        51979 :       break;
    5023              :     }
    5024              : 
    5025              :   /* Pull the value out of the saved registers.  */
    5026              : 
    5027        52007 :   addr = create_tmp_var (ptr_type_node, "addr");
    5028        52007 :   type_align = TYPE_ALIGN (type);
    5029              : 
    5030        52007 :   if (container)
    5031              :     {
    5032        28914 :       int needed_intregs, needed_sseregs;
    5033        28914 :       bool need_temp;
    5034        28914 :       tree int_addr, sse_addr;
    5035              : 
    5036        28914 :       lab_false = create_artificial_label (UNKNOWN_LOCATION);
    5037        28914 :       lab_over = create_artificial_label (UNKNOWN_LOCATION);
    5038              : 
    5039        28914 :       examine_argument (nat_mode, type, false,
    5040              :                         &needed_intregs, &needed_sseregs);
    5041              : 
    5042        28914 :       bool container_in_reg = false;
    5043        28914 :       if (REG_P (container))
    5044              :         container_in_reg = true;
    5045         1641 :       else if (GET_CODE (container) == PARALLEL
    5046         1641 :                && GET_MODE (container) == BLKmode
    5047          580 :                && XVECLEN (container, 0) == 1)
    5048              :         {
    5049              :           /* Check if it is a PARALLEL BLKmode container of an EXPR_LIST
    5050              :              expression in a TImode register.  In this case, temp isn't
    5051              :              needed.  Otherwise, the TImode variable will be put in the
    5052              :              GPR save area which guarantees only 8-byte alignment.   */
    5053          509 :           rtx x = XVECEXP (container, 0, 0);
    5054          509 :           if (GET_CODE (x) == EXPR_LIST
    5055          509 :               && REG_P (XEXP (x, 0))
    5056          509 :               && XEXP (x, 1) == const0_rtx)
    5057              :             container_in_reg = true;
    5058              :         }
    5059              : 
    5060          680 :       need_temp = (!container_in_reg
    5061         1150 :                    && ((needed_intregs && TYPE_ALIGN (type) > 64)
    5062          680 :                        || TYPE_ALIGN (type) > 128));
    5063              : 
    5064              :       /* In case we are passing structure, verify that it is consecutive block
    5065              :          on the register save area.  If not we need to do moves.  */
    5066          680 :       if (!need_temp && !container_in_reg)
    5067              :         {
    5068              :           /* Verify that all registers are strictly consecutive  */
    5069          966 :           if (SSE_REGNO_P (REGNO (XEXP (XVECEXP (container, 0, 0), 0))))
    5070              :             {
    5071              :               int i;
    5072              : 
    5073          815 :               for (i = 0; i < XVECLEN (container, 0) && !need_temp; i++)
    5074              :                 {
    5075          529 :                   rtx slot = XVECEXP (container, 0, i);
    5076          529 :                   if (REGNO (XEXP (slot, 0)) != FIRST_SSE_REG + (unsigned int) i
    5077          529 :                       || INTVAL (XEXP (slot, 1)) != i * 16)
    5078              :                     need_temp = true;
    5079              :                 }
    5080              :             }
    5081              :           else
    5082              :             {
    5083              :               int i;
    5084              : 
    5085         1120 :               for (i = 0; i < XVECLEN (container, 0) && !need_temp; i++)
    5086              :                 {
    5087          726 :                   rtx slot = XVECEXP (container, 0, i);
    5088          726 :                   if (REGNO (XEXP (slot, 0)) != (unsigned int) i
    5089          726 :                       || INTVAL (XEXP (slot, 1)) != i * 8)
    5090              :                     need_temp = true;
    5091              :                 }
    5092              :             }
    5093              :         }
    5094        28914 :       if (!need_temp)
    5095              :         {
    5096              :           int_addr = addr;
    5097              :           sse_addr = addr;
    5098              :         }
    5099              :       else
    5100              :         {
    5101          877 :           int_addr = create_tmp_var (ptr_type_node, "int_addr");
    5102          877 :           sse_addr = create_tmp_var (ptr_type_node, "sse_addr");
    5103              :         }
    5104              : 
    5105              :       /* First ensure that we fit completely in registers.  */
    5106        28914 :       if (needed_intregs)
    5107              :         {
    5108        18148 :           t = build_int_cst (TREE_TYPE (gpr),
    5109        18148 :                              (X86_64_REGPARM_MAX - needed_intregs + 1) * 8);
    5110        18148 :           t = build2 (GE_EXPR, boolean_type_node, gpr, t);
    5111        18148 :           t2 = build1 (GOTO_EXPR, void_type_node, lab_false);
    5112        18148 :           t = build3 (COND_EXPR, void_type_node, t, t2, NULL_TREE);
    5113        18148 :           gimplify_and_add (t, pre_p);
    5114              :         }
    5115        28914 :       if (needed_sseregs)
    5116              :         {
    5117        11158 :           t = build_int_cst (TREE_TYPE (fpr),
    5118              :                              (X86_64_SSE_REGPARM_MAX - needed_sseregs + 1) * 16
    5119        11158 :                              + X86_64_REGPARM_MAX * 8);
    5120        11158 :           t = build2 (GE_EXPR, boolean_type_node, fpr, t);
    5121        11158 :           t2 = build1 (GOTO_EXPR, void_type_node, lab_false);
    5122        11158 :           t = build3 (COND_EXPR, void_type_node, t, t2, NULL_TREE);
    5123        11158 :           gimplify_and_add (t, pre_p);
    5124              :         }
    5125              : 
    5126              :       /* Compute index to start of area used for integer regs.  */
    5127        28914 :       if (needed_intregs)
    5128              :         {
    5129              :           /* int_addr = gpr + sav; */
    5130        18148 :           t = fold_build_pointer_plus (sav, gpr);
    5131        18148 :           gimplify_assign (int_addr, t, pre_p);
    5132              :         }
    5133        28914 :       if (needed_sseregs)
    5134              :         {
    5135              :           /* sse_addr = fpr + sav; */
    5136        11158 :           t = fold_build_pointer_plus (sav, fpr);
    5137        11158 :           gimplify_assign (sse_addr, t, pre_p);
    5138              :         }
    5139        28914 :       if (need_temp)
    5140              :         {
    5141          877 :           int i, prev_size = 0;
    5142          877 :           tree temp = create_tmp_var (type, "va_arg_tmp");
    5143          877 :           TREE_ADDRESSABLE (temp) = 1;
    5144              : 
    5145              :           /* addr = &temp; */
    5146          877 :           t = build1 (ADDR_EXPR, build_pointer_type (type), temp);
    5147          877 :           gimplify_assign (addr, t, pre_p);
    5148              : 
    5149         2241 :           for (i = 0; i < XVECLEN (container, 0); i++)
    5150              :             {
    5151         1364 :               rtx slot = XVECEXP (container, 0, i);
    5152         1364 :               rtx reg = XEXP (slot, 0);
    5153         1364 :               machine_mode mode = GET_MODE (reg);
    5154         1364 :               tree piece_type;
    5155         1364 :               tree addr_type;
    5156         1364 :               tree daddr_type;
    5157         1364 :               tree src_addr, src;
    5158         1364 :               int src_offset;
    5159         1364 :               tree dest_addr, dest;
    5160         1364 :               int cur_size = GET_MODE_SIZE (mode);
    5161              : 
    5162         1364 :               gcc_assert (prev_size <= INTVAL (XEXP (slot, 1)));
    5163         1364 :               prev_size = INTVAL (XEXP (slot, 1));
    5164         1364 :               if (prev_size + cur_size > size)
    5165              :                 {
    5166           30 :                   cur_size = size - prev_size;
    5167           30 :                   unsigned int nbits = cur_size * BITS_PER_UNIT;
    5168           30 :                   if (!int_mode_for_size (nbits, 1).exists (&mode))
    5169           10 :                     mode = QImode;
    5170              :                 }
    5171         1364 :               piece_type = lang_hooks.types.type_for_mode (mode, 1);
    5172         1364 :               if (mode == GET_MODE (reg))
    5173         1334 :                 addr_type = build_pointer_type (piece_type);
    5174              :               else
    5175           30 :                 addr_type = build_pointer_type_for_mode (piece_type, ptr_mode,
    5176              :                                                          true);
    5177         1364 :               daddr_type = build_pointer_type_for_mode (piece_type, ptr_mode,
    5178              :                                                         true);
    5179              : 
    5180         1364 :               if (SSE_REGNO_P (REGNO (reg)))
    5181              :                 {
    5182          534 :                   src_addr = sse_addr;
    5183          534 :                   src_offset = (REGNO (reg) - FIRST_SSE_REG) * 16;
    5184              :                 }
    5185              :               else
    5186              :                 {
    5187          830 :                   src_addr = int_addr;
    5188          830 :                   src_offset = REGNO (reg) * 8;
    5189              :                 }
    5190         1364 :               src_addr = fold_convert (addr_type, src_addr);
    5191         1364 :               src_addr = fold_build_pointer_plus_hwi (src_addr, src_offset);
    5192              : 
    5193         1364 :               dest_addr = fold_convert (daddr_type, addr);
    5194         1364 :               dest_addr = fold_build_pointer_plus_hwi (dest_addr, prev_size);
    5195         2728 :               if (cur_size == GET_MODE_SIZE (mode))
    5196              :                 {
    5197         1354 :                   src = build_va_arg_indirect_ref (src_addr);
    5198         1354 :                   dest = build_va_arg_indirect_ref (dest_addr);
    5199              : 
    5200         1354 :                   gimplify_assign (dest, src, pre_p);
    5201              :                 }
    5202              :               else
    5203              :                 {
    5204           10 :                   tree copy
    5205           20 :                     = build_call_expr (builtin_decl_implicit (BUILT_IN_MEMCPY),
    5206              :                                        3, dest_addr, src_addr,
    5207           10 :                                        size_int (cur_size));
    5208           10 :                   gimplify_and_add (copy, pre_p);
    5209              :                 }
    5210         1364 :               prev_size += cur_size;
    5211              :             }
    5212              :         }
    5213              : 
    5214        28914 :       if (needed_intregs)
    5215              :         {
    5216        18148 :           t = build2 (PLUS_EXPR, TREE_TYPE (gpr), gpr,
    5217        18148 :                       build_int_cst (TREE_TYPE (gpr), needed_intregs * 8));
    5218        18148 :           gimplify_assign (gpr, t, pre_p);
    5219              :           /* The GPR save area guarantees only 8-byte alignment.  */
    5220        18148 :           if (!need_temp)
    5221        17344 :             type_align = MIN (type_align, 64);
    5222              :         }
    5223              : 
    5224        28914 :       if (needed_sseregs)
    5225              :         {
    5226        11158 :           t = build2 (PLUS_EXPR, TREE_TYPE (fpr), fpr,
    5227        11158 :                       build_int_cst (TREE_TYPE (fpr), needed_sseregs * 16));
    5228        11158 :           gimplify_assign (unshare_expr (fpr), t, pre_p);
    5229              :         }
    5230              : 
    5231        28914 :       gimple_seq_add_stmt (pre_p, gimple_build_goto (lab_over));
    5232              : 
    5233        28914 :       gimple_seq_add_stmt (pre_p, gimple_build_label (lab_false));
    5234              :     }
    5235              : 
    5236              :   /* ... otherwise out of the overflow area.  */
    5237              : 
    5238              :   /* When we align parameter on stack for caller, if the parameter
    5239              :      alignment is beyond MAX_SUPPORTED_STACK_ALIGNMENT, it will be
    5240              :      aligned at MAX_SUPPORTED_STACK_ALIGNMENT.  We will match callee
    5241              :      here with caller.  */
    5242        52007 :   arg_boundary = ix86_function_arg_boundary (VOIDmode, type);
    5243        52007 :   if ((unsigned int) arg_boundary > MAX_SUPPORTED_STACK_ALIGNMENT)
    5244              :     arg_boundary = MAX_SUPPORTED_STACK_ALIGNMENT;
    5245              : 
    5246              :   /* Care for on-stack alignment if needed.  */
    5247        52007 :   if (arg_boundary <= 64 || size == 0)
    5248        34967 :     t = ovf;
    5249              :  else
    5250              :     {
    5251        17040 :       HOST_WIDE_INT align = arg_boundary / 8;
    5252        17040 :       t = fold_build_pointer_plus_hwi (ovf, align - 1);
    5253        17040 :       t = build2 (BIT_AND_EXPR, TREE_TYPE (t), t,
    5254        17040 :                   build_int_cst (TREE_TYPE (t), -align));
    5255              :     }
    5256              : 
    5257        52007 :   gimplify_expr (&t, pre_p, NULL, is_gimple_val, fb_rvalue);
    5258        52007 :   gimplify_assign (addr, t, pre_p);
    5259              : 
    5260        52007 :   t = fold_build_pointer_plus_hwi (t, rsize * UNITS_PER_WORD);
    5261        52007 :   gimplify_assign (unshare_expr (ovf), t, pre_p);
    5262              : 
    5263        52007 :   if (container)
    5264        28914 :     gimple_seq_add_stmt (pre_p, gimple_build_label (lab_over));
    5265              : 
    5266        52007 :   type = build_aligned_type (type, type_align);
    5267        52007 :   ptrtype = build_pointer_type_for_mode (type, ptr_mode, true);
    5268        52007 :   addr = fold_convert (ptrtype, addr);
    5269              : 
    5270        52007 :   if (indirect_p)
    5271          103 :     addr = build_va_arg_indirect_ref (addr);
    5272        52007 :   return build_va_arg_indirect_ref (addr);
    5273              : }
    5274              : 
    5275              : /* Return true if OPNUM's MEM should be matched
    5276              :    in movabs* patterns.  */
    5277              : 
    5278              : bool
    5279          480 : ix86_check_movabs (rtx insn, int opnum)
    5280              : {
    5281          480 :   rtx set, mem;
    5282              : 
    5283          480 :   set = PATTERN (insn);
    5284          480 :   if (GET_CODE (set) == PARALLEL)
    5285            0 :     set = XVECEXP (set, 0, 0);
    5286          480 :   gcc_assert (GET_CODE (set) == SET);
    5287          480 :   mem = XEXP (set, opnum);
    5288          480 :   while (SUBREG_P (mem))
    5289            0 :     mem = SUBREG_REG (mem);
    5290          480 :   gcc_assert (MEM_P (mem));
    5291          480 :   return volatile_ok || !MEM_VOLATILE_P (mem);
    5292              : }
    5293              : 
    5294              : /* Return true if XVECEXP idx of INSN satisfies MOVS arguments.  */
    5295              : bool
    5296       218135 : ix86_check_movs (rtx insn, int idx)
    5297              : {
    5298       218135 :   rtx pat = PATTERN (insn);
    5299       218135 :   gcc_assert (GET_CODE (pat) == PARALLEL);
    5300              : 
    5301       218135 :   rtx set = XVECEXP (pat, 0, idx);
    5302       218135 :   gcc_assert (GET_CODE (set) == SET);
    5303              : 
    5304       218135 :   rtx dst = SET_DEST (set);
    5305       218135 :   gcc_assert (MEM_P (dst));
    5306              : 
    5307       218135 :   rtx src = SET_SRC (set);
    5308       218135 :   gcc_assert (MEM_P (src));
    5309              : 
    5310       218135 :   return (ADDR_SPACE_GENERIC_P (MEM_ADDR_SPACE (dst))
    5311       436270 :           && (ADDR_SPACE_GENERIC_P (MEM_ADDR_SPACE (src))
    5312            0 :               || Pmode == word_mode));
    5313              : }
    5314              : 
    5315              : /* Return false if INSN contains a MEM with a non-default address space.  */
    5316              : bool
    5317        65436 : ix86_check_no_addr_space (rtx insn)
    5318              : {
    5319        65436 :   subrtx_var_iterator::array_type array;
    5320      1440044 :   FOR_EACH_SUBRTX_VAR (iter, array, PATTERN (insn), ALL)
    5321              :     {
    5322      1374608 :       rtx x = *iter;
    5323      1505480 :       if (MEM_P (x) && !ADDR_SPACE_GENERIC_P (MEM_ADDR_SPACE (x)))
    5324            0 :         return false;
    5325              :     }
    5326        65436 :   return true;
    5327        65436 : }
    5328              : 
    5329              : /* Initialize the table of extra 80387 mathematical constants.  */
    5330              : 
    5331              : static void
    5332         2353 : init_ext_80387_constants (void)
    5333              : {
    5334         2353 :   static const char * cst[5] =
    5335              :   {
    5336              :     "0.3010299956639811952256464283594894482",  /* 0: fldlg2  */
    5337              :     "0.6931471805599453094286904741849753009",  /* 1: fldln2  */
    5338              :     "1.4426950408889634073876517827983434472",  /* 2: fldl2e  */
    5339              :     "3.3219280948873623478083405569094566090",  /* 3: fldl2t  */
    5340              :     "3.1415926535897932385128089594061862044",  /* 4: fldpi   */
    5341              :   };
    5342         2353 :   int i;
    5343              : 
    5344        14118 :   for (i = 0; i < 5; i++)
    5345              :     {
    5346        11765 :       real_from_string (&ext_80387_constants_table[i], cst[i]);
    5347              :       /* Ensure each constant is rounded to XFmode precision.  */
    5348        11765 :       real_convert (&ext_80387_constants_table[i],
    5349        23530 :                     XFmode, &ext_80387_constants_table[i]);
    5350              :     }
    5351              : 
    5352         2353 :   ext_80387_constants_init = 1;
    5353         2353 : }
    5354              : 
    5355              : /* Return non-zero if the constant is something that
    5356              :    can be loaded with a special instruction.  */
    5357              : 
    5358              : int
    5359      5043671 : standard_80387_constant_p (rtx x)
    5360              : {
    5361      5043671 :   machine_mode mode = GET_MODE (x);
    5362              : 
    5363      5043671 :   const REAL_VALUE_TYPE *r;
    5364              : 
    5365      5043671 :   if (!(CONST_DOUBLE_P (x) && X87_FLOAT_MODE_P (mode)))
    5366              :     return -1;
    5367              : 
    5368      4582530 :   if (x == CONST0_RTX (mode))
    5369              :     return 1;
    5370      2113838 :   if (x == CONST1_RTX (mode))
    5371              :     return 2;
    5372              : 
    5373      1230986 :   r = CONST_DOUBLE_REAL_VALUE (x);
    5374              : 
    5375              :   /* For XFmode constants, try to find a special 80387 instruction when
    5376              :      optimizing for size or on those CPUs that benefit from them.  */
    5377      1230986 :   if (mode == XFmode
    5378       796383 :       && (optimize_function_for_size_p (cfun) || TARGET_EXT_80387_CONSTANTS)
    5379      2027369 :       && !flag_rounding_math)
    5380              :     {
    5381       788333 :       int i;
    5382              : 
    5383       788333 :       if (! ext_80387_constants_init)
    5384         2346 :         init_ext_80387_constants ();
    5385              : 
    5386      4719506 :       for (i = 0; i < 5; i++)
    5387      3940004 :         if (real_identical (r, &ext_80387_constants_table[i]))
    5388         8831 :           return i + 3;
    5389              :     }
    5390              : 
    5391              :   /* Load of the constant -0.0 or -1.0 will be split as
    5392              :      fldz;fchs or fld1;fchs sequence.  */
    5393      1222155 :   if (real_isnegzero (r))
    5394              :     return 8;
    5395      1205665 :   if (real_identical (r, &dconstm1))
    5396       301849 :     return 9;
    5397              : 
    5398              :   return 0;
    5399              : }
    5400              : 
    5401              : /* Return the opcode of the special instruction to be used to load
    5402              :    the constant X.  */
    5403              : 
    5404              : const char *
    5405        54462 : standard_80387_constant_opcode (rtx x)
    5406              : {
    5407        54462 :   switch (standard_80387_constant_p (x))
    5408              :     {
    5409              :     case 1:
    5410              :       return "fldz";
    5411        33984 :     case 2:
    5412        33984 :       return "fld1";
    5413            1 :     case 3:
    5414            1 :       return "fldlg2";
    5415           10 :     case 4:
    5416           10 :       return "fldln2";
    5417           12 :     case 5:
    5418           12 :       return "fldl2e";
    5419            2 :     case 6:
    5420            2 :       return "fldl2t";
    5421          192 :     case 7:
    5422          192 :       return "fldpi";
    5423            0 :     case 8:
    5424            0 :     case 9:
    5425            0 :       return "#";
    5426            0 :     default:
    5427            0 :       gcc_unreachable ();
    5428              :     }
    5429              : }
    5430              : 
    5431              : /* Return the CONST_DOUBLE representing the 80387 constant that is
    5432              :    loaded by the specified special instruction.  The argument IDX
    5433              :    matches the return value from standard_80387_constant_p.  */
    5434              : 
    5435              : rtx
    5436           24 : standard_80387_constant_rtx (int idx)
    5437              : {
    5438           24 :   int i;
    5439              : 
    5440           24 :   if (! ext_80387_constants_init)
    5441            7 :     init_ext_80387_constants ();
    5442              : 
    5443           24 :   switch (idx)
    5444              :     {
    5445           24 :     case 3:
    5446           24 :     case 4:
    5447           24 :     case 5:
    5448           24 :     case 6:
    5449           24 :     case 7:
    5450           24 :       i = idx - 3;
    5451           24 :       break;
    5452              : 
    5453            0 :     default:
    5454            0 :       gcc_unreachable ();
    5455              :     }
    5456              : 
    5457           24 :   return const_double_from_real_value (ext_80387_constants_table[i],
    5458           24 :                                        XFmode);
    5459              : }
    5460              : 
    5461              : /* Return 1 if X is all bits 0, 2 if X is all bits 1
    5462              :    and 3 if X is all bits 1 with zero extend
    5463              :    in supported SSE/AVX vector mode.  */
    5464              : 
    5465              : int
    5466     55141975 : standard_sse_constant_p (rtx x, machine_mode pred_mode)
    5467              : {
    5468     55141975 :   machine_mode mode;
    5469              : 
    5470     55141975 :   if (!TARGET_SSE)
    5471              :     return 0;
    5472              : 
    5473     54972944 :   mode = GET_MODE (x);
    5474              : 
    5475     54972944 :   if (x == const0_rtx || const0_operand (x, mode))
    5476     13117044 :     return 1;
    5477              : 
    5478     41855900 :   if (x == constm1_rtx
    5479     41717496 :       || vector_all_ones_operand (x, mode)
    5480     83003772 :       || ((GET_MODE_CLASS (mode) == MODE_VECTOR_FLOAT
    5481     34434176 :            || GET_MODE_CLASS (pred_mode) == MODE_VECTOR_FLOAT)
    5482      6714643 :           && float_vector_all_ones_operand (x, mode)))
    5483              :     {
    5484              :       /* VOIDmode integer constant, get mode from the predicate.  */
    5485       710119 :       if (mode == VOIDmode)
    5486       138404 :         mode = pred_mode;
    5487              : 
    5488      1420238 :       switch (GET_MODE_SIZE (mode))
    5489              :         {
    5490        30660 :         case 64:
    5491        30660 :           if (TARGET_AVX512F)
    5492              :             return 2;
    5493              :           break;
    5494        39941 :         case 32:
    5495        39941 :           if (TARGET_AVX2)
    5496              :             return 2;
    5497              :           break;
    5498       627139 :         case 16:
    5499       627139 :           if (TARGET_SSE2)
    5500              :             return 2;
    5501              :           break;
    5502            0 :         case 0:
    5503              :           /* VOIDmode */
    5504            0 :           gcc_unreachable ();
    5505              :         default:
    5506              :           break;
    5507              :         }
    5508              :     }
    5509              : 
    5510     41159091 :   if (vector_all_ones_zero_extend_half_operand (x, mode)
    5511     41159091 :       || vector_all_ones_zero_extend_quarter_operand (x, mode))
    5512          706 :     return 3;
    5513              : 
    5514              :   return 0;
    5515              : }
    5516              : 
    5517              : /* Return the opcode of the special instruction to be used to load
    5518              :    the constant operands[1] into operands[0].  */
    5519              : 
    5520              : const char *
    5521       464886 : standard_sse_constant_opcode (rtx_insn *insn, rtx *operands)
    5522              : {
    5523       464886 :   machine_mode mode;
    5524       464886 :   rtx x = operands[1];
    5525              : 
    5526       464886 :   gcc_assert (TARGET_SSE);
    5527              : 
    5528       464886 :   mode = GET_MODE (x);
    5529              : 
    5530       464886 :   if (x == const0_rtx || const0_operand (x, mode))
    5531              :     {
    5532       453196 :       switch (get_attr_mode (insn))
    5533              :         {
    5534       435435 :         case MODE_TI:
    5535       435435 :           if (!EXT_REX_SSE_REG_P (operands[0]))
    5536              :             return "%vpxor\t%0, %d0";
    5537              :           /* FALLTHRU */
    5538         6184 :         case MODE_XI:
    5539         6184 :         case MODE_OI:
    5540         6184 :           if (EXT_REX_SSE_REG_P (operands[0]))
    5541              :             {
    5542           67 :               if (TARGET_AVX512VL)
    5543              :                 return "vpxord\t%x0, %x0, %x0";
    5544              :               else
    5545           28 :                 return "vpxord\t%g0, %g0, %g0";
    5546              :             }
    5547              :           return "vpxor\t%x0, %x0, %x0";
    5548              : 
    5549         2107 :         case MODE_V2DF:
    5550         2107 :           if (!EXT_REX_SSE_REG_P (operands[0]))
    5551              :             return "%vxorpd\t%0, %d0";
    5552              :           /* FALLTHRU */
    5553          847 :         case MODE_V8DF:
    5554          847 :         case MODE_V4DF:
    5555          847 :           if (EXT_REX_SSE_REG_P (operands[0]))
    5556              :             {
    5557            4 :               if (TARGET_AVX512DQ)
    5558              :                 {
    5559            0 :                   if (TARGET_AVX512VL)
    5560              :                     return "vxorpd\t%x0, %x0, %x0";
    5561              :                   else
    5562            0 :                     return "vxorpd\t%g0, %g0, %g0";
    5563              :                 }
    5564              :               else
    5565              :                 {
    5566            4 :                   if (TARGET_AVX512VL)
    5567              :                     return "vpxorq\t%x0, %x0, %x0";
    5568              :                   else
    5569            4 :                     return "vpxorq\t%g0, %g0, %g0";
    5570              :                 }
    5571              :             }
    5572              :           return "vxorpd\t%x0, %x0, %x0";
    5573              : 
    5574         6672 :         case MODE_V4SF:
    5575         6672 :           if (!EXT_REX_SSE_REG_P (operands[0]))
    5576              :             return "%vxorps\t%0, %d0";
    5577              :           /* FALLTHRU */
    5578         2015 :         case MODE_V16SF:
    5579         2015 :         case MODE_V8SF:
    5580         2015 :           if (EXT_REX_SSE_REG_P (operands[0]))
    5581              :             {
    5582           65 :               if (TARGET_AVX512DQ)
    5583              :                 {
    5584           26 :                   if (TARGET_AVX512VL)
    5585              :                     return "vxorps\t%x0, %x0, %x0";
    5586              :                   else
    5587            0 :                     return "vxorps\t%g0, %g0, %g0";
    5588              :                 }
    5589              :               else
    5590              :                 {
    5591           39 :                   if (TARGET_AVX512VL)
    5592              :                     return "vpxord\t%x0, %x0, %x0";
    5593              :                   else
    5594           37 :                     return "vpxord\t%g0, %g0, %g0";
    5595              :                 }
    5596              :             }
    5597              :           return "vxorps\t%x0, %x0, %x0";
    5598              : 
    5599            0 :         default:
    5600            0 :           gcc_unreachable ();
    5601              :         }
    5602              :     }
    5603        11690 :   else if (x == constm1_rtx
    5604        11679 :            || vector_all_ones_operand (x, mode)
    5605        11757 :            || (GET_MODE_CLASS (mode) == MODE_VECTOR_FLOAT
    5606           45 :                && float_vector_all_ones_operand (x, mode)))
    5607              :     {
    5608        11668 :       enum attr_mode insn_mode = get_attr_mode (insn);
    5609              : 
    5610        11668 :       switch (insn_mode)
    5611              :         {
    5612            3 :         case MODE_XI:
    5613            3 :         case MODE_V8DF:
    5614            3 :         case MODE_V16SF:
    5615            3 :           gcc_assert (TARGET_AVX512F);
    5616              :           return "vpternlogd\t{$0xFF, %g0, %g0, %g0|%g0, %g0, %g0, 0xFF}";
    5617              : 
    5618          959 :         case MODE_OI:
    5619          959 :         case MODE_V4DF:
    5620          959 :         case MODE_V8SF:
    5621          959 :           gcc_assert (TARGET_AVX2);
    5622              :           /* FALLTHRU */
    5623        11665 :         case MODE_TI:
    5624        11665 :         case MODE_V2DF:
    5625        11665 :         case MODE_V4SF:
    5626        11665 :           gcc_assert (TARGET_SSE2);
    5627        11665 :           if (EXT_REX_SSE_REG_P (operands[0]))
    5628              :             {
    5629            2 :               if (TARGET_AVX512VL)
    5630              :                 return "vpternlogd\t{$0xFF, %0, %0, %0|%0, %0, %0, 0xFF}";
    5631              :               else
    5632            0 :                 return "vpternlogd\t{$0xFF, %g0, %g0, %g0|%g0, %g0, %g0, 0xFF}";
    5633              :             }
    5634        11663 :           return (TARGET_AVX
    5635        11663 :                   ? "vpcmpeqd\t%0, %0, %0"
    5636        11663 :                   : "pcmpeqd\t%0, %0");
    5637              : 
    5638            0 :         default:
    5639            0 :           gcc_unreachable ();
    5640              :         }
    5641              :    }
    5642           22 :   else if (vector_all_ones_zero_extend_half_operand (x, mode))
    5643              :     {
    5644           40 :       if (GET_MODE_SIZE (mode) == 64)
    5645              :         {
    5646            5 :           gcc_assert (TARGET_AVX512F);
    5647              :           return "vpcmpeqd\t%t0, %t0, %t0";
    5648              :         }
    5649           30 :       else if (GET_MODE_SIZE (mode) == 32)
    5650              :         {
    5651           15 :           gcc_assert (TARGET_AVX);
    5652              :           return "vpcmpeqd\t%x0, %x0, %x0";
    5653              :         }
    5654            0 :       gcc_unreachable ();
    5655              :     }
    5656            2 :   else if (vector_all_ones_zero_extend_quarter_operand (x, mode))
    5657              :     {
    5658            2 :       gcc_assert (TARGET_AVX512F);
    5659              :       return "vpcmpeqd\t%x0, %x0, %x0";
    5660              :     }
    5661              : 
    5662            0 :   gcc_unreachable ();
    5663              : }
    5664              : 
    5665              : /* Returns true if INSN can be transformed from a memory load
    5666              :    to a supported FP constant load.  */
    5667              : 
    5668              : bool
    5669      2143789 : ix86_standard_x87sse_constant_load_p (const rtx_insn *insn, rtx dst)
    5670              : {
    5671      2143789 :   rtx src = find_constant_src (insn);
    5672              : 
    5673      2143789 :   gcc_assert (REG_P (dst));
    5674              : 
    5675      2143789 :   if (src == NULL
    5676       597391 :       || (SSE_REGNO_P (REGNO (dst))
    5677       465329 :           && standard_sse_constant_p (src, GET_MODE (dst)) != 1)
    5678       165988 :       || (!TARGET_AVX512VL
    5679       165927 :           && EXT_REX_SSE_REGNO_P (REGNO (dst))
    5680            0 :           && standard_sse_constant_p (src, GET_MODE (dst)) == 1)
    5681      2309777 :       || (STACK_REGNO_P (REGNO (dst))
    5682       132062 :            && standard_80387_constant_p (src) < 1))
    5683      2067258 :     return false;
    5684              : 
    5685              :   return true;
    5686              : }
    5687              : 
    5688              : /* Predicate for pre-reload splitters with associated instructions,
    5689              :    which can match any time before the split1 pass (usually combine),
    5690              :    then are unconditionally split in that pass and should not be
    5691              :    matched again afterwards.  */
    5692              : 
    5693              : bool
    5694     17717758 : ix86_pre_reload_split (void)
    5695              : {
    5696     17717758 :   return (can_create_pseudo_p ()
    5697     27033743 :           && !(cfun->curr_properties & PROP_rtl_split_insns));
    5698              : }
    5699              : 
    5700              : /* Return the opcode of the TYPE_SSEMOV instruction.  To move from
    5701              :    or to xmm16-xmm31/ymm16-ymm31 registers, we either require
    5702              :    TARGET_AVX512VL or it is a register to register move which can
    5703              :    be done with zmm register move. */
    5704              : 
    5705              : static const char *
    5706      4164947 : ix86_get_ssemov (rtx *operands, unsigned size,
    5707              :                  enum attr_mode insn_mode, machine_mode mode)
    5708              : {
    5709      4164947 :   char buf[128];
    5710      4164947 :   bool misaligned_p = (misaligned_operand (operands[0], mode)
    5711      4164947 :                        || misaligned_operand (operands[1], mode));
    5712      4164947 :   bool evex_reg_p = (size == 64
    5713      4078437 :                      || EXT_REX_SSE_REG_P (operands[0])
    5714      8242643 :                      || EXT_REX_SSE_REG_P (operands[1]));
    5715              : 
    5716      4164947 :   bool egpr_p = (TARGET_APX_EGPR
    5717      4164947 :                  && (x86_extended_rex2reg_mentioned_p (operands[0])
    5718          182 :                      || x86_extended_rex2reg_mentioned_p (operands[1])));
    5719          196 :   bool egpr_vl = egpr_p && TARGET_AVX512VL;
    5720              : 
    5721      4164947 :   machine_mode scalar_mode;
    5722              : 
    5723      4164947 :   const char *opcode = NULL;
    5724      4164947 :   enum
    5725              :     {
    5726              :       opcode_int,
    5727              :       opcode_float,
    5728              :       opcode_double
    5729      4164947 :     } type = opcode_int;
    5730              : 
    5731      4164947 :   switch (insn_mode)
    5732              :     {
    5733              :     case MODE_V16SF:
    5734              :     case MODE_V8SF:
    5735              :     case MODE_V4SF:
    5736              :       scalar_mode = E_SFmode;
    5737              :       type = opcode_float;
    5738              :       break;
    5739       209355 :     case MODE_V8DF:
    5740       209355 :     case MODE_V4DF:
    5741       209355 :     case MODE_V2DF:
    5742       209355 :       scalar_mode = E_DFmode;
    5743       209355 :       type = opcode_double;
    5744       209355 :       break;
    5745      1508589 :     case MODE_XI:
    5746      1508589 :     case MODE_OI:
    5747      1508589 :     case MODE_TI:
    5748      1508589 :       scalar_mode = GET_MODE_INNER (mode);
    5749              :       break;
    5750            0 :     default:
    5751            0 :       gcc_unreachable ();
    5752              :     }
    5753              : 
    5754              :   /* NB: To move xmm16-xmm31/ymm16-ymm31 registers without AVX512VL,
    5755              :      we can only use zmm register move without memory operand.  */
    5756      4164947 :   if (evex_reg_p
    5757        88559 :       && !TARGET_AVX512VL
    5758      4215199 :       && GET_MODE_SIZE (mode) < 64)
    5759              :     {
    5760              :       /* NB: Even though ix86_hard_regno_mode_ok doesn't allow
    5761              :          xmm16-xmm31 nor ymm16-ymm31 in 128/256 bit modes when
    5762              :          AVX512VL is disabled, LRA can still generate reg to
    5763              :          reg moves with xmm16-xmm31 and ymm16-ymm31 in 128/256 bit
    5764              :          modes.  */
    5765            0 :       if (memory_operand (operands[0], mode)
    5766            0 :           || memory_operand (operands[1], mode))
    5767            0 :         gcc_unreachable ();
    5768            0 :       size = 64;
    5769            0 :       switch (type)
    5770              :         {
    5771            0 :         case opcode_int:
    5772            0 :           if (scalar_mode == E_HFmode || scalar_mode == E_BFmode)
    5773            0 :             opcode = (misaligned_p
    5774            0 :                       ? (TARGET_AVX512BW ? "vmovdqu16" : "vmovdqu64")
    5775              :                       : "vmovdqa64");
    5776              :           else
    5777            0 :             opcode = misaligned_p ? "vmovdqu32" : "vmovdqa32";
    5778              :           break;
    5779            0 :         case opcode_float:
    5780            0 :           opcode = misaligned_p ? "vmovups" : "vmovaps";
    5781              :           break;
    5782            0 :         case opcode_double:
    5783            0 :           opcode = misaligned_p ? "vmovupd" : "vmovapd";
    5784              :           break;
    5785              :         }
    5786              :     }
    5787      4164947 :   else if (SCALAR_FLOAT_MODE_P (scalar_mode))
    5788              :     {
    5789      2832649 :       switch (scalar_mode)
    5790              :         {
    5791        36799 :         case E_HFmode:
    5792        36799 :         case E_BFmode:
    5793        36799 :           if (evex_reg_p || egpr_vl)
    5794        11597 :             opcode = (misaligned_p
    5795          173 :                       ? (TARGET_AVX512BW
    5796              :                          ? "vmovdqu16"
    5797              :                          : "vmovdqu64")
    5798              :                       : "vmovdqa64");
    5799        25202 :           else if (egpr_p)
    5800       790287 :             opcode = (misaligned_p
    5801            0 :                       ? (TARGET_AVX512BW
    5802            0 :                          ? "vmovdqu16"
    5803              :                          : "%vmovups")
    5804              :                       : "%vmovaps");
    5805              :           else
    5806       412967 :             opcode = (misaligned_p
    5807        25202 :                       ? (TARGET_AVX512BW && evex_reg_p
    5808              :                          ? "vmovdqu16"
    5809              :                          : "%vmovdqu")
    5810              :                       : "%vmovdqa");
    5811              :           break;
    5812      2447003 :         case E_SFmode:
    5813      2447003 :           opcode = misaligned_p ? "%vmovups" : "%vmovaps";
    5814              :           break;
    5815       209355 :         case E_DFmode:
    5816       209355 :           opcode = misaligned_p ? "%vmovupd" : "%vmovapd";
    5817              :           break;
    5818       139492 :         case E_TFmode:
    5819       139492 :           if (evex_reg_p || egpr_vl)
    5820           14 :             opcode = misaligned_p ? "vmovdqu64" : "vmovdqa64";
    5821       139478 :           else if (egpr_p)
    5822            0 :             opcode = misaligned_p ? "%vmovups" : "%vmovaps";
    5823              :           else
    5824       139478 :             opcode = misaligned_p ? "%vmovdqu" : "%vmovdqa";
    5825              :           break;
    5826            0 :         default:
    5827            0 :           gcc_unreachable ();
    5828              :         }
    5829              :     }
    5830      1332298 :   else if (SCALAR_INT_MODE_P (scalar_mode))
    5831              :     {
    5832      1332298 :       switch (scalar_mode)
    5833              :         {
    5834       101329 :         case E_QImode:
    5835       101329 :           if (evex_reg_p || egpr_vl)
    5836      4175158 :             opcode = (misaligned_p
    5837        10211 :                       ? (TARGET_AVX512BW
    5838         5071 :                          ? "vmovdqu8"
    5839              :                          : "vmovdqu64")
    5840              :                       : "vmovdqa64");
    5841        91118 :           else if (egpr_p)
    5842           30 :             opcode = (misaligned_p
    5843            0 :                       ? (TARGET_AVX512BW
    5844              :                          ? "vmovdqu8"
    5845              :                          : "%vmovups")
    5846              :                       : "%vmovaps");
    5847              :           else
    5848        91088 :             opcode = (misaligned_p
    5849              :                       ? (TARGET_AVX512BW && evex_reg_p
    5850              :                          ? "vmovdqu8"
    5851              :                          : "%vmovdqu")
    5852              :                       : "%vmovdqa");
    5853              :           break;
    5854        42943 :         case E_HImode:
    5855        42943 :           if (evex_reg_p || egpr_vl)
    5856         3836 :             opcode = (misaligned_p
    5857          300 :                       ? (TARGET_AVX512BW
    5858              :                          ? "vmovdqu16"
    5859              :                          : "vmovdqu64")
    5860              :                       : "vmovdqa64");
    5861        39107 :           else if (egpr_p)
    5862       790287 :             opcode = (misaligned_p
    5863           27 :                       ? (TARGET_AVX512BW
    5864            0 :                          ? "vmovdqu16"
    5865              :                          : "%vmovups")
    5866              :                       : "%vmovaps");
    5867              :           else
    5868       387765 :             opcode = (misaligned_p
    5869        39080 :                       ? (TARGET_AVX512BW && evex_reg_p
    5870              :                          ? "vmovdqu16"
    5871              :                          : "%vmovdqu")
    5872              :                       : "%vmovdqa");
    5873              :           break;
    5874       182384 :         case E_SImode:
    5875       182384 :           if (evex_reg_p || egpr_vl)
    5876         8211 :             opcode = misaligned_p ? "vmovdqu32" : "vmovdqa32";
    5877       174173 :           else if (egpr_p)
    5878           14 :             opcode = misaligned_p ? "%vmovups" : "%vmovaps";
    5879              :           else
    5880       174159 :             opcode = misaligned_p ? "%vmovdqu" : "%vmovdqa";
    5881              :           break;
    5882       993834 :         case E_DImode:
    5883       993834 :         case E_TImode:
    5884       993834 :         case E_OImode:
    5885       993834 :           if (evex_reg_p || egpr_vl)
    5886        18531 :             opcode = misaligned_p ? "vmovdqu64" : "vmovdqa64";
    5887       975303 :           else if (egpr_p)
    5888           26 :             opcode = misaligned_p ? "%vmovups" : "%vmovaps";
    5889              :           else
    5890       975277 :             opcode = misaligned_p ? "%vmovdqu" : "%vmovdqa";
    5891              :           break;
    5892        11808 :         case E_XImode:
    5893        49646 :           opcode = misaligned_p ? "vmovdqu64" : "vmovdqa64";
    5894              :           break;
    5895            0 :         default:
    5896            0 :           gcc_unreachable ();
    5897              :         }
    5898              :     }
    5899              :   else
    5900            0 :     gcc_unreachable ();
    5901              : 
    5902      4164947 :   switch (size)
    5903              :     {
    5904        86510 :     case 64:
    5905        86510 :       snprintf (buf, sizeof (buf), "%s\t{%%g1, %%g0|%%g0, %%g1}",
    5906              :                 opcode);
    5907        86510 :       break;
    5908        91984 :     case 32:
    5909        91984 :       snprintf (buf, sizeof (buf), "%s\t{%%t1, %%t0|%%t0, %%t1}",
    5910              :                 opcode);
    5911        91984 :       break;
    5912      3986453 :     case 16:
    5913      3986453 :       snprintf (buf, sizeof (buf), "%s\t{%%x1, %%x0|%%x0, %%x1}",
    5914              :                 opcode);
    5915      3986453 :       break;
    5916            0 :     default:
    5917            0 :       gcc_unreachable ();
    5918              :     }
    5919      4164947 :   output_asm_insn (buf, operands);
    5920      4164947 :   return "";
    5921              : }
    5922              : 
    5923              : /* Return the template of the TYPE_SSEMOV instruction to move
    5924              :    operands[1] into operands[0].  */
    5925              : 
    5926              : const char *
    5927      6543368 : ix86_output_ssemov (rtx_insn *insn, rtx *operands)
    5928              : {
    5929      6543368 :   machine_mode mode = GET_MODE (operands[0]);
    5930      6543368 :   if (get_attr_type (insn) != TYPE_SSEMOV
    5931      6543368 :       || mode != GET_MODE (operands[1]))
    5932            0 :     gcc_unreachable ();
    5933              : 
    5934      6543368 :   enum attr_mode insn_mode = get_attr_mode (insn);
    5935              : 
    5936      6543368 :   switch (insn_mode)
    5937              :     {
    5938        86510 :     case MODE_XI:
    5939        86510 :     case MODE_V8DF:
    5940        86510 :     case MODE_V16SF:
    5941        86510 :       return ix86_get_ssemov (operands, 64, insn_mode, mode);
    5942              : 
    5943        91984 :     case MODE_OI:
    5944        91984 :     case MODE_V4DF:
    5945        91984 :     case MODE_V8SF:
    5946        91984 :       return ix86_get_ssemov (operands, 32, insn_mode, mode);
    5947              : 
    5948      3986453 :     case MODE_TI:
    5949      3986453 :     case MODE_V2DF:
    5950      3986453 :     case MODE_V4SF:
    5951      3986453 :       return ix86_get_ssemov (operands, 16, insn_mode, mode);
    5952              : 
    5953       664193 :     case MODE_DI:
    5954              :       /* Handle broken assemblers that require movd instead of movq. */
    5955       664193 :       if (GENERAL_REG_P (operands[0]))
    5956              :         {
    5957              :           if (HAVE_AS_IX86_INTERUNIT_MOVQ)
    5958              :             return "%vmovq\t{%1, %q0|%q0, %1}";
    5959              :           else
    5960              :             return "%vmovd\t{%1, %q0|%q0, %1}";
    5961              :         }
    5962       587569 :       else if (GENERAL_REG_P (operands[1]))
    5963              :         {
    5964              :           if (HAVE_AS_IX86_INTERUNIT_MOVQ)
    5965              :             return "%vmovq\t{%q1, %0|%0, %q1}";
    5966              :           else
    5967              :             return "%vmovd\t{%q1, %0|%0, %q1}";
    5968              :         }
    5969              :       else
    5970       420552 :         return "%vmovq\t{%1, %0|%0, %1}";
    5971              : 
    5972       202628 :     case MODE_SI:
    5973       202628 :       if (GENERAL_REG_P (operands[0]))
    5974              :         return "%vmovd\t{%1, %k0|%k0, %1}";
    5975       146136 :       else if (GENERAL_REG_P (operands[1]))
    5976              :         return "%vmovd\t{%k1, %0|%0, %k1}";
    5977              :       else
    5978        60646 :         return "%vmovd\t{%1, %0|%0, %1}";
    5979              : 
    5980        54085 :     case MODE_HI:
    5981        54085 :       if (GENERAL_REG_P (operands[0]))
    5982              :         return "vmovw\t{%1, %k0|%k0, %1}";
    5983        53922 :       else if (GENERAL_REG_P (operands[1]))
    5984              :         return "vmovw\t{%k1, %0|%0, %k1}";
    5985              :       else
    5986        53688 :         return "vmovw\t{%1, %0|%0, %1}";
    5987              : 
    5988       780855 :     case MODE_DF:
    5989       780855 :       if (TARGET_AVX && REG_P (operands[0]) && REG_P (operands[1]))
    5990              :         return "vmovsd\t{%d1, %0|%0, %d1}";
    5991              :       else
    5992       780013 :         return "%vmovsd\t{%1, %0|%0, %1}";
    5993              : 
    5994       672548 :     case MODE_SF:
    5995       672548 :       if (TARGET_AVX && REG_P (operands[0]) && REG_P (operands[1]))
    5996              :         return "vmovss\t{%d1, %0|%0, %d1}";
    5997              :       else
    5998       672008 :         return "%vmovss\t{%1, %0|%0, %1}";
    5999              : 
    6000           96 :     case MODE_HF:
    6001           96 :     case MODE_BF:
    6002           96 :       if (REG_P (operands[0]) && REG_P (operands[1]))
    6003              :         return "vmovsh\t{%d1, %0|%0, %d1}";
    6004              :       else
    6005            0 :         return "vmovsh\t{%1, %0|%0, %1}";
    6006              : 
    6007           36 :     case MODE_V1DF:
    6008           36 :       gcc_assert (!TARGET_AVX);
    6009              :       return "movlpd\t{%1, %0|%0, %1}";
    6010              : 
    6011         3980 :     case MODE_V2SF:
    6012         3980 :       if (TARGET_AVX && REG_P (operands[0]))
    6013              :         return "vmovlps\t{%1, %d0|%d0, %1}";
    6014              :       else
    6015         3907 :         return "%vmovlps\t{%1, %0|%0, %1}";
    6016              : 
    6017            0 :     default:
    6018            0 :       gcc_unreachable ();
    6019              :     }
    6020              : }
    6021              : 
    6022              : /* Returns true if OP contains a symbol reference */
    6023              : 
    6024              : bool
    6025    573061797 : symbolic_reference_mentioned_p (const_rtx op)
    6026              : {
    6027    573061797 :   const char *fmt;
    6028    573061797 :   int i;
    6029              : 
    6030    573061797 :   if (SYMBOL_REF_P (op) || LABEL_REF_P (op))
    6031              :     return true;
    6032              : 
    6033    433219426 :   fmt = GET_RTX_FORMAT (GET_CODE (op));
    6034    735207357 :   for (i = GET_RTX_LENGTH (GET_CODE (op)) - 1; i >= 0; i--)
    6035              :     {
    6036    586551683 :       if (fmt[i] == 'E')
    6037              :         {
    6038      2021445 :           int j;
    6039              : 
    6040      4044058 :           for (j = XVECLEN (op, i) - 1; j >= 0; j--)
    6041      3329423 :             if (symbolic_reference_mentioned_p (XVECEXP (op, i, j)))
    6042              :               return true;
    6043              :         }
    6044              : 
    6045    584530238 :       else if (fmt[i] == 'e' && symbolic_reference_mentioned_p (XEXP (op, i)))
    6046              :         return true;
    6047              :     }
    6048              : 
    6049              :   return false;
    6050              : }
    6051              : 
    6052              : /* Return true if it is appropriate to emit `ret' instructions in the
    6053              :    body of a function.  Do this only if the epilogue is simple, needing a
    6054              :    couple of insns.  Prior to reloading, we can't tell how many registers
    6055              :    must be saved, so return false then.  Return false if there is no frame
    6056              :    marker to de-allocate.  */
    6057              : 
    6058              : bool
    6059            0 : ix86_can_use_return_insn_p (void)
    6060              : {
    6061            0 :   if (ix86_function_ms_hook_prologue (current_function_decl))
    6062              :     return false;
    6063              : 
    6064            0 :   if (ix86_function_naked (current_function_decl))
    6065              :     return false;
    6066              : 
    6067              :   /* Don't use `ret' instruction in interrupt handler.  */
    6068            0 :   if (! reload_completed
    6069            0 :       || frame_pointer_needed
    6070            0 :       || cfun->machine->func_type != TYPE_NORMAL)
    6071              :     return 0;
    6072              : 
    6073              :   /* Don't allow more than 32k pop, since that's all we can do
    6074              :      with one instruction.  */
    6075            0 :   if (crtl->args.pops_args && crtl->args.size >= 32768)
    6076              :     return 0;
    6077              : 
    6078            0 :   struct ix86_frame &frame = cfun->machine->frame;
    6079            0 :   return (frame.stack_pointer_offset == UNITS_PER_WORD
    6080            0 :           && (frame.nregs + frame.nsseregs) == 0);
    6081              : }
    6082              : 
    6083              : /* Return stack frame size.  get_frame_size () returns used stack slots
    6084              :    during compilation, which may be optimized out later.  If stack frame
    6085              :    is needed, stack_frame_required should be true.  */
    6086              : 
    6087              : static HOST_WIDE_INT
    6088      8275840 : ix86_get_frame_size (void)
    6089              : {
    6090      8275840 :   if (cfun->machine->stack_frame_required)
    6091      8206235 :     return get_frame_size ();
    6092              :   else
    6093              :     return 0;
    6094              : }
    6095              : 
    6096              : /* Value should be nonzero if functions must have frame pointers.
    6097              :    Zero means the frame pointer need not be set up (and parms may
    6098              :    be accessed via the stack pointer) in functions that seem suitable.  */
    6099              : 
    6100              : static bool
    6101      1222360 : ix86_frame_pointer_required (void)
    6102              : {
    6103              :   /* If we accessed previous frames, then the generated code expects
    6104              :      to be able to access the saved ebp value in our frame.  */
    6105      1222360 :   if (cfun->machine->accesses_prev_frame)
    6106              :     return true;
    6107              : 
    6108              :   /* Several x86 os'es need a frame pointer for other reasons,
    6109              :      usually pertaining to setjmp.  */
    6110      1222327 :   if (SUBTARGET_FRAME_POINTER_REQUIRED)
    6111              :     return true;
    6112              : 
    6113              :   /* For older 32-bit runtimes setjmp requires valid frame-pointer.  */
    6114      1222327 :   if (TARGET_32BIT_MS_ABI && cfun->calls_setjmp)
    6115              :     return true;
    6116              : 
    6117              :   /* Win64 SEH, very large frames need a frame-pointer as maximum stack
    6118              :      allocation is 4GB.  */
    6119      1222327 :   if (TARGET_64BIT_MS_ABI && ix86_get_frame_size () > SEH_MAX_FRAME_SIZE)
    6120              :     return true;
    6121              : 
    6122              :   /* SSE saves require frame-pointer when stack is misaligned.  */
    6123      1222327 :   if (TARGET_64BIT_MS_ABI && ix86_incoming_stack_boundary < 128)
    6124              :     return true;
    6125              : 
    6126              :   /* In ix86_option_override_internal, TARGET_OMIT_LEAF_FRAME_POINTER
    6127              :      turns off the frame pointer by default.  Turn it back on now if
    6128              :      we've not got a leaf function.  */
    6129      1222326 :   if (TARGET_OMIT_LEAF_FRAME_POINTER
    6130      1222326 :       && (!crtl->is_leaf
    6131            0 :           || ix86_current_function_calls_tls_descriptor))
    6132            0 :     return true;
    6133              : 
    6134              :   /* Several versions of mcount for the x86 assumes that there is a
    6135              :      frame, so we cannot allow profiling without a frame pointer.  */
    6136      1222326 :   if (crtl->profile && !flag_fentry)
    6137              :     return true;
    6138              : 
    6139              :   return false;
    6140              : }
    6141              : 
    6142              : /* Record that the current function accesses previous call frames.  */
    6143              : 
    6144              : void
    6145          966 : ix86_setup_frame_addresses (void)
    6146              : {
    6147          966 :   cfun->machine->accesses_prev_frame = 1;
    6148          966 : }
    6149              : 
    6150              : #if defined(HAVE_GAS_HIDDEN) && (SUPPORTS_ONE_ONLY - 0)
    6151              : # define USE_HIDDEN_LINKONCE 1
    6152              : #else
    6153              : # define USE_HIDDEN_LINKONCE 0
    6154              : #endif
    6155              : 
    6156              : /* Label count for call and return thunks.  It is used to make unique
    6157              :    labels in call and return thunks.  */
    6158              : static int indirectlabelno;
    6159              : 
    6160              : /* True if call thunk function is needed.  */
    6161              : static bool indirect_thunk_needed = false;
    6162              : 
    6163              : /* Bit masks of integer registers, which contain branch target, used
    6164              :    by call thunk functions.  */
    6165              : static HARD_REG_SET indirect_thunks_used;
    6166              : 
    6167              : /* True if return thunk function is needed.  */
    6168              : static bool indirect_return_needed = false;
    6169              : 
    6170              : /* True if return thunk function via CX is needed.  */
    6171              : static bool indirect_return_via_cx;
    6172              : 
    6173              : #ifndef INDIRECT_LABEL
    6174              : # define INDIRECT_LABEL "LIND"
    6175              : #endif
    6176              : 
    6177              : /* Indicate what prefix is needed for an indirect branch.  */
    6178              : enum indirect_thunk_prefix
    6179              : {
    6180              :   indirect_thunk_prefix_none,
    6181              :   indirect_thunk_prefix_nt
    6182              : };
    6183              : 
    6184              : /* Return the prefix needed for an indirect branch INSN.  */
    6185              : 
    6186              : enum indirect_thunk_prefix
    6187           67 : indirect_thunk_need_prefix (rtx_insn *insn)
    6188              : {
    6189           67 :   enum indirect_thunk_prefix need_prefix;
    6190           67 :   if ((cfun->machine->indirect_branch_type
    6191           67 :             == indirect_branch_thunk_extern)
    6192           67 :            && ix86_notrack_prefixed_insn_p (insn))
    6193              :     {
    6194              :       /* NOTRACK prefix is only used with external thunk so that it
    6195              :          can be properly updated to support CET at run-time.  */
    6196              :       need_prefix = indirect_thunk_prefix_nt;
    6197              :     }
    6198              :   else
    6199              :     need_prefix = indirect_thunk_prefix_none;
    6200           67 :   return need_prefix;
    6201              : }
    6202              : 
    6203              : /* Fills in the label name that should be used for the indirect thunk.  */
    6204              : 
    6205              : static void
    6206           73 : indirect_thunk_name (char name[32], unsigned int regno,
    6207              :                      enum indirect_thunk_prefix need_prefix,
    6208              :                      bool ret_p)
    6209              : {
    6210           73 :   if (regno != INVALID_REGNUM && regno != CX_REG && ret_p)
    6211            0 :     gcc_unreachable ();
    6212              : 
    6213           73 :   if (USE_HIDDEN_LINKONCE)
    6214              :     {
    6215           73 :       const char *prefix;
    6216              : 
    6217           73 :       if (need_prefix == indirect_thunk_prefix_nt
    6218           73 :           && regno != INVALID_REGNUM)
    6219              :         {
    6220              :           /* NOTRACK prefix is only used with external thunk via
    6221              :              register so that NOTRACK prefix can be added to indirect
    6222              :              branch via register to support CET at run-time.  */
    6223              :           prefix = "_nt";
    6224              :         }
    6225              :       else
    6226           71 :         prefix = "";
    6227              : 
    6228           73 :       const char *ret = ret_p ? "return" : "indirect";
    6229              : 
    6230           73 :       if (regno != INVALID_REGNUM)
    6231              :         {
    6232           55 :           const char *reg_prefix;
    6233           55 :           if (LEGACY_INT_REGNO_P (regno))
    6234           53 :             reg_prefix = TARGET_64BIT ? "r" : "e";
    6235              :           else
    6236              :             reg_prefix = "";
    6237           55 :           sprintf (name, "__x86_%s_thunk%s_%s%s",
    6238              :                    ret, prefix, reg_prefix, reg_names[regno]);
    6239              :         }
    6240              :       else
    6241           18 :         sprintf (name, "__x86_%s_thunk%s", ret, prefix);
    6242              :     }
    6243              :   else
    6244              :     {
    6245              :       if (regno != INVALID_REGNUM)
    6246              :         ASM_GENERATE_INTERNAL_LABEL (name, "LITR", regno);
    6247              :       else
    6248              :         {
    6249              :           if (ret_p)
    6250              :             ASM_GENERATE_INTERNAL_LABEL (name, "LRT", 0);
    6251              :           else
    6252           73 :             ASM_GENERATE_INTERNAL_LABEL (name, "LIT", 0);
    6253              :         }
    6254              :     }
    6255           73 : }
    6256              : 
    6257              : /* Output a call and return thunk for indirect branch.  If REGNO != -1,
    6258              :    the function address is in REGNO and the call and return thunk looks like:
    6259              : 
    6260              :         call    L2
    6261              :    L1:
    6262              :         pause
    6263              :         lfence
    6264              :         jmp     L1
    6265              :    L2:
    6266              :         mov     %REG, (%sp)
    6267              :         ret
    6268              : 
    6269              :    Otherwise, the function address is on the top of stack and the
    6270              :    call and return thunk looks like:
    6271              : 
    6272              :         call L2
    6273              :   L1:
    6274              :         pause
    6275              :         lfence
    6276              :         jmp L1
    6277              :   L2:
    6278              :         lea WORD_SIZE(%sp), %sp
    6279              :         ret
    6280              :  */
    6281              : 
    6282              : static void
    6283           38 : output_indirect_thunk (unsigned int regno)
    6284              : {
    6285           38 :   char indirectlabel1[32];
    6286           38 :   char indirectlabel2[32];
    6287              : 
    6288           38 :   ASM_GENERATE_INTERNAL_LABEL (indirectlabel1, INDIRECT_LABEL,
    6289              :                                indirectlabelno++);
    6290           38 :   ASM_GENERATE_INTERNAL_LABEL (indirectlabel2, INDIRECT_LABEL,
    6291              :                                indirectlabelno++);
    6292              : 
    6293              :   /* Call */
    6294           38 :   fputs ("\tcall\t", asm_out_file);
    6295           38 :   assemble_name_raw (asm_out_file, indirectlabel2);
    6296           38 :   fputc ('\n', asm_out_file);
    6297              : 
    6298           38 :   ASM_OUTPUT_INTERNAL_LABEL (asm_out_file, indirectlabel1);
    6299              : 
    6300              :   /* AMD and Intel CPUs prefer each a different instruction as loop filler.
    6301              :      Usage of both pause + lfence is compromise solution.  */
    6302           38 :   fprintf (asm_out_file, "\tpause\n\tlfence\n");
    6303              : 
    6304              :   /* Jump.  */
    6305           38 :   fputs ("\tjmp\t", asm_out_file);
    6306           38 :   assemble_name_raw (asm_out_file, indirectlabel1);
    6307           38 :   fputc ('\n', asm_out_file);
    6308              : 
    6309           38 :   ASM_OUTPUT_INTERNAL_LABEL (asm_out_file, indirectlabel2);
    6310              : 
    6311              :   /* The above call insn pushed a word to stack.  Adjust CFI info.  */
    6312           38 :   if (flag_asynchronous_unwind_tables && dwarf2out_do_frame ())
    6313              :     {
    6314           38 :       if (! dwarf2out_do_cfi_asm ())
    6315              :         {
    6316            0 :           dw_cfi_ref xcfi = ggc_cleared_alloc<dw_cfi_node> ();
    6317            0 :           xcfi->dw_cfi_opc = DW_CFA_advance_loc4;
    6318            0 :           xcfi->dw_cfi_oprnd1.dw_cfi_addr = ggc_strdup (indirectlabel2);
    6319            0 :           vec_safe_push (cfun->fde->dw_fde_cfi, xcfi);
    6320              :         }
    6321           38 :       dw_cfi_ref xcfi = ggc_cleared_alloc<dw_cfi_node> ();
    6322           38 :       xcfi->dw_cfi_opc = DW_CFA_def_cfa_offset;
    6323           38 :       xcfi->dw_cfi_oprnd1.dw_cfi_offset = 2 * UNITS_PER_WORD;
    6324           38 :       vec_safe_push (cfun->fde->dw_fde_cfi, xcfi);
    6325           38 :       dwarf2out_emit_cfi (xcfi);
    6326              :     }
    6327              : 
    6328           38 :   if (regno != INVALID_REGNUM)
    6329              :     {
    6330              :       /* MOV.  */
    6331           27 :       rtx xops[2];
    6332           27 :       xops[0] = gen_rtx_MEM (word_mode, stack_pointer_rtx);
    6333           27 :       xops[1] = gen_rtx_REG (word_mode, regno);
    6334           27 :       output_asm_insn ("mov\t{%1, %0|%0, %1}", xops);
    6335              :     }
    6336              :   else
    6337              :     {
    6338              :       /* LEA.  */
    6339           11 :       rtx xops[2];
    6340           11 :       xops[0] = stack_pointer_rtx;
    6341           11 :       xops[1] = plus_constant (Pmode, stack_pointer_rtx, UNITS_PER_WORD);
    6342           11 :       output_asm_insn ("lea\t{%E1, %0|%0, %E1}", xops);
    6343              :     }
    6344              : 
    6345           38 :   fputs ("\tret\n", asm_out_file);
    6346           38 :   if ((ix86_harden_sls & harden_sls_return))
    6347            1 :     fputs ("\tint3\n", asm_out_file);
    6348           38 : }
    6349              : 
    6350              : /* Output a function with a call and return thunk for indirect branch.
    6351              :    If REGNO != INVALID_REGNUM, the function address is in REGNO.
    6352              :    Otherwise, the function address is on the top of stack.  Thunk is
    6353              :    used for function return if RET_P is true.  */
    6354              : 
    6355              : static void
    6356           22 : output_indirect_thunk_function (enum indirect_thunk_prefix need_prefix,
    6357              :                                 unsigned int regno, bool ret_p)
    6358              : {
    6359           22 :   char name[32];
    6360           22 :   tree decl;
    6361              : 
    6362              :   /* Create __x86_indirect_thunk.  */
    6363           22 :   indirect_thunk_name (name, regno, need_prefix, ret_p);
    6364           22 :   decl = build_decl (BUILTINS_LOCATION, FUNCTION_DECL,
    6365              :                      get_identifier (name),
    6366              :                      build_function_type_list (void_type_node, NULL_TREE));
    6367           22 :   DECL_RESULT (decl) = build_decl (BUILTINS_LOCATION, RESULT_DECL,
    6368              :                                    NULL_TREE, void_type_node);
    6369           22 :   TREE_PUBLIC (decl) = 1;
    6370           22 :   TREE_STATIC (decl) = 1;
    6371           22 :   DECL_IGNORED_P (decl) = 1;
    6372              : 
    6373              : #if TARGET_MACHO
    6374              :   if (TARGET_MACHO)
    6375              :     {
    6376              :       switch_to_section (darwin_sections[picbase_thunk_section]);
    6377              :       fputs ("\t.weak_definition\t", asm_out_file);
    6378              :       assemble_name (asm_out_file, name);
    6379              :       fputs ("\n\t.private_extern\t", asm_out_file);
    6380              :       assemble_name (asm_out_file, name);
    6381              :       putc ('\n', asm_out_file);
    6382              :       ASM_OUTPUT_LABEL (asm_out_file, name);
    6383              :       DECL_WEAK (decl) = 1;
    6384              :     }
    6385              :   else
    6386              : #endif
    6387           22 :     if (USE_HIDDEN_LINKONCE)
    6388              :       {
    6389           22 :         cgraph_node::create (decl)->set_comdat_group (DECL_ASSEMBLER_NAME (decl));
    6390              : 
    6391           22 :         targetm.asm_out.unique_section (decl, 0);
    6392           22 :         switch_to_section (get_named_section (decl, NULL, 0));
    6393              : 
    6394           22 :         targetm.asm_out.globalize_label (asm_out_file, name);
    6395           22 :         fputs ("\t.hidden\t", asm_out_file);
    6396           22 :         assemble_name (asm_out_file, name);
    6397           22 :         putc ('\n', asm_out_file);
    6398           22 :         ASM_DECLARE_FUNCTION_NAME (asm_out_file, name, decl);
    6399              :       }
    6400              :     else
    6401              :       {
    6402              :         switch_to_section (text_section);
    6403           22 :         ASM_OUTPUT_LABEL (asm_out_file, name);
    6404              :       }
    6405              : 
    6406           22 :   DECL_INITIAL (decl) = make_node (BLOCK);
    6407           22 :   current_function_decl = decl;
    6408           22 :   allocate_struct_function (decl, false);
    6409           22 :   init_function_start (decl);
    6410              :   /* We're about to hide the function body from callees of final_* by
    6411              :      emitting it directly; tell them we're a thunk, if they care.  */
    6412           22 :   cfun->is_thunk = true;
    6413           22 :   first_function_block_is_cold = false;
    6414              :   /* Make sure unwind info is emitted for the thunk if needed.  */
    6415           22 :   final_start_function (emit_barrier (), asm_out_file, 1);
    6416              : 
    6417           22 :   output_indirect_thunk (regno);
    6418              : 
    6419           22 :   final_end_function ();
    6420           22 :   init_insn_lengths ();
    6421           22 :   free_after_compilation (cfun);
    6422           22 :   set_cfun (NULL);
    6423           22 :   current_function_decl = NULL;
    6424           22 : }
    6425              : 
    6426              : static int pic_labels_used;
    6427              : 
    6428              : /* Fills in the label name that should be used for a pc thunk for
    6429              :    the given register.  */
    6430              : 
    6431              : static void
    6432        37467 : get_pc_thunk_name (char name[32], unsigned int regno)
    6433              : {
    6434        37467 :   gcc_assert (!TARGET_64BIT);
    6435              : 
    6436        37467 :   if (USE_HIDDEN_LINKONCE)
    6437        37467 :     sprintf (name, "__x86.get_pc_thunk.%s", reg_names[regno]);
    6438              :   else
    6439        37467 :     ASM_GENERATE_INTERNAL_LABEL (name, "LPR", regno);
    6440        37467 : }
    6441              : 
    6442              : 
    6443              : /* This function generates code for -fpic that loads %ebx with
    6444              :    the return address of the caller and then returns.  */
    6445              : 
    6446              : static void
    6447       238327 : ix86_code_end (void)
    6448              : {
    6449       238327 :   rtx xops[2];
    6450       238327 :   unsigned int regno;
    6451              : 
    6452       238327 :   if (indirect_return_needed)
    6453            6 :     output_indirect_thunk_function (indirect_thunk_prefix_none,
    6454              :                                     INVALID_REGNUM, true);
    6455       238327 :   if (indirect_return_via_cx)
    6456            0 :     output_indirect_thunk_function (indirect_thunk_prefix_none,
    6457              :                                     CX_REG, true);
    6458       238327 :   if (indirect_thunk_needed)
    6459            0 :     output_indirect_thunk_function (indirect_thunk_prefix_none,
    6460              :                                     INVALID_REGNUM, false);
    6461              : 
    6462      2144943 :   for (regno = FIRST_REX_INT_REG; regno <= LAST_REX_INT_REG; regno++)
    6463              :     {
    6464      1906616 :       if (TEST_HARD_REG_BIT (indirect_thunks_used, regno))
    6465            0 :         output_indirect_thunk_function (indirect_thunk_prefix_none,
    6466              :                                         regno, false);
    6467              :     }
    6468              : 
    6469      4051559 :   for (regno = FIRST_REX2_INT_REG; regno <= LAST_REX2_INT_REG; regno++)
    6470              :     {
    6471      3813232 :       if (TEST_HARD_REG_BIT (indirect_thunks_used, regno))
    6472            0 :         output_indirect_thunk_function (indirect_thunk_prefix_none,
    6473              :                                         regno, false);
    6474              :     }
    6475              : 
    6476      2144943 :   for (regno = FIRST_INT_REG; regno <= LAST_INT_REG; regno++)
    6477              :     {
    6478      1906616 :       char name[32];
    6479      1906616 :       tree decl;
    6480              : 
    6481      1906616 :       if (TEST_HARD_REG_BIT (indirect_thunks_used, regno))
    6482           16 :         output_indirect_thunk_function (indirect_thunk_prefix_none,
    6483              :                                         regno, false);
    6484              : 
    6485      1906616 :       if (!(pic_labels_used & (1 << regno)))
    6486      1903032 :         continue;
    6487              : 
    6488         3584 :       get_pc_thunk_name (name, regno);
    6489              : 
    6490         3584 :       decl = build_decl (BUILTINS_LOCATION, FUNCTION_DECL,
    6491              :                          get_identifier (name),
    6492              :                          build_function_type_list (void_type_node, NULL_TREE));
    6493         3584 :       DECL_RESULT (decl) = build_decl (BUILTINS_LOCATION, RESULT_DECL,
    6494              :                                        NULL_TREE, void_type_node);
    6495         3584 :       TREE_PUBLIC (decl) = 1;
    6496         3584 :       TREE_STATIC (decl) = 1;
    6497         3584 :       DECL_IGNORED_P (decl) = 1;
    6498              : 
    6499              : #if TARGET_MACHO
    6500              :       if (TARGET_MACHO)
    6501              :         {
    6502              :           switch_to_section (darwin_sections[picbase_thunk_section]);
    6503              :           fputs ("\t.weak_definition\t", asm_out_file);
    6504              :           assemble_name (asm_out_file, name);
    6505              :           fputs ("\n\t.private_extern\t", asm_out_file);
    6506              :           assemble_name (asm_out_file, name);
    6507              :           putc ('\n', asm_out_file);
    6508              :           ASM_OUTPUT_LABEL (asm_out_file, name);
    6509              :           DECL_WEAK (decl) = 1;
    6510              :         }
    6511              :       else
    6512              : #endif
    6513         3584 :       if (USE_HIDDEN_LINKONCE)
    6514              :         {
    6515         3584 :           cgraph_node::create (decl)->set_comdat_group (DECL_ASSEMBLER_NAME (decl));
    6516              : 
    6517         3584 :           targetm.asm_out.unique_section (decl, 0);
    6518         3584 :           switch_to_section (get_named_section (decl, NULL, 0));
    6519              : 
    6520         3584 :           targetm.asm_out.globalize_label (asm_out_file, name);
    6521         3584 :           fputs ("\t.hidden\t", asm_out_file);
    6522         3584 :           assemble_name (asm_out_file, name);
    6523         3584 :           putc ('\n', asm_out_file);
    6524         3584 :           ASM_DECLARE_FUNCTION_NAME (asm_out_file, name, decl);
    6525              :         }
    6526              :       else
    6527              :         {
    6528              :           switch_to_section (text_section);
    6529         3584 :           ASM_OUTPUT_LABEL (asm_out_file, name);
    6530              :         }
    6531              : 
    6532         3584 :       DECL_INITIAL (decl) = make_node (BLOCK);
    6533         3584 :       current_function_decl = decl;
    6534         3584 :       allocate_struct_function (decl, false);
    6535         3584 :       init_function_start (decl);
    6536              :       /* We're about to hide the function body from callees of final_* by
    6537              :          emitting it directly; tell them we're a thunk, if they care.  */
    6538         3584 :       cfun->is_thunk = true;
    6539         3584 :       first_function_block_is_cold = false;
    6540              :       /* Make sure unwind info is emitted for the thunk if needed.  */
    6541         3584 :       final_start_function (emit_barrier (), asm_out_file, 1);
    6542              : 
    6543              :       /* Pad stack IP move with 4 instructions (two NOPs count
    6544              :          as one instruction).  */
    6545         3584 :       if (TARGET_PAD_SHORT_FUNCTION)
    6546              :         {
    6547              :           int i = 8;
    6548              : 
    6549            0 :           while (i--)
    6550            0 :             fputs ("\tnop\n", asm_out_file);
    6551              :         }
    6552              : 
    6553         7168 :       xops[0] = gen_rtx_REG (Pmode, regno);
    6554         7168 :       xops[1] = gen_rtx_MEM (Pmode, stack_pointer_rtx);
    6555         3584 :       output_asm_insn ("mov%z0\t{%1, %0|%0, %1}", xops);
    6556         3584 :       fputs ("\tret\n", asm_out_file);
    6557         3584 :       final_end_function ();
    6558         3584 :       init_insn_lengths ();
    6559         3584 :       free_after_compilation (cfun);
    6560         3584 :       set_cfun (NULL);
    6561         3584 :       current_function_decl = NULL;
    6562              :     }
    6563              : 
    6564       238327 :   if (flag_split_stack)
    6565         4710 :     file_end_indicate_split_stack ();
    6566       238327 : }
    6567              : 
    6568              : /* Emit code for the SET_GOT patterns.  */
    6569              : 
    6570              : const char *
    6571        33883 : output_set_got (rtx dest, rtx label)
    6572              : {
    6573        33883 :   rtx xops[3];
    6574              : 
    6575        33883 :   xops[0] = dest;
    6576              : 
    6577        33883 :   if (TARGET_VXWORKS_GOTTPIC && TARGET_VXWORKS_RTP && flag_pic)
    6578              :     {
    6579              :       /* Load (*VXWORKS_GOTT_BASE) into the PIC register.  */
    6580              :       xops[2] = gen_rtx_MEM (Pmode,
    6581              :                              gen_rtx_SYMBOL_REF (Pmode, VXWORKS_GOTT_BASE));
    6582              :       output_asm_insn ("mov{l}\t{%2, %0|%0, %2}", xops);
    6583              : 
    6584              :       /* Load (*VXWORKS_GOTT_BASE)[VXWORKS_GOTT_INDEX] into the PIC register.
    6585              :          Use %P and a local symbol in order to print VXWORKS_GOTT_INDEX as
    6586              :          an unadorned address.  */
    6587              :       xops[2] = gen_rtx_SYMBOL_REF (Pmode, VXWORKS_GOTT_INDEX);
    6588              :       SYMBOL_REF_FLAGS (xops[2]) |= SYMBOL_FLAG_LOCAL;
    6589              :       output_asm_insn ("mov{l}\t{%P2(%0), %0|%0, DWORD PTR %P2[%0]}", xops);
    6590              :       return "";
    6591              :     }
    6592              : 
    6593        67766 :   xops[1] = gen_rtx_SYMBOL_REF (Pmode, GOT_SYMBOL_NAME);
    6594              : 
    6595        33883 :   if (flag_pic)
    6596              :     {
    6597        33883 :       char name[32];
    6598        33883 :       get_pc_thunk_name (name, REGNO (dest));
    6599        33883 :       pic_labels_used |= 1 << REGNO (dest);
    6600              : 
    6601        67766 :       xops[2] = gen_rtx_SYMBOL_REF (Pmode, ggc_strdup (name));
    6602        33883 :       xops[2] = gen_rtx_MEM (QImode, xops[2]);
    6603        33883 :       output_asm_insn ("%!call\t%X2", xops);
    6604              : 
    6605              : #if TARGET_MACHO
    6606              :       /* Output the Mach-O "canonical" pic base label name ("Lxx$pb") here.
    6607              :          This is what will be referenced by the Mach-O PIC subsystem.  */
    6608              :       if (machopic_should_output_picbase_label () || !label)
    6609              :         ASM_OUTPUT_LABEL (asm_out_file, MACHOPIC_FUNCTION_BASE_NAME);
    6610              : 
    6611              :       /* When we are restoring the pic base at the site of a nonlocal label,
    6612              :          and we decided to emit the pic base above, we will still output a
    6613              :          local label used for calculating the correction offset (even though
    6614              :          the offset will be 0 in that case).  */
    6615              :       if (label)
    6616              :         targetm.asm_out.internal_label (asm_out_file, "L",
    6617              :                                            CODE_LABEL_NUMBER (label));
    6618              : #endif
    6619              :     }
    6620              :   else
    6621              :     {
    6622            0 :       if (TARGET_MACHO)
    6623              :         /* We don't need a pic base, we're not producing pic.  */
    6624              :         gcc_unreachable ();
    6625              : 
    6626            0 :       xops[2] = gen_rtx_LABEL_REF (Pmode, label ? label : gen_label_rtx ());
    6627            0 :       output_asm_insn ("mov%z0\t{%2, %0|%0, %2}", xops);
    6628            0 :       targetm.asm_out.internal_label (asm_out_file, "L",
    6629            0 :                                       CODE_LABEL_NUMBER (XEXP (xops[2], 0)));
    6630              :     }
    6631              : 
    6632        33883 :   if (!TARGET_MACHO)
    6633        33883 :     output_asm_insn ("add%z0\t{%1, %0|%0, %1}", xops);
    6634              : 
    6635        33883 :   return "";
    6636              : }
    6637              : 
    6638              : /* Generate an "push" pattern for input ARG.  */
    6639              : 
    6640              : rtx
    6641      1891399 : gen_push (rtx arg, bool ppx_p)
    6642              : {
    6643      1891399 :   struct machine_function *m = cfun->machine;
    6644              : 
    6645      1891399 :   if (m->fs.cfa_reg == stack_pointer_rtx)
    6646      1613981 :     m->fs.cfa_offset += UNITS_PER_WORD;
    6647      1891399 :   m->fs.sp_offset += UNITS_PER_WORD;
    6648              : 
    6649      1891399 :   if (REG_P (arg) && GET_MODE (arg) != word_mode)
    6650           36 :     arg = gen_rtx_REG (word_mode, REGNO (arg));
    6651              : 
    6652      1891399 :   rtx stack = gen_rtx_MEM (word_mode,
    6653      1891399 :                            gen_rtx_PRE_DEC (Pmode,
    6654              :                                             stack_pointer_rtx));
    6655      3782710 :   return ppx_p ? gen_pushp_di (stack, arg) : gen_rtx_SET (stack, arg);
    6656              : }
    6657              : 
    6658              : rtx
    6659           21 : gen_pushfl (void)
    6660              : {
    6661           21 :   struct machine_function *m = cfun->machine;
    6662           21 :   rtx flags, mem;
    6663              : 
    6664           21 :   if (m->fs.cfa_reg == stack_pointer_rtx)
    6665            0 :     m->fs.cfa_offset += UNITS_PER_WORD;
    6666           21 :   m->fs.sp_offset += UNITS_PER_WORD;
    6667              : 
    6668           21 :   flags = gen_rtx_REG (CCmode, FLAGS_REG);
    6669              : 
    6670           21 :   mem = gen_rtx_MEM (word_mode,
    6671           21 :                      gen_rtx_PRE_DEC (Pmode, stack_pointer_rtx));
    6672              : 
    6673           21 :   return gen_pushfl2 (word_mode, mem, flags);
    6674              : }
    6675              : 
    6676              : /* Generate an "pop" pattern for input ARG.  */
    6677              : 
    6678              : rtx
    6679      1469017 : gen_pop (rtx arg, bool ppx_p)
    6680              : {
    6681      1469017 :   if (REG_P (arg) && GET_MODE (arg) != word_mode)
    6682           32 :     arg = gen_rtx_REG (word_mode, REGNO (arg));
    6683              : 
    6684      1469017 :   rtx stack = gen_rtx_MEM (word_mode,
    6685      1469017 :                            gen_rtx_POST_INC (Pmode,
    6686              :                                              stack_pointer_rtx));
    6687              : 
    6688      2937946 :   return ppx_p ? gen_popp_di (arg, stack) : gen_rtx_SET (arg, stack);
    6689              : }
    6690              : 
    6691              : rtx
    6692           21 : gen_popfl (void)
    6693              : {
    6694           21 :   rtx flags, mem;
    6695              : 
    6696           21 :   flags = gen_rtx_REG (CCmode, FLAGS_REG);
    6697              : 
    6698           21 :   mem = gen_rtx_MEM (word_mode,
    6699           21 :                      gen_rtx_POST_INC (Pmode, stack_pointer_rtx));
    6700              : 
    6701           21 :   return gen_popfl1 (word_mode, flags, mem);
    6702              : }
    6703              : 
    6704              : /* Generate a "push2" pattern for input ARG.  */
    6705              : rtx
    6706           19 : gen_push2 (rtx mem, rtx reg1, rtx reg2, bool ppx_p = false)
    6707              : {
    6708           19 :   struct machine_function *m = cfun->machine;
    6709           19 :   const int offset = UNITS_PER_WORD * 2;
    6710              : 
    6711           19 :   if (m->fs.cfa_reg == stack_pointer_rtx)
    6712           14 :     m->fs.cfa_offset += offset;
    6713           19 :   m->fs.sp_offset += offset;
    6714              : 
    6715           19 :   if (REG_P (reg1) && GET_MODE (reg1) != word_mode)
    6716            0 :     reg1 = gen_rtx_REG (word_mode, REGNO (reg1));
    6717              : 
    6718           19 :   if (REG_P (reg2) && GET_MODE (reg2) != word_mode)
    6719            0 :     reg2 = gen_rtx_REG (word_mode, REGNO (reg2));
    6720              : 
    6721           19 :   return ppx_p ? gen_push2p_di (mem, reg1, reg2)
    6722            4 :                : gen_push2_di (mem, reg1, reg2);
    6723              : }
    6724              : 
    6725              : /* Return >= 0 if there is an unused call-clobbered register available
    6726              :    for the entire function.  */
    6727              : 
    6728              : static unsigned int
    6729            0 : ix86_select_alt_pic_regnum (void)
    6730              : {
    6731            0 :   if (ix86_use_pseudo_pic_reg ())
    6732              :     return INVALID_REGNUM;
    6733              : 
    6734            0 :   if (crtl->is_leaf
    6735            0 :       && !crtl->profile
    6736            0 :       && !ix86_current_function_calls_tls_descriptor)
    6737              :     {
    6738            0 :       int i, drap;
    6739              :       /* Can't use the same register for both PIC and DRAP.  */
    6740            0 :       if (crtl->drap_reg)
    6741            0 :         drap = REGNO (crtl->drap_reg);
    6742              :       else
    6743              :         drap = -1;
    6744            0 :       for (i = 2; i >= 0; --i)
    6745            0 :         if (i != drap && !df_regs_ever_live_p (i))
    6746              :           return i;
    6747              :     }
    6748              : 
    6749              :   return INVALID_REGNUM;
    6750              : }
    6751              : 
    6752              : /* Return true if REGNO is used by the epilogue.  */
    6753              : 
    6754              : bool
    6755   1668288998 : ix86_epilogue_uses (int regno)
    6756              : {
    6757              :   /* If there are no caller-saved registers, we preserve all registers,
    6758              :      except for MMX and x87 registers which aren't supported when saving
    6759              :      and restoring registers.  Don't explicitly save SP register since
    6760              :      it is always preserved.  */
    6761   1668288998 :   return (epilogue_completed
    6762    263688130 :           && (cfun->machine->call_saved_registers
    6763    263688130 :               == TYPE_NO_CALLER_SAVED_REGISTERS)
    6764        27140 :           && !fixed_regs[regno]
    6765         4857 :           && !STACK_REGNO_P (regno)
    6766   1668293855 :           && !MMX_REGNO_P (regno));
    6767              : }
    6768              : 
    6769              : /* Return nonzero if register REGNO can be used as a scratch register
    6770              :    in peephole2.  */
    6771              : 
    6772              : static bool
    6773      1220092 : ix86_hard_regno_scratch_ok (unsigned int regno)
    6774              : {
    6775              :   /* If there are no caller-saved registers, we can't use any register
    6776              :      as a scratch register after epilogue and use REGNO as scratch
    6777              :      register only if it has been used before to avoid saving and
    6778              :      restoring it.  */
    6779      1220092 :   return ((cfun->machine->call_saved_registers
    6780      1220092 :            != TYPE_NO_CALLER_SAVED_REGISTERS)
    6781      1220092 :           || (!epilogue_completed
    6782            0 :               && df_regs_ever_live_p (regno)));
    6783              : }
    6784              : 
    6785              : /* Return TRUE if we need to save REGNO.  */
    6786              : 
    6787              : bool
    6788    354412398 : ix86_save_reg (unsigned int regno, bool maybe_eh_return, bool ignore_outlined)
    6789              : {
    6790    354412398 :   rtx reg;
    6791              : 
    6792              :   /* Save and restore DRAP register between prologue and epilogue so
    6793              :      that stack pointer can be restored.  */
    6794    354412398 :   if (crtl->drap_reg
    6795      2285938 :       && regno == REGNO (crtl->drap_reg)
    6796    354468027 :       && !cfun->machine->no_drap_save_restore)
    6797              :     return true;
    6798              : 
    6799    354356769 :   switch (cfun->machine->call_saved_registers)
    6800              :     {
    6801              :     case TYPE_DEFAULT_CALL_SAVED_REGISTERS:
    6802              :       break;
    6803              : 
    6804        57152 :     case TYPE_NO_CALLER_SAVED_REGISTERS:
    6805              :       /* If there are no caller-saved registers, we preserve all
    6806              :          registers, except for MMX and x87 registers which aren't
    6807              :          supported when saving and restoring registers.  Don't
    6808              :          explicitly save SP register since it is always preserved.
    6809              : 
    6810              :          Don't preserve registers used for function return value.  */
    6811        57152 :       reg = crtl->return_rtx;
    6812        57152 :       if (reg)
    6813              :         {
    6814          768 :           unsigned int i = REGNO (reg);
    6815          768 :           unsigned int nregs = REG_NREGS (reg);
    6816         1522 :           while (nregs-- > 0)
    6817          768 :             if ((i + nregs) == regno)
    6818              :               return false;
    6819              :         }
    6820              : 
    6821        57138 :       return (df_regs_ever_live_p (regno)
    6822         6930 :               && !fixed_regs[regno]
    6823         5962 :               && !STACK_REGNO_P (regno)
    6824         5962 :               && !MMX_REGNO_P (regno)
    6825        63100 :               && (regno != HARD_FRAME_POINTER_REGNUM
    6826          249 :                   || !frame_pointer_needed));
    6827              : 
    6828        18192 :     case TYPE_NO_CALLEE_SAVED_REGISTERS:
    6829        18192 :     case TYPE_PRESERVE_NONE:
    6830        18192 :       if (regno != HARD_FRAME_POINTER_REGNUM)
    6831              :         return false;
    6832              :       break;
    6833              :     }
    6834              : 
    6835    388250365 :   if (regno == REAL_PIC_OFFSET_TABLE_REGNUM
    6836     10780223 :       && pic_offset_table_rtx)
    6837              :     {
    6838       385014 :       if (ix86_use_pseudo_pic_reg ())
    6839              :         {
    6840              :           /* REAL_PIC_OFFSET_TABLE_REGNUM used by call to
    6841              :           _mcount in prologue.  */
    6842       385014 :           if (!TARGET_64BIT && flag_pic && crtl->profile)
    6843              :             return true;
    6844              :         }
    6845            0 :       else if (df_regs_ever_live_p (REAL_PIC_OFFSET_TABLE_REGNUM)
    6846            0 :                || crtl->profile
    6847            0 :                || crtl->calls_eh_return
    6848            0 :                || crtl->uses_const_pool
    6849            0 :                || cfun->has_nonlocal_label)
    6850            0 :         return ix86_select_alt_pic_regnum () == INVALID_REGNUM;
    6851              :     }
    6852              : 
    6853    354281994 :   if (crtl->calls_eh_return && maybe_eh_return)
    6854              :     {
    6855              :       unsigned i;
    6856        13237 :       for (i = 0; ; i++)
    6857              :         {
    6858        20181 :           unsigned test = EH_RETURN_DATA_REGNO (i);
    6859        13671 :           if (test == INVALID_REGNUM)
    6860              :             break;
    6861        13671 :           if (test == regno)
    6862              :             return true;
    6863        13237 :         }
    6864              :     }
    6865              : 
    6866    354281560 :   if (ignore_outlined && cfun->machine->call_ms2sysv)
    6867              :     {
    6868      2641728 :       unsigned count = cfun->machine->call_ms2sysv_extra_regs
    6869              :                        + xlogue_layout::MIN_REGS;
    6870      2641728 :       if (xlogue_layout::is_stub_managed_reg (regno, count))
    6871              :         return false;
    6872              :     }
    6873              : 
    6874    353781691 :   return (df_regs_ever_live_p (regno)
    6875    373148687 :           && !call_used_or_fixed_reg_p (regno)
    6876    372517980 :           && (regno != HARD_FRAME_POINTER_REGNUM || !frame_pointer_needed));
    6877              : }
    6878              : 
    6879              : /* Return number of saved general prupose registers.  */
    6880              : 
    6881              : static int
    6882      8199850 : ix86_nsaved_regs (void)
    6883              : {
    6884      8199850 :   int nregs = 0;
    6885      8199850 :   int regno;
    6886              : 
    6887    762586050 :   for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
    6888    754386200 :     if (GENERAL_REGNO_P (regno) && ix86_save_reg (regno, true, true))
    6889      8185098 :       nregs ++;
    6890      8199850 :   return nregs;
    6891              : }
    6892              : 
    6893              : /* Return number of saved SSE registers.  */
    6894              : 
    6895              : static int
    6896      8234936 : ix86_nsaved_sseregs (void)
    6897              : {
    6898      8234936 :   int nregs = 0;
    6899      8234936 :   int regno;
    6900              : 
    6901      7432672 :   if (!TARGET_64BIT_MS_ABI
    6902      8234936 :       && (cfun->machine->call_saved_registers
    6903      8009335 :           != TYPE_NO_CALLER_SAVED_REGISTERS))
    6904              :     return 0;
    6905     21049434 :   for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
    6906     20823096 :     if (SSE_REGNO_P (regno) && ix86_save_reg (regno, true, true))
    6907      1897045 :       nregs ++;
    6908              :   return nregs;
    6909              : }
    6910              : 
    6911              : /* Given FROM and TO register numbers, say whether this elimination is
    6912              :    allowed.  If stack alignment is needed, we can only replace argument
    6913              :    pointer with hard frame pointer, or replace frame pointer with stack
    6914              :    pointer.  Otherwise, frame pointer elimination is automatically
    6915              :    handled and all other eliminations are valid.  */
    6916              : 
    6917              : static bool
    6918     48536735 : ix86_can_eliminate (const int from, const int to)
    6919              : {
    6920     48536735 :   if (stack_realign_fp)
    6921      1713236 :     return ((from == ARG_POINTER_REGNUM
    6922      1713236 :              && to == HARD_FRAME_POINTER_REGNUM)
    6923      1713236 :             || (from == FRAME_POINTER_REGNUM
    6924      1713236 :                 && to == STACK_POINTER_REGNUM));
    6925              :   else
    6926     86984450 :     return to == STACK_POINTER_REGNUM ? !frame_pointer_needed : true;
    6927              : }
    6928              : 
    6929              : /* Return the offset between two registers, one to be eliminated, and the other
    6930              :    its replacement, at the start of a routine.  */
    6931              : 
    6932              : HOST_WIDE_INT
    6933    141386275 : ix86_initial_elimination_offset (int from, int to)
    6934              : {
    6935    141386275 :   struct ix86_frame &frame = cfun->machine->frame;
    6936              : 
    6937    141386275 :   if (from == ARG_POINTER_REGNUM && to == HARD_FRAME_POINTER_REGNUM)
    6938     10450296 :     return frame.hard_frame_pointer_offset;
    6939    130935979 :   else if (from == FRAME_POINTER_REGNUM
    6940    130935979 :            && to == HARD_FRAME_POINTER_REGNUM)
    6941      8164438 :     return frame.hard_frame_pointer_offset - frame.frame_pointer_offset;
    6942              :   else
    6943              :     {
    6944    122771541 :       gcc_assert (to == STACK_POINTER_REGNUM);
    6945              : 
    6946    122771541 :       if (from == ARG_POINTER_REGNUM)
    6947    114607103 :         return frame.stack_pointer_offset;
    6948              : 
    6949      8164438 :       gcc_assert (from == FRAME_POINTER_REGNUM);
    6950      8164438 :       return frame.stack_pointer_offset - frame.frame_pointer_offset;
    6951              :     }
    6952              : }
    6953              : 
    6954              : /* Emits a warning for unsupported msabi to sysv pro/epilogues.  */
    6955              : void
    6956            0 : warn_once_call_ms2sysv_xlogues (const char *feature)
    6957              : {
    6958            0 :   static bool warned_once = false;
    6959            0 :   if (!warned_once)
    6960              :     {
    6961            0 :       warning (0, "%<-mcall-ms2sysv-xlogues%> is not compatible with %s",
    6962              :                feature);
    6963            0 :       warned_once = true;
    6964              :     }
    6965            0 : }
    6966              : 
    6967              : /* Return the probing interval for -fstack-clash-protection.  */
    6968              : 
    6969              : static HOST_WIDE_INT
    6970          495 : get_probe_interval (void)
    6971              : {
    6972          341 :   if (flag_stack_clash_protection)
    6973          412 :     return (HOST_WIDE_INT_1U
    6974          412 :             << param_stack_clash_protection_probe_interval);
    6975              :   else
    6976              :     return (HOST_WIDE_INT_1U << STACK_CHECK_PROBE_INTERVAL_EXP);
    6977              : }
    6978              : 
    6979              : /* When using -fsplit-stack, the allocation routines set a field in
    6980              :    the TCB to the bottom of the stack plus this much space, measured
    6981              :    in bytes.  */
    6982              : 
    6983              : #define SPLIT_STACK_AVAILABLE 256
    6984              : 
    6985              : /* Return true if push2/pop2 can be generated.  */
    6986              : 
    6987              : static bool
    6988      8200506 : ix86_can_use_push2pop2 (void)
    6989              : {
    6990              :   /* Use push2/pop2 only if the incoming stack is 16-byte aligned.  */
    6991      8200506 :   unsigned int incoming_stack_boundary
    6992      8200506 :     = (crtl->parm_stack_boundary > ix86_incoming_stack_boundary
    6993      8200506 :        ? crtl->parm_stack_boundary : ix86_incoming_stack_boundary);
    6994      8200506 :   return incoming_stack_boundary % 128 == 0;
    6995              : }
    6996              : 
    6997              : /* Helper function to determine whether push2/pop2 can be used in prologue or
    6998              :    epilogue for register save/restore.  */
    6999              : static bool
    7000      8199850 : ix86_pro_and_epilogue_can_use_push2pop2 (int nregs)
    7001              : {
    7002      8199850 :   if (!ix86_can_use_push2pop2 ())
    7003              :     return false;
    7004      8163939 :   int aligned = cfun->machine->fs.sp_offset % 16 == 0;
    7005      8163939 :   return TARGET_APX_PUSH2POP2
    7006         2852 :          && !cfun->machine->frame.save_regs_using_mov
    7007         2840 :          && cfun->machine->func_type == TYPE_NORMAL
    7008      8166771 :          && (nregs + aligned) >= 3;
    7009              : }
    7010              : 
    7011              : /* Check if push/pop should be used to save/restore registers.  */
    7012              : static bool
    7013      8921123 : save_regs_using_push_pop (HOST_WIDE_INT to_allocate)
    7014              : {
    7015      3207936 :   return ((!to_allocate && cfun->machine->frame.nregs <= 1)
    7016      5958988 :           || (TARGET_64BIT && to_allocate >= HOST_WIDE_INT_C (0x80000000))
    7017              :           /* If static stack checking is enabled and done with probes,
    7018              :              the registers need to be saved before allocating the frame.  */
    7019      5958327 :           || flag_stack_check == STATIC_BUILTIN_STACK_CHECK
    7020              :           /* If stack clash probing needs a loop, then it needs a
    7021              :              scratch register.  But the returned register is only guaranteed
    7022              :              to be safe to use after register saves are complete.  So if
    7023              :              stack clash protections are enabled and the allocated frame is
    7024              :              larger than the probe interval, then use pushes to save
    7025              :              callee saved registers.  */
    7026     14879376 :           || (flag_stack_clash_protection
    7027          341 :               && !ix86_target_stack_probe ()
    7028          341 :               && to_allocate > get_probe_interval ()));
    7029              : }
    7030              : 
    7031              : /* Fill structure ix86_frame about frame of currently computed function.  */
    7032              : 
    7033              : static void
    7034      8199850 : ix86_compute_frame_layout (void)
    7035              : {
    7036      8199850 :   struct ix86_frame *frame = &cfun->machine->frame;
    7037      8199850 :   struct machine_function *m = cfun->machine;
    7038      8199850 :   unsigned HOST_WIDE_INT stack_alignment_needed;
    7039      8199850 :   HOST_WIDE_INT offset;
    7040      8199850 :   unsigned HOST_WIDE_INT preferred_alignment;
    7041      8199850 :   HOST_WIDE_INT size = ix86_get_frame_size ();
    7042      8199850 :   HOST_WIDE_INT to_allocate;
    7043              : 
    7044              :   /* m->call_ms2sysv is initially enabled in ix86_expand_call for all 64-bit
    7045              :    * ms_abi functions that call a sysv function.  We now need to prune away
    7046              :    * cases where it should be disabled.  */
    7047      8199850 :   if (TARGET_64BIT && m->call_ms2sysv)
    7048              :     {
    7049        35225 :       gcc_assert (TARGET_64BIT_MS_ABI);
    7050        35225 :       gcc_assert (TARGET_CALL_MS2SYSV_XLOGUES);
    7051        35225 :       gcc_assert (!TARGET_SEH);
    7052        35225 :       gcc_assert (TARGET_SSE);
    7053        35225 :       gcc_assert (!ix86_using_red_zone ());
    7054              : 
    7055        35225 :       if (crtl->calls_eh_return)
    7056              :         {
    7057            0 :           gcc_assert (!reload_completed);
    7058            0 :           m->call_ms2sysv = false;
    7059            0 :           warn_once_call_ms2sysv_xlogues ("__builtin_eh_return");
    7060              :         }
    7061              : 
    7062        35225 :       else if (ix86_static_chain_on_stack)
    7063              :         {
    7064            0 :           gcc_assert (!reload_completed);
    7065            0 :           m->call_ms2sysv = false;
    7066            0 :           warn_once_call_ms2sysv_xlogues ("static call chains");
    7067              :         }
    7068              : 
    7069              :       /* Finally, compute which registers the stub will manage.  */
    7070              :       else
    7071              :         {
    7072        35225 :           unsigned count = xlogue_layout::count_stub_managed_regs ();
    7073        35225 :           m->call_ms2sysv_extra_regs = count - xlogue_layout::MIN_REGS;
    7074        35225 :           m->call_ms2sysv_pad_in = 0;
    7075              :         }
    7076              :     }
    7077              : 
    7078      8199850 :   frame->nregs = ix86_nsaved_regs ();
    7079      8199850 :   frame->nsseregs = ix86_nsaved_sseregs ();
    7080              : 
    7081              :   /* 64-bit MS ABI seem to require stack alignment to be always 16,
    7082              :      except for function prologues, leaf functions and when the default
    7083              :      incoming stack boundary is overridden at command line or via
    7084              :      force_align_arg_pointer attribute.
    7085              : 
    7086              :      Darwin's ABI specifies 128b alignment for both 32 and  64 bit variants
    7087              :      at call sites, including profile function calls.
    7088              : 
    7089              :      For APX push2/pop2, the stack also requires 128b alignment.  */
    7090      8199850 :   if ((ix86_pro_and_epilogue_can_use_push2pop2 (frame->nregs)
    7091           65 :        && crtl->preferred_stack_boundary < 128)
    7092      8199913 :       || (((TARGET_64BIT_MS_ABI || TARGET_MACHO)
    7093       225599 :            && crtl->preferred_stack_boundary < 128)
    7094            0 :           && (!crtl->is_leaf || cfun->calls_alloca != 0
    7095            0 :               || ix86_current_function_calls_tls_descriptor
    7096            0 :               || (TARGET_MACHO && crtl->profile)
    7097            0 :               || ix86_incoming_stack_boundary < 128)))
    7098              :     {
    7099            2 :       crtl->preferred_stack_boundary = 128;
    7100            2 :       if (crtl->stack_alignment_needed < 128)
    7101            1 :         crtl->stack_alignment_needed = 128;
    7102              :     }
    7103              : 
    7104      8199850 :   stack_alignment_needed = crtl->stack_alignment_needed / BITS_PER_UNIT;
    7105      8199850 :   preferred_alignment = crtl->preferred_stack_boundary / BITS_PER_UNIT;
    7106              : 
    7107      8199850 :   gcc_assert (!size || stack_alignment_needed);
    7108      9002085 :   gcc_assert (preferred_alignment >= STACK_BOUNDARY / BITS_PER_UNIT);
    7109      8199850 :   gcc_assert (preferred_alignment <= stack_alignment_needed);
    7110              : 
    7111              :   /* The only ABI saving SSE regs should be 64-bit ms_abi or with
    7112              :      no_caller_saved_registers attribute.  */
    7113      8199850 :   gcc_assert (TARGET_64BIT
    7114              :               || (cfun->machine->call_saved_registers
    7115              :                   == TYPE_NO_CALLER_SAVED_REGISTERS)
    7116              :               || !frame->nsseregs);
    7117      8199850 :   if (TARGET_64BIT && m->call_ms2sysv)
    7118              :     {
    7119        35225 :       gcc_assert (stack_alignment_needed >= 16);
    7120        35225 :       gcc_assert ((cfun->machine->call_saved_registers
    7121              :                    == TYPE_NO_CALLER_SAVED_REGISTERS)
    7122              :                   || !frame->nsseregs);
    7123              :     }
    7124              : 
    7125              :   /* For SEH we have to limit the amount of code movement into the prologue.
    7126              :      At present we do this via a BLOCKAGE, at which point there's very little
    7127              :      scheduling that can be done, which means that there's very little point
    7128              :      in doing anything except PUSHs.  */
    7129      8199850 :   if (TARGET_SEH)
    7130              :     m->use_fast_prologue_epilogue = false;
    7131      8199850 :   else if (!optimize_bb_for_size_p (ENTRY_BLOCK_PTR_FOR_FN (cfun)))
    7132              :     {
    7133      7863617 :       int count = frame->nregs;
    7134      7863617 :       struct cgraph_node *node = cgraph_node::get (current_function_decl);
    7135              : 
    7136              :       /* The fast prologue uses move instead of push to save registers.  This
    7137              :          is significantly longer, but also executes faster as modern hardware
    7138              :          can execute the moves in parallel, but can't do that for push/pop.
    7139              : 
    7140              :          Be careful about choosing what prologue to emit:  When function takes
    7141              :          many instructions to execute we may use slow version as well as in
    7142              :          case function is known to be outside hot spot (this is known with
    7143              :          feedback only).  Weight the size of function by number of registers
    7144              :          to save as it is cheap to use one or two push instructions but very
    7145              :          slow to use many of them.
    7146              : 
    7147              :          Calling this hook multiple times with the same frame requirements
    7148              :          must produce the same layout, since the RA might otherwise be
    7149              :          unable to reach a fixed point or might fail its final sanity checks.
    7150              :          This means that once we've assumed that a function does or doesn't
    7151              :          have a particular size, we have to stick to that assumption
    7152              :          regardless of how the function has changed since.  */
    7153      7863617 :       if (count)
    7154      2600622 :         count = (count - 1) * FAST_PROLOGUE_INSN_COUNT;
    7155      7863617 :       if (node->frequency < NODE_FREQUENCY_NORMAL
    7156      7168057 :           || (flag_branch_probabilities
    7157         1051 :               && node->frequency < NODE_FREQUENCY_HOT))
    7158       695890 :         m->use_fast_prologue_epilogue = false;
    7159              :       else
    7160              :         {
    7161      7167727 :           if (count != frame->expensive_count)
    7162              :             {
    7163       284424 :               frame->expensive_count = count;
    7164       284424 :               frame->expensive_p = expensive_function_p (count);
    7165              :             }
    7166      7167727 :           m->use_fast_prologue_epilogue = !frame->expensive_p;
    7167              :         }
    7168              :     }
    7169              : 
    7170      8199850 :   frame->save_regs_using_mov
    7171      8199850 :     = TARGET_PROLOGUE_USING_MOVE && m->use_fast_prologue_epilogue;
    7172              : 
    7173              :   /* Skip return address and error code in exception handler.  */
    7174      8199850 :   offset = INCOMING_FRAME_SP_OFFSET;
    7175              : 
    7176              :   /* Skip pushed static chain.  */
    7177      8199850 :   if (ix86_static_chain_on_stack)
    7178            0 :     offset += UNITS_PER_WORD;
    7179              : 
    7180              :   /* Skip saved base pointer.  */
    7181      8199850 :   if (frame_pointer_needed)
    7182      2828267 :     offset += UNITS_PER_WORD;
    7183      8199850 :   frame->hfp_save_offset = offset;
    7184              : 
    7185              :   /* The traditional frame pointer location is at the top of the frame.  */
    7186      8199850 :   frame->hard_frame_pointer_offset = offset;
    7187              : 
    7188              :   /* Register save area */
    7189      8199850 :   offset += frame->nregs * UNITS_PER_WORD;
    7190      8199850 :   frame->reg_save_offset = offset;
    7191              : 
    7192              :   /* Calculate the size of the va-arg area (not including padding, if any).  */
    7193      8199850 :   frame->va_arg_size = ix86_varargs_gpr_size + ix86_varargs_fpr_size;
    7194              : 
    7195              :   /* Also adjust stack_realign_offset for the largest alignment of
    7196              :      stack slot actually used.  */
    7197      8199850 :   if (stack_realign_fp
    7198      7891611 :       || (cfun->machine->max_used_stack_alignment != 0
    7199          138 :           && (offset % cfun->machine->max_used_stack_alignment) != 0))
    7200              :     {
    7201              :       /* We may need a 16-byte aligned stack for the remainder of the
    7202              :          register save area, but the stack frame for the local function
    7203              :          may require a greater alignment if using AVX/2/512.  In order
    7204              :          to avoid wasting space, we first calculate the space needed for
    7205              :          the rest of the register saves, add that to the stack pointer,
    7206              :          and then realign the stack to the boundary of the start of the
    7207              :          frame for the local function.  */
    7208       308308 :       HOST_WIDE_INT space_needed = 0;
    7209       308308 :       HOST_WIDE_INT sse_reg_space_needed = 0;
    7210              : 
    7211       308308 :       if (TARGET_64BIT)
    7212              :         {
    7213       306523 :           if (m->call_ms2sysv)
    7214              :             {
    7215         6415 :               m->call_ms2sysv_pad_in = 0;
    7216         6415 :               space_needed = xlogue_layout::get_instance ().get_stack_space_used ();
    7217              :             }
    7218              : 
    7219       300108 :           else if (frame->nsseregs)
    7220              :             /* The only ABI that has saved SSE registers (Win64) also has a
    7221              :                16-byte aligned default stack.  However, many programs violate
    7222              :                the ABI, and Wine64 forces stack realignment to compensate.  */
    7223         6447 :             space_needed = frame->nsseregs * 16;
    7224              : 
    7225       306523 :           sse_reg_space_needed = space_needed = ROUND_UP (space_needed, 16);
    7226              : 
    7227              :           /* 64-bit frame->va_arg_size should always be a multiple of 16, but
    7228              :              rounding to be pedantic.  */
    7229       306523 :           space_needed = ROUND_UP (space_needed + frame->va_arg_size, 16);
    7230              :         }
    7231              :       else
    7232         1785 :         space_needed = frame->va_arg_size;
    7233              : 
    7234              :       /* Record the allocation size required prior to the realignment AND.  */
    7235       308308 :       frame->stack_realign_allocate = space_needed;
    7236              : 
    7237              :       /* The re-aligned stack starts at frame->stack_realign_offset.  Values
    7238              :          before this point are not directly comparable with values below
    7239              :          this point.  Use sp_valid_at to determine if the stack pointer is
    7240              :          valid for a given offset, fp_valid_at for the frame pointer, or
    7241              :          choose_baseaddr to have a base register chosen for you.
    7242              : 
    7243              :          Note that the result of (frame->stack_realign_offset
    7244              :          & (stack_alignment_needed - 1)) may not equal zero.  */
    7245       308308 :       offset = ROUND_UP (offset + space_needed, stack_alignment_needed);
    7246       308308 :       frame->stack_realign_offset = offset - space_needed;
    7247       308308 :       frame->sse_reg_save_offset = frame->stack_realign_offset
    7248       308308 :                                                         + sse_reg_space_needed;
    7249       308308 :     }
    7250              :   else
    7251              :     {
    7252      7891542 :       frame->stack_realign_offset = offset;
    7253              : 
    7254      7891542 :       if (TARGET_64BIT && m->call_ms2sysv)
    7255              :         {
    7256        28810 :           m->call_ms2sysv_pad_in = !!(offset & UNITS_PER_WORD);
    7257        28810 :           offset += xlogue_layout::get_instance ().get_stack_space_used ();
    7258              :         }
    7259              : 
    7260              :       /* Align and set SSE register save area.  */
    7261      7862732 :       else if (frame->nsseregs)
    7262              :         {
    7263              :           /* If the incoming stack boundary is at least 16 bytes, or DRAP is
    7264              :              required and the DRAP re-alignment boundary is at least 16 bytes,
    7265              :              then we want the SSE register save area properly aligned.  */
    7266       183247 :           if (ix86_incoming_stack_boundary >= 128
    7267         6400 :                   || (stack_realign_drap && stack_alignment_needed >= 16))
    7268       183247 :             offset = ROUND_UP (offset, 16);
    7269       183247 :           offset += frame->nsseregs * 16;
    7270              :         }
    7271      7891542 :       frame->sse_reg_save_offset = offset;
    7272      7891542 :       offset += frame->va_arg_size;
    7273              :     }
    7274              : 
    7275              :   /* Align start of frame for local function.  When a function call
    7276              :      is removed, it may become a leaf function.  But if argument may
    7277              :      be passed on stack, we need to align the stack when there is no
    7278              :      tail call.  */
    7279      8199850 :   if (m->call_ms2sysv
    7280      8164625 :       || frame->va_arg_size != 0
    7281      8085185 :       || size != 0
    7282      4398766 :       || !crtl->is_leaf
    7283      2058478 :       || (!crtl->tail_call_emit
    7284      1745647 :           && cfun->machine->outgoing_args_on_stack)
    7285      2058428 :       || cfun->calls_alloca
    7286     10256569 :       || ix86_current_function_calls_tls_descriptor)
    7287      6143543 :     offset = ROUND_UP (offset, stack_alignment_needed);
    7288              : 
    7289              :   /* Frame pointer points here.  */
    7290      8199850 :   frame->frame_pointer_offset = offset;
    7291              : 
    7292      8199850 :   offset += size;
    7293              : 
    7294              :   /* Add outgoing arguments area.  Can be skipped if we eliminated
    7295              :      all the function calls as dead code.
    7296              :      Skipping is however impossible when function calls alloca.  Alloca
    7297              :      expander assumes that last crtl->outgoing_args_size
    7298              :      of stack frame are unused.  */
    7299      8199850 :   if (ACCUMULATE_OUTGOING_ARGS
    7300      8817966 :       && (!crtl->is_leaf || cfun->calls_alloca
    7301       391912 :           || ix86_current_function_calls_tls_descriptor))
    7302              :     {
    7303       226204 :       offset += crtl->outgoing_args_size;
    7304       226204 :       frame->outgoing_arguments_size = crtl->outgoing_args_size;
    7305              :     }
    7306              :   else
    7307      7973646 :     frame->outgoing_arguments_size = 0;
    7308              : 
    7309              :   /* Align stack boundary.  Only needed if we're calling another function
    7310              :      or using alloca.  */
    7311      2786886 :   if (!crtl->is_leaf || cfun->calls_alloca
    7312     10983328 :       || ix86_current_function_calls_tls_descriptor)
    7313      5418184 :     offset = ROUND_UP (offset, preferred_alignment);
    7314              : 
    7315              :   /* We've reached end of stack frame.  */
    7316      8199850 :   frame->stack_pointer_offset = offset;
    7317              : 
    7318              :   /* Size prologue needs to allocate.  */
    7319      8199850 :   to_allocate = offset - frame->sse_reg_save_offset;
    7320              : 
    7321      8199850 :   if (save_regs_using_push_pop (to_allocate))
    7322      2591043 :     frame->save_regs_using_mov = false;
    7323              : 
    7324      8199850 :   if (ix86_using_red_zone ()
    7325      7171825 :       && crtl->sp_is_unchanging
    7326      6527706 :       && crtl->is_leaf
    7327      2687437 :       && !cfun->machine->asm_redzone_clobber_seen
    7328      2687424 :       && !ix86_pc_thunk_call_expanded
    7329     10887274 :       && !ix86_current_function_calls_tls_descriptor)
    7330              :     {
    7331      2687409 :       frame->red_zone_size = to_allocate;
    7332      2687409 :       if (frame->save_regs_using_mov)
    7333       139945 :         frame->red_zone_size += frame->nregs * UNITS_PER_WORD;
    7334      2687409 :       if (frame->red_zone_size > RED_ZONE_SIZE - RED_ZONE_RESERVE)
    7335       102734 :         frame->red_zone_size = RED_ZONE_SIZE - RED_ZONE_RESERVE;
    7336              :     }
    7337              :   else
    7338      5512441 :     frame->red_zone_size = 0;
    7339      8199850 :   frame->stack_pointer_offset -= frame->red_zone_size;
    7340              : 
    7341              :   /* The SEH frame pointer location is near the bottom of the frame.
    7342              :      This is enforced by the fact that the difference between the
    7343              :      stack pointer and the frame pointer is limited to 240 bytes in
    7344              :      the unwind data structure.  */
    7345      8199850 :   if (TARGET_SEH)
    7346              :     {
    7347              :       /* Force the frame pointer to point at or below the lowest register save
    7348              :          area, see the SEH code in config/i386/winnt.cc for the rationale.  */
    7349              :       frame->hard_frame_pointer_offset = frame->sse_reg_save_offset;
    7350              : 
    7351              :       /* If we can leave the frame pointer where it is, do so; however return
    7352              :          the establisher frame for __builtin_frame_address (0) or else if the
    7353              :          frame overflows the SEH maximum frame size.
    7354              : 
    7355              :          Note that the value returned by __builtin_frame_address (0) is quite
    7356              :          constrained, because setjmp is piggybacked on the SEH machinery with
    7357              :          recent versions of MinGW:
    7358              : 
    7359              :           #    elif defined(__SEH__)
    7360              :           #     if defined(__aarch64__) || defined(_ARM64_)
    7361              :           #      define setjmp(BUF) _setjmp((BUF), __builtin_sponentry())
    7362              :           #     elif (__MINGW_GCC_VERSION < 40702)
    7363              :           #      define setjmp(BUF) _setjmp((BUF), mingw_getsp())
    7364              :           #     else
    7365              :           #      define setjmp(BUF) _setjmp((BUF), __builtin_frame_address (0))
    7366              :           #     endif
    7367              : 
    7368              :          and the second argument passed to _setjmp, if not null, is forwarded
    7369              :          to the TargetFrame parameter of RtlUnwindEx by longjmp (after it has
    7370              :          built an ExceptionRecord on the fly describing the setjmp buffer).  */
    7371              :       const HOST_WIDE_INT diff
    7372              :         = frame->stack_pointer_offset - frame->hard_frame_pointer_offset;
    7373              :       if (diff <= 255 && !crtl->accesses_prior_frames)
    7374              :         {
    7375              :           /* The resulting diff will be a multiple of 16 lower than 255,
    7376              :              i.e. at most 240 as required by the unwind data structure.  */
    7377              :           frame->hard_frame_pointer_offset += (diff & 15);
    7378              :         }
    7379              :       else if (diff <= SEH_MAX_FRAME_SIZE && !crtl->accesses_prior_frames)
    7380              :         {
    7381              :           /* Ideally we'd determine what portion of the local stack frame
    7382              :              (within the constraint of the lowest 240) is most heavily used.
    7383              :              But without that complication, simply bias the frame pointer
    7384              :              by 128 bytes so as to maximize the amount of the local stack
    7385              :              frame that is addressable with 8-bit offsets.  */
    7386              :           frame->hard_frame_pointer_offset = frame->stack_pointer_offset - 128;
    7387              :         }
    7388              :       else
    7389              :         frame->hard_frame_pointer_offset = frame->hfp_save_offset;
    7390              :     }
    7391      8199850 : }
    7392              : 
    7393              : /* This is semi-inlined memory_address_length, but simplified
    7394              :    since we know that we're always dealing with reg+offset, and
    7395              :    to avoid having to create and discard all that rtl.  */
    7396              : 
    7397              : static inline int
    7398      1003599 : choose_baseaddr_len (unsigned int regno, HOST_WIDE_INT offset)
    7399              : {
    7400      1003599 :   int len = 4;
    7401              : 
    7402            0 :   if (offset == 0)
    7403              :     {
    7404              :       /* EBP and R13 cannot be encoded without an offset.  */
    7405            0 :       len = (regno == BP_REG || regno == R13_REG);
    7406              :     }
    7407       995531 :   else if (IN_RANGE (offset, -128, 127))
    7408       630674 :     len = 1;
    7409              : 
    7410              :   /* ESP and R12 must be encoded with a SIB byte.  */
    7411            0 :   if (regno == SP_REG || regno == R12_REG)
    7412            0 :     len++;
    7413              : 
    7414      1003599 :   return len;
    7415              : }
    7416              : 
    7417              : /* Determine if the stack pointer is valid for accessing the CFA_OFFSET in
    7418              :    the frame save area.  The register is saved at CFA - CFA_OFFSET.  */
    7419              : 
    7420              : static bool
    7421      3501894 : sp_valid_at (HOST_WIDE_INT cfa_offset)
    7422              : {
    7423      3501894 :   const struct machine_frame_state &fs = cfun->machine->fs;
    7424      3501894 :   if (fs.sp_realigned && cfa_offset <= fs.sp_realigned_offset)
    7425              :     {
    7426              :       /* Validate that the cfa_offset isn't in a "no-man's land".  */
    7427        46600 :       gcc_assert (cfa_offset <= fs.sp_realigned_fp_last);
    7428              :       return false;
    7429              :     }
    7430      3455294 :   return fs.sp_valid;
    7431              : }
    7432              : 
    7433              : /* Determine if the frame pointer is valid for accessing the CFA_OFFSET in
    7434              :    the frame save area.  The register is saved at CFA - CFA_OFFSET.  */
    7435              : 
    7436              : static inline bool
    7437      1355438 : fp_valid_at (HOST_WIDE_INT cfa_offset)
    7438              : {
    7439      1355438 :   const struct machine_frame_state &fs = cfun->machine->fs;
    7440      1355438 :   if (fs.sp_realigned && cfa_offset > fs.sp_realigned_fp_last)
    7441              :     {
    7442              :       /* Validate that the cfa_offset isn't in a "no-man's land".  */
    7443        28328 :       gcc_assert (cfa_offset >= fs.sp_realigned_offset);
    7444              :       return false;
    7445              :     }
    7446      1327110 :   return fs.fp_valid;
    7447              : }
    7448              : 
    7449              : /* Choose a base register based upon alignment requested, speed and/or
    7450              :    size.  */
    7451              : 
    7452              : static void
    7453      1355438 : choose_basereg (HOST_WIDE_INT cfa_offset, rtx &base_reg,
    7454              :                 HOST_WIDE_INT &base_offset,
    7455              :                 unsigned int align_reqested, unsigned int *align)
    7456              : {
    7457      1355438 :   const struct machine_function *m = cfun->machine;
    7458      1355438 :   unsigned int hfp_align;
    7459      1355438 :   unsigned int drap_align;
    7460      1355438 :   unsigned int sp_align;
    7461      1355438 :   bool hfp_ok  = fp_valid_at (cfa_offset);
    7462      1355438 :   bool drap_ok = m->fs.drap_valid;
    7463      1355438 :   bool sp_ok   = sp_valid_at (cfa_offset);
    7464              : 
    7465      1355438 :   hfp_align = drap_align = sp_align = INCOMING_STACK_BOUNDARY;
    7466              : 
    7467              :   /* Filter out any registers that don't meet the requested alignment
    7468              :      criteria.  */
    7469      1355438 :   if (align_reqested)
    7470              :     {
    7471       961194 :       if (m->fs.realigned)
    7472        28160 :         hfp_align = drap_align = sp_align = crtl->stack_alignment_needed;
    7473              :       /* SEH unwind code does do not currently support REG_CFA_EXPRESSION
    7474              :          notes (which we would need to use a realigned stack pointer),
    7475              :          so disable on SEH targets.  */
    7476       933034 :       else if (m->fs.sp_realigned)
    7477        28328 :         sp_align = crtl->stack_alignment_needed;
    7478              : 
    7479       961194 :       hfp_ok = hfp_ok && hfp_align >= align_reqested;
    7480       961194 :       drap_ok = drap_ok && drap_align >= align_reqested;
    7481       961194 :       sp_ok = sp_ok && sp_align >= align_reqested;
    7482              :     }
    7483              : 
    7484      1355438 :   if (m->use_fast_prologue_epilogue)
    7485              :     {
    7486              :       /* Choose the base register most likely to allow the most scheduling
    7487              :          opportunities.  Generally FP is valid throughout the function,
    7488              :          while DRAP must be reloaded within the epilogue.  But choose either
    7489              :          over the SP due to increased encoding size.  */
    7490              : 
    7491       647174 :       if (hfp_ok)
    7492              :         {
    7493       118719 :           base_reg = hard_frame_pointer_rtx;
    7494       118719 :           base_offset = m->fs.fp_offset - cfa_offset;
    7495              :         }
    7496       528455 :       else if (drap_ok)
    7497              :         {
    7498            0 :           base_reg = crtl->drap_reg;
    7499            0 :           base_offset = 0 - cfa_offset;
    7500              :         }
    7501       528455 :       else if (sp_ok)
    7502              :         {
    7503       528455 :           base_reg = stack_pointer_rtx;
    7504       528455 :           base_offset = m->fs.sp_offset - cfa_offset;
    7505              :         }
    7506              :     }
    7507              :   else
    7508              :     {
    7509       708264 :       HOST_WIDE_INT toffset;
    7510       708264 :       int len = 16, tlen;
    7511              : 
    7512              :       /* Choose the base register with the smallest address encoding.
    7513              :          With a tie, choose FP > DRAP > SP.  */
    7514       708264 :       if (sp_ok)
    7515              :         {
    7516       690948 :           base_reg = stack_pointer_rtx;
    7517       690948 :           base_offset = m->fs.sp_offset - cfa_offset;
    7518      1373828 :           len = choose_baseaddr_len (STACK_POINTER_REGNUM, base_offset);
    7519              :         }
    7520       708264 :       if (drap_ok)
    7521              :         {
    7522            0 :           toffset = 0 - cfa_offset;
    7523            0 :           tlen = choose_baseaddr_len (REGNO (crtl->drap_reg), toffset);
    7524            0 :           if (tlen <= len)
    7525              :             {
    7526            0 :               base_reg = crtl->drap_reg;
    7527            0 :               base_offset = toffset;
    7528            0 :               len = tlen;
    7529              :             }
    7530              :         }
    7531       708264 :       if (hfp_ok)
    7532              :         {
    7533       312651 :           toffset = m->fs.fp_offset - cfa_offset;
    7534       312651 :           tlen = choose_baseaddr_len (HARD_FRAME_POINTER_REGNUM, toffset);
    7535       312651 :           if (tlen <= len)
    7536              :             {
    7537       221981 :               base_reg = hard_frame_pointer_rtx;
    7538       221981 :               base_offset = toffset;
    7539              :             }
    7540              :         }
    7541              :     }
    7542              : 
    7543              :     /* Set the align return value.  */
    7544      1355438 :     if (align)
    7545              :       {
    7546       961194 :         if (base_reg == stack_pointer_rtx)
    7547       679497 :           *align = sp_align;
    7548       281697 :         else if (base_reg == crtl->drap_reg)
    7549            0 :           *align = drap_align;
    7550       281697 :         else if (base_reg == hard_frame_pointer_rtx)
    7551       281697 :           *align = hfp_align;
    7552              :       }
    7553      1355438 : }
    7554              : 
    7555              : /* Return an RTX that points to CFA_OFFSET within the stack frame and
    7556              :    the alignment of address.  If ALIGN is non-null, it should point to
    7557              :    an alignment value (in bits) that is preferred or zero and will
    7558              :    receive the alignment of the base register that was selected,
    7559              :    irrespective of rather or not CFA_OFFSET is a multiple of that
    7560              :    alignment value.  If it is possible for the base register offset to be
    7561              :    non-immediate then SCRATCH_REGNO should specify a scratch register to
    7562              :    use.
    7563              : 
    7564              :    The valid base registers are taken from CFUN->MACHINE->FS.  */
    7565              : 
    7566              : static rtx
    7567      1355438 : choose_baseaddr (HOST_WIDE_INT cfa_offset, unsigned int *align,
    7568              :                  unsigned int scratch_regno = INVALID_REGNUM)
    7569              : {
    7570      1355438 :   rtx base_reg = NULL;
    7571      1355438 :   HOST_WIDE_INT base_offset = 0;
    7572              : 
    7573              :   /* If a specific alignment is requested, try to get a base register
    7574              :      with that alignment first.  */
    7575      1355438 :   if (align && *align)
    7576       961194 :     choose_basereg (cfa_offset, base_reg, base_offset, *align, align);
    7577              : 
    7578      1355438 :   if (!base_reg)
    7579       394244 :     choose_basereg (cfa_offset, base_reg, base_offset, 0, align);
    7580              : 
    7581      1355438 :   gcc_assert (base_reg != NULL);
    7582              : 
    7583      1355438 :   rtx base_offset_rtx = GEN_INT (base_offset);
    7584              : 
    7585      1405991 :   if (!x86_64_immediate_operand (base_offset_rtx, Pmode))
    7586              :     {
    7587            1 :       gcc_assert (scratch_regno != INVALID_REGNUM);
    7588              : 
    7589            1 :       rtx scratch_reg = gen_rtx_REG (Pmode, scratch_regno);
    7590            1 :       emit_move_insn (scratch_reg, base_offset_rtx);
    7591              : 
    7592            1 :       return gen_rtx_PLUS (Pmode, base_reg, scratch_reg);
    7593              :     }
    7594              : 
    7595      1405990 :   return plus_constant (Pmode, base_reg, base_offset);
    7596              : }
    7597              : 
    7598              : /* Emit code to save registers in the prologue.  */
    7599              : 
    7600              : static void
    7601       427777 : ix86_emit_save_regs (void)
    7602              : {
    7603       427777 :   int regno;
    7604       427777 :   rtx_insn *insn;
    7605       427777 :   bool use_ppx = TARGET_APX_PPX && !crtl->calls_eh_return;
    7606              : 
    7607       427777 :   if (!TARGET_APX_PUSH2POP2
    7608           90 :       || !ix86_can_use_push2pop2 ()
    7609       427865 :       || cfun->machine->func_type != TYPE_NORMAL)
    7610              :     {
    7611     39775170 :       for (regno = FIRST_PSEUDO_REGISTER - 1; regno >= 0; regno--)
    7612     39347480 :         if (GENERAL_REGNO_P (regno) && ix86_save_reg (regno, true, true))
    7613              :           {
    7614      1196180 :             insn = emit_insn (gen_push (gen_rtx_REG (word_mode, regno),
    7615              :                                         use_ppx));
    7616      1196180 :             RTX_FRAME_RELATED_P (insn) = 1;
    7617              :           }
    7618              :     }
    7619              :   else
    7620              :     {
    7621           87 :       int regno_list[2];
    7622           87 :       regno_list[0] = regno_list[1] = -1;
    7623           87 :       int loaded_regnum = 0;
    7624           87 :       bool aligned = cfun->machine->fs.sp_offset % 16 == 0;
    7625              : 
    7626         8091 :       for (regno = FIRST_PSEUDO_REGISTER - 1; regno >= 0; regno--)
    7627         8004 :         if (GENERAL_REGNO_P (regno) && ix86_save_reg (regno, true, true))
    7628              :           {
    7629          127 :             if (aligned)
    7630              :               {
    7631           45 :                 regno_list[loaded_regnum++] = regno;
    7632           45 :                 if (loaded_regnum == 2)
    7633              :                   {
    7634           19 :                     gcc_assert (regno_list[0] != -1
    7635              :                                 && regno_list[1] != -1
    7636              :                                 && regno_list[0] != regno_list[1]);
    7637           19 :                     const int offset = UNITS_PER_WORD * 2;
    7638           19 :                     rtx mem = gen_rtx_MEM (TImode,
    7639           19 :                                            gen_rtx_PRE_DEC (Pmode,
    7640              :                                                             stack_pointer_rtx));
    7641           19 :                     insn = emit_insn (gen_push2 (mem,
    7642              :                                                  gen_rtx_REG (word_mode,
    7643              :                                                               regno_list[0]),
    7644              :                                                  gen_rtx_REG (word_mode,
    7645              :                                                               regno_list[1]),
    7646              :                                                  use_ppx));
    7647           19 :                     RTX_FRAME_RELATED_P (insn) = 1;
    7648           19 :                     rtx dwarf = gen_rtx_SEQUENCE (VOIDmode, rtvec_alloc (3));
    7649              : 
    7650           57 :                     for (int i = 0; i < 2; i++)
    7651              :                       {
    7652           76 :                         rtx dwarf_reg = gen_rtx_REG (word_mode,
    7653           38 :                                                      regno_list[i]);
    7654           38 :                         rtx sp_offset = plus_constant (Pmode,
    7655              :                                                        stack_pointer_rtx,
    7656           38 :                                                        + UNITS_PER_WORD
    7657           38 :                                                          * (1 - i));
    7658           38 :                         rtx tmp = gen_rtx_SET (gen_frame_mem (DImode,
    7659              :                                                               sp_offset),
    7660              :                                                dwarf_reg);
    7661           38 :                         RTX_FRAME_RELATED_P (tmp) = 1;
    7662           38 :                         XVECEXP (dwarf, 0, i + 1) = tmp;
    7663              :                       }
    7664           19 :                     rtx sp_tmp = gen_rtx_SET (stack_pointer_rtx,
    7665              :                                               plus_constant (Pmode,
    7666              :                                                              stack_pointer_rtx,
    7667              :                                                              -offset));
    7668           19 :                     RTX_FRAME_RELATED_P (sp_tmp) = 1;
    7669           19 :                     XVECEXP (dwarf, 0, 0) = sp_tmp;
    7670           19 :                     add_reg_note (insn, REG_FRAME_RELATED_EXPR, dwarf);
    7671              : 
    7672           19 :                     loaded_regnum = 0;
    7673           19 :                     regno_list[0] = regno_list[1] = -1;
    7674              :                   }
    7675              :               }
    7676              :             else
    7677              :               {
    7678           82 :                 insn = emit_insn (gen_push (gen_rtx_REG (word_mode, regno),
    7679              :                                             use_ppx));
    7680           82 :                 RTX_FRAME_RELATED_P (insn) = 1;
    7681           82 :                 aligned = true;
    7682              :               }
    7683              :           }
    7684           87 :       if (loaded_regnum == 1)
    7685              :         {
    7686            7 :           insn = emit_insn (gen_push (gen_rtx_REG (word_mode,
    7687            7 :                                                    regno_list[0]),
    7688              :                                       use_ppx));
    7689            7 :           RTX_FRAME_RELATED_P (insn) = 1;
    7690              :         }
    7691              :     }
    7692       427777 : }
    7693              : 
    7694              : /* Emit a single register save at CFA - CFA_OFFSET.  */
    7695              : 
    7696              : static void
    7697       607111 : ix86_emit_save_reg_using_mov (machine_mode mode, unsigned int regno,
    7698              :                               HOST_WIDE_INT cfa_offset)
    7699              : {
    7700       607111 :   struct machine_function *m = cfun->machine;
    7701       607111 :   rtx reg = gen_rtx_REG (mode, regno);
    7702       607111 :   rtx mem, addr, base, insn;
    7703       607111 :   unsigned int align = GET_MODE_ALIGNMENT (mode);
    7704              : 
    7705       607111 :   addr = choose_baseaddr (cfa_offset, &align);
    7706       607111 :   mem = gen_frame_mem (mode, addr);
    7707              : 
    7708              :   /* The location alignment depends upon the base register.  */
    7709       607111 :   align = MIN (GET_MODE_ALIGNMENT (mode), align);
    7710       607111 :   gcc_assert (! (cfa_offset & (align / BITS_PER_UNIT - 1)));
    7711       607111 :   set_mem_align (mem, align);
    7712              : 
    7713       607111 :   insn = emit_insn (gen_rtx_SET (mem, reg));
    7714       607111 :   RTX_FRAME_RELATED_P (insn) = 1;
    7715              : 
    7716       607111 :   base = addr;
    7717       607111 :   if (GET_CODE (base) == PLUS)
    7718       595363 :     base = XEXP (base, 0);
    7719       607111 :   gcc_checking_assert (REG_P (base));
    7720              : 
    7721              :   /* When saving registers into a re-aligned local stack frame, avoid
    7722              :      any tricky guessing by dwarf2out.  */
    7723       607111 :   if (m->fs.realigned)
    7724              :     {
    7725        12800 :       gcc_checking_assert (stack_realign_drap);
    7726              : 
    7727        12800 :       if (regno == REGNO (crtl->drap_reg))
    7728              :         {
    7729              :           /* A bit of a hack.  We force the DRAP register to be saved in
    7730              :              the re-aligned stack frame, which provides us with a copy
    7731              :              of the CFA that will last past the prologue.  Install it.  */
    7732            0 :           gcc_checking_assert (cfun->machine->fs.fp_valid);
    7733            0 :           addr = plus_constant (Pmode, hard_frame_pointer_rtx,
    7734            0 :                                 cfun->machine->fs.fp_offset - cfa_offset);
    7735            0 :           mem = gen_rtx_MEM (mode, addr);
    7736            0 :           add_reg_note (insn, REG_CFA_DEF_CFA, mem);
    7737              :         }
    7738              :       else
    7739              :         {
    7740              :           /* The frame pointer is a stable reference within the
    7741              :              aligned frame.  Use it.  */
    7742        12800 :           gcc_checking_assert (cfun->machine->fs.fp_valid);
    7743        12800 :           addr = plus_constant (Pmode, hard_frame_pointer_rtx,
    7744        12800 :                                 cfun->machine->fs.fp_offset - cfa_offset);
    7745        12800 :           mem = gen_rtx_MEM (mode, addr);
    7746        12800 :           add_reg_note (insn, REG_CFA_EXPRESSION, gen_rtx_SET (mem, reg));
    7747              :         }
    7748              :     }
    7749              : 
    7750       594311 :   else if (base == stack_pointer_rtx && m->fs.sp_realigned
    7751        12881 :            && cfa_offset >= m->fs.sp_realigned_offset)
    7752              :     {
    7753        12881 :       gcc_checking_assert (stack_realign_fp);
    7754        12881 :       add_reg_note (insn, REG_CFA_EXPRESSION, gen_rtx_SET (mem, reg));
    7755              :     }
    7756              : 
    7757              :   /* The memory may not be relative to the current CFA register,
    7758              :      which means that we may need to generate a new pattern for
    7759              :      use by the unwind info.  */
    7760       581430 :   else if (base != m->fs.cfa_reg)
    7761              :     {
    7762        45097 :       addr = plus_constant (Pmode, m->fs.cfa_reg,
    7763        45097 :                             m->fs.cfa_offset - cfa_offset);
    7764        45097 :       mem = gen_rtx_MEM (mode, addr);
    7765        45097 :       add_reg_note (insn, REG_CFA_OFFSET, gen_rtx_SET (mem, reg));
    7766              :     }
    7767       607111 : }
    7768              : 
    7769              : /* Emit code to save registers using MOV insns.
    7770              :    First register is stored at CFA - CFA_OFFSET.  */
    7771              : static void
    7772        44146 : ix86_emit_save_regs_using_mov (HOST_WIDE_INT cfa_offset)
    7773              : {
    7774        44146 :   unsigned int regno;
    7775              : 
    7776      4105578 :   for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
    7777      4061432 :     if (GENERAL_REGNO_P (regno) && ix86_save_reg (regno, true, true))
    7778              :       {
    7779              :         /* Skip registers, already processed by shrink wrap separate.  */
    7780       188586 :         if (!cfun->machine->reg_is_wrapped_separately[regno])
    7781        84047 :           ix86_emit_save_reg_using_mov (word_mode, regno, cfa_offset);
    7782       203288 :         cfa_offset -= UNITS_PER_WORD;
    7783              :       }
    7784        44146 : }
    7785              : 
    7786              : /* Emit code to save SSE registers using MOV insns.
    7787              :    First register is stored at CFA - CFA_OFFSET.  */
    7788              : static void
    7789        33363 : ix86_emit_save_sse_regs_using_mov (HOST_WIDE_INT cfa_offset)
    7790              : {
    7791        33363 :   unsigned int regno;
    7792              : 
    7793      3102759 :   for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
    7794      3069396 :     if (SSE_REGNO_P (regno) && ix86_save_reg (regno, true, true))
    7795              :       {
    7796       333657 :         ix86_emit_save_reg_using_mov (V4SFmode, regno, cfa_offset);
    7797       333657 :         cfa_offset -= GET_MODE_SIZE (V4SFmode);
    7798              :       }
    7799        33363 : }
    7800              : 
    7801              : static GTY(()) rtx queued_cfa_restores;
    7802              : 
    7803              : /* Add a REG_CFA_RESTORE REG note to INSN or queue them until next stack
    7804              :    manipulation insn.  The value is on the stack at CFA - CFA_OFFSET.
    7805              :    Don't add the note if the previously saved value will be left untouched
    7806              :    within stack red-zone till return, as unwinders can find the same value
    7807              :    in the register and on the stack.  */
    7808              : 
    7809              : static void
    7810      2292920 : ix86_add_cfa_restore_note (rtx_insn *insn, rtx reg, HOST_WIDE_INT cfa_offset)
    7811              : {
    7812      2292920 :   if (!crtl->shrink_wrapped
    7813      2274323 :       && cfa_offset <= cfun->machine->fs.red_zone_offset)
    7814              :     return;
    7815              : 
    7816       771362 :   if (insn)
    7817              :     {
    7818       360575 :       add_reg_note (insn, REG_CFA_RESTORE, reg);
    7819       360575 :       RTX_FRAME_RELATED_P (insn) = 1;
    7820              :     }
    7821              :   else
    7822       410787 :     queued_cfa_restores
    7823       410787 :       = alloc_reg_note (REG_CFA_RESTORE, reg, queued_cfa_restores);
    7824              : }
    7825              : 
    7826              : /* Add queued REG_CFA_RESTORE notes if any to INSN.  */
    7827              : 
    7828              : static void
    7829      2555044 : ix86_add_queued_cfa_restore_notes (rtx insn)
    7830              : {
    7831      2555044 :   rtx last;
    7832      2555044 :   if (!queued_cfa_restores)
    7833              :     return;
    7834       410787 :   for (last = queued_cfa_restores; XEXP (last, 1); last = XEXP (last, 1))
    7835              :     ;
    7836        52954 :   XEXP (last, 1) = REG_NOTES (insn);
    7837        52954 :   REG_NOTES (insn) = queued_cfa_restores;
    7838        52954 :   queued_cfa_restores = NULL_RTX;
    7839        52954 :   RTX_FRAME_RELATED_P (insn) = 1;
    7840              : }
    7841              : 
    7842              : /* Expand prologue or epilogue stack adjustment.
    7843              :    The pattern exist to put a dependency on all ebp-based memory accesses.
    7844              :    STYLE should be negative if instructions should be marked as frame related,
    7845              :    zero if %r11 register is live and cannot be freely used and positive
    7846              :    otherwise.  */
    7847              : 
    7848              : static rtx
    7849      1580186 : pro_epilogue_adjust_stack (rtx dest, rtx src, rtx offset,
    7850              :                            int style, bool set_cfa)
    7851              : {
    7852      1580186 :   struct machine_function *m = cfun->machine;
    7853      1580186 :   rtx addend = offset;
    7854      1580186 :   rtx insn;
    7855      1580186 :   bool add_frame_related_expr = false;
    7856              : 
    7857      1798710 :   if (!x86_64_immediate_operand (offset, Pmode))
    7858              :     {
    7859              :       /* r11 is used by indirect sibcall return as well, set before the
    7860              :          epilogue and used after the epilogue.  */
    7861          199 :       if (style)
    7862          174 :         addend = gen_rtx_REG (Pmode, R11_REG);
    7863              :       else
    7864              :         {
    7865           25 :           gcc_assert (src != hard_frame_pointer_rtx
    7866              :                       && dest != hard_frame_pointer_rtx);
    7867              :           addend = hard_frame_pointer_rtx;
    7868              :         }
    7869          199 :       emit_insn (gen_rtx_SET (addend, offset));
    7870          199 :       if (style < 0)
    7871           88 :         add_frame_related_expr = true;
    7872              :     }
    7873              : 
    7874              :   /*  Shrink wrap separate may insert prologue between TEST and JMP.  In order
    7875              :       not to affect EFlags, emit add without reg clobbering.  */
    7876      1580186 :   if (crtl->shrink_wrapped_separate)
    7877        93230 :     insn = emit_insn (gen_pro_epilogue_adjust_stack_add_nocc
    7878        93230 :                       (Pmode, dest, src, addend));
    7879              :   else
    7880      1486956 :     insn = emit_insn (gen_pro_epilogue_adjust_stack_add
    7881      1486956 :                       (Pmode, dest, src, addend));
    7882              : 
    7883      1580186 :   if (style >= 0)
    7884       694955 :     ix86_add_queued_cfa_restore_notes (insn);
    7885              : 
    7886      1580186 :   if (set_cfa)
    7887              :     {
    7888      1215141 :       rtx r;
    7889              : 
    7890      1215141 :       gcc_assert (m->fs.cfa_reg == src);
    7891      1215141 :       m->fs.cfa_offset += INTVAL (offset);
    7892      1215141 :       m->fs.cfa_reg = dest;
    7893              : 
    7894      1411288 :       r = gen_rtx_PLUS (Pmode, src, offset);
    7895      1215141 :       r = gen_rtx_SET (dest, r);
    7896      1215141 :       add_reg_note (insn, REG_CFA_ADJUST_CFA, r);
    7897      1215141 :       RTX_FRAME_RELATED_P (insn) = 1;
    7898              :     }
    7899       365045 :   else if (style < 0)
    7900              :     {
    7901       298163 :       RTX_FRAME_RELATED_P (insn) = 1;
    7902       298163 :       if (add_frame_related_expr)
    7903              :         {
    7904           20 :           rtx r = gen_rtx_PLUS (Pmode, src, offset);
    7905           20 :           r = gen_rtx_SET (dest, r);
    7906           20 :           add_reg_note (insn, REG_FRAME_RELATED_EXPR, r);
    7907              :         }
    7908              :     }
    7909              : 
    7910      1580186 :   if (dest == stack_pointer_rtx)
    7911              :     {
    7912      1580186 :       HOST_WIDE_INT ooffset = m->fs.sp_offset;
    7913      1580186 :       bool valid = m->fs.sp_valid;
    7914      1580186 :       bool realigned = m->fs.sp_realigned;
    7915              : 
    7916      1580186 :       if (src == hard_frame_pointer_rtx)
    7917              :         {
    7918        29817 :           valid = m->fs.fp_valid;
    7919        29817 :           realigned = false;
    7920        29817 :           ooffset = m->fs.fp_offset;
    7921              :         }
    7922      1550369 :       else if (src == crtl->drap_reg)
    7923              :         {
    7924            0 :           valid = m->fs.drap_valid;
    7925            0 :           realigned = false;
    7926            0 :           ooffset = 0;
    7927              :         }
    7928              :       else
    7929              :         {
    7930              :           /* Else there are two possibilities: SP itself, which we set
    7931              :              up as the default above.  Or EH_RETURN_STACKADJ_RTX, which is
    7932              :              taken care of this by hand along the eh_return path.  */
    7933      1550369 :           gcc_checking_assert (src == stack_pointer_rtx
    7934              :                                || offset == const0_rtx);
    7935              :         }
    7936              : 
    7937      1580186 :       m->fs.sp_offset = ooffset - INTVAL (offset);
    7938      1580186 :       m->fs.sp_valid = valid;
    7939      1580186 :       m->fs.sp_realigned = realigned;
    7940              :     }
    7941      1580186 :   return insn;
    7942              : }
    7943              : 
    7944              : /* Find an available register to be used as dynamic realign argument
    7945              :    pointer register.  Such a register will be written in prologue and
    7946              :    used in begin of body, so it must not be
    7947              :         1. parameter passing register.
    7948              :         2. GOT pointer.
    7949              :    We reuse static-chain register if it is available.  Otherwise, we
    7950              :    use DI for i386 and R13 for x86-64.  We chose R13 since it has
    7951              :    shorter encoding.
    7952              : 
    7953              :    Return: the regno of chosen register.  */
    7954              : 
    7955              : static unsigned int
    7956         7295 : find_drap_reg (void)
    7957              : {
    7958         7295 :   tree decl = cfun->decl;
    7959              : 
    7960              :   /* Always use callee-saved register if there are no caller-saved
    7961              :      registers.  */
    7962         7295 :   if (TARGET_64BIT)
    7963              :     {
    7964              :       /* In preserve_none functions, any register can be used for DRAP,
    7965              :          except AX, R12–R15, DI, SI (argument registers), SP, and BP.  */
    7966         7010 :       if (cfun->machine->call_saved_registers == TYPE_PRESERVE_NONE)
    7967              :         return R11_REG;
    7968              : 
    7969              :       /* Use R13 for nested function or function need static chain.
    7970              :          Since function with tail call may use any caller-saved
    7971              :          registers in epilogue, DRAP must not use caller-saved
    7972              :          register in such case.  */
    7973         7009 :       if (DECL_STATIC_CHAIN (decl)
    7974         6967 :           || (cfun->machine->call_saved_registers
    7975              :               == TYPE_NO_CALLER_SAVED_REGISTERS)
    7976        13976 :           || crtl->tail_call_emit)
    7977          191 :         return R13_REG;
    7978              : 
    7979              :       return R10_REG;
    7980              :     }
    7981              :   else
    7982              :     {
    7983              :       /* Use DI for nested function or function need static chain.
    7984              :          Since function with tail call may use any caller-saved
    7985              :          registers in epilogue, DRAP must not use caller-saved
    7986              :          register in such case.  */
    7987          285 :       if (DECL_STATIC_CHAIN (decl)
    7988          285 :           || (cfun->machine->call_saved_registers
    7989          285 :               == TYPE_NO_CALLER_SAVED_REGISTERS)
    7990          285 :           || crtl->tail_call_emit
    7991          550 :           || crtl->calls_eh_return)
    7992              :         return DI_REG;
    7993              : 
    7994              :       /* Reuse static chain register if it isn't used for parameter
    7995              :          passing.  */
    7996          265 :       if (ix86_function_regparm (TREE_TYPE (decl), decl) <= 2)
    7997              :         {
    7998          265 :           unsigned int ccvt = ix86_get_callcvt (TREE_TYPE (decl));
    7999          265 :           if ((ccvt & (IX86_CALLCVT_FASTCALL | IX86_CALLCVT_THISCALL)) == 0)
    8000              :             return CX_REG;
    8001              :         }
    8002            0 :       return DI_REG;
    8003              :     }
    8004              : }
    8005              : 
    8006              : /* Return minimum incoming stack alignment.  */
    8007              : 
    8008              : static unsigned int
    8009      1619987 : ix86_minimum_incoming_stack_boundary (bool sibcall)
    8010              : {
    8011      1619987 :   unsigned int incoming_stack_boundary;
    8012              : 
    8013              :   /* Stack of interrupt handler is aligned to 128 bits in 64bit mode.  */
    8014      1619987 :   if (cfun->machine->func_type != TYPE_NORMAL)
    8015          120 :     incoming_stack_boundary = TARGET_64BIT ? 128 : MIN_STACK_BOUNDARY;
    8016              :   /* Prefer the one specified at command line. */
    8017      1619867 :   else if (ix86_user_incoming_stack_boundary)
    8018              :     incoming_stack_boundary = ix86_user_incoming_stack_boundary;
    8019              :   /* In 32bit, use MIN_STACK_BOUNDARY for incoming stack boundary
    8020              :      if -mstackrealign is used, it isn't used for sibcall check and
    8021              :      estimated stack alignment is 128bit.  */
    8022      1619845 :   else if (!sibcall
    8023      1489011 :            && ix86_force_align_arg_pointer
    8024         4572 :            && crtl->stack_alignment_estimated == 128)
    8025          596 :     incoming_stack_boundary = MIN_STACK_BOUNDARY;
    8026              :   else
    8027      1619249 :     incoming_stack_boundary = ix86_default_incoming_stack_boundary;
    8028              : 
    8029              :   /* Incoming stack alignment can be changed on individual functions
    8030              :      via force_align_arg_pointer attribute.  We use the smallest
    8031              :      incoming stack boundary.  */
    8032      1619987 :   if (incoming_stack_boundary > MIN_STACK_BOUNDARY
    8033      3239368 :       && lookup_attribute ("force_align_arg_pointer",
    8034      1619381 :                            TYPE_ATTRIBUTES (TREE_TYPE (current_function_decl))))
    8035         5708 :     incoming_stack_boundary = MIN_STACK_BOUNDARY;
    8036              : 
    8037              :   /* The incoming stack frame has to be aligned at least at
    8038              :      parm_stack_boundary.  */
    8039      1619987 :   if (incoming_stack_boundary < crtl->parm_stack_boundary)
    8040              :     incoming_stack_boundary = crtl->parm_stack_boundary;
    8041              : 
    8042              :   /* Stack at entrance of main is aligned by runtime.  We use the
    8043              :      smallest incoming stack boundary. */
    8044      1619987 :   if (incoming_stack_boundary > MAIN_STACK_BOUNDARY
    8045       140808 :       && DECL_NAME (current_function_decl)
    8046       140808 :       && MAIN_NAME_P (DECL_NAME (current_function_decl))
    8047      1622461 :       && DECL_FILE_SCOPE_P (current_function_decl))
    8048         2474 :     incoming_stack_boundary = MAIN_STACK_BOUNDARY;
    8049              : 
    8050      1619987 :   return incoming_stack_boundary;
    8051              : }
    8052              : 
    8053              : /* Update incoming stack boundary and estimated stack alignment.  */
    8054              : 
    8055              : static void
    8056      1489148 : ix86_update_stack_boundary (void)
    8057              : {
    8058      1489148 :   ix86_incoming_stack_boundary
    8059      1489148 :     = ix86_minimum_incoming_stack_boundary (false);
    8060              : 
    8061              :   /* x86_64 vararg needs 16byte stack alignment for register save area.  */
    8062      1489148 :   if (TARGET_64BIT
    8063      1362581 :       && cfun->stdarg
    8064        21431 :       && crtl->stack_alignment_estimated < 128)
    8065        10187 :     crtl->stack_alignment_estimated = 128;
    8066              : 
    8067              :   /* __tls_get_addr needs to be called with 16-byte aligned stack.  */
    8068      1489148 :   if (ix86_tls_descriptor_calls_expanded_in_cfun
    8069         1078 :       && crtl->preferred_stack_boundary < 128)
    8070          750 :     crtl->preferred_stack_boundary = 128;
    8071              : 
    8072              :   /* For 32-bit MS ABI, both the incoming and preferred stack boundaries
    8073              :      are 32 bits, but if force_align_arg_pointer is specified, it should
    8074              :      prefer 128 bits for a backward-compatibility reason, which is also
    8075              :      what the doc suggests.  */
    8076      1489148 :   if (lookup_attribute ("force_align_arg_pointer",
    8077      1489148 :                         TYPE_ATTRIBUTES (TREE_TYPE (current_function_decl)))
    8078      1489148 :       && crtl->preferred_stack_boundary < 128)
    8079            4 :     crtl->preferred_stack_boundary = 128;
    8080      1489148 : }
    8081              : 
    8082              : /* Handle the TARGET_GET_DRAP_RTX hook.  Return NULL if no DRAP is
    8083              :    needed or an rtx for DRAP otherwise.  */
    8084              : 
    8085              : static rtx
    8086      1593012 : ix86_get_drap_rtx (void)
    8087              : {
    8088              :   /* We must use DRAP if there are outgoing arguments on stack or
    8089              :      the stack pointer register is clobbered by asm statement and
    8090              :      ACCUMULATE_OUTGOING_ARGS is false.  */
    8091      1593012 :   if (ix86_force_drap
    8092      1593012 :       || ((cfun->machine->outgoing_args_on_stack
    8093      1260546 :            || crtl->sp_is_clobbered_by_asm)
    8094       330521 :           && !ACCUMULATE_OUTGOING_ARGS))
    8095       310326 :     crtl->need_drap = true;
    8096              : 
    8097      1593012 :   if (stack_realign_drap)
    8098              :     {
    8099              :       /* Assign DRAP to vDRAP and returns vDRAP */
    8100         7295 :       unsigned int regno = find_drap_reg ();
    8101         7295 :       rtx drap_vreg;
    8102         7295 :       rtx arg_ptr;
    8103         7295 :       rtx_insn *seq, *insn;
    8104              : 
    8105         7580 :       arg_ptr = gen_rtx_REG (Pmode, regno);
    8106         7295 :       crtl->drap_reg = arg_ptr;
    8107              : 
    8108         7295 :       start_sequence ();
    8109         7295 :       drap_vreg = copy_to_reg (arg_ptr);
    8110         7295 :       seq = end_sequence ();
    8111              : 
    8112         7295 :       insn = emit_insn_before (seq, NEXT_INSN (entry_of_function ()));
    8113         7295 :       if (!optimize)
    8114              :         {
    8115         1896 :           add_reg_note (insn, REG_CFA_SET_VDRAP, drap_vreg);
    8116         1896 :           RTX_FRAME_RELATED_P (insn) = 1;
    8117              :         }
    8118         7295 :       return drap_vreg;
    8119              :     }
    8120              :   else
    8121              :     return NULL;
    8122              : }
    8123              : 
    8124              : /* Handle the TARGET_INTERNAL_ARG_POINTER hook.  */
    8125              : 
    8126              : static rtx
    8127      1489150 : ix86_internal_arg_pointer (void)
    8128              : {
    8129      1489150 :   return virtual_incoming_args_rtx;
    8130              : }
    8131              : 
    8132              : struct scratch_reg {
    8133              :   rtx reg;
    8134              :   bool saved;
    8135              : };
    8136              : 
    8137              : /* Return a short-lived scratch register for use on function entry.
    8138              :    In 32-bit mode, it is valid only after the registers are saved
    8139              :    in the prologue.  This register must be released by means of
    8140              :    release_scratch_register_on_entry once it is dead.  */
    8141              : 
    8142              : static void
    8143           25 : get_scratch_register_on_entry (struct scratch_reg *sr)
    8144              : {
    8145           25 :   int regno;
    8146              : 
    8147           25 :   sr->saved = false;
    8148              : 
    8149           25 :   if (TARGET_64BIT)
    8150              :     {
    8151              :       /* We always use R11 in 64-bit mode.  */
    8152              :       regno = R11_REG;
    8153              :     }
    8154              :   else
    8155              :     {
    8156            0 :       tree decl = current_function_decl, fntype = TREE_TYPE (decl);
    8157            0 :       bool fastcall_p
    8158            0 :         = lookup_attribute ("fastcall", TYPE_ATTRIBUTES (fntype)) != NULL_TREE;
    8159            0 :       bool thiscall_p
    8160            0 :         = lookup_attribute ("thiscall", TYPE_ATTRIBUTES (fntype)) != NULL_TREE;
    8161            0 :       bool static_chain_p = DECL_STATIC_CHAIN (decl);
    8162            0 :       int regparm = ix86_function_regparm (fntype, decl);
    8163            0 :       int drap_regno
    8164            0 :         = crtl->drap_reg ? REGNO (crtl->drap_reg) : INVALID_REGNUM;
    8165              : 
    8166              :       /* 'fastcall' sets regparm to 2, uses ecx/edx for arguments and eax
    8167              :           for the static chain register.  */
    8168            0 :       if ((regparm < 1 || (fastcall_p && !static_chain_p))
    8169            0 :           && drap_regno != AX_REG)
    8170              :         regno = AX_REG;
    8171              :       /* 'thiscall' sets regparm to 1, uses ecx for arguments and edx
    8172              :           for the static chain register.  */
    8173            0 :       else if (thiscall_p && !static_chain_p && drap_regno != AX_REG)
    8174              :         regno = AX_REG;
    8175            0 :       else if (regparm < 2 && !thiscall_p && drap_regno != DX_REG)
    8176              :         regno = DX_REG;
    8177              :       /* ecx is the static chain register.  */
    8178            0 :       else if (regparm < 3 && !fastcall_p && !thiscall_p
    8179            0 :                && !static_chain_p
    8180            0 :                && drap_regno != CX_REG)
    8181              :         regno = CX_REG;
    8182            0 :       else if (ix86_save_reg (BX_REG, true, false))
    8183              :         regno = BX_REG;
    8184              :       /* esi is the static chain register.  */
    8185            0 :       else if (!(regparm == 3 && static_chain_p)
    8186            0 :                && ix86_save_reg (SI_REG, true, false))
    8187              :         regno = SI_REG;
    8188            0 :       else if (ix86_save_reg (DI_REG, true, false))
    8189              :         regno = DI_REG;
    8190              :       else
    8191              :         {
    8192            0 :           regno = (drap_regno == AX_REG ? DX_REG : AX_REG);
    8193            0 :           sr->saved = true;
    8194              :         }
    8195              :     }
    8196              : 
    8197           25 :   sr->reg = gen_rtx_REG (Pmode, regno);
    8198           25 :   if (sr->saved)
    8199              :     {
    8200            0 :       rtx_insn *insn = emit_insn (gen_push (sr->reg));
    8201            0 :       RTX_FRAME_RELATED_P (insn) = 1;
    8202              :     }
    8203           25 : }
    8204              : 
    8205              : /* Release a scratch register obtained from the preceding function.
    8206              : 
    8207              :    If RELEASE_VIA_POP is true, we just pop the register off the stack
    8208              :    to release it.  This is what non-Linux systems use with -fstack-check.
    8209              : 
    8210              :    Otherwise we use OFFSET to locate the saved register and the
    8211              :    allocated stack space becomes part of the local frame and is
    8212              :    deallocated by the epilogue.  */
    8213              : 
    8214              : static void
    8215           25 : release_scratch_register_on_entry (struct scratch_reg *sr, HOST_WIDE_INT offset,
    8216              :                                    bool release_via_pop)
    8217              : {
    8218           25 :   if (sr->saved)
    8219              :     {
    8220            0 :       if (release_via_pop)
    8221              :         {
    8222            0 :           struct machine_function *m = cfun->machine;
    8223            0 :           rtx x, insn = emit_insn (gen_pop (sr->reg));
    8224              : 
    8225              :           /* The RX FRAME_RELATED_P mechanism doesn't know about pop.  */
    8226            0 :           RTX_FRAME_RELATED_P (insn) = 1;
    8227            0 :           x = plus_constant (Pmode, stack_pointer_rtx, UNITS_PER_WORD);
    8228            0 :           x = gen_rtx_SET (stack_pointer_rtx, x);
    8229            0 :           add_reg_note (insn, REG_FRAME_RELATED_EXPR, x);
    8230            0 :           m->fs.sp_offset -= UNITS_PER_WORD;
    8231              :         }
    8232              :       else
    8233              :         {
    8234            0 :           rtx x = plus_constant (Pmode, stack_pointer_rtx, offset);
    8235            0 :           x = gen_rtx_SET (sr->reg, gen_rtx_MEM (word_mode, x));
    8236            0 :           emit_insn (x);
    8237              :         }
    8238              :     }
    8239           25 : }
    8240              : 
    8241              : /* Emit code to adjust the stack pointer by SIZE bytes while probing it.
    8242              : 
    8243              :    If INT_REGISTERS_SAVED is true, then integer registers have already been
    8244              :    pushed on the stack.
    8245              : 
    8246              :    If PROTECTION AREA is true, then probe PROBE_INTERVAL plus a small dope
    8247              :    beyond SIZE bytes.
    8248              : 
    8249              :    This assumes no knowledge of the current probing state, i.e. it is never
    8250              :    allowed to allocate more than PROBE_INTERVAL bytes of stack space without
    8251              :    a suitable probe.  */
    8252              : 
    8253              : static void
    8254          126 : ix86_adjust_stack_and_probe (HOST_WIDE_INT size,
    8255              :                              const bool int_registers_saved,
    8256              :                              const bool protection_area)
    8257              : {
    8258          126 :   struct machine_function *m = cfun->machine;
    8259              : 
    8260              :   /* If this function does not statically allocate stack space, then
    8261              :      no probes are needed.  */
    8262          126 :   if (!size)
    8263              :     {
    8264              :       /* However, the allocation of space via pushes for register
    8265              :          saves could be viewed as allocating space, but without the
    8266              :          need to probe.  */
    8267           43 :       if (m->frame.nregs || m->frame.nsseregs || frame_pointer_needed)
    8268           23 :         dump_stack_clash_frame_info (NO_PROBE_SMALL_FRAME, true);
    8269              :       else
    8270           20 :         dump_stack_clash_frame_info (NO_PROBE_NO_FRAME, false);
    8271           43 :       return;
    8272              :     }
    8273              : 
    8274              :   /* If we are a noreturn function, then we have to consider the
    8275              :      possibility that we're called via a jump rather than a call.
    8276              : 
    8277              :      Thus we don't have the implicit probe generated by saving the
    8278              :      return address into the stack at the call.  Thus, the stack
    8279              :      pointer could be anywhere in the guard page.  The safe thing
    8280              :      to do is emit a probe now.
    8281              : 
    8282              :      The probe can be avoided if we have already emitted any callee
    8283              :      register saves into the stack or have a frame pointer (which will
    8284              :      have been saved as well).  Those saves will function as implicit
    8285              :      probes.
    8286              : 
    8287              :      ?!? This should be revamped to work like aarch64 and s390 where
    8288              :      we track the offset from the most recent probe.  Normally that
    8289              :      offset would be zero.  For a noreturn function we would reset
    8290              :      it to PROBE_INTERVAL - (STACK_BOUNDARY / BITS_PER_UNIT).   Then
    8291              :      we just probe when we cross PROBE_INTERVAL.  */
    8292           83 :   if (TREE_THIS_VOLATILE (cfun->decl)
    8293           15 :       && !(m->frame.nregs || m->frame.nsseregs || frame_pointer_needed))
    8294              :     {
    8295              :       /* We can safely use any register here since we're just going to push
    8296              :          its value and immediately pop it back.  But we do try and avoid
    8297              :          argument passing registers so as not to introduce dependencies in
    8298              :          the pipeline.  For 32 bit we use %esi and for 64 bit we use %rax.  */
    8299           15 :       rtx dummy_reg = gen_rtx_REG (word_mode, TARGET_64BIT ? AX_REG : SI_REG);
    8300           15 :       rtx_insn *insn_push = emit_insn (gen_push (dummy_reg));
    8301           15 :       rtx_insn *insn_pop = emit_insn (gen_pop (dummy_reg));
    8302           15 :       m->fs.sp_offset -= UNITS_PER_WORD;
    8303           15 :       if (m->fs.cfa_reg == stack_pointer_rtx)
    8304              :         {
    8305           15 :           m->fs.cfa_offset -= UNITS_PER_WORD;
    8306           15 :           rtx x = plus_constant (Pmode, stack_pointer_rtx, -UNITS_PER_WORD);
    8307           15 :           x = gen_rtx_SET (stack_pointer_rtx, x);
    8308           15 :           add_reg_note (insn_push, REG_CFA_ADJUST_CFA, x);
    8309           15 :           RTX_FRAME_RELATED_P (insn_push) = 1;
    8310           15 :           x = plus_constant (Pmode, stack_pointer_rtx, UNITS_PER_WORD);
    8311           15 :           x = gen_rtx_SET (stack_pointer_rtx, x);
    8312           15 :           add_reg_note (insn_pop, REG_CFA_ADJUST_CFA, x);
    8313           15 :           RTX_FRAME_RELATED_P (insn_pop) = 1;
    8314              :         }
    8315           15 :       emit_insn (gen_blockage ());
    8316              :     }
    8317              : 
    8318           83 :   const HOST_WIDE_INT probe_interval = get_probe_interval ();
    8319           83 :   const int dope = 4 * UNITS_PER_WORD;
    8320              : 
    8321              :   /* If there is protection area, take it into account in the size.  */
    8322           83 :   if (protection_area)
    8323           24 :     size += probe_interval + dope;
    8324              : 
    8325              :   /* If we allocate less than the size of the guard statically,
    8326              :      then no probing is necessary, but we do need to allocate
    8327              :      the stack.  */
    8328           59 :   else if (size < (1 << param_stack_clash_protection_guard_size))
    8329              :     {
    8330           38 :       pro_epilogue_adjust_stack (stack_pointer_rtx, stack_pointer_rtx,
    8331              :                                  GEN_INT (-size), -1,
    8332           38 :                                  m->fs.cfa_reg == stack_pointer_rtx);
    8333           38 :       dump_stack_clash_frame_info (NO_PROBE_SMALL_FRAME, true);
    8334           38 :       return;
    8335              :     }
    8336              : 
    8337              :   /* We're allocating a large enough stack frame that we need to
    8338              :      emit probes.  Either emit them inline or in a loop depending
    8339              :      on the size.  */
    8340           45 :   if (size <= 4 * probe_interval)
    8341              :     {
    8342              :       HOST_WIDE_INT i;
    8343           47 :       for (i = probe_interval; i <= size; i += probe_interval)
    8344              :         {
    8345              :           /* Allocate PROBE_INTERVAL bytes.  */
    8346           27 :           rtx insn
    8347           27 :             = pro_epilogue_adjust_stack (stack_pointer_rtx, stack_pointer_rtx,
    8348              :                                          GEN_INT (-probe_interval), -1,
    8349           27 :                                          m->fs.cfa_reg == stack_pointer_rtx);
    8350           27 :           add_reg_note (insn, REG_STACK_CHECK, const0_rtx);
    8351              : 
    8352              :           /* And probe at *sp.  */
    8353           27 :           emit_stack_probe (stack_pointer_rtx);
    8354           27 :           emit_insn (gen_blockage ());
    8355              :         }
    8356              : 
    8357              :       /* We need to allocate space for the residual, but we do not need
    8358              :          to probe the residual...  */
    8359           20 :       HOST_WIDE_INT residual = (i - probe_interval - size);
    8360           20 :       if (residual)
    8361              :         {
    8362           20 :           pro_epilogue_adjust_stack (stack_pointer_rtx, stack_pointer_rtx,
    8363              :                                      GEN_INT (residual), -1,
    8364           20 :                                      m->fs.cfa_reg == stack_pointer_rtx);
    8365              : 
    8366              :           /* ...except if there is a protection area to maintain.  */
    8367           20 :           if (protection_area)
    8368           11 :             emit_stack_probe (stack_pointer_rtx);
    8369              :         }
    8370              : 
    8371           20 :       dump_stack_clash_frame_info (PROBE_INLINE, residual != 0);
    8372              :     }
    8373              :   else
    8374              :     {
    8375              :       /* We expect the GP registers to be saved when probes are used
    8376              :          as the probing sequences might need a scratch register and
    8377              :          the routine to allocate one assumes the integer registers
    8378              :          have already been saved.  */
    8379           25 :       gcc_assert (int_registers_saved);
    8380              : 
    8381           25 :       struct scratch_reg sr;
    8382           25 :       get_scratch_register_on_entry (&sr);
    8383              : 
    8384              :       /* If we needed to save a register, then account for any space
    8385              :          that was pushed (we are not going to pop the register when
    8386              :          we do the restore).  */
    8387           25 :       if (sr.saved)
    8388            0 :         size -= UNITS_PER_WORD;
    8389              : 
    8390              :       /* Step 1: round SIZE down to a multiple of the interval.  */
    8391           25 :       HOST_WIDE_INT rounded_size = size & -probe_interval;
    8392              : 
    8393              :       /* Step 2: compute final value of the loop counter.  Use lea if
    8394              :          possible.  */
    8395           25 :       rtx addr = plus_constant (Pmode, stack_pointer_rtx, -rounded_size);
    8396           25 :       rtx insn;
    8397           25 :       if (address_no_seg_operand (addr, Pmode))
    8398           13 :         insn = emit_insn (gen_rtx_SET (sr.reg, addr));
    8399              :       else
    8400              :         {
    8401           12 :           emit_move_insn (sr.reg, GEN_INT (-rounded_size));
    8402           12 :           insn = emit_insn (gen_rtx_SET (sr.reg,
    8403              :                                          gen_rtx_PLUS (Pmode, sr.reg,
    8404              :                                                        stack_pointer_rtx)));
    8405              :         }
    8406           25 :       if (m->fs.cfa_reg == stack_pointer_rtx)
    8407              :         {
    8408           22 :           add_reg_note (insn, REG_CFA_DEF_CFA,
    8409           22 :                         plus_constant (Pmode, sr.reg,
    8410           22 :                                        m->fs.cfa_offset + rounded_size));
    8411           22 :           RTX_FRAME_RELATED_P (insn) = 1;
    8412              :         }
    8413              : 
    8414              :       /* Step 3: the loop.  */
    8415           25 :       rtx size_rtx = GEN_INT (rounded_size);
    8416           25 :       insn = emit_insn (gen_adjust_stack_and_probe (Pmode, sr.reg, sr.reg,
    8417              :                                                     size_rtx));
    8418           25 :       if (m->fs.cfa_reg == stack_pointer_rtx)
    8419              :         {
    8420           22 :           m->fs.cfa_offset += rounded_size;
    8421           22 :           add_reg_note (insn, REG_CFA_DEF_CFA,
    8422           22 :                         plus_constant (Pmode, stack_pointer_rtx,
    8423           22 :                                        m->fs.cfa_offset));
    8424           22 :           RTX_FRAME_RELATED_P (insn) = 1;
    8425              :         }
    8426           25 :       m->fs.sp_offset += rounded_size;
    8427           25 :       emit_insn (gen_blockage ());
    8428              : 
    8429              :       /* Step 4: adjust SP if we cannot assert at compile-time that SIZE
    8430              :          is equal to ROUNDED_SIZE.  */
    8431              : 
    8432           25 :       if (size != rounded_size)
    8433              :         {
    8434           25 :           pro_epilogue_adjust_stack (stack_pointer_rtx, stack_pointer_rtx,
    8435              :                                      GEN_INT (rounded_size - size), -1,
    8436           25 :                                      m->fs.cfa_reg == stack_pointer_rtx);
    8437              : 
    8438           25 :           if (protection_area)
    8439           13 :             emit_stack_probe (stack_pointer_rtx);
    8440              :         }
    8441              : 
    8442           25 :       dump_stack_clash_frame_info (PROBE_LOOP, size != rounded_size);
    8443              : 
    8444              :       /* This does not deallocate the space reserved for the scratch
    8445              :          register.  That will be deallocated in the epilogue.  */
    8446           25 :       release_scratch_register_on_entry (&sr, size, false);
    8447              :     }
    8448              : 
    8449              :   /* Adjust back to account for the protection area.  */
    8450           45 :   if (protection_area)
    8451           24 :     pro_epilogue_adjust_stack (stack_pointer_rtx, stack_pointer_rtx,
    8452           24 :                                GEN_INT (probe_interval + dope), -1,
    8453           24 :                                m->fs.cfa_reg == stack_pointer_rtx);
    8454              : 
    8455              :   /* Make sure nothing is scheduled before we are done.  */
    8456           45 :   emit_insn (gen_blockage ());
    8457              : }
    8458              : 
    8459              : /* Adjust the stack pointer up to REG while probing it.  */
    8460              : 
    8461              : const char *
    8462           25 : output_adjust_stack_and_probe (rtx reg)
    8463              : {
    8464           25 :   static int labelno = 0;
    8465           25 :   char loop_lab[32];
    8466           25 :   rtx xops[2];
    8467              : 
    8468           25 :   ASM_GENERATE_INTERNAL_LABEL (loop_lab, "LPSRL", labelno++);
    8469              : 
    8470              :   /* Loop.  */
    8471           25 :   ASM_OUTPUT_INTERNAL_LABEL (asm_out_file, loop_lab);
    8472              : 
    8473              :   /* SP = SP + PROBE_INTERVAL.  */
    8474           25 :   xops[0] = stack_pointer_rtx;
    8475           37 :   xops[1] = GEN_INT (get_probe_interval ());
    8476           25 :   output_asm_insn ("sub%z0\t{%1, %0|%0, %1}", xops);
    8477              : 
    8478              :   /* Probe at SP.  */
    8479           25 :   xops[1] = const0_rtx;
    8480           25 :   output_asm_insn ("or{b}\t{%1, (%0)|BYTE PTR [%0], %1}", xops);
    8481              : 
    8482              :   /* Test if SP == LAST_ADDR.  */
    8483           25 :   xops[0] = stack_pointer_rtx;
    8484           25 :   xops[1] = reg;
    8485           25 :   output_asm_insn ("cmp%z0\t{%1, %0|%0, %1}", xops);
    8486              : 
    8487              :   /* Branch.  */
    8488           25 :   fputs ("\tjne\t", asm_out_file);
    8489           25 :   assemble_name_raw (asm_out_file, loop_lab);
    8490           25 :   fputc ('\n', asm_out_file);
    8491              : 
    8492           25 :   return "";
    8493              : }
    8494              : 
    8495              : /* Emit code to probe a range of stack addresses from FIRST to FIRST+SIZE,
    8496              :    inclusive.  These are offsets from the current stack pointer.
    8497              : 
    8498              :    INT_REGISTERS_SAVED is true if integer registers have already been
    8499              :    pushed on the stack.  */
    8500              : 
    8501              : static void
    8502            0 : ix86_emit_probe_stack_range (HOST_WIDE_INT first, HOST_WIDE_INT size,
    8503              :                              const bool int_registers_saved)
    8504              : {
    8505            0 :   const HOST_WIDE_INT probe_interval = get_probe_interval ();
    8506              : 
    8507              :   /* See if we have a constant small number of probes to generate.  If so,
    8508              :      that's the easy case.  The run-time loop is made up of 6 insns in the
    8509              :      generic case while the compile-time loop is made up of n insns for n #
    8510              :      of intervals.  */
    8511            0 :   if (size <= 6 * probe_interval)
    8512              :     {
    8513              :       HOST_WIDE_INT i;
    8514              : 
    8515              :       /* Probe at FIRST + N * PROBE_INTERVAL for values of N from 1 until
    8516              :          it exceeds SIZE.  If only one probe is needed, this will not
    8517              :          generate any code.  Then probe at FIRST + SIZE.  */
    8518            0 :       for (i = probe_interval; i < size; i += probe_interval)
    8519            0 :         emit_stack_probe (plus_constant (Pmode, stack_pointer_rtx,
    8520            0 :                                          -(first + i)));
    8521              : 
    8522            0 :       emit_stack_probe (plus_constant (Pmode, stack_pointer_rtx,
    8523            0 :                                        -(first + size)));
    8524              :     }
    8525              : 
    8526              :   /* Otherwise, do the same as above, but in a loop.  Note that we must be
    8527              :      extra careful with variables wrapping around because we might be at
    8528              :      the very top (or the very bottom) of the address space and we have
    8529              :      to be able to handle this case properly; in particular, we use an
    8530              :      equality test for the loop condition.  */
    8531              :   else
    8532              :     {
    8533              :       /* We expect the GP registers to be saved when probes are used
    8534              :          as the probing sequences might need a scratch register and
    8535              :          the routine to allocate one assumes the integer registers
    8536              :          have already been saved.  */
    8537            0 :       gcc_assert (int_registers_saved);
    8538              : 
    8539            0 :       HOST_WIDE_INT rounded_size, last;
    8540            0 :       struct scratch_reg sr;
    8541              : 
    8542            0 :       get_scratch_register_on_entry (&sr);
    8543              : 
    8544              : 
    8545              :       /* Step 1: round SIZE to the previous multiple of the interval.  */
    8546              : 
    8547            0 :       rounded_size = ROUND_DOWN (size, probe_interval);
    8548              : 
    8549              : 
    8550              :       /* Step 2: compute initial and final value of the loop counter.  */
    8551              : 
    8552              :       /* TEST_OFFSET = FIRST.  */
    8553            0 :       emit_move_insn (sr.reg, GEN_INT (-first));
    8554              : 
    8555              :       /* LAST_OFFSET = FIRST + ROUNDED_SIZE.  */
    8556            0 :       last = first + rounded_size;
    8557              : 
    8558              : 
    8559              :       /* Step 3: the loop
    8560              : 
    8561              :          do
    8562              :            {
    8563              :              TEST_ADDR = TEST_ADDR + PROBE_INTERVAL
    8564              :              probe at TEST_ADDR
    8565              :            }
    8566              :          while (TEST_ADDR != LAST_ADDR)
    8567              : 
    8568              :          probes at FIRST + N * PROBE_INTERVAL for values of N from 1
    8569              :          until it is equal to ROUNDED_SIZE.  */
    8570              : 
    8571            0 :       emit_insn
    8572            0 :         (gen_probe_stack_range (Pmode, sr.reg, sr.reg, GEN_INT (-last)));
    8573              : 
    8574              : 
    8575              :       /* Step 4: probe at FIRST + SIZE if we cannot assert at compile-time
    8576              :          that SIZE is equal to ROUNDED_SIZE.  */
    8577              : 
    8578            0 :       if (size != rounded_size)
    8579            0 :         emit_stack_probe (plus_constant (Pmode,
    8580            0 :                                          gen_rtx_PLUS (Pmode,
    8581              :                                                        stack_pointer_rtx,
    8582              :                                                        sr.reg),
    8583            0 :                                          rounded_size - size));
    8584              : 
    8585            0 :       release_scratch_register_on_entry (&sr, size, true);
    8586              :     }
    8587              : 
    8588              :   /* Make sure nothing is scheduled before we are done.  */
    8589            0 :   emit_insn (gen_blockage ());
    8590            0 : }
    8591              : 
    8592              : /* Probe a range of stack addresses from REG to END, inclusive.  These are
    8593              :    offsets from the current stack pointer.  */
    8594              : 
    8595              : const char *
    8596            0 : output_probe_stack_range (rtx reg, rtx end)
    8597              : {
    8598            0 :   static int labelno = 0;
    8599            0 :   char loop_lab[32];
    8600            0 :   rtx xops[3];
    8601              : 
    8602            0 :   ASM_GENERATE_INTERNAL_LABEL (loop_lab, "LPSRL", labelno++);
    8603              : 
    8604              :   /* Loop.  */
    8605            0 :   ASM_OUTPUT_INTERNAL_LABEL (asm_out_file, loop_lab);
    8606              : 
    8607              :   /* TEST_ADDR = TEST_ADDR + PROBE_INTERVAL.  */
    8608            0 :   xops[0] = reg;
    8609            0 :   xops[1] = GEN_INT (get_probe_interval ());
    8610            0 :   output_asm_insn ("sub%z0\t{%1, %0|%0, %1}", xops);
    8611              : 
    8612              :   /* Probe at TEST_ADDR.  */
    8613            0 :   xops[0] = stack_pointer_rtx;
    8614            0 :   xops[1] = reg;
    8615            0 :   xops[2] = const0_rtx;
    8616            0 :   output_asm_insn ("or{b}\t{%2, (%0,%1)|BYTE PTR [%0+%1], %2}", xops);
    8617              : 
    8618              :   /* Test if TEST_ADDR == LAST_ADDR.  */
    8619            0 :   xops[0] = reg;
    8620            0 :   xops[1] = end;
    8621            0 :   output_asm_insn ("cmp%z0\t{%1, %0|%0, %1}", xops);
    8622              : 
    8623              :   /* Branch.  */
    8624            0 :   fputs ("\tjne\t", asm_out_file);
    8625            0 :   assemble_name_raw (asm_out_file, loop_lab);
    8626            0 :   fputc ('\n', asm_out_file);
    8627              : 
    8628            0 :   return "";
    8629              : }
    8630              : 
    8631              : /* Data passed to ix86_update_stack_alignment.  */
    8632              : struct stack_access_data
    8633              : {
    8634              :   /* The stack access register.  */
    8635              :   const_rtx reg;
    8636              :   /* Pointer to stack alignment.  */
    8637              :   unsigned int *stack_alignment;
    8638              : };
    8639              : 
    8640              : /* Return true if OP references an argument passed on stack.  */
    8641              : 
    8642              : static bool
    8643       135885 : ix86_argument_passed_on_stack_p (const_rtx op)
    8644              : {
    8645       135885 :   tree mem_expr = MEM_EXPR (op);
    8646       135885 :   if (mem_expr)
    8647              :     {
    8648       133991 :       tree var = get_base_address (mem_expr);
    8649       133991 :       return TREE_CODE (var) == PARM_DECL;
    8650              :     }
    8651              :   return false;
    8652              : }
    8653              : 
    8654              : /* Update the maximum stack slot alignment from memory alignment in PAT.  */
    8655              : 
    8656              : static void
    8657       168965 : ix86_update_stack_alignment (rtx, const_rtx pat, void *data)
    8658              : {
    8659              :   /* This insn may reference stack slot.  Update the maximum stack slot
    8660              :      alignment if the memory is referenced by the stack access register. */
    8661       168965 :   stack_access_data *p = (stack_access_data *) data;
    8662              : 
    8663       168965 :   subrtx_iterator::array_type array;
    8664       707147 :   FOR_EACH_SUBRTX (iter, array, pat, ALL)
    8665              :     {
    8666       566933 :       auto op = *iter;
    8667       566933 :       if (MEM_P (op))
    8668              :         {
    8669              :           /* NB: Ignore arguments passed on stack since caller is
    8670              :              responsible to align the outgoing stack for arguments
    8671              :              passed on stack.  */
    8672       165550 :           if (reg_mentioned_p (p->reg, XEXP (op, 0))
    8673       165550 :               && !ix86_argument_passed_on_stack_p (op))
    8674              :             {
    8675        28751 :               unsigned int alignment = MEM_ALIGN (op);
    8676              : 
    8677        28751 :               if (alignment > *p->stack_alignment)
    8678        28672 :                 *p->stack_alignment = alignment;
    8679              :               break;
    8680              :             }
    8681              :           else
    8682       136799 :             iter.skip_subrtxes ();
    8683              :         }
    8684              :     }
    8685       168965 : }
    8686              : 
    8687              : /* Helper function for ix86_find_all_reg_uses.  */
    8688              : 
    8689              : static void
    8690     45204803 : ix86_find_all_reg_uses_1 (HARD_REG_SET &regset,
    8691              :                           rtx set, unsigned int regno,
    8692              :                           auto_bitmap &worklist)
    8693              : {
    8694     45204803 :   rtx dest = SET_DEST (set);
    8695              : 
    8696     45204803 :   if (!REG_P (dest))
    8697     40941213 :     return;
    8698              : 
    8699              :   /* Reject non-Pmode modes.  */
    8700     34234168 :   if (GET_MODE (dest) != Pmode)
    8701              :     return;
    8702              : 
    8703     18111700 :   unsigned int dst_regno = REGNO (dest);
    8704              : 
    8705     18111700 :   if (TEST_HARD_REG_BIT (regset, dst_regno))
    8706              :     return;
    8707              : 
    8708      4263590 :   const_rtx src = SET_SRC (set);
    8709              : 
    8710      4263590 :   subrtx_iterator::array_type array;
    8711      8475926 :   FOR_EACH_SUBRTX (iter, array, src, ALL)
    8712              :     {
    8713      5482905 :       auto op = *iter;
    8714              : 
    8715      5482905 :       if (MEM_P (op))
    8716      2971500 :         iter.skip_subrtxes ();
    8717              : 
    8718      5482905 :       if (REG_P (op) && REGNO (op) == regno)
    8719              :         {
    8720              :           /* Add this register to register set.  */
    8721      1439023 :           add_to_hard_reg_set (&regset, Pmode, dst_regno);
    8722      1270569 :           bitmap_set_bit (worklist, dst_regno);
    8723      1270569 :           break;
    8724              :         }
    8725              :     }
    8726      4263590 : }
    8727              : 
    8728              : /* Find all registers defined with register REGNO.  */
    8729              : 
    8730              : static void
    8731      2294420 : ix86_find_all_reg_uses (HARD_REG_SET &regset,
    8732              :                         unsigned int regno, auto_bitmap &worklist)
    8733              : {
    8734      2294420 :   for (df_ref ref = DF_REG_USE_CHAIN (regno);
    8735     81031909 :        ref != NULL;
    8736     78737489 :        ref = DF_REF_NEXT_REG (ref))
    8737              :     {
    8738     78737489 :       if (DF_REF_IS_ARTIFICIAL (ref))
    8739     16506296 :         continue;
    8740              : 
    8741     62231193 :       rtx_insn *insn = DF_REF_INSN (ref);
    8742              : 
    8743     62231193 :       if (!NONJUMP_INSN_P (insn))
    8744     17681695 :         continue;
    8745              : 
    8746     44549498 :       unsigned int ref_regno = DF_REF_REGNO (ref);
    8747              : 
    8748     44549498 :       rtx set = single_set (insn);
    8749     44549498 :       if (set)
    8750              :         {
    8751     43775569 :           ix86_find_all_reg_uses_1 (regset, set,
    8752              :                                     ref_regno, worklist);
    8753     43775569 :           continue;
    8754              :         }
    8755              : 
    8756       773929 :       rtx pat = PATTERN (insn);
    8757       773929 :       if (GET_CODE (pat) != PARALLEL)
    8758       124633 :         continue;
    8759              : 
    8760      2502858 :       for (int i = 0; i < XVECLEN (pat, 0); i++)
    8761              :         {
    8762      1853562 :           rtx exp = XVECEXP (pat, 0, i);
    8763              : 
    8764      1853562 :           if (GET_CODE (exp) == SET)
    8765      1429234 :             ix86_find_all_reg_uses_1 (regset, exp,
    8766              :                                       ref_regno, worklist);
    8767              :         }
    8768              :     }
    8769      2294420 : }
    8770              : 
    8771              : /* Return true if the hard register REGNO used for a stack access is
    8772              :    defined in a basic block that dominates the block where it is used.  */
    8773              : 
    8774              : static bool
    8775        39115 : ix86_access_stack_p (unsigned int regno, basic_block bb,
    8776              :                      HARD_REG_SET &set_up_by_prologue,
    8777              :                      HARD_REG_SET &prologue_used,
    8778              :                      auto_bitmap reg_dominate_bbs_known[],
    8779              :                      auto_bitmap reg_dominate_bbs[])
    8780              : {
    8781        39115 :   if (bitmap_bit_p (reg_dominate_bbs_known[regno], bb->index))
    8782        10566 :     return bitmap_bit_p (reg_dominate_bbs[regno], bb->index);
    8783              : 
    8784        28549 :   bitmap_set_bit (reg_dominate_bbs_known[regno], bb->index);
    8785              : 
    8786              :   /* Get all BBs which set REGNO and dominate the current BB from all
    8787              :      DEFs of REGNO.  */
    8788        28549 :   for (df_ref def = DF_REG_DEF_CHAIN (regno);
    8789      1470550 :        def;
    8790      1442001 :        def = DF_REF_NEXT_REG (def))
    8791      1469139 :     if (!DF_REF_IS_ARTIFICIAL (def)
    8792      1467412 :         && !DF_REF_FLAGS_IS_SET (def, DF_REF_MAY_CLOBBER)
    8793      1443513 :         && !DF_REF_FLAGS_IS_SET (def, DF_REF_MUST_CLOBBER))
    8794              :       {
    8795      1441738 :         basic_block set_bb = DF_REF_BB (def);
    8796      1441738 :         if (dominated_by_p (CDI_DOMINATORS, bb, set_bb))
    8797              :           {
    8798        87477 :             rtx_insn *insn = DF_REF_INSN (def);
    8799              :             /* Return true if INSN requires stack.  */
    8800        87477 :             if (requires_stack_frame_p (insn, prologue_used,
    8801              :                                         set_up_by_prologue))
    8802              :               {
    8803        27138 :                 bitmap_set_bit (reg_dominate_bbs[regno], bb->index);
    8804        27138 :                 return true;
    8805              :               }
    8806              :           }
    8807              :       }
    8808              : 
    8809              :   /* When we get here, REGNO used in the current BB doesn't access
    8810              :      stack.  */
    8811              :   return false;
    8812              : }
    8813              : 
    8814              : /* Return true if OP isn't a memory operand with SYMBOLIC_CONST and
    8815              :    needs alignment > ALIGNMENT.  */
    8816              : 
    8817              : static bool
    8818     27614536 : ix86_need_alignment_p_2 (const_rtx op, unsigned int alignment)
    8819              : {
    8820     27614536 :   bool need_alignment = MEM_ALIGN (op) > alignment;
    8821     27614536 :   tree mem_expr = MEM_EXPR (op);
    8822     27614536 :   if (!mem_expr)
    8823              :     return need_alignment;
    8824              : 
    8825     22640371 :   tree var = get_base_address (mem_expr);
    8826     22640371 :   if (!VAR_P (var) || !DECL_RTL_SET_P (var))
    8827              :     return need_alignment;
    8828              : 
    8829     14378292 :   rtx x = DECL_RTL (var);
    8830     14378292 :   if (!MEM_P (x))
    8831              :     return need_alignment;
    8832              : 
    8833     14378289 :   x = XEXP (x, 0);
    8834     14378289 :   return !SYMBOLIC_CONST (x) && need_alignment;
    8835              : }
    8836              : 
    8837              : /* Return true if SET needs alignment > ALIGNMENT.  */
    8838              : 
    8839              : static bool
    8840     45204093 : ix86_need_alignment_p_1 (rtx set, unsigned int alignment)
    8841              : {
    8842     45204093 :   rtx dest = SET_DEST (set);
    8843              : 
    8844     45204093 :   if (MEM_P (dest))
    8845     17118665 :     return ix86_need_alignment_p_2 (dest, alignment);
    8846              : 
    8847     28085428 :   const_rtx src = SET_SRC (set);
    8848              : 
    8849     28085428 :   subrtx_iterator::array_type array;
    8850     81341742 :   FOR_EACH_SUBRTX (iter, array, src, ALL)
    8851              :     {
    8852     63752185 :       auto op = *iter;
    8853              : 
    8854     63752185 :       if (MEM_P (op))
    8855     10495871 :         return ix86_need_alignment_p_2 (op, alignment);
    8856              :     }
    8857              : 
    8858     17589557 :   return false;
    8859     28085428 : }
    8860              : 
    8861              : /* Return true if INSN needs alignment > ALIGNMENT.  */
    8862              : 
    8863              : static bool
    8864     44549498 : ix86_need_alignment_p (rtx_insn *insn, unsigned int alignment)
    8865              : {
    8866     44549498 :   rtx set = single_set (insn);
    8867     44549498 :   if (set)
    8868     43775569 :     return ix86_need_alignment_p_1 (set, alignment);
    8869              : 
    8870       773929 :   rtx pat = PATTERN (insn);
    8871       773929 :   if (GET_CODE (pat) != PARALLEL)
    8872              :     return false;
    8873              : 
    8874      2501169 :   for (int i = 0; i < XVECLEN (pat, 0); i++)
    8875              :     {
    8876      1852661 :       rtx exp = XVECEXP (pat, 0, i);
    8877              : 
    8878      1852661 :       if (GET_CODE (exp) == SET
    8879      1852661 :           && ix86_need_alignment_p_1 (exp, alignment))
    8880              :         return true;
    8881              :     }
    8882              : 
    8883              :   return false;
    8884              : }
    8885              : 
    8886              : /* Set stack_frame_required to false if stack frame isn't required.
    8887              :    Update STACK_ALIGNMENT to the largest alignment, in bits, of stack
    8888              :    slot used if stack frame is required and CHECK_STACK_SLOT is true.  */
    8889              : 
    8890              : static void
    8891      1488297 : ix86_find_max_used_stack_alignment (unsigned int &stack_alignment,
    8892              :                                     bool check_stack_slot)
    8893              : {
    8894      1488297 :   HARD_REG_SET set_up_by_prologue, prologue_used;
    8895      1488297 :   basic_block bb;
    8896              : 
    8897      5953188 :   CLEAR_HARD_REG_SET (prologue_used);
    8898      1488297 :   CLEAR_HARD_REG_SET (set_up_by_prologue);
    8899      1614979 :   add_to_hard_reg_set (&set_up_by_prologue, Pmode, STACK_POINTER_REGNUM);
    8900      1488297 :   add_to_hard_reg_set (&set_up_by_prologue, Pmode, ARG_POINTER_REGNUM);
    8901      1488297 :   add_to_hard_reg_set (&set_up_by_prologue, Pmode,
    8902              :                        HARD_FRAME_POINTER_REGNUM);
    8903              : 
    8904      1488297 :   bool require_stack_frame = false;
    8905              : 
    8906     15758966 :   FOR_EACH_BB_FN (bb, cfun)
    8907              :     {
    8908     14270669 :       rtx_insn *insn;
    8909     88752251 :       FOR_BB_INSNS (bb, insn)
    8910     82274467 :         if (NONDEBUG_INSN_P (insn)
    8911     82274467 :             && requires_stack_frame_p (insn, prologue_used,
    8912              :                                        set_up_by_prologue))
    8913              :           {
    8914              :             require_stack_frame = true;
    8915              :             break;
    8916              :           }
    8917              :     }
    8918              : 
    8919      1488297 :   cfun->machine->stack_frame_required = require_stack_frame;
    8920              : 
    8921              :   /* Stop if we don't need to check stack slot.  */
    8922      1488297 :   if (!check_stack_slot)
    8923       793224 :     return;
    8924              : 
    8925              :   /* The preferred stack alignment is the minimum stack alignment.  */
    8926       695073 :   if (stack_alignment > crtl->preferred_stack_boundary)
    8927       143793 :     stack_alignment = crtl->preferred_stack_boundary;
    8928              : 
    8929              :   HARD_REG_SET stack_slot_access;
    8930       695073 :   CLEAR_HARD_REG_SET (stack_slot_access);
    8931              : 
    8932              :   /* Stack slot can be accessed by stack pointer, frame pointer or
    8933              :      registers defined by stack pointer or frame pointer.  */
    8934       695073 :   auto_bitmap worklist;
    8935              : 
    8936       754639 :   add_to_hard_reg_set (&stack_slot_access, Pmode, STACK_POINTER_REGNUM);
    8937       695073 :   bitmap_set_bit (worklist, STACK_POINTER_REGNUM);
    8938              : 
    8939       695073 :   if (frame_pointer_needed)
    8940              :     {
    8941       337808 :       add_to_hard_reg_set (&stack_slot_access, Pmode,
    8942              :                            HARD_FRAME_POINTER_REGNUM);
    8943       328778 :       bitmap_set_bit (worklist, HARD_FRAME_POINTER_REGNUM);
    8944              :     }
    8945              : 
    8946              :   /* Registers on HARD_STACK_SLOT_ACCESS always access stack.  */
    8947       695073 :   HARD_REG_SET hard_stack_slot_access = stack_slot_access;
    8948              : 
    8949       695073 :   calculate_dominance_info (CDI_DOMINATORS);
    8950              : 
    8951      2294420 :   unsigned int regno;
    8952              : 
    8953      2294420 :   do
    8954              :     {
    8955      2294420 :       regno = bitmap_clear_first_set_bit (worklist);
    8956      2294420 :       ix86_find_all_reg_uses (stack_slot_access, regno, worklist);
    8957              :     }
    8958      2294420 :   while (!bitmap_empty_p (worklist));
    8959              : 
    8960              :   hard_reg_set_iterator hrsi;
    8961              :   stack_access_data data;
    8962              : 
    8963    128588505 :   auto_bitmap reg_dominate_bbs_known[FIRST_PSEUDO_REGISTER];
    8964    128588505 :   auto_bitmap reg_dominate_bbs[FIRST_PSEUDO_REGISTER];
    8965              : 
    8966       695073 :   data.stack_alignment = &stack_alignment;
    8967              : 
    8968      2989493 :   EXECUTE_IF_SET_IN_HARD_REG_SET (stack_slot_access, 0, regno, hrsi)
    8969              :     {
    8970      2294420 :       for (df_ref ref = DF_REG_USE_CHAIN (regno);
    8971     81031909 :            ref != NULL;
    8972     78737489 :            ref = DF_REF_NEXT_REG (ref))
    8973              :         {
    8974     78737489 :           if (DF_REF_IS_ARTIFICIAL (ref))
    8975     16506296 :             continue;
    8976              : 
    8977     62231193 :           rtx_insn *insn = DF_REF_INSN (ref);
    8978              : 
    8979     62231193 :           if (!NONJUMP_INSN_P (insn))
    8980     17681695 :             continue;
    8981              : 
    8982              :           /* Call ix86_access_stack_p only if INSN needs alignment >
    8983              :              STACK_ALIGNMENT.  */
    8984     44549498 :           if (ix86_need_alignment_p (insn, stack_alignment)
    8985     44549498 :               && (TEST_HARD_REG_BIT (hard_stack_slot_access, regno)
    8986        39115 :                   || ix86_access_stack_p (regno, BLOCK_FOR_INSN (insn),
    8987              :                                           set_up_by_prologue,
    8988              :                                           prologue_used,
    8989              :                                           reg_dominate_bbs_known,
    8990              :                                           reg_dominate_bbs)))
    8991              :             {
    8992              :               /* Update stack alignment if REGNO is used for stack
    8993              :                  access.  */
    8994       162393 :               data.reg = DF_REF_REG (ref);
    8995       162393 :               note_stores (insn, ix86_update_stack_alignment, &data);
    8996              :             }
    8997              :         }
    8998              :     }
    8999              : 
    9000       695073 :   free_dominance_info (CDI_DOMINATORS);
    9001    129978651 : }
    9002              : 
    9003              : /* Finalize stack_realign_needed and frame_pointer_needed flags, which
    9004              :    will guide prologue/epilogue to be generated in correct form.  */
    9005              : 
    9006              : static void
    9007      3446793 : ix86_finalize_stack_frame_flags (void)
    9008              : {
    9009              :   /* Check if stack realign is really needed after reload, and
    9010              :      stores result in cfun */
    9011      3446793 :   unsigned int incoming_stack_boundary
    9012      3446793 :     = (crtl->parm_stack_boundary > ix86_incoming_stack_boundary
    9013      3446793 :        ? crtl->parm_stack_boundary : ix86_incoming_stack_boundary);
    9014      3446793 :   unsigned int stack_alignment
    9015      1188659 :     = (crtl->is_leaf && !ix86_current_function_calls_tls_descriptor
    9016      4635452 :        ? crtl->max_used_stack_slot_alignment
    9017      3446793 :        : crtl->stack_alignment_needed);
    9018      3446793 :   unsigned int stack_realign
    9019      3446793 :     = (incoming_stack_boundary < stack_alignment);
    9020      3446793 :   bool recompute_frame_layout_p = false;
    9021              : 
    9022      3446793 :   if (crtl->stack_realign_finalized)
    9023              :     {
    9024              :       /* After stack_realign_needed is finalized, we can't no longer
    9025              :          change it.  */
    9026      1958496 :       gcc_assert (crtl->stack_realign_needed == stack_realign);
    9027      1958496 :       return;
    9028              :     }
    9029              : 
    9030              :   /* It is always safe to compute max_used_stack_alignment.  We
    9031              :      compute it only if 128-bit aligned load/store may be generated
    9032              :      on misaligned stack slot which will lead to segfault. */
    9033      2976594 :   bool check_stack_slot
    9034      1488297 :     = (stack_realign || crtl->max_used_stack_slot_alignment >= 128);
    9035      1488297 :   ix86_find_max_used_stack_alignment (stack_alignment,
    9036              :                                       check_stack_slot);
    9037              : 
    9038              :   /* If the only reason for frame_pointer_needed is that we conservatively
    9039              :      assumed stack realignment might be needed or -fno-omit-frame-pointer
    9040              :      is used, but in the end nothing that needed the stack alignment had
    9041              :      been spilled nor stack access, clear frame_pointer_needed and say we
    9042              :      don't need stack realignment.
    9043              : 
    9044              :      When vector register is used for piecewise move and store, we don't
    9045              :      increase stack_alignment_needed as there is no register spill for
    9046              :      piecewise move and store.  Since stack_realign_needed is set to true
    9047              :      by checking stack_alignment_estimated which is updated by pseudo
    9048              :      vector register usage, we also need to check stack_realign_needed to
    9049              :      eliminate frame pointer.  */
    9050      1488297 :   if ((stack_realign
    9051      1421808 :        || (!flag_omit_frame_pointer && optimize)
    9052      1411558 :        || crtl->stack_realign_needed)
    9053        77398 :       && frame_pointer_needed
    9054        77398 :       && crtl->is_leaf
    9055        52926 :       && crtl->sp_is_unchanging
    9056        52874 :       && !ix86_current_function_calls_tls_descriptor
    9057        52874 :       && !crtl->accesses_prior_frames
    9058        52874 :       && !cfun->calls_alloca
    9059        52874 :       && !crtl->calls_eh_return
    9060              :       /* See ira_setup_eliminable_regset for the rationale.  */
    9061        52874 :       && !(STACK_CHECK_MOVING_SP
    9062        52874 :            && flag_stack_check
    9063            0 :            && flag_exceptions
    9064            0 :            && cfun->can_throw_non_call_exceptions)
    9065        52874 :       && !ix86_frame_pointer_required ()
    9066        52873 :       && ix86_get_frame_size () == 0
    9067        35086 :       && ix86_nsaved_sseregs () == 0
    9068      1523383 :       && ix86_varargs_gpr_size + ix86_varargs_fpr_size == 0)
    9069              :     {
    9070        35086 :       if (cfun->machine->stack_frame_required)
    9071              :         {
    9072              :           /* Stack frame is required.  If stack alignment needed is less
    9073              :              than incoming stack boundary, don't realign stack.  */
    9074          285 :           stack_realign = incoming_stack_boundary < stack_alignment;
    9075          285 :           if (!stack_realign)
    9076              :             {
    9077          285 :               crtl->max_used_stack_slot_alignment
    9078          285 :                 = incoming_stack_boundary;
    9079          285 :               crtl->stack_alignment_needed
    9080          285 :                 = incoming_stack_boundary;
    9081              :               /* Also update preferred_stack_boundary for leaf
    9082              :                  functions.  */
    9083          285 :               crtl->preferred_stack_boundary
    9084          285 :                 = incoming_stack_boundary;
    9085              :             }
    9086              :         }
    9087              :       else
    9088              :         {
    9089              :           /* If drap has been set, but it actually isn't live at the
    9090              :              start of the function, there is no reason to set it up.  */
    9091        34801 :           if (crtl->drap_reg)
    9092              :             {
    9093           35 :               basic_block bb = ENTRY_BLOCK_PTR_FOR_FN (cfun)->next_bb;
    9094           70 :               if (! REGNO_REG_SET_P (DF_LR_IN (bb),
    9095              :                                      REGNO (crtl->drap_reg)))
    9096              :                 {
    9097           35 :                   crtl->drap_reg = NULL_RTX;
    9098           35 :                   crtl->need_drap = false;
    9099              :                 }
    9100              :             }
    9101              :           else
    9102        34766 :             cfun->machine->no_drap_save_restore = true;
    9103              : 
    9104        34801 :           frame_pointer_needed = false;
    9105        34801 :           stack_realign = false;
    9106        34801 :           crtl->max_used_stack_slot_alignment = incoming_stack_boundary;
    9107        34801 :           crtl->stack_alignment_needed = incoming_stack_boundary;
    9108        34801 :           crtl->stack_alignment_estimated = incoming_stack_boundary;
    9109        34801 :           if (crtl->preferred_stack_boundary > incoming_stack_boundary)
    9110            1 :             crtl->preferred_stack_boundary = incoming_stack_boundary;
    9111        34801 :           df_finish_pass (true);
    9112        34801 :           df_scan_alloc (NULL);
    9113        34801 :           df_scan_blocks ();
    9114        34801 :           df_compute_regs_ever_live (true);
    9115        34801 :           df_analyze ();
    9116              : 
    9117        34801 :           if (flag_var_tracking)
    9118              :             {
    9119              :               /* Since frame pointer is no longer available, replace it with
    9120              :                  stack pointer - UNITS_PER_WORD in debug insns.  */
    9121          136 :               df_ref ref, next;
    9122          136 :               for (ref = DF_REG_USE_CHAIN (HARD_FRAME_POINTER_REGNUM);
    9123          136 :                    ref; ref = next)
    9124              :                 {
    9125            0 :                   next = DF_REF_NEXT_REG (ref);
    9126            0 :                   if (!DF_REF_INSN_INFO (ref))
    9127            0 :                     continue;
    9128              : 
    9129              :                   /* Make sure the next ref is for a different instruction,
    9130              :                      so that we're not affected by the rescan.  */
    9131            0 :                   rtx_insn *insn = DF_REF_INSN (ref);
    9132            0 :                   while (next && DF_REF_INSN (next) == insn)
    9133            0 :                     next = DF_REF_NEXT_REG (next);
    9134              : 
    9135            0 :                   if (DEBUG_INSN_P (insn))
    9136              :                     {
    9137              :                       bool changed = false;
    9138            0 :                       for (; ref != next; ref = DF_REF_NEXT_REG (ref))
    9139              :                         {
    9140            0 :                           rtx *loc = DF_REF_LOC (ref);
    9141            0 :                           if (*loc == hard_frame_pointer_rtx)
    9142              :                             {
    9143            0 :                               *loc = plus_constant (Pmode,
    9144              :                                                     stack_pointer_rtx,
    9145            0 :                                                     -UNITS_PER_WORD);
    9146            0 :                               changed = true;
    9147              :                             }
    9148              :                         }
    9149            0 :                       if (changed)
    9150            0 :                         df_insn_rescan (insn);
    9151              :                     }
    9152              :                 }
    9153              :             }
    9154              : 
    9155              :           recompute_frame_layout_p = true;
    9156              :         }
    9157              :     }
    9158      1453211 :   else if (crtl->max_used_stack_slot_alignment >= 128
    9159       658963 :            && cfun->machine->stack_frame_required)
    9160              :     {
    9161              :       /* We don't need to realign stack.  max_used_stack_alignment is
    9162              :          used to decide how stack frame should be aligned.  This is
    9163              :          independent of any psABIs nor 32-bit vs 64-bit.  */
    9164       613364 :       cfun->machine->max_used_stack_alignment
    9165       613364 :         = stack_alignment / BITS_PER_UNIT;
    9166              :     }
    9167              : 
    9168      1488297 :   if (crtl->stack_realign_needed != stack_realign)
    9169        35319 :     recompute_frame_layout_p = true;
    9170      1488297 :   crtl->stack_realign_needed = stack_realign;
    9171      1488297 :   crtl->stack_realign_finalized = true;
    9172      1488297 :   if (recompute_frame_layout_p)
    9173        35412 :     ix86_compute_frame_layout ();
    9174              : }
    9175              : 
    9176              : /* Delete SET_GOT right after entry block if it is allocated to reg.  */
    9177              : 
    9178              : static void
    9179            0 : ix86_elim_entry_set_got (rtx reg)
    9180              : {
    9181            0 :   basic_block bb = ENTRY_BLOCK_PTR_FOR_FN (cfun)->next_bb;
    9182            0 :   rtx_insn *c_insn = BB_HEAD (bb);
    9183            0 :   if (!NONDEBUG_INSN_P (c_insn))
    9184            0 :     c_insn = next_nonnote_nondebug_insn (c_insn);
    9185            0 :   if (c_insn && NONJUMP_INSN_P (c_insn))
    9186              :     {
    9187            0 :       rtx pat = PATTERN (c_insn);
    9188            0 :       if (GET_CODE (pat) == PARALLEL)
    9189              :         {
    9190            0 :           rtx set = XVECEXP (pat, 0, 0);
    9191            0 :           if (GET_CODE (set) == SET
    9192            0 :               && GET_CODE (SET_SRC (set)) == UNSPEC
    9193            0 :               && XINT (SET_SRC (set), 1) == UNSPEC_SET_GOT
    9194            0 :               && REGNO (SET_DEST (set)) == REGNO (reg))
    9195            0 :             delete_insn (c_insn);
    9196              :         }
    9197              :     }
    9198            0 : }
    9199              : 
    9200              : static rtx
    9201       193166 : gen_frame_set (rtx reg, rtx frame_reg, int offset, bool store)
    9202              : {
    9203       193166 :   rtx addr, mem;
    9204              : 
    9205       193166 :   if (offset)
    9206       184480 :     addr = plus_constant (Pmode, frame_reg, offset);
    9207       193166 :   mem = gen_frame_mem (GET_MODE (reg), offset ? addr : frame_reg);
    9208       193166 :   return gen_rtx_SET (store ? mem : reg, store ? reg : mem);
    9209              : }
    9210              : 
    9211              : static inline rtx
    9212       100333 : gen_frame_load (rtx reg, rtx frame_reg, int offset)
    9213              : {
    9214       100333 :   return gen_frame_set (reg, frame_reg, offset, false);
    9215              : }
    9216              : 
    9217              : static inline rtx
    9218        92833 : gen_frame_store (rtx reg, rtx frame_reg, int offset)
    9219              : {
    9220        92833 :   return gen_frame_set (reg, frame_reg, offset, true);
    9221              : }
    9222              : 
    9223              : static void
    9224         7045 : ix86_emit_outlined_ms2sysv_save (const struct ix86_frame &frame)
    9225              : {
    9226         7045 :   struct machine_function *m = cfun->machine;
    9227         7045 :   const unsigned ncregs = NUM_X86_64_MS_CLOBBERED_REGS
    9228         7045 :                           + m->call_ms2sysv_extra_regs;
    9229         7045 :   rtvec v = rtvec_alloc (ncregs + 1);
    9230         7045 :   unsigned int align, i, vi = 0;
    9231         7045 :   rtx_insn *insn;
    9232         7045 :   rtx sym, addr;
    9233         7045 :   rtx rax = gen_rtx_REG (word_mode, AX_REG);
    9234         7045 :   const class xlogue_layout &xlogue = xlogue_layout::get_instance ();
    9235              : 
    9236              :   /* AL should only be live with sysv_abi.  */
    9237         7045 :   gcc_assert (!ix86_eax_live_at_start_p ());
    9238         7045 :   gcc_assert (m->fs.sp_offset >= frame.sse_reg_save_offset);
    9239              : 
    9240              :   /* Setup RAX as the stub's base pointer.  We use stack_realign_offset rather
    9241              :      we've actually realigned the stack or not.  */
    9242         7045 :   align = GET_MODE_ALIGNMENT (V4SFmode);
    9243         7045 :   addr = choose_baseaddr (frame.stack_realign_offset
    9244         7045 :                           + xlogue.get_stub_ptr_offset (), &align, AX_REG);
    9245         7045 :   gcc_assert (align >= GET_MODE_ALIGNMENT (V4SFmode));
    9246              : 
    9247         7045 :   emit_insn (gen_rtx_SET (rax, addr));
    9248              : 
    9249              :   /* Get the stub symbol.  */
    9250         8327 :   sym = xlogue.get_stub_rtx (frame_pointer_needed ? XLOGUE_STUB_SAVE_HFP
    9251              :                                                   : XLOGUE_STUB_SAVE);
    9252         7045 :   RTVEC_ELT (v, vi++) = gen_rtx_USE (VOIDmode, sym);
    9253              : 
    9254        99878 :   for (i = 0; i < ncregs; ++i)
    9255              :     {
    9256        92833 :       const xlogue_layout::reginfo &r = xlogue.get_reginfo (i);
    9257        92833 :       rtx reg = gen_rtx_REG ((SSE_REGNO_P (r.regno) ? V4SFmode : word_mode),
    9258        92833 :                              r.regno);
    9259        92833 :       RTVEC_ELT (v, vi++) = gen_frame_store (reg, rax, -r.offset);
    9260              :     }
    9261              : 
    9262         7045 :   gcc_assert (vi == (unsigned)GET_NUM_ELEM (v));
    9263              : 
    9264         7045 :   insn = emit_insn (gen_rtx_PARALLEL (VOIDmode, v));
    9265         7045 :   RTX_FRAME_RELATED_P (insn) = true;
    9266         7045 : }
    9267              : 
    9268              : /* Generate and return an insn body to AND X with Y.  */
    9269              : 
    9270              : static rtx_insn *
    9271        31829 : gen_and2_insn (rtx x, rtx y)
    9272              : {
    9273        31829 :   enum insn_code icode = optab_handler (and_optab, GET_MODE (x));
    9274              : 
    9275        31829 :   gcc_assert (insn_operand_matches (icode, 0, x));
    9276        31829 :   gcc_assert (insn_operand_matches (icode, 1, x));
    9277        31829 :   gcc_assert (insn_operand_matches (icode, 2, y));
    9278              : 
    9279        31829 :   return GEN_FCN (icode) (x, x, y);
    9280              : }
    9281              : 
    9282              : /* Expand the prologue into a bunch of separate insns.  */
    9283              : 
    9284              : void
    9285      1532456 : ix86_expand_prologue (void)
    9286              : {
    9287      1532456 :   struct machine_function *m = cfun->machine;
    9288      1532456 :   rtx insn, t;
    9289      1532456 :   HOST_WIDE_INT allocate;
    9290      1532456 :   bool int_registers_saved;
    9291      1532456 :   bool sse_registers_saved;
    9292      1532456 :   bool save_stub_call_needed;
    9293      1532456 :   rtx static_chain = NULL_RTX;
    9294              : 
    9295      1532456 :   ix86_last_zero_store_uid = 0;
    9296      1532456 :   if (ix86_function_naked (current_function_decl))
    9297              :     {
    9298           74 :       if (flag_stack_usage_info)
    9299            0 :         current_function_static_stack_size = 0;
    9300           74 :       return;
    9301              :     }
    9302              : 
    9303      1532382 :   ix86_finalize_stack_frame_flags ();
    9304              : 
    9305              :   /* DRAP should not coexist with stack_realign_fp */
    9306      1532382 :   gcc_assert (!(crtl->drap_reg && stack_realign_fp));
    9307              : 
    9308      1532382 :   memset (&m->fs, 0, sizeof (m->fs));
    9309              : 
    9310              :   /* Initialize CFA state for before the prologue.  */
    9311      1532382 :   m->fs.cfa_reg = stack_pointer_rtx;
    9312      1532382 :   m->fs.cfa_offset = INCOMING_FRAME_SP_OFFSET;
    9313              : 
    9314              :   /* Track SP offset to the CFA.  We continue tracking this after we've
    9315              :      swapped the CFA register away from SP.  In the case of re-alignment
    9316              :      this is fudged; we're interested to offsets within the local frame.  */
    9317      1532382 :   m->fs.sp_offset = INCOMING_FRAME_SP_OFFSET;
    9318      1532382 :   m->fs.sp_valid = true;
    9319      1532382 :   m->fs.sp_realigned = false;
    9320              : 
    9321      1532382 :   const struct ix86_frame &frame = cfun->machine->frame;
    9322              : 
    9323      1532382 :   if (!TARGET_64BIT && ix86_function_ms_hook_prologue (current_function_decl))
    9324              :     {
    9325              :       /* We should have already generated an error for any use of
    9326              :          ms_hook on a nested function.  */
    9327            0 :       gcc_checking_assert (!ix86_static_chain_on_stack);
    9328              : 
    9329              :       /* Check if profiling is active and we shall use profiling before
    9330              :          prologue variant. If so sorry.  */
    9331            0 :       if (crtl->profile && flag_fentry != 0)
    9332            0 :         sorry ("%<ms_hook_prologue%> attribute is not compatible "
    9333              :                "with %<-mfentry%> for 32-bit");
    9334              : 
    9335              :       /* In ix86_asm_output_function_label we emitted:
    9336              :          8b ff     movl.s %edi,%edi
    9337              :          55        push   %ebp
    9338              :          8b ec     movl.s %esp,%ebp
    9339              : 
    9340              :          This matches the hookable function prologue in Win32 API
    9341              :          functions in Microsoft Windows XP Service Pack 2 and newer.
    9342              :          Wine uses this to enable Windows apps to hook the Win32 API
    9343              :          functions provided by Wine.
    9344              : 
    9345              :          What that means is that we've already set up the frame pointer.  */
    9346              : 
    9347            0 :       if (frame_pointer_needed
    9348            0 :           && !(crtl->drap_reg && crtl->stack_realign_needed))
    9349              :         {
    9350            0 :           rtx push, mov;
    9351              : 
    9352              :           /* We've decided to use the frame pointer already set up.
    9353              :              Describe this to the unwinder by pretending that both
    9354              :              push and mov insns happen right here.
    9355              : 
    9356              :              Putting the unwind info here at the end of the ms_hook
    9357              :              is done so that we can make absolutely certain we get
    9358              :              the required byte sequence at the start of the function,
    9359              :              rather than relying on an assembler that can produce
    9360              :              the exact encoding required.
    9361              : 
    9362              :              However it does mean (in the unpatched case) that we have
    9363              :              a 1 insn window where the asynchronous unwind info is
    9364              :              incorrect.  However, if we placed the unwind info at
    9365              :              its correct location we would have incorrect unwind info
    9366              :              in the patched case.  Which is probably all moot since
    9367              :              I don't expect Wine generates dwarf2 unwind info for the
    9368              :              system libraries that use this feature.  */
    9369              : 
    9370            0 :           insn = emit_insn (gen_blockage ());
    9371              : 
    9372            0 :           push = gen_push (hard_frame_pointer_rtx);
    9373            0 :           mov = gen_rtx_SET (hard_frame_pointer_rtx,
    9374              :                              stack_pointer_rtx);
    9375            0 :           RTX_FRAME_RELATED_P (push) = 1;
    9376            0 :           RTX_FRAME_RELATED_P (mov) = 1;
    9377              : 
    9378            0 :           RTX_FRAME_RELATED_P (insn) = 1;
    9379            0 :           add_reg_note (insn, REG_FRAME_RELATED_EXPR,
    9380              :                         gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, push, mov)));
    9381              : 
    9382              :           /* Note that gen_push incremented m->fs.cfa_offset, even
    9383              :              though we didn't emit the push insn here.  */
    9384            0 :           m->fs.cfa_reg = hard_frame_pointer_rtx;
    9385            0 :           m->fs.fp_offset = m->fs.cfa_offset;
    9386            0 :           m->fs.fp_valid = true;
    9387            0 :         }
    9388              :       else
    9389              :         {
    9390              :           /* The frame pointer is not needed so pop %ebp again.
    9391              :              This leaves us with a pristine state.  */
    9392            0 :           emit_insn (gen_pop (hard_frame_pointer_rtx));
    9393              :         }
    9394              :     }
    9395              : 
    9396              :   /* The first insn of a function that accepts its static chain on the
    9397              :      stack is to push the register that would be filled in by a direct
    9398              :      call.  This insn will be skipped by the trampoline.  */
    9399      1532382 :   else if (ix86_static_chain_on_stack)
    9400              :     {
    9401            0 :       static_chain = ix86_static_chain (cfun->decl, false);
    9402            0 :       insn = emit_insn (gen_push (static_chain));
    9403            0 :       emit_insn (gen_blockage ());
    9404              : 
    9405              :       /* We don't want to interpret this push insn as a register save,
    9406              :          only as a stack adjustment.  The real copy of the register as
    9407              :          a save will be done later, if needed.  */
    9408            0 :       t = plus_constant (Pmode, stack_pointer_rtx, -UNITS_PER_WORD);
    9409            0 :       t = gen_rtx_SET (stack_pointer_rtx, t);
    9410            0 :       add_reg_note (insn, REG_CFA_ADJUST_CFA, t);
    9411            0 :       RTX_FRAME_RELATED_P (insn) = 1;
    9412              :     }
    9413              : 
    9414              :   /* Emit prologue code to adjust stack alignment and setup DRAP, in case
    9415              :      of DRAP is needed and stack realignment is really needed after reload */
    9416      1532382 :   if (stack_realign_drap)
    9417              :     {
    9418         7079 :       int align_bytes = crtl->stack_alignment_needed / BITS_PER_UNIT;
    9419              : 
    9420              :       /* Can't use DRAP in interrupt function.  */
    9421         7079 :       if (cfun->machine->func_type != TYPE_NORMAL)
    9422            0 :         sorry ("Dynamic Realign Argument Pointer (DRAP) not supported "
    9423              :                "in interrupt service routine.  This may be worked "
    9424              :                "around by avoiding functions with aggregate return.");
    9425              : 
    9426              :       /* Only need to push parameter pointer reg if it is caller saved.  */
    9427         7079 :       if (!call_used_or_fixed_reg_p (REGNO (crtl->drap_reg)))
    9428              :         {
    9429              :           /* Push arg pointer reg */
    9430          137 :           insn = emit_insn (gen_push (crtl->drap_reg));
    9431          137 :           RTX_FRAME_RELATED_P (insn) = 1;
    9432              :         }
    9433              : 
    9434              :       /* Grab the argument pointer.  */
    9435         7364 :       t = plus_constant (Pmode, stack_pointer_rtx, m->fs.sp_offset);
    9436         7079 :       insn = emit_insn (gen_rtx_SET (crtl->drap_reg, t));
    9437         7079 :       RTX_FRAME_RELATED_P (insn) = 1;
    9438         7079 :       m->fs.cfa_reg = crtl->drap_reg;
    9439         7079 :       m->fs.cfa_offset = 0;
    9440              : 
    9441              :       /* Align the stack.  */
    9442         7079 :       insn = emit_insn (gen_and2_insn (stack_pointer_rtx,
    9443         7079 :                                        GEN_INT (-align_bytes)));
    9444         7079 :       RTX_FRAME_RELATED_P (insn) = 1;
    9445              : 
    9446              :       /* Replicate the return address on the stack so that return
    9447              :          address can be reached via (argp - 1) slot.  This is needed
    9448              :          to implement macro RETURN_ADDR_RTX and intrinsic function
    9449              :          expand_builtin_return_addr etc.  */
    9450         7649 :       t = plus_constant (Pmode, crtl->drap_reg, -UNITS_PER_WORD);
    9451         7079 :       t = gen_frame_mem (word_mode, t);
    9452         7079 :       insn = emit_insn (gen_push (t));
    9453         7079 :       RTX_FRAME_RELATED_P (insn) = 1;
    9454              : 
    9455              :       /* For the purposes of frame and register save area addressing,
    9456              :          we've started over with a new frame.  */
    9457         7079 :       m->fs.sp_offset = INCOMING_FRAME_SP_OFFSET;
    9458         7079 :       m->fs.realigned = true;
    9459              : 
    9460         7079 :       if (static_chain)
    9461              :         {
    9462              :           /* Replicate static chain on the stack so that static chain
    9463              :              can be reached via (argp - 2) slot.  This is needed for
    9464              :              nested function with stack realignment.  */
    9465            0 :           insn = emit_insn (gen_push (static_chain));
    9466            0 :           RTX_FRAME_RELATED_P (insn) = 1;
    9467              :         }
    9468              :     }
    9469              : 
    9470      1532382 :   int_registers_saved = (frame.nregs == 0);
    9471      1532382 :   sse_registers_saved = (frame.nsseregs == 0);
    9472      1532382 :   save_stub_call_needed = (m->call_ms2sysv);
    9473      1532382 :   gcc_assert (sse_registers_saved || !save_stub_call_needed);
    9474              : 
    9475      1532382 :   if (frame_pointer_needed && !m->fs.fp_valid)
    9476              :     {
    9477              :       /* Note: AT&T enter does NOT have reversed args.  Enter is probably
    9478              :          slower on all targets.  Also sdb didn't like it.  */
    9479       491868 :       insn = emit_insn (gen_push (hard_frame_pointer_rtx));
    9480       491868 :       RTX_FRAME_RELATED_P (insn) = 1;
    9481              : 
    9482       491868 :       if (m->fs.sp_offset == frame.hard_frame_pointer_offset)
    9483              :         {
    9484       491868 :           insn = emit_move_insn (hard_frame_pointer_rtx, stack_pointer_rtx);
    9485       491868 :           RTX_FRAME_RELATED_P (insn) = 1;
    9486              : 
    9487       491868 :           if (m->fs.cfa_reg == stack_pointer_rtx)
    9488       484789 :             m->fs.cfa_reg = hard_frame_pointer_rtx;
    9489       491868 :           m->fs.fp_offset = m->fs.sp_offset;
    9490       491868 :           m->fs.fp_valid = true;
    9491              :         }
    9492              :     }
    9493              : 
    9494      1532382 :   if (!int_registers_saved)
    9495              :     {
    9496              :       /* If saving registers via PUSH, do so now.  */
    9497       471923 :       if (!frame.save_regs_using_mov)
    9498              :         {
    9499       427777 :           ix86_emit_save_regs ();
    9500       427777 :           m->fs.apx_ppx_used = TARGET_APX_PPX && !crtl->calls_eh_return;
    9501       427777 :           int_registers_saved = true;
    9502       427777 :           gcc_assert (m->fs.sp_offset == frame.reg_save_offset);
    9503              :         }
    9504              : 
    9505              :       /* When using red zone we may start register saving before allocating
    9506              :          the stack frame saving one cycle of the prologue.  However, avoid
    9507              :          doing this if we have to probe the stack; at least on x86_64 the
    9508              :          stack probe can turn into a call that clobbers a red zone location. */
    9509        44146 :       else if (ix86_using_red_zone ()
    9510        44146 :                 && (! TARGET_STACK_PROBE
    9511            0 :                     || frame.stack_pointer_offset < CHECK_STACK_LIMIT))
    9512              :         {
    9513        39699 :           HOST_WIDE_INT allocate_offset;
    9514        39699 :           if (crtl->shrink_wrapped_separate)
    9515              :             {
    9516        39643 :               allocate_offset = m->fs.sp_offset - frame.stack_pointer_offset;
    9517              : 
    9518              :               /* Adjust the total offset at the beginning of the function.  */
    9519        39643 :               pro_epilogue_adjust_stack (stack_pointer_rtx, stack_pointer_rtx,
    9520              :                                          GEN_INT (allocate_offset), -1,
    9521        39643 :                                          m->fs.cfa_reg == stack_pointer_rtx);
    9522        39643 :               m->fs.sp_offset = cfun->machine->frame.stack_pointer_offset;
    9523              :             }
    9524              : 
    9525        39699 :           ix86_emit_save_regs_using_mov (frame.reg_save_offset);
    9526        39699 :           int_registers_saved = true;
    9527              :         }
    9528              :     }
    9529              : 
    9530      1532382 :   if (frame.red_zone_size != 0)
    9531       143651 :     cfun->machine->red_zone_used = true;
    9532              : 
    9533      1532382 :   if (stack_realign_fp)
    9534              :     {
    9535        24750 :       int align_bytes = crtl->stack_alignment_needed / BITS_PER_UNIT;
    9536        25099 :       gcc_assert (align_bytes > MIN_STACK_BOUNDARY / BITS_PER_UNIT);
    9537              : 
    9538              :       /* Record last valid frame pointer offset.  */
    9539        24750 :       m->fs.sp_realigned_fp_last = frame.reg_save_offset;
    9540              : 
    9541              :       /* The computation of the size of the re-aligned stack frame means
    9542              :          that we must allocate the size of the register save area before
    9543              :          performing the actual alignment.  Otherwise we cannot guarantee
    9544              :          that there's enough storage above the realignment point.  */
    9545        24750 :       allocate = frame.reg_save_offset - m->fs.sp_offset
    9546        24750 :                  + frame.stack_realign_allocate;
    9547        24750 :       if (allocate)
    9548         2691 :         pro_epilogue_adjust_stack (stack_pointer_rtx, stack_pointer_rtx,
    9549              :                                    GEN_INT (-allocate), -1, false);
    9550              : 
    9551              :       /* Align the stack.  */
    9552        24750 :       emit_insn (gen_and2_insn (stack_pointer_rtx, GEN_INT (-align_bytes)));
    9553        24750 :       m->fs.sp_offset = ROUND_UP (m->fs.sp_offset, align_bytes);
    9554        24750 :       m->fs.sp_realigned_offset = m->fs.sp_offset
    9555        24750 :                                               - frame.stack_realign_allocate;
    9556              :       /* The stack pointer may no longer be equal to CFA - m->fs.sp_offset.
    9557              :          Beyond this point, stack access should be done via choose_baseaddr or
    9558              :          by using sp_valid_at and fp_valid_at to determine the correct base
    9559              :          register.  Henceforth, any CFA offset should be thought of as logical
    9560              :          and not physical.  */
    9561        24750 :       gcc_assert (m->fs.sp_realigned_offset >= m->fs.sp_realigned_fp_last);
    9562        24750 :       gcc_assert (m->fs.sp_realigned_offset == frame.stack_realign_offset);
    9563        24750 :       m->fs.sp_realigned = true;
    9564              : 
    9565              :       /* SEH unwind emit doesn't currently support REG_CFA_EXPRESSION, which
    9566              :          is needed to describe where a register is saved using a realigned
    9567              :          stack pointer, so we need to invalidate the stack pointer for that
    9568              :          target.  */
    9569        24750 :       if (TARGET_SEH)
    9570              :         m->fs.sp_valid = false;
    9571              : 
    9572              :       /* If SP offset is non-immediate after allocation of the stack frame,
    9573              :          then emit SSE saves or stub call prior to allocating the rest of the
    9574              :          stack frame.  This is less efficient for the out-of-line stub because
    9575              :          we can't combine allocations across the call barrier, but it's better
    9576              :          than using a scratch register.  */
    9577        24750 :       else if (!x86_64_immediate_operand (GEN_INT (frame.stack_pointer_offset
    9578              :                                                    - m->fs.sp_realigned_offset),
    9579        24750 :                                           Pmode))
    9580              :         {
    9581            3 :           if (!sse_registers_saved)
    9582              :             {
    9583            1 :               ix86_emit_save_sse_regs_using_mov (frame.sse_reg_save_offset);
    9584            1 :               sse_registers_saved = true;
    9585              :             }
    9586            2 :           else if (save_stub_call_needed)
    9587              :             {
    9588            1 :               ix86_emit_outlined_ms2sysv_save (frame);
    9589            1 :               save_stub_call_needed = false;
    9590              :             }
    9591              :         }
    9592              :     }
    9593              : 
    9594      1532382 :   allocate = frame.stack_pointer_offset - m->fs.sp_offset;
    9595              : 
    9596      1532382 :   if (flag_stack_usage_info)
    9597              :     {
    9598              :       /* We start to count from ARG_POINTER.  */
    9599          355 :       HOST_WIDE_INT stack_size = frame.stack_pointer_offset;
    9600              : 
    9601              :       /* If it was realigned, take into account the fake frame.  */
    9602          355 :       if (stack_realign_drap)
    9603              :         {
    9604            1 :           if (ix86_static_chain_on_stack)
    9605            0 :             stack_size += UNITS_PER_WORD;
    9606              : 
    9607            1 :           if (!call_used_or_fixed_reg_p (REGNO (crtl->drap_reg)))
    9608            0 :             stack_size += UNITS_PER_WORD;
    9609              : 
    9610              :           /* This over-estimates by 1 minimal-stack-alignment-unit but
    9611              :              mitigates that by counting in the new return address slot.  */
    9612            1 :           current_function_dynamic_stack_size
    9613            1 :             += crtl->stack_alignment_needed / BITS_PER_UNIT;
    9614              :         }
    9615              : 
    9616          355 :       current_function_static_stack_size = stack_size;
    9617              :     }
    9618              : 
    9619              :   /* On SEH target with very large frame size, allocate an area to save
    9620              :      SSE registers (as the very large allocation won't be described).  */
    9621      1532382 :   if (TARGET_SEH
    9622              :       && frame.stack_pointer_offset > SEH_MAX_FRAME_SIZE
    9623              :       && !sse_registers_saved)
    9624              :     {
    9625              :       HOST_WIDE_INT sse_size
    9626              :         = frame.sse_reg_save_offset - frame.reg_save_offset;
    9627              : 
    9628              :       gcc_assert (int_registers_saved);
    9629              : 
    9630              :       /* No need to do stack checking as the area will be immediately
    9631              :          written.  */
    9632              :       pro_epilogue_adjust_stack (stack_pointer_rtx, stack_pointer_rtx,
    9633              :                                  GEN_INT (-sse_size), -1,
    9634              :                                  m->fs.cfa_reg == stack_pointer_rtx);
    9635              :       allocate -= sse_size;
    9636              :       ix86_emit_save_sse_regs_using_mov (frame.sse_reg_save_offset);
    9637              :       sse_registers_saved = true;
    9638              :     }
    9639              : 
    9640              :   /* If stack clash protection is requested, then probe the stack, unless it
    9641              :      is already probed on the target.  */
    9642      1532382 :   if (allocate >= 0
    9643      1532378 :       && flag_stack_clash_protection
    9644      1532480 :       && !ix86_target_stack_probe ())
    9645              :     {
    9646           98 :       ix86_adjust_stack_and_probe (allocate, int_registers_saved, false);
    9647           98 :       allocate = 0;
    9648              :     }
    9649              : 
    9650              :   /* The stack has already been decremented by the instruction calling us
    9651              :      so probe if the size is non-negative to preserve the protection area.  */
    9652      1532284 :   else if (allocate >= 0 && flag_stack_check == STATIC_BUILTIN_STACK_CHECK)
    9653              :     {
    9654           46 :       const HOST_WIDE_INT probe_interval = get_probe_interval ();
    9655              : 
    9656           46 :       if (STACK_CHECK_MOVING_SP)
    9657              :         {
    9658           46 :           if (crtl->is_leaf
    9659           19 :               && !cfun->calls_alloca
    9660           19 :               && allocate <= probe_interval)
    9661              :             ;
    9662              : 
    9663              :           else
    9664              :             {
    9665           28 :               ix86_adjust_stack_and_probe (allocate, int_registers_saved, true);
    9666           28 :               allocate = 0;
    9667              :             }
    9668              :         }
    9669              : 
    9670              :       else
    9671              :         {
    9672              :           HOST_WIDE_INT size = allocate;
    9673              : 
    9674              :           if (TARGET_64BIT && size >= HOST_WIDE_INT_C (0x80000000))
    9675              :             size = 0x80000000 - get_stack_check_protect () - 1;
    9676              : 
    9677              :           if (TARGET_STACK_PROBE)
    9678              :             {
    9679              :               if (crtl->is_leaf && !cfun->calls_alloca)
    9680              :                 {
    9681              :                   if (size > probe_interval)
    9682              :                     ix86_emit_probe_stack_range (0, size, int_registers_saved);
    9683              :                 }
    9684              :               else
    9685              :                 ix86_emit_probe_stack_range (0,
    9686              :                                              size + get_stack_check_protect (),
    9687              :                                              int_registers_saved);
    9688              :             }
    9689              :           else
    9690              :             {
    9691              :               if (crtl->is_leaf && !cfun->calls_alloca)
    9692              :                 {
    9693              :                   if (size > probe_interval
    9694              :                       && size > get_stack_check_protect ())
    9695              :                     ix86_emit_probe_stack_range (get_stack_check_protect (),
    9696              :                                                  (size
    9697              :                                                   - get_stack_check_protect ()),
    9698              :                                                  int_registers_saved);
    9699              :                 }
    9700              :               else
    9701              :                 ix86_emit_probe_stack_range (get_stack_check_protect (), size,
    9702              :                                              int_registers_saved);
    9703              :             }
    9704              :         }
    9705              :     }
    9706              : 
    9707      1532378 :   if (allocate == 0)
    9708              :     ;
    9709       842808 :   else if (!ix86_target_stack_probe ()
    9710       842808 :            || frame.stack_pointer_offset < CHECK_STACK_LIMIT)
    9711              :     {
    9712       842763 :       pro_epilogue_adjust_stack (stack_pointer_rtx, stack_pointer_rtx,
    9713              :                                  GEN_INT (-allocate), -1,
    9714       842763 :                                  m->fs.cfa_reg == stack_pointer_rtx);
    9715              :     }
    9716              :   else
    9717              :     {
    9718           45 :       rtx eax = gen_rtx_REG (Pmode, AX_REG);
    9719           45 :       rtx r10 = NULL;
    9720           45 :       const bool sp_is_cfa_reg = (m->fs.cfa_reg == stack_pointer_rtx);
    9721           45 :       bool eax_live = ix86_eax_live_at_start_p ();
    9722           45 :       bool r10_live = false;
    9723              : 
    9724           45 :       if (TARGET_64BIT)
    9725           45 :         r10_live = (DECL_STATIC_CHAIN (current_function_decl) != 0);
    9726              : 
    9727           45 :       if (eax_live)
    9728              :         {
    9729            0 :           insn = emit_insn (gen_push (eax));
    9730            0 :           allocate -= UNITS_PER_WORD;
    9731              :           /* Note that SEH directives need to continue tracking the stack
    9732              :              pointer even after the frame pointer has been set up.  */
    9733            0 :           if (sp_is_cfa_reg || TARGET_SEH)
    9734              :             {
    9735            0 :               if (sp_is_cfa_reg)
    9736            0 :                 m->fs.cfa_offset += UNITS_PER_WORD;
    9737            0 :               RTX_FRAME_RELATED_P (insn) = 1;
    9738            0 :               add_reg_note (insn, REG_FRAME_RELATED_EXPR,
    9739            0 :                             gen_rtx_SET (stack_pointer_rtx,
    9740              :                                          plus_constant (Pmode,
    9741              :                                                         stack_pointer_rtx,
    9742              :                                                         -UNITS_PER_WORD)));
    9743              :             }
    9744              :         }
    9745              : 
    9746           45 :       if (r10_live)
    9747              :         {
    9748            0 :           r10 = gen_rtx_REG (Pmode, R10_REG);
    9749            0 :           insn = emit_insn (gen_push (r10));
    9750            0 :           allocate -= UNITS_PER_WORD;
    9751            0 :           if (sp_is_cfa_reg || TARGET_SEH)
    9752              :             {
    9753            0 :               if (sp_is_cfa_reg)
    9754            0 :                 m->fs.cfa_offset += UNITS_PER_WORD;
    9755            0 :               RTX_FRAME_RELATED_P (insn) = 1;
    9756            0 :               add_reg_note (insn, REG_FRAME_RELATED_EXPR,
    9757            0 :                             gen_rtx_SET (stack_pointer_rtx,
    9758              :                                          plus_constant (Pmode,
    9759              :                                                         stack_pointer_rtx,
    9760              :                                                         -UNITS_PER_WORD)));
    9761              :             }
    9762              :         }
    9763              : 
    9764           45 :       emit_move_insn (eax, GEN_INT (allocate));
    9765           45 :       emit_insn (gen_allocate_stack_worker_probe (Pmode, eax, eax));
    9766              : 
    9767              :       /* Use the fact that AX still contains ALLOCATE.  */
    9768           45 :       insn = emit_insn (gen_pro_epilogue_adjust_stack_sub
    9769           45 :                         (Pmode, stack_pointer_rtx, stack_pointer_rtx, eax));
    9770              : 
    9771           45 :       if (sp_is_cfa_reg || TARGET_SEH)
    9772              :         {
    9773           37 :           if (sp_is_cfa_reg)
    9774           37 :             m->fs.cfa_offset += allocate;
    9775           37 :           RTX_FRAME_RELATED_P (insn) = 1;
    9776           37 :           add_reg_note (insn, REG_FRAME_RELATED_EXPR,
    9777           37 :                         gen_rtx_SET (stack_pointer_rtx,
    9778              :                                      plus_constant (Pmode, stack_pointer_rtx,
    9779              :                                                     -allocate)));
    9780              :         }
    9781           45 :       m->fs.sp_offset += allocate;
    9782              : 
    9783              :       /* Use stack_pointer_rtx for relative addressing so that code works for
    9784              :          realigned stack.  But this means that we need a blockage to prevent
    9785              :          stores based on the frame pointer from being scheduled before.  */
    9786           45 :       if (r10_live && eax_live)
    9787              :         {
    9788            0 :           t = gen_rtx_PLUS (Pmode, stack_pointer_rtx, eax);
    9789            0 :           emit_move_insn (gen_rtx_REG (word_mode, R10_REG),
    9790              :                           gen_frame_mem (word_mode, t));
    9791            0 :           t = plus_constant (Pmode, t, UNITS_PER_WORD);
    9792            0 :           emit_move_insn (gen_rtx_REG (word_mode, AX_REG),
    9793              :                           gen_frame_mem (word_mode, t));
    9794            0 :           emit_insn (gen_memory_blockage ());
    9795              :         }
    9796           45 :       else if (eax_live || r10_live)
    9797              :         {
    9798            0 :           t = gen_rtx_PLUS (Pmode, stack_pointer_rtx, eax);
    9799            0 :           emit_move_insn (gen_rtx_REG (word_mode,
    9800              :                                        (eax_live ? AX_REG : R10_REG)),
    9801              :                           gen_frame_mem (word_mode, t));
    9802            0 :           emit_insn (gen_memory_blockage ());
    9803              :         }
    9804              :     }
    9805      1532382 :   gcc_assert (m->fs.sp_offset == frame.stack_pointer_offset);
    9806              : 
    9807              :   /* If we haven't already set up the frame pointer, do so now.  */
    9808      1532382 :   if (frame_pointer_needed && !m->fs.fp_valid)
    9809              :     {
    9810            0 :       insn = gen_add3_insn (hard_frame_pointer_rtx, stack_pointer_rtx,
    9811            0 :                             GEN_INT (frame.stack_pointer_offset
    9812              :                                      - frame.hard_frame_pointer_offset));
    9813            0 :       insn = emit_insn (insn);
    9814            0 :       RTX_FRAME_RELATED_P (insn) = 1;
    9815            0 :       add_reg_note (insn, REG_CFA_ADJUST_CFA, NULL);
    9816              : 
    9817            0 :       if (m->fs.cfa_reg == stack_pointer_rtx)
    9818            0 :         m->fs.cfa_reg = hard_frame_pointer_rtx;
    9819            0 :       m->fs.fp_offset = frame.hard_frame_pointer_offset;
    9820            0 :       m->fs.fp_valid = true;
    9821              :     }
    9822              : 
    9823      1532382 :   if (!int_registers_saved)
    9824         4447 :     ix86_emit_save_regs_using_mov (frame.reg_save_offset);
    9825      1532382 :   if (!sse_registers_saved)
    9826        33362 :     ix86_emit_save_sse_regs_using_mov (frame.sse_reg_save_offset);
    9827      1499020 :   else if (save_stub_call_needed)
    9828         7044 :     ix86_emit_outlined_ms2sysv_save (frame);
    9829              : 
    9830              :   /* For the mcount profiling on 32 bit PIC mode we need to emit SET_GOT
    9831              :      in PROLOGUE.  */
    9832      1532382 :   if (!TARGET_64BIT && pic_offset_table_rtx && crtl->profile && !flag_fentry)
    9833              :     {
    9834            0 :       rtx pic = gen_rtx_REG (Pmode, REAL_PIC_OFFSET_TABLE_REGNUM);
    9835            0 :       insn = emit_insn (gen_set_got (pic));
    9836            0 :       RTX_FRAME_RELATED_P (insn) = 1;
    9837            0 :       add_reg_note (insn, REG_CFA_FLUSH_QUEUE, NULL_RTX);
    9838            0 :       emit_insn (gen_prologue_use (pic));
    9839              :       /* Deleting already emitted SET_GOT if exist and allocated to
    9840              :          REAL_PIC_OFFSET_TABLE_REGNUM.  */
    9841            0 :       ix86_elim_entry_set_got (pic);
    9842              :     }
    9843              : 
    9844      1532382 :   if (crtl->drap_reg && !crtl->stack_realign_needed)
    9845              :     {
    9846              :       /* vDRAP is setup but after reload it turns out stack realign
    9847              :          isn't necessary, here we will emit prologue to setup DRAP
    9848              :          without stack realign adjustment */
    9849          181 :       t = choose_baseaddr (0, NULL);
    9850          181 :       emit_insn (gen_rtx_SET (crtl->drap_reg, t));
    9851              :     }
    9852              : 
    9853              :   /* Prevent instructions from being scheduled into register save push
    9854              :      sequence when access to the redzone area is done through frame pointer.
    9855              :      The offset between the frame pointer and the stack pointer is calculated
    9856              :      relative to the value of the stack pointer at the end of the function
    9857              :      prologue, and moving instructions that access redzone area via frame
    9858              :      pointer inside push sequence violates this assumption.  */
    9859      1532382 :   if (frame_pointer_needed && frame.red_zone_size)
    9860       132639 :     emit_insn (gen_memory_blockage ());
    9861              : 
    9862              :   /* SEH requires that the prologue end within 256 bytes of the start of
    9863              :      the function.  Prevent instruction schedules that would extend that.
    9864              :      Further, prevent alloca modifications to the stack pointer from being
    9865              :      combined with prologue modifications.  */
    9866              :   if (TARGET_SEH)
    9867              :     emit_insn (gen_prologue_use (stack_pointer_rtx));
    9868              : }
    9869              : 
    9870              : /* Emit code to restore REG using a POP or POPP insn.  */
    9871              : 
    9872              : static void
    9873      1468964 : ix86_emit_restore_reg_using_pop (rtx reg, bool ppx_p)
    9874              : {
    9875      1468964 :   struct machine_function *m = cfun->machine;
    9876      1468964 :   rtx_insn *insn = emit_insn (gen_pop (reg, ppx_p));
    9877              : 
    9878      1468964 :   ix86_add_cfa_restore_note (insn, reg, m->fs.sp_offset);
    9879      1468964 :   m->fs.sp_offset -= UNITS_PER_WORD;
    9880              : 
    9881      1468964 :   if (m->fs.cfa_reg == crtl->drap_reg
    9882      1468964 :       && REGNO (reg) == REGNO (crtl->drap_reg))
    9883              :     {
    9884              :       /* Previously we'd represented the CFA as an expression
    9885              :          like *(%ebp - 8).  We've just popped that value from
    9886              :          the stack, which means we need to reset the CFA to
    9887              :          the drap register.  This will remain until we restore
    9888              :          the stack pointer.  */
    9889         4033 :       add_reg_note (insn, REG_CFA_DEF_CFA, reg);
    9890         4033 :       RTX_FRAME_RELATED_P (insn) = 1;
    9891              : 
    9892              :       /* This means that the DRAP register is valid for addressing too.  */
    9893         4033 :       m->fs.drap_valid = true;
    9894         4033 :       return;
    9895              :     }
    9896              : 
    9897      1464931 :   if (m->fs.cfa_reg == stack_pointer_rtx)
    9898              :     {
    9899      1372359 :       rtx x = plus_constant (Pmode, stack_pointer_rtx, UNITS_PER_WORD);
    9900      1009046 :       x = gen_rtx_SET (stack_pointer_rtx, x);
    9901      1009046 :       add_reg_note (insn, REG_CFA_ADJUST_CFA, x);
    9902      1009046 :       RTX_FRAME_RELATED_P (insn) = 1;
    9903              : 
    9904      1190695 :       m->fs.cfa_offset -= UNITS_PER_WORD;
    9905              :     }
    9906              : 
    9907              :   /* When the frame pointer is the CFA, and we pop it, we are
    9908              :      swapping back to the stack pointer as the CFA.  This happens
    9909              :      for stack frames that don't allocate other data, so we assume
    9910              :      the stack pointer is now pointing at the return address, i.e.
    9911              :      the function entry state, which makes the offset be 1 word.  */
    9912      1464931 :   if (reg == hard_frame_pointer_rtx)
    9913              :     {
    9914       245856 :       m->fs.fp_valid = false;
    9915       245856 :       if (m->fs.cfa_reg == hard_frame_pointer_rtx)
    9916              :         {
    9917       241810 :           m->fs.cfa_reg = stack_pointer_rtx;
    9918       241810 :           m->fs.cfa_offset -= UNITS_PER_WORD;
    9919              : 
    9920       241810 :           add_reg_note (insn, REG_CFA_DEF_CFA,
    9921       241810 :                         plus_constant (Pmode, stack_pointer_rtx,
    9922       241810 :                                        m->fs.cfa_offset));
    9923       241810 :           RTX_FRAME_RELATED_P (insn) = 1;
    9924              :         }
    9925              :     }
    9926              : }
    9927              : 
    9928              : /* Emit code to restore REG using a POP2 insn.  */
    9929              : static void
    9930           19 : ix86_emit_restore_reg_using_pop2 (rtx reg1, rtx reg2, bool ppx_p = false)
    9931              : {
    9932           19 :   struct machine_function *m = cfun->machine;
    9933           19 :   const int offset = UNITS_PER_WORD * 2;
    9934           19 :   rtx_insn *insn;
    9935              : 
    9936           19 :   rtx mem = gen_rtx_MEM (TImode, gen_rtx_POST_INC (Pmode,
    9937              :                                                    stack_pointer_rtx));
    9938              : 
    9939           19 :   if (ppx_p)
    9940           15 :     insn = emit_insn (gen_pop2p_di (reg1, mem, reg2));
    9941              :   else
    9942            4 :     insn = emit_insn (gen_pop2_di (reg1, mem, reg2));
    9943              : 
    9944           19 :   RTX_FRAME_RELATED_P (insn) = 1;
    9945              : 
    9946           19 :   rtx dwarf = NULL_RTX;
    9947           19 :   dwarf = alloc_reg_note (REG_CFA_RESTORE, reg1, dwarf);
    9948           19 :   dwarf = alloc_reg_note (REG_CFA_RESTORE, reg2, dwarf);
    9949           19 :   REG_NOTES (insn) = dwarf;
    9950           19 :   m->fs.sp_offset -= offset;
    9951              : 
    9952           19 :   if (m->fs.cfa_reg == crtl->drap_reg
    9953           19 :       && (REGNO (reg1) == REGNO (crtl->drap_reg)
    9954            3 :           || REGNO (reg2) == REGNO (crtl->drap_reg)))
    9955              :     {
    9956              :       /* Previously we'd represented the CFA as an expression
    9957              :          like *(%ebp - 8).  We've just popped that value from
    9958              :          the stack, which means we need to reset the CFA to
    9959              :          the drap register.  This will remain until we restore
    9960              :          the stack pointer.  */
    9961            1 :       add_reg_note (insn, REG_CFA_DEF_CFA,
    9962            1 :                     REGNO (reg1) == REGNO (crtl->drap_reg) ? reg1 : reg2);
    9963            1 :       RTX_FRAME_RELATED_P (insn) = 1;
    9964              : 
    9965              :       /* This means that the DRAP register is valid for addressing too.  */
    9966            1 :       m->fs.drap_valid = true;
    9967            1 :       return;
    9968              :     }
    9969              : 
    9970           18 :   if (m->fs.cfa_reg == stack_pointer_rtx)
    9971              :     {
    9972           14 :       rtx x = plus_constant (Pmode, stack_pointer_rtx, offset);
    9973           14 :       x = gen_rtx_SET (stack_pointer_rtx, x);
    9974           14 :       add_reg_note (insn, REG_CFA_ADJUST_CFA, x);
    9975           14 :       RTX_FRAME_RELATED_P (insn) = 1;
    9976              : 
    9977           14 :       m->fs.cfa_offset -= offset;
    9978              :     }
    9979              : 
    9980              :   /* When the frame pointer is the CFA, and we pop it, we are
    9981              :      swapping back to the stack pointer as the CFA.  This happens
    9982              :      for stack frames that don't allocate other data, so we assume
    9983              :      the stack pointer is now pointing at the return address, i.e.
    9984              :      the function entry state, which makes the offset be 1 word.  */
    9985           18 :   if (reg1 == hard_frame_pointer_rtx || reg2 == hard_frame_pointer_rtx)
    9986              :     {
    9987            0 :       m->fs.fp_valid = false;
    9988            0 :       if (m->fs.cfa_reg == hard_frame_pointer_rtx)
    9989              :         {
    9990            0 :           m->fs.cfa_reg = stack_pointer_rtx;
    9991            0 :           m->fs.cfa_offset -= offset;
    9992              : 
    9993            0 :           add_reg_note (insn, REG_CFA_DEF_CFA,
    9994            0 :                         plus_constant (Pmode, stack_pointer_rtx,
    9995            0 :                                        m->fs.cfa_offset));
    9996            0 :           RTX_FRAME_RELATED_P (insn) = 1;
    9997              :         }
    9998              :     }
    9999              : }
   10000              : 
   10001              : /* Emit code to restore saved registers using POP insns.  */
   10002              : 
   10003              : static void
   10004      1357019 : ix86_emit_restore_regs_using_pop (bool ppx_p)
   10005              : {
   10006      1357019 :   unsigned int regno;
   10007              : 
   10008    126202767 :   for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
   10009    124845748 :     if (GENERAL_REGNO_P (regno) && ix86_save_reg (regno, false, true))
   10010      1222787 :       ix86_emit_restore_reg_using_pop (gen_rtx_REG (word_mode, regno), ppx_p);
   10011      1357019 : }
   10012              : 
   10013              : /* Emit code to restore saved registers using POP2 insns.  */
   10014              : 
   10015              : static void
   10016          563 : ix86_emit_restore_regs_using_pop2 (void)
   10017              : {
   10018          563 :   int regno;
   10019          563 :   int regno_list[2];
   10020          563 :   regno_list[0] = regno_list[1] = -1;
   10021          563 :   int loaded_regnum = 0;
   10022          563 :   bool aligned = cfun->machine->fs.sp_offset % 16 == 0;
   10023              : 
   10024        52359 :   for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
   10025        51796 :     if (GENERAL_REGNO_P (regno) && ix86_save_reg (regno, false, true))
   10026              :       {
   10027          127 :         if (aligned)
   10028              :           {
   10029          120 :             regno_list[loaded_regnum++] = regno;
   10030          120 :             if (loaded_regnum == 2)
   10031              :               {
   10032           19 :                 gcc_assert (regno_list[0] != -1
   10033              :                             && regno_list[1] != -1
   10034              :                             && regno_list[0] != regno_list[1]);
   10035              : 
   10036           19 :                 ix86_emit_restore_reg_using_pop2 (gen_rtx_REG (word_mode,
   10037              :                                                                regno_list[0]),
   10038              :                                                   gen_rtx_REG (word_mode,
   10039              :                                                                regno_list[1]),
   10040           19 :                                                   TARGET_APX_PPX);
   10041           19 :                 loaded_regnum = 0;
   10042           19 :                 regno_list[0] = regno_list[1] = -1;
   10043              :               }
   10044              :           }
   10045              :         else
   10046              :           {
   10047           14 :             ix86_emit_restore_reg_using_pop (gen_rtx_REG (word_mode, regno),
   10048            7 :                                              TARGET_APX_PPX);
   10049            7 :             aligned = true;
   10050              :           }
   10051              :       }
   10052              : 
   10053          563 :   if (loaded_regnum == 1)
   10054           82 :     ix86_emit_restore_reg_using_pop (gen_rtx_REG (word_mode, regno_list[0]),
   10055           82 :                                      TARGET_APX_PPX);
   10056          563 : }
   10057              : 
   10058              : /* Emit code and notes for the LEAVE instruction.  If insn is non-null,
   10059              :    omits the emit and only attaches the notes.  */
   10060              : 
   10061              : static void
   10062       247325 : ix86_emit_leave (rtx_insn *insn)
   10063              : {
   10064       247325 :   struct machine_function *m = cfun->machine;
   10065              : 
   10066       247325 :   if (!insn)
   10067       246354 :     insn = emit_insn (gen_leave (word_mode));
   10068              : 
   10069       247325 :   ix86_add_queued_cfa_restore_notes (insn);
   10070              : 
   10071       247325 :   gcc_assert (m->fs.fp_valid);
   10072       247325 :   m->fs.sp_valid = true;
   10073       247325 :   m->fs.sp_realigned = false;
   10074       247325 :   m->fs.sp_offset = m->fs.fp_offset - UNITS_PER_WORD;
   10075       247325 :   m->fs.fp_valid = false;
   10076              : 
   10077       247325 :   if (m->fs.cfa_reg == hard_frame_pointer_rtx)
   10078              :     {
   10079       244184 :       m->fs.cfa_reg = stack_pointer_rtx;
   10080       244184 :       m->fs.cfa_offset = m->fs.sp_offset;
   10081              : 
   10082       244184 :       add_reg_note (insn, REG_CFA_DEF_CFA,
   10083       244184 :                     plus_constant (Pmode, stack_pointer_rtx,
   10084       244184 :                                    m->fs.sp_offset));
   10085       244184 :       RTX_FRAME_RELATED_P (insn) = 1;
   10086              :     }
   10087       247325 :   ix86_add_cfa_restore_note (insn, hard_frame_pointer_rtx,
   10088              :                              m->fs.fp_offset);
   10089       247325 : }
   10090              : 
   10091              : /* Emit code to restore saved registers using MOV insns.
   10092              :    First register is restored from CFA - CFA_OFFSET.  */
   10093              : static void
   10094        96126 : ix86_emit_restore_regs_using_mov (HOST_WIDE_INT cfa_offset,
   10095              :                                   bool maybe_eh_return)
   10096              : {
   10097        96126 :   struct machine_function *m = cfun->machine;
   10098        96126 :   unsigned int regno;
   10099              : 
   10100      8939718 :   for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
   10101      8843592 :     if (GENERAL_REGNO_P (regno) && ix86_save_reg (regno, maybe_eh_return, true))
   10102              :       {
   10103              : 
   10104              :         /* Skip registers, already processed by shrink wrap separate.  */
   10105       262650 :         if (!cfun->machine->reg_is_wrapped_separately[regno])
   10106              :           {
   10107       140022 :             rtx reg = gen_rtx_REG (word_mode, regno);
   10108       140022 :             rtx mem;
   10109       140022 :             rtx_insn *insn;
   10110              : 
   10111       140022 :             mem = choose_baseaddr (cfa_offset, NULL);
   10112       140022 :             mem = gen_frame_mem (word_mode, mem);
   10113       140022 :             insn = emit_move_insn (reg, mem);
   10114              : 
   10115       140022 :             if (m->fs.cfa_reg == crtl->drap_reg
   10116       140022 :                 && regno == REGNO (crtl->drap_reg))
   10117              :               {
   10118              :                 /* Previously we'd represented the CFA as an expression
   10119              :                    like *(%ebp - 8).  We've just popped that value from
   10120              :                    the stack, which means we need to reset the CFA to
   10121              :                    the drap register.  This will remain until we restore
   10122              :                    the stack pointer.  */
   10123         3141 :                 add_reg_note (insn, REG_CFA_DEF_CFA, reg);
   10124         3141 :                 RTX_FRAME_RELATED_P (insn) = 1;
   10125              : 
   10126              :                 /* DRAP register is valid for addressing.  */
   10127         3141 :                 m->fs.drap_valid = true;
   10128              :               }
   10129              :             else
   10130       136881 :               ix86_add_cfa_restore_note (NULL, reg, cfa_offset);
   10131              :           }
   10132       283097 :         cfa_offset -= UNITS_PER_WORD;
   10133              :       }
   10134        96126 : }
   10135              : 
   10136              : /* Emit code to restore saved registers using MOV insns.
   10137              :    First register is restored from CFA - CFA_OFFSET.  */
   10138              : static void
   10139        33939 : ix86_emit_restore_sse_regs_using_mov (HOST_WIDE_INT cfa_offset,
   10140              :                                       bool maybe_eh_return)
   10141              : {
   10142        33939 :   unsigned int regno;
   10143              : 
   10144      3156327 :   for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
   10145      3122388 :     if (SSE_REGNO_P (regno) && ix86_save_reg (regno, maybe_eh_return, true))
   10146              :       {
   10147       339417 :         rtx reg = gen_rtx_REG (V4SFmode, regno);
   10148       339417 :         rtx mem;
   10149       339417 :         unsigned int align = GET_MODE_ALIGNMENT (V4SFmode);
   10150              : 
   10151       339417 :         mem = choose_baseaddr (cfa_offset, &align);
   10152       339417 :         mem = gen_rtx_MEM (V4SFmode, mem);
   10153              : 
   10154              :         /* The location alignment depends upon the base register.  */
   10155       339417 :         align = MIN (GET_MODE_ALIGNMENT (V4SFmode), align);
   10156       339417 :         gcc_assert (! (cfa_offset & (align / BITS_PER_UNIT - 1)));
   10157       339417 :         set_mem_align (mem, align);
   10158       339417 :         emit_insn (gen_rtx_SET (reg, mem));
   10159              : 
   10160       339417 :         ix86_add_cfa_restore_note (NULL, reg, cfa_offset);
   10161              : 
   10162       339417 :         cfa_offset -= GET_MODE_SIZE (V4SFmode);
   10163              :       }
   10164        33939 : }
   10165              : 
   10166              : static void
   10167         7621 : ix86_emit_outlined_ms2sysv_restore (const struct ix86_frame &frame,
   10168              :                                   bool use_call, int style)
   10169              : {
   10170         7621 :   struct machine_function *m = cfun->machine;
   10171         7621 :   const unsigned ncregs = NUM_X86_64_MS_CLOBBERED_REGS
   10172         7621 :                           + m->call_ms2sysv_extra_regs;
   10173         7621 :   rtvec v;
   10174         7621 :   unsigned int elems_needed, align, i, vi = 0;
   10175         7621 :   rtx_insn *insn;
   10176         7621 :   rtx sym, tmp;
   10177         7621 :   rtx rsi = gen_rtx_REG (word_mode, SI_REG);
   10178         7621 :   rtx r10 = NULL_RTX;
   10179         7621 :   const class xlogue_layout &xlogue = xlogue_layout::get_instance ();
   10180         7621 :   HOST_WIDE_INT stub_ptr_offset = xlogue.get_stub_ptr_offset ();
   10181         7621 :   HOST_WIDE_INT rsi_offset = frame.stack_realign_offset + stub_ptr_offset;
   10182         7621 :   rtx rsi_frame_load = NULL_RTX;
   10183         7621 :   HOST_WIDE_INT rsi_restore_offset = (HOST_WIDE_INT)-1;
   10184         7621 :   enum xlogue_stub stub;
   10185              : 
   10186         7621 :   gcc_assert (!m->fs.fp_valid || frame_pointer_needed);
   10187              : 
   10188              :   /* If using a realigned stack, we should never start with padding.  */
   10189         7621 :   gcc_assert (!stack_realign_fp || !xlogue.get_stack_align_off_in ());
   10190              : 
   10191              :   /* Setup RSI as the stub's base pointer.  */
   10192         7621 :   align = GET_MODE_ALIGNMENT (V4SFmode);
   10193         7621 :   tmp = choose_baseaddr (rsi_offset, &align, SI_REG);
   10194         7621 :   gcc_assert (align >= GET_MODE_ALIGNMENT (V4SFmode));
   10195              : 
   10196         7621 :   emit_insn (gen_rtx_SET (rsi, tmp));
   10197              : 
   10198              :   /* Get a symbol for the stub.  */
   10199         7621 :   if (frame_pointer_needed)
   10200         5955 :     stub = use_call ? XLOGUE_STUB_RESTORE_HFP
   10201              :                     : XLOGUE_STUB_RESTORE_HFP_TAIL;
   10202              :   else
   10203         1666 :     stub = use_call ? XLOGUE_STUB_RESTORE
   10204              :                     : XLOGUE_STUB_RESTORE_TAIL;
   10205         7621 :   sym = xlogue.get_stub_rtx (stub);
   10206              : 
   10207         7621 :   elems_needed = ncregs;
   10208         7621 :   if (use_call)
   10209         6498 :     elems_needed += 1;
   10210              :   else
   10211         1275 :     elems_needed += frame_pointer_needed ? 5 : 3;
   10212         7621 :   v = rtvec_alloc (elems_needed);
   10213              : 
   10214              :   /* We call the epilogue stub when we need to pop incoming args or we are
   10215              :      doing a sibling call as the tail.  Otherwise, we will emit a jmp to the
   10216              :      epilogue stub and it is the tail-call.  */
   10217         7621 :   if (use_call)
   10218         6498 :       RTVEC_ELT (v, vi++) = gen_rtx_USE (VOIDmode, sym);
   10219              :   else
   10220              :     {
   10221         1123 :       RTVEC_ELT (v, vi++) = ret_rtx;
   10222         1123 :       RTVEC_ELT (v, vi++) = gen_rtx_USE (VOIDmode, sym);
   10223         1123 :       if (frame_pointer_needed)
   10224              :         {
   10225          971 :           rtx rbp = gen_rtx_REG (DImode, BP_REG);
   10226          971 :           gcc_assert (m->fs.fp_valid);
   10227          971 :           gcc_assert (m->fs.cfa_reg == hard_frame_pointer_rtx);
   10228              : 
   10229          971 :           tmp = plus_constant (DImode, rbp, 8);
   10230          971 :           RTVEC_ELT (v, vi++) = gen_rtx_SET (stack_pointer_rtx, tmp);
   10231          971 :           RTVEC_ELT (v, vi++) = gen_rtx_SET (rbp, gen_rtx_MEM (DImode, rbp));
   10232          971 :           tmp = gen_rtx_MEM (BLKmode, gen_rtx_SCRATCH (VOIDmode));
   10233          971 :           RTVEC_ELT (v, vi++) = gen_rtx_CLOBBER (VOIDmode, tmp);
   10234              :         }
   10235              :       else
   10236              :         {
   10237              :           /* If no hard frame pointer, we set R10 to the SP restore value.  */
   10238          152 :           gcc_assert (!m->fs.fp_valid);
   10239          152 :           gcc_assert (m->fs.cfa_reg == stack_pointer_rtx);
   10240          152 :           gcc_assert (m->fs.sp_valid);
   10241              : 
   10242          152 :           r10 = gen_rtx_REG (DImode, R10_REG);
   10243          152 :           tmp = plus_constant (Pmode, rsi, stub_ptr_offset);
   10244          152 :           emit_insn (gen_rtx_SET (r10, tmp));
   10245              : 
   10246          152 :           RTVEC_ELT (v, vi++) = gen_rtx_SET (stack_pointer_rtx, r10);
   10247              :         }
   10248              :     }
   10249              : 
   10250              :   /* Generate frame load insns and restore notes.  */
   10251       107954 :   for (i = 0; i < ncregs; ++i)
   10252              :     {
   10253       100333 :       const xlogue_layout::reginfo &r = xlogue.get_reginfo (i);
   10254       100333 :       machine_mode mode = SSE_REGNO_P (r.regno) ? V4SFmode : word_mode;
   10255       100333 :       rtx reg, frame_load;
   10256              : 
   10257       100333 :       reg = gen_rtx_REG (mode, r.regno);
   10258       100333 :       frame_load = gen_frame_load (reg, rsi, r.offset);
   10259              : 
   10260              :       /* Save RSI frame load insn & note to add last.  */
   10261       100333 :       if (r.regno == SI_REG)
   10262              :         {
   10263         7621 :           gcc_assert (!rsi_frame_load);
   10264         7621 :           rsi_frame_load = frame_load;
   10265         7621 :           rsi_restore_offset = r.offset;
   10266              :         }
   10267              :       else
   10268              :         {
   10269        92712 :           RTVEC_ELT (v, vi++) = frame_load;
   10270        92712 :           ix86_add_cfa_restore_note (NULL, reg, r.offset);
   10271              :         }
   10272              :     }
   10273              : 
   10274              :   /* Add RSI frame load & restore note at the end.  */
   10275         7621 :   gcc_assert (rsi_frame_load);
   10276         7621 :   gcc_assert (rsi_restore_offset != (HOST_WIDE_INT)-1);
   10277         7621 :   RTVEC_ELT (v, vi++) = rsi_frame_load;
   10278         7621 :   ix86_add_cfa_restore_note (NULL, gen_rtx_REG (DImode, SI_REG),
   10279              :                              rsi_restore_offset);
   10280              : 
   10281              :   /* Finally, for tail-call w/o a hard frame pointer, set SP to R10.  */
   10282         7621 :   if (!use_call && !frame_pointer_needed)
   10283              :     {
   10284          152 :       gcc_assert (m->fs.sp_valid);
   10285          152 :       gcc_assert (!m->fs.sp_realigned);
   10286              : 
   10287              :       /* At this point, R10 should point to frame.stack_realign_offset.  */
   10288          152 :       if (m->fs.cfa_reg == stack_pointer_rtx)
   10289          152 :         m->fs.cfa_offset += m->fs.sp_offset - frame.stack_realign_offset;
   10290          152 :       m->fs.sp_offset = frame.stack_realign_offset;
   10291              :     }
   10292              : 
   10293         7621 :   gcc_assert (vi == (unsigned int)GET_NUM_ELEM (v));
   10294         7621 :   tmp = gen_rtx_PARALLEL (VOIDmode, v);
   10295         7621 :   if (use_call)
   10296         6498 :       insn = emit_insn (tmp);
   10297              :   else
   10298              :     {
   10299         1123 :       insn = emit_jump_insn (tmp);
   10300         1123 :       JUMP_LABEL (insn) = ret_rtx;
   10301              : 
   10302         1123 :       if (frame_pointer_needed)
   10303          971 :         ix86_emit_leave (insn);
   10304              :       else
   10305              :         {
   10306              :           /* Need CFA adjust note.  */
   10307          152 :           tmp = gen_rtx_SET (stack_pointer_rtx, r10);
   10308          152 :           add_reg_note (insn, REG_CFA_ADJUST_CFA, tmp);
   10309              :         }
   10310              :     }
   10311              : 
   10312         7621 :   RTX_FRAME_RELATED_P (insn) = true;
   10313         7621 :   ix86_add_queued_cfa_restore_notes (insn);
   10314              : 
   10315              :   /* If we're not doing a tail-call, we need to adjust the stack.  */
   10316         7621 :   if (use_call && m->fs.sp_valid)
   10317              :     {
   10318         3706 :       HOST_WIDE_INT dealloc = m->fs.sp_offset - frame.stack_realign_offset;
   10319         3706 :       pro_epilogue_adjust_stack (stack_pointer_rtx, stack_pointer_rtx,
   10320              :                                 GEN_INT (dealloc), style,
   10321         3706 :                                 m->fs.cfa_reg == stack_pointer_rtx);
   10322              :     }
   10323         7621 : }
   10324              : 
   10325              : /* Restore function stack, frame, and registers.  */
   10326              : 
   10327              : void
   10328      1654442 : ix86_expand_epilogue (int style)
   10329              : {
   10330      1654442 :   struct machine_function *m = cfun->machine;
   10331      1654442 :   struct machine_frame_state frame_state_save = m->fs;
   10332      1654442 :   bool restore_regs_via_mov;
   10333      1654442 :   bool using_drap;
   10334      1654442 :   bool restore_stub_is_tail = false;
   10335              : 
   10336      1654442 :   if (ix86_function_naked (current_function_decl))
   10337              :     {
   10338              :       /* The program should not reach this point.  */
   10339           74 :       emit_insn (gen_ud2 ());
   10340       122105 :       return;
   10341              :     }
   10342              : 
   10343      1654368 :   ix86_finalize_stack_frame_flags ();
   10344      1654368 :   const struct ix86_frame &frame = cfun->machine->frame;
   10345              : 
   10346      1654368 :   m->fs.sp_realigned = stack_realign_fp;
   10347        31971 :   m->fs.sp_valid = stack_realign_fp
   10348      1629572 :                    || !frame_pointer_needed
   10349      2122780 :                    || crtl->sp_is_unchanging;
   10350      1654368 :   gcc_assert (!m->fs.sp_valid
   10351              :               || m->fs.sp_offset == frame.stack_pointer_offset);
   10352              : 
   10353              :   /* The FP must be valid if the frame pointer is present.  */
   10354      1654368 :   gcc_assert (frame_pointer_needed == m->fs.fp_valid);
   10355      1654368 :   gcc_assert (!m->fs.fp_valid
   10356              :               || m->fs.fp_offset == frame.hard_frame_pointer_offset);
   10357              : 
   10358              :   /* We must have *some* valid pointer to the stack frame.  */
   10359      1654368 :   gcc_assert (m->fs.sp_valid || m->fs.fp_valid);
   10360              : 
   10361              :   /* The DRAP is never valid at this point.  */
   10362      1654368 :   gcc_assert (!m->fs.drap_valid);
   10363              : 
   10364              :   /* See the comment about red zone and frame
   10365              :      pointer usage in ix86_expand_prologue.  */
   10366      1654368 :   if (frame_pointer_needed && frame.red_zone_size)
   10367       132672 :     emit_insn (gen_memory_blockage ());
   10368              : 
   10369      1654368 :   using_drap = crtl->drap_reg && crtl->stack_realign_needed;
   10370         7175 :   gcc_assert (!using_drap || m->fs.cfa_reg == crtl->drap_reg);
   10371              : 
   10372              :   /* Determine the CFA offset of the end of the red-zone.  */
   10373      1654368 :   m->fs.red_zone_offset = 0;
   10374      1654368 :   if (ix86_using_red_zone () && crtl->args.pops_args < 65536)
   10375              :     {
   10376              :       /* The red-zone begins below return address and error code in
   10377              :          exception handler.  */
   10378      1476908 :       m->fs.red_zone_offset = RED_ZONE_SIZE + INCOMING_FRAME_SP_OFFSET;
   10379              : 
   10380              :       /* When the register save area is in the aligned portion of
   10381              :          the stack, determine the maximum runtime displacement that
   10382              :          matches up with the aligned frame.  */
   10383      1476908 :       if (stack_realign_drap)
   10384         8620 :         m->fs.red_zone_offset -= (crtl->stack_alignment_needed / BITS_PER_UNIT
   10385         4310 :                                   + UNITS_PER_WORD);
   10386              :     }
   10387              : 
   10388      1654368 :   HOST_WIDE_INT reg_save_offset = frame.reg_save_offset;
   10389              : 
   10390              :   /* Special care must be taken for the normal return case of a function
   10391              :      using eh_return: the eax and edx registers are marked as saved, but
   10392              :      not restored along this path.  Adjust the save location to match.  */
   10393      1654368 :   if (crtl->calls_eh_return && style != 2)
   10394           37 :     reg_save_offset -= 2 * UNITS_PER_WORD;
   10395              : 
   10396              :   /* EH_RETURN requires the use of moves to function properly.  */
   10397      1654368 :   if (crtl->calls_eh_return)
   10398              :     restore_regs_via_mov = true;
   10399              :   /* SEH requires the use of pops to identify the epilogue.  */
   10400      1654310 :   else if (TARGET_SEH)
   10401              :     restore_regs_via_mov = false;
   10402              :   /* If we already save reg with pushp, don't use move at epilogue.  */
   10403      1654310 :   else if (m->fs.apx_ppx_used)
   10404              :     restore_regs_via_mov = false;
   10405              :   /* If we're only restoring one register and sp cannot be used then
   10406              :      using a move instruction to restore the register since it's
   10407              :      less work than reloading sp and popping the register.  */
   10408      1654223 :   else if (!sp_valid_at (frame.hfp_save_offset) && frame.nregs <= 1)
   10409              :     restore_regs_via_mov = true;
   10410      1593138 :   else if (crtl->shrink_wrapped_separate
   10411      1541636 :            || (TARGET_EPILOGUE_USING_MOVE
   10412        56735 :                && cfun->machine->use_fast_prologue_epilogue
   10413        56679 :                && (frame.nregs > 1
   10414        56666 :                    || m->fs.sp_offset != reg_save_offset)))
   10415              :     restore_regs_via_mov = true;
   10416      1541401 :   else if (frame_pointer_needed
   10417       429514 :            && !frame.nregs
   10418       333251 :            && m->fs.sp_offset != reg_save_offset)
   10419              :     restore_regs_via_mov = true;
   10420      1388062 :   else if (frame_pointer_needed
   10421       276175 :            && TARGET_USE_LEAVE
   10422       276100 :            && cfun->machine->use_fast_prologue_epilogue
   10423       218824 :            && frame.nregs == 1)
   10424              :     restore_regs_via_mov = true;
   10425              :   else
   10426      1654368 :     restore_regs_via_mov = false;
   10427              : 
   10428      1654368 :   if (crtl->shrink_wrapped_separate)
   10429        51533 :     gcc_assert (restore_regs_via_mov);
   10430              : 
   10431      1602835 :   if (restore_regs_via_mov || frame.nsseregs)
   10432              :     {
   10433              :       /* Ensure that the entire register save area is addressable via
   10434              :          the stack pointer, if we will restore SSE regs via sp.  */
   10435       330371 :       if (TARGET_64BIT
   10436       317771 :           && m->fs.sp_offset > 0x7fffffff
   10437           23 :           && sp_valid_at (frame.stack_realign_offset + 1)
   10438       330393 :           && (frame.nsseregs + frame.nregs) != 0)
   10439              :         {
   10440            6 :           pro_epilogue_adjust_stack (stack_pointer_rtx, stack_pointer_rtx,
   10441            6 :                                      GEN_INT (m->fs.sp_offset
   10442              :                                               - frame.sse_reg_save_offset),
   10443              :                                      style,
   10444            6 :                                      m->fs.cfa_reg == stack_pointer_rtx);
   10445              :         }
   10446              :     }
   10447              : 
   10448              :   /* If there are any SSE registers to restore, then we have to do it
   10449              :      via moves, since there's obviously no pop for SSE regs.  */
   10450      1654368 :   if (frame.nsseregs)
   10451        33939 :     ix86_emit_restore_sse_regs_using_mov (frame.sse_reg_save_offset,
   10452              :                                           style == 2);
   10453              : 
   10454      1654368 :   if (m->call_ms2sysv)
   10455              :     {
   10456         7621 :       int pop_incoming_args = crtl->args.pops_args && crtl->args.size;
   10457              : 
   10458              :       /* We cannot use a tail-call for the stub if:
   10459              :          1. We have to pop incoming args,
   10460              :          2. We have additional int regs to restore, or
   10461              :          3. A sibling call will be the tail-call, or
   10462              :          4. We are emitting an eh_return_internal epilogue.
   10463              : 
   10464              :          TODO: Item 4 has not yet tested!
   10465              : 
   10466              :          If any of the above are true, we will call the stub rather than
   10467              :          jump to it.  */
   10468         7621 :       restore_stub_is_tail = !(pop_incoming_args || frame.nregs || style != 1);
   10469         7621 :       ix86_emit_outlined_ms2sysv_restore (frame, !restore_stub_is_tail, style);
   10470              :     }
   10471              : 
   10472              :   /* If using out-of-line stub that is a tail-call, then...*/
   10473      1654368 :   if (m->call_ms2sysv && restore_stub_is_tail)
   10474              :     {
   10475              :       /* TODO: parinoid tests. (remove eventually)  */
   10476         1123 :       gcc_assert (m->fs.sp_valid);
   10477         1123 :       gcc_assert (!m->fs.sp_realigned);
   10478         1123 :       gcc_assert (!m->fs.fp_valid);
   10479         1123 :       gcc_assert (!m->fs.realigned);
   10480         1123 :       gcc_assert (m->fs.sp_offset == UNITS_PER_WORD);
   10481         1123 :       gcc_assert (!crtl->drap_reg);
   10482         1123 :       gcc_assert (!frame.nregs);
   10483         1123 :       gcc_assert (!crtl->shrink_wrapped_separate);
   10484              :     }
   10485      1653245 :   else if (restore_regs_via_mov)
   10486              :     {
   10487       295663 :       rtx t;
   10488              : 
   10489       295663 :       if (frame.nregs)
   10490        96126 :         ix86_emit_restore_regs_using_mov (reg_save_offset, style == 2);
   10491              : 
   10492              :       /* eh_return epilogues need %ecx added to the stack pointer.  */
   10493       295663 :       if (style == 2)
   10494              :         {
   10495           37 :           rtx sa = EH_RETURN_STACKADJ_RTX;
   10496           29 :           rtx_insn *insn;
   10497              : 
   10498           29 :           gcc_assert (!crtl->shrink_wrapped_separate);
   10499              : 
   10500              :           /* Stack realignment doesn't work with eh_return.  */
   10501           29 :           if (crtl->stack_realign_needed)
   10502            0 :             sorry ("Stack realignment not supported with "
   10503              :                    "%<__builtin_eh_return%>");
   10504              : 
   10505              :           /* regparm nested functions don't work with eh_return.  */
   10506           29 :           if (ix86_static_chain_on_stack)
   10507            0 :             sorry ("regparm nested function not supported with "
   10508              :                    "%<__builtin_eh_return%>");
   10509              : 
   10510           29 :           if (frame_pointer_needed)
   10511              :             {
   10512           35 :               t = gen_rtx_PLUS (Pmode, hard_frame_pointer_rtx, sa);
   10513           43 :               t = plus_constant (Pmode, t, m->fs.fp_offset - UNITS_PER_WORD);
   10514           27 :               emit_insn (gen_rtx_SET (sa, t));
   10515              : 
   10516              :               /* NB: eh_return epilogues must restore the frame pointer
   10517              :                  in word_mode since the upper 32 bits of RBP register
   10518              :                  can have any values.  */
   10519           27 :               t = gen_frame_mem (word_mode, hard_frame_pointer_rtx);
   10520           27 :               rtx frame_reg = gen_rtx_REG (word_mode,
   10521              :                                            HARD_FRAME_POINTER_REGNUM);
   10522           27 :               insn = emit_move_insn (frame_reg, t);
   10523              : 
   10524              :               /* Note that we use SA as a temporary CFA, as the return
   10525              :                  address is at the proper place relative to it.  We
   10526              :                  pretend this happens at the FP restore insn because
   10527              :                  prior to this insn the FP would be stored at the wrong
   10528              :                  offset relative to SA, and after this insn we have no
   10529              :                  other reasonable register to use for the CFA.  We don't
   10530              :                  bother resetting the CFA to the SP for the duration of
   10531              :                  the return insn, unless the control flow instrumentation
   10532              :                  is done.  In this case the SP is used later and we have
   10533              :                  to reset CFA to SP.  */
   10534           27 :               add_reg_note (insn, REG_CFA_DEF_CFA,
   10535           35 :                             plus_constant (Pmode, sa, UNITS_PER_WORD));
   10536           27 :               ix86_add_queued_cfa_restore_notes (insn);
   10537           27 :               add_reg_note (insn, REG_CFA_RESTORE, frame_reg);
   10538           27 :               RTX_FRAME_RELATED_P (insn) = 1;
   10539              : 
   10540           27 :               m->fs.cfa_reg = sa;
   10541           27 :               m->fs.cfa_offset = UNITS_PER_WORD;
   10542           27 :               m->fs.fp_valid = false;
   10543              : 
   10544           27 :               pro_epilogue_adjust_stack (stack_pointer_rtx, sa,
   10545              :                                          const0_rtx, style,
   10546           27 :                                          flag_cf_protection);
   10547              :             }
   10548              :           else
   10549              :             {
   10550            2 :               t = gen_rtx_PLUS (Pmode, stack_pointer_rtx, sa);
   10551            2 :               t = plus_constant (Pmode, t, m->fs.sp_offset - UNITS_PER_WORD);
   10552            2 :               insn = emit_insn (gen_rtx_SET (stack_pointer_rtx, t));
   10553            2 :               ix86_add_queued_cfa_restore_notes (insn);
   10554              : 
   10555            2 :               gcc_assert (m->fs.cfa_reg == stack_pointer_rtx);
   10556            2 :               if (m->fs.cfa_offset != UNITS_PER_WORD)
   10557              :                 {
   10558            2 :                   m->fs.cfa_offset = UNITS_PER_WORD;
   10559            2 :                   add_reg_note (insn, REG_CFA_DEF_CFA,
   10560            2 :                                 plus_constant (Pmode, stack_pointer_rtx,
   10561            2 :                                                UNITS_PER_WORD));
   10562            2 :                   RTX_FRAME_RELATED_P (insn) = 1;
   10563              :                 }
   10564              :             }
   10565           29 :           m->fs.sp_offset = UNITS_PER_WORD;
   10566           29 :           m->fs.sp_valid = true;
   10567           29 :           m->fs.sp_realigned = false;
   10568              :         }
   10569              :     }
   10570              :   else
   10571              :     {
   10572              :       /* SEH requires that the function end with (1) a stack adjustment
   10573              :          if necessary, (2) a sequence of pops, and (3) a return or
   10574              :          jump instruction.  Prevent insns from the function body from
   10575              :          being scheduled into this sequence.  */
   10576      1357582 :       if (TARGET_SEH)
   10577              :         {
   10578              :           /* Prevent a catch region from being adjacent to the standard
   10579              :              epilogue sequence.  Unfortunately neither crtl->uses_eh_lsda
   10580              :              nor several other flags that would be interesting to test are
   10581              :              set up yet.  */
   10582              :           if (flag_non_call_exceptions)
   10583              :             emit_insn (gen_nops (const1_rtx));
   10584              :           else
   10585              :             emit_insn (gen_blockage ());
   10586              :         }
   10587              : 
   10588              :       /* First step is to deallocate the stack frame so that we can
   10589              :          pop the registers.  If the stack pointer was realigned, it needs
   10590              :          to be restored now.  Also do it on SEH target for very large
   10591              :          frame as the emitted instructions aren't allowed by the ABI
   10592              :          in epilogues.  */
   10593      1357582 :       if (!m->fs.sp_valid || m->fs.sp_realigned
   10594              :           || (TARGET_SEH
   10595              :               && (m->fs.sp_offset - reg_save_offset
   10596              :                   >= SEH_MAX_FRAME_SIZE)))
   10597              :         {
   10598        29805 :           pro_epilogue_adjust_stack (stack_pointer_rtx, hard_frame_pointer_rtx,
   10599        29805 :                                      GEN_INT (m->fs.fp_offset
   10600              :                                               - reg_save_offset),
   10601              :                                      style, false);
   10602              :         }
   10603      1327777 :       else if (m->fs.sp_offset != reg_save_offset)
   10604              :         {
   10605       612145 :           pro_epilogue_adjust_stack (stack_pointer_rtx, stack_pointer_rtx,
   10606              :                                      GEN_INT (m->fs.sp_offset
   10607              :                                               - reg_save_offset),
   10608              :                                      style,
   10609       612145 :                                      m->fs.cfa_reg == stack_pointer_rtx);
   10610              :         }
   10611              : 
   10612      1357582 :       if (TARGET_APX_PUSH2POP2
   10613          566 :           && ix86_can_use_push2pop2 ()
   10614      1358146 :           && m->func_type == TYPE_NORMAL)
   10615          563 :         ix86_emit_restore_regs_using_pop2 ();
   10616              :       else
   10617      1357019 :         ix86_emit_restore_regs_using_pop (TARGET_APX_PPX);
   10618              :     }
   10619              : 
   10620              :   /* If we used a stack pointer and haven't already got rid of it,
   10621              :      then do so now.  */
   10622      1654368 :   if (m->fs.fp_valid)
   10623              :     {
   10624              :       /* If the stack pointer is valid and pointing at the frame
   10625              :          pointer store address, then we only need a pop.  */
   10626       492210 :       if (sp_valid_at (frame.hfp_save_offset)
   10627       492210 :           && m->fs.sp_offset == frame.hfp_save_offset)
   10628       245844 :         ix86_emit_restore_reg_using_pop (hard_frame_pointer_rtx);
   10629              :       /* Leave results in shorter dependency chains on CPUs that are
   10630              :          able to grok it fast.  */
   10631       246366 :       else if (TARGET_USE_LEAVE
   10632           12 :                || optimize_bb_for_size_p (EXIT_BLOCK_PTR_FOR_FN (cfun))
   10633       246378 :                || !cfun->machine->use_fast_prologue_epilogue)
   10634       246354 :         ix86_emit_leave (NULL);
   10635              :       else
   10636              :         {
   10637           12 :           pro_epilogue_adjust_stack (stack_pointer_rtx,
   10638              :                                      hard_frame_pointer_rtx,
   10639           12 :                                      const0_rtx, style, !using_drap);
   10640           12 :           ix86_emit_restore_reg_using_pop (hard_frame_pointer_rtx);
   10641              :         }
   10642              :     }
   10643              : 
   10644      1654368 :   if (using_drap)
   10645              :     {
   10646         7175 :       int param_ptr_offset = UNITS_PER_WORD;
   10647         7175 :       rtx_insn *insn;
   10648              : 
   10649         7175 :       gcc_assert (stack_realign_drap);
   10650              : 
   10651         7175 :       if (ix86_static_chain_on_stack)
   10652            0 :         param_ptr_offset += UNITS_PER_WORD;
   10653         7175 :       if (!call_used_or_fixed_reg_p (REGNO (crtl->drap_reg)))
   10654          232 :         param_ptr_offset += UNITS_PER_WORD;
   10655              : 
   10656         7480 :       insn = emit_insn (gen_rtx_SET
   10657              :                         (stack_pointer_rtx,
   10658              :                          plus_constant (Pmode, crtl->drap_reg,
   10659              :                                         -param_ptr_offset)));
   10660         7175 :       m->fs.cfa_reg = stack_pointer_rtx;
   10661         7175 :       m->fs.cfa_offset = param_ptr_offset;
   10662         7175 :       m->fs.sp_offset = param_ptr_offset;
   10663         7175 :       m->fs.realigned = false;
   10664              : 
   10665         7480 :       add_reg_note (insn, REG_CFA_DEF_CFA,
   10666         7175 :                     plus_constant (Pmode, stack_pointer_rtx,
   10667         7175 :                                    param_ptr_offset));
   10668         7175 :       RTX_FRAME_RELATED_P (insn) = 1;
   10669              : 
   10670         7175 :       if (!call_used_or_fixed_reg_p (REGNO (crtl->drap_reg)))
   10671          232 :         ix86_emit_restore_reg_using_pop (crtl->drap_reg);
   10672              :     }
   10673              : 
   10674              :   /* At this point the stack pointer must be valid, and we must have
   10675              :      restored all of the registers.  We may not have deallocated the
   10676              :      entire stack frame.  We've delayed this until now because it may
   10677              :      be possible to merge the local stack deallocation with the
   10678              :      deallocation forced by ix86_static_chain_on_stack.   */
   10679      1654368 :   gcc_assert (m->fs.sp_valid);
   10680      1654368 :   gcc_assert (!m->fs.sp_realigned);
   10681      1654368 :   gcc_assert (!m->fs.fp_valid);
   10682      1654368 :   gcc_assert (!m->fs.realigned);
   10683      1790076 :   if (m->fs.sp_offset != UNITS_PER_WORD)
   10684              :     {
   10685        49254 :       pro_epilogue_adjust_stack (stack_pointer_rtx, stack_pointer_rtx,
   10686              :                                  GEN_INT (m->fs.sp_offset - UNITS_PER_WORD),
   10687              :                                  style, true);
   10688              :     }
   10689              :   else
   10690      1605114 :     ix86_add_queued_cfa_restore_notes (get_last_insn ());
   10691              : 
   10692              :   /* Sibcall epilogues don't want a return instruction.  */
   10693      1654368 :   if (style == 0)
   10694              :     {
   10695       121957 :       m->fs = frame_state_save;
   10696       121957 :       return;
   10697              :     }
   10698              : 
   10699      1532411 :   if (cfun->machine->func_type != TYPE_NORMAL)
   10700          120 :     emit_jump_insn (gen_interrupt_return ());
   10701      1532291 :   else if (crtl->args.pops_args && crtl->args.size)
   10702              :     {
   10703        25992 :       rtx popc = GEN_INT (crtl->args.pops_args);
   10704              : 
   10705              :       /* i386 can only pop 64K bytes.  If asked to pop more, pop return
   10706              :          address, do explicit add, and jump indirectly to the caller.  */
   10707              : 
   10708        25992 :       if (crtl->args.pops_args >= 65536)
   10709              :         {
   10710            0 :           rtx ecx = gen_rtx_REG (SImode, CX_REG);
   10711            0 :           rtx_insn *insn;
   10712              : 
   10713              :           /* There is no "pascal" calling convention in any 64bit ABI.  */
   10714            0 :           gcc_assert (!TARGET_64BIT);
   10715              : 
   10716            0 :           insn = emit_insn (gen_pop (ecx));
   10717            0 :           m->fs.cfa_offset -= UNITS_PER_WORD;
   10718            0 :           m->fs.sp_offset -= UNITS_PER_WORD;
   10719              : 
   10720            0 :           rtx x = plus_constant (Pmode, stack_pointer_rtx, UNITS_PER_WORD);
   10721            0 :           x = gen_rtx_SET (stack_pointer_rtx, x);
   10722            0 :           add_reg_note (insn, REG_CFA_ADJUST_CFA, x);
   10723            0 :           add_reg_note (insn, REG_CFA_REGISTER, gen_rtx_SET (ecx, pc_rtx));
   10724            0 :           RTX_FRAME_RELATED_P (insn) = 1;
   10725              : 
   10726            0 :           pro_epilogue_adjust_stack (stack_pointer_rtx, stack_pointer_rtx,
   10727              :                                      popc, -1, true);
   10728            0 :           emit_jump_insn (gen_simple_return_indirect_internal (ecx));
   10729              :         }
   10730              :       else
   10731        25992 :         emit_jump_insn (gen_simple_return_pop_internal (popc));
   10732              :     }
   10733      1506299 :   else if (!m->call_ms2sysv || !restore_stub_is_tail)
   10734              :     {
   10735              :       /* In case of return from EH a simple return cannot be used
   10736              :          as a return address will be compared with a shadow stack
   10737              :          return address.  Use indirect jump instead.  */
   10738      1505176 :       if (style == 2 && flag_cf_protection)
   10739              :         {
   10740              :           /* Register used in indirect jump must be in word_mode.  But
   10741              :              Pmode may not be the same as word_mode for x32.  */
   10742           17 :           rtx ecx = gen_rtx_REG (word_mode, CX_REG);
   10743           17 :           rtx_insn *insn;
   10744              : 
   10745           17 :           insn = emit_insn (gen_pop (ecx));
   10746           17 :           m->fs.cfa_offset -= UNITS_PER_WORD;
   10747           17 :           m->fs.sp_offset -= UNITS_PER_WORD;
   10748              : 
   10749           33 :           rtx x = plus_constant (Pmode, stack_pointer_rtx, UNITS_PER_WORD);
   10750           17 :           x = gen_rtx_SET (stack_pointer_rtx, x);
   10751           17 :           add_reg_note (insn, REG_CFA_ADJUST_CFA, x);
   10752           17 :           add_reg_note (insn, REG_CFA_REGISTER, gen_rtx_SET (ecx, pc_rtx));
   10753           17 :           RTX_FRAME_RELATED_P (insn) = 1;
   10754              : 
   10755           17 :           emit_jump_insn (gen_simple_return_indirect_internal (ecx));
   10756           17 :         }
   10757              :       else
   10758      1505159 :         emit_jump_insn (gen_simple_return_internal ());
   10759              :     }
   10760              : 
   10761              :   /* Restore the state back to the state from the prologue,
   10762              :      so that it's correct for the next epilogue.  */
   10763      1532411 :   m->fs = frame_state_save;
   10764              : }
   10765              : 
   10766              : /* Reset from the function's potential modifications.  */
   10767              : 
   10768              : static void
   10769      1493745 : ix86_output_function_epilogue (FILE *file ATTRIBUTE_UNUSED)
   10770              : {
   10771      1493745 :   if (pic_offset_table_rtx
   10772      1493745 :       && !ix86_use_pseudo_pic_reg ())
   10773            0 :     SET_REGNO (pic_offset_table_rtx, REAL_PIC_OFFSET_TABLE_REGNUM);
   10774              : 
   10775      1493745 :   if (TARGET_MACHO)
   10776              :     {
   10777              :       rtx_insn *insn = get_last_insn ();
   10778              :       rtx_insn *deleted_debug_label = NULL;
   10779              : 
   10780              :       /* Mach-O doesn't support labels at the end of objects, so if
   10781              :          it looks like we might want one, take special action.
   10782              :         First, collect any sequence of deleted debug labels.  */
   10783              :       while (insn
   10784              :              && NOTE_P (insn)
   10785              :              && NOTE_KIND (insn) != NOTE_INSN_DELETED_LABEL)
   10786              :         {
   10787              :           /* Don't insert a nop for NOTE_INSN_DELETED_DEBUG_LABEL
   10788              :              notes only, instead set their CODE_LABEL_NUMBER to -1,
   10789              :              otherwise there would be code generation differences
   10790              :              in between -g and -g0.  */
   10791              :           if (NOTE_P (insn) && NOTE_KIND (insn)
   10792              :               == NOTE_INSN_DELETED_DEBUG_LABEL)
   10793              :             deleted_debug_label = insn;
   10794              :           insn = PREV_INSN (insn);
   10795              :         }
   10796              : 
   10797              :       /* If we have:
   10798              :          label:
   10799              :             barrier
   10800              :           then this needs to be detected, so skip past the barrier.  */
   10801              : 
   10802              :       if (insn && BARRIER_P (insn))
   10803              :         insn = PREV_INSN (insn);
   10804              : 
   10805              :       /* Up to now we've only seen notes or barriers.  */
   10806              :       if (insn)
   10807              :         {
   10808              :           if (LABEL_P (insn)
   10809              :               || (NOTE_P (insn)
   10810              :                   && NOTE_KIND (insn) == NOTE_INSN_DELETED_LABEL))
   10811              :             /* Trailing label.  */
   10812              :             fputs ("\tnop\n", file);
   10813              :           else if (cfun && ! cfun->is_thunk)
   10814              :             {
   10815              :               /* See if we have a completely empty function body, skipping
   10816              :                  the special case of the picbase thunk emitted as asm.  */
   10817              :               while (insn && ! INSN_P (insn))
   10818              :                 insn = PREV_INSN (insn);
   10819              :               /* If we don't find any insns, we've got an empty function body;
   10820              :                  I.e. completely empty - without a return or branch.  This is
   10821              :                  taken as the case where a function body has been removed
   10822              :                  because it contains an inline __builtin_unreachable().  GCC
   10823              :                  declares that reaching __builtin_unreachable() means UB so
   10824              :                  we're not obliged to do anything special; however, we want
   10825              :                  non-zero-sized function bodies.  To meet this, and help the
   10826              :                  user out, let's trap the case.  */
   10827              :               if (insn == NULL)
   10828              :                 fputs ("\tud2\n", file);
   10829              :             }
   10830              :         }
   10831              :       else if (deleted_debug_label)
   10832              :         for (insn = deleted_debug_label; insn; insn = NEXT_INSN (insn))
   10833              :           if (NOTE_KIND (insn) == NOTE_INSN_DELETED_DEBUG_LABEL)
   10834              :             CODE_LABEL_NUMBER (insn) = -1;
   10835              :     }
   10836      1493745 : }
   10837              : 
   10838              : /* Implement TARGET_ASM_PRINT_PATCHABLE_FUNCTION_ENTRY.  */
   10839              : 
   10840              : void
   10841           59 : ix86_print_patchable_function_entry (FILE *file,
   10842              :                                      unsigned HOST_WIDE_INT patch_area_size,
   10843              :                                      bool record_p)
   10844              : {
   10845           59 :   if (cfun->machine->function_label_emitted)
   10846              :     {
   10847              :       /* NB: When ix86_print_patchable_function_entry is called after
   10848              :          function table has been emitted, we have inserted or queued
   10849              :          a pseudo UNSPECV_PATCHABLE_AREA instruction at the proper
   10850              :          place.  There is nothing to do here.  */
   10851              :       return;
   10852              :     }
   10853              : 
   10854            8 :   default_print_patchable_function_entry (file, patch_area_size,
   10855              :                                           record_p);
   10856              : }
   10857              : 
   10858              : /* Output patchable area.  NB: default_print_patchable_function_entry
   10859              :    isn't available in i386.md.  */
   10860              : 
   10861              : void
   10862           51 : ix86_output_patchable_area (unsigned int patch_area_size,
   10863              :                             bool record_p)
   10864              : {
   10865           51 :   default_print_patchable_function_entry (asm_out_file,
   10866              :                                           patch_area_size,
   10867              :                                           record_p);
   10868           51 : }
   10869              : 
   10870              : /* Return a scratch register to use in the split stack prologue.  The
   10871              :    split stack prologue is used for -fsplit-stack.  It is the first
   10872              :    instructions in the function, even before the regular prologue.
   10873              :    The scratch register can be any caller-saved register which is not
   10874              :    used for parameters or for the static chain.  */
   10875              : 
   10876              : static unsigned int
   10877        24613 : split_stack_prologue_scratch_regno (void)
   10878              : {
   10879        24613 :   if (TARGET_64BIT)
   10880              :     return R11_REG;
   10881              :   else
   10882              :     {
   10883         6946 :       bool is_fastcall, is_thiscall;
   10884         6946 :       int regparm;
   10885              : 
   10886         6946 :       is_fastcall = (lookup_attribute ("fastcall",
   10887         6946 :                                        TYPE_ATTRIBUTES (TREE_TYPE (cfun->decl)))
   10888              :                      != NULL);
   10889         6946 :       is_thiscall = (lookup_attribute ("thiscall",
   10890         6946 :                                        TYPE_ATTRIBUTES (TREE_TYPE (cfun->decl)))
   10891              :                      != NULL);
   10892         6946 :       regparm = ix86_function_regparm (TREE_TYPE (cfun->decl), cfun->decl);
   10893              : 
   10894         6946 :       if (is_fastcall)
   10895              :         {
   10896            0 :           if (DECL_STATIC_CHAIN (cfun->decl))
   10897              :             {
   10898            0 :               sorry ("%<-fsplit-stack%> does not support fastcall with "
   10899              :                      "nested function");
   10900            0 :               return INVALID_REGNUM;
   10901              :             }
   10902              :           return AX_REG;
   10903              :         }
   10904         6946 :       else if (is_thiscall)
   10905              :         {
   10906            0 :           if (!DECL_STATIC_CHAIN (cfun->decl))
   10907              :             return DX_REG;
   10908            0 :           return AX_REG;
   10909              :         }
   10910         6946 :       else if (regparm < 3)
   10911              :         {
   10912         6946 :           if (!DECL_STATIC_CHAIN (cfun->decl))
   10913              :             return CX_REG;
   10914              :           else
   10915              :             {
   10916          459 :               if (regparm >= 2)
   10917              :                 {
   10918            0 :                   sorry ("%<-fsplit-stack%> does not support 2 register "
   10919              :                          "parameters for a nested function");
   10920            0 :                   return INVALID_REGNUM;
   10921              :                 }
   10922              :               return DX_REG;
   10923              :             }
   10924              :         }
   10925              :       else
   10926              :         {
   10927              :           /* FIXME: We could make this work by pushing a register
   10928              :              around the addition and comparison.  */
   10929            0 :           sorry ("%<-fsplit-stack%> does not support 3 register parameters");
   10930            0 :           return INVALID_REGNUM;
   10931              :         }
   10932              :     }
   10933              : }
   10934              : 
   10935              : /* A SYMBOL_REF for the function which allocates new stackspace for
   10936              :    -fsplit-stack.  */
   10937              : 
   10938              : static GTY(()) rtx split_stack_fn;
   10939              : 
   10940              : /* A SYMBOL_REF for the more stack function when using the large model.  */
   10941              : 
   10942              : static GTY(()) rtx split_stack_fn_large;
   10943              : 
   10944              : /* Return location of the stack guard value in the TLS block.  */
   10945              : 
   10946              : rtx
   10947       260053 : ix86_split_stack_guard (void)
   10948              : {
   10949       260053 :   int offset;
   10950       260053 :   addr_space_t as = DEFAULT_TLS_SEG_REG;
   10951       260053 :   rtx r;
   10952              : 
   10953       260053 :   gcc_assert (flag_split_stack);
   10954              : 
   10955              : #ifdef TARGET_THREAD_SPLIT_STACK_OFFSET
   10956       260053 :   offset = TARGET_THREAD_SPLIT_STACK_OFFSET;
   10957              : #else
   10958              :   gcc_unreachable ();
   10959              : #endif
   10960              : 
   10961       260053 :   r = GEN_INT (offset);
   10962       358058 :   r = gen_const_mem (Pmode, r);
   10963       260053 :   set_mem_addr_space (r, as);
   10964              : 
   10965       260053 :   return r;
   10966              : }
   10967              : 
   10968              : /* Handle -fsplit-stack.  These are the first instructions in the
   10969              :    function, even before the regular prologue.  */
   10970              : 
   10971              : void
   10972       260043 : ix86_expand_split_stack_prologue (void)
   10973              : {
   10974       260043 :   HOST_WIDE_INT allocate;
   10975       260043 :   unsigned HOST_WIDE_INT args_size;
   10976       260043 :   rtx_code_label *label;
   10977       260043 :   rtx limit, current, allocate_rtx, call_fusage;
   10978       260043 :   rtx_insn *call_insn;
   10979       260043 :   unsigned int scratch_regno = INVALID_REGNUM;
   10980       260043 :   rtx scratch_reg = NULL_RTX;
   10981       260043 :   rtx_code_label *varargs_label = NULL;
   10982       260043 :   rtx fn;
   10983              : 
   10984       260043 :   gcc_assert (flag_split_stack && reload_completed);
   10985              : 
   10986       260043 :   ix86_finalize_stack_frame_flags ();
   10987       260043 :   struct ix86_frame &frame = cfun->machine->frame;
   10988       260043 :   allocate = frame.stack_pointer_offset - INCOMING_FRAME_SP_OFFSET;
   10989              : 
   10990              :   /* This is the label we will branch to if we have enough stack
   10991              :      space.  We expect the basic block reordering pass to reverse this
   10992              :      branch if optimizing, so that we branch in the unlikely case.  */
   10993       260043 :   label = gen_label_rtx ();
   10994              : 
   10995              :   /* We need to compare the stack pointer minus the frame size with
   10996              :      the stack boundary in the TCB.  The stack boundary always gives
   10997              :      us SPLIT_STACK_AVAILABLE bytes, so if we need less than that we
   10998              :      can compare directly.  Otherwise we need to do an addition.  */
   10999              : 
   11000       260043 :   limit = ix86_split_stack_guard ();
   11001              : 
   11002       260043 :   if (allocate >= SPLIT_STACK_AVAILABLE
   11003       235593 :       || flag_force_indirect_call)
   11004              :     {
   11005        24465 :       scratch_regno = split_stack_prologue_scratch_regno ();
   11006        24465 :       if (scratch_regno == INVALID_REGNUM)
   11007            0 :         return;
   11008              :     }
   11009              : 
   11010       260043 :   if (allocate >= SPLIT_STACK_AVAILABLE)
   11011              :     {
   11012        24450 :       rtx offset;
   11013              : 
   11014              :       /* We need a scratch register to hold the stack pointer minus
   11015              :          the required frame size.  Since this is the very start of the
   11016              :          function, the scratch register can be any caller-saved
   11017              :          register which is not used for parameters.  */
   11018        24450 :       offset = GEN_INT (- allocate);
   11019              : 
   11020        31342 :       scratch_reg = gen_rtx_REG (Pmode, scratch_regno);
   11021        24450 :       if (!TARGET_64BIT || x86_64_immediate_operand (offset, Pmode))
   11022              :         {
   11023              :           /* We don't use gen_add in this case because it will
   11024              :              want to split to lea, but when not optimizing the insn
   11025              :              will not be split after this point.  */
   11026        31342 :           emit_insn (gen_rtx_SET (scratch_reg,
   11027              :                                   gen_rtx_PLUS (Pmode, stack_pointer_rtx,
   11028              :                                                 offset)));
   11029              :         }
   11030              :       else
   11031              :         {
   11032            0 :           emit_move_insn (scratch_reg, offset);
   11033            0 :           emit_insn (gen_add2_insn (scratch_reg, stack_pointer_rtx));
   11034              :         }
   11035              :       current = scratch_reg;
   11036              :     }
   11037              :   else
   11038       235593 :     current = stack_pointer_rtx;
   11039              : 
   11040       260043 :   ix86_expand_branch (GEU, current, limit, label);
   11041       260043 :   rtx_insn *jump_insn = get_last_insn ();
   11042       260043 :   JUMP_LABEL (jump_insn) = label;
   11043              : 
   11044              :   /* Mark the jump as very likely to be taken.  */
   11045       260043 :   add_reg_br_prob_note (jump_insn, profile_probability::very_likely ());
   11046              : 
   11047       260043 :   if (split_stack_fn == NULL_RTX)
   11048              :     {
   11049         5451 :       split_stack_fn = gen_rtx_SYMBOL_REF (Pmode, "__morestack");
   11050         4347 :       SYMBOL_REF_FLAGS (split_stack_fn) |= SYMBOL_FLAG_LOCAL;
   11051              :     }
   11052       260043 :   fn = split_stack_fn;
   11053              : 
   11054              :   /* Get more stack space.  We pass in the desired stack space and the
   11055              :      size of the arguments to copy to the new stack.  In 32-bit mode
   11056              :      we push the parameters; __morestack will return on a new stack
   11057              :      anyhow.  In 64-bit mode we pass the parameters in r10 and
   11058              :      r11.  */
   11059       260043 :   allocate_rtx = GEN_INT (allocate);
   11060       260043 :   args_size = crtl->args.size >= 0 ? (HOST_WIDE_INT) crtl->args.size : 0;
   11061       260043 :   call_fusage = NULL_RTX;
   11062       260043 :   rtx pop = NULL_RTX;
   11063       260043 :   if (TARGET_64BIT)
   11064              :     {
   11065       162038 :       rtx reg10, reg11;
   11066              : 
   11067       162038 :       reg10 = gen_rtx_REG (DImode, R10_REG);
   11068       162038 :       reg11 = gen_rtx_REG (DImode, R11_REG);
   11069              : 
   11070              :       /* If this function uses a static chain, it will be in %r10.
   11071              :          Preserve it across the call to __morestack.  */
   11072       162038 :       if (DECL_STATIC_CHAIN (cfun->decl))
   11073              :         {
   11074         7505 :           rtx rax;
   11075              : 
   11076         7505 :           rax = gen_rtx_REG (word_mode, AX_REG);
   11077         7505 :           emit_move_insn (rax, gen_rtx_REG (word_mode, R10_REG));
   11078         7505 :           use_reg (&call_fusage, rax);
   11079              :         }
   11080              : 
   11081       162038 :       if (flag_force_indirect_call
   11082       162023 :           || ix86_cmodel == CM_LARGE || ix86_cmodel == CM_LARGE_PIC)
   11083              :         {
   11084           16 :           HOST_WIDE_INT argval;
   11085              : 
   11086           16 :           if (split_stack_fn_large == NULL_RTX)
   11087              :             {
   11088            7 :               split_stack_fn_large
   11089            7 :                 = gen_rtx_SYMBOL_REF (Pmode, "__morestack_large_model");
   11090            7 :               SYMBOL_REF_FLAGS (split_stack_fn_large) |= SYMBOL_FLAG_LOCAL;
   11091              :             }
   11092              : 
   11093           16 :           fn = split_stack_fn_large;
   11094              : 
   11095           16 :           if (ix86_cmodel == CM_LARGE_PIC)
   11096              :             {
   11097            3 :               rtx_code_label *label;
   11098            3 :               rtx x;
   11099              : 
   11100            3 :               gcc_assert (Pmode == DImode);
   11101              : 
   11102            3 :               label = gen_label_rtx ();
   11103            3 :               emit_label (label);
   11104            3 :               LABEL_PRESERVE_P (label) = 1;
   11105            3 :               emit_insn (gen_set_rip_rex64 (reg10, label));
   11106            3 :               emit_insn (gen_set_got_offset_rex64 (reg11, label));
   11107            3 :               emit_insn (gen_add2_insn (reg10, reg11));
   11108            3 :               x = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, fn), UNSPEC_GOT);
   11109            3 :               x = gen_rtx_CONST (Pmode, x);
   11110            3 :               emit_move_insn (reg11, x);
   11111            3 :               x = gen_rtx_PLUS (Pmode, reg10, reg11);
   11112            3 :               x = gen_const_mem (Pmode, x);
   11113            3 :               fn = copy_to_suggested_reg (x, reg11, Pmode);
   11114              :             }
   11115           13 :           else if (ix86_cmodel == CM_LARGE)
   11116            1 :             fn = copy_to_suggested_reg (fn, reg11, Pmode);
   11117              : 
   11118              :           /* When using the large model we need to load the address
   11119              :              into a register, and we've run out of registers.  So we
   11120              :              switch to a different calling convention, and we call a
   11121              :              different function: __morestack_large.  We pass the
   11122              :              argument size in the upper 32 bits of r10 and pass the
   11123              :              frame size in the lower 32 bits.  */
   11124           16 :           gcc_assert ((allocate & HOST_WIDE_INT_C (0xffffffff)) == allocate);
   11125           16 :           gcc_assert ((args_size & 0xffffffff) == args_size);
   11126              : 
   11127           16 :           argval = ((args_size << 16) << 16) + allocate;
   11128           16 :           emit_move_insn (reg10, GEN_INT (argval));
   11129           16 :         }
   11130              :       else
   11131              :         {
   11132       162022 :           emit_move_insn (reg10, allocate_rtx);
   11133       162022 :           emit_move_insn (reg11, GEN_INT (args_size));
   11134       162022 :           use_reg (&call_fusage, reg11);
   11135              :         }
   11136              : 
   11137       162038 :       use_reg (&call_fusage, reg10);
   11138              :     }
   11139              :   else
   11140              :     {
   11141        98005 :       if (flag_force_indirect_call && flag_pic)
   11142              :         {
   11143            0 :           rtx x;
   11144              : 
   11145            0 :           gcc_assert (Pmode == SImode);
   11146              : 
   11147            0 :           scratch_reg = gen_rtx_REG (Pmode, scratch_regno);
   11148              : 
   11149            0 :           emit_insn (gen_set_got (scratch_reg));
   11150            0 :           x = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, split_stack_fn),
   11151              :                               UNSPEC_GOT);
   11152            0 :           x = gen_rtx_CONST (Pmode, x);
   11153            0 :           x = gen_rtx_PLUS (Pmode, scratch_reg, x);
   11154            0 :           x = gen_const_mem (Pmode, x);
   11155            0 :           fn = copy_to_suggested_reg (x, scratch_reg, Pmode);
   11156              :         }
   11157              : 
   11158        98005 :       rtx_insn *insn = emit_insn (gen_push (GEN_INT (args_size)));
   11159       196010 :       add_reg_note (insn, REG_ARGS_SIZE, GEN_INT (UNITS_PER_WORD));
   11160        98005 :       insn = emit_insn (gen_push (allocate_rtx));
   11161       196010 :       add_reg_note (insn, REG_ARGS_SIZE, GEN_INT (2 * UNITS_PER_WORD));
   11162       196010 :       pop = GEN_INT (2 * UNITS_PER_WORD);
   11163              :     }
   11164              : 
   11165       260043 :   if (flag_force_indirect_call && !register_operand (fn, VOIDmode))
   11166              :     {
   11167           12 :       scratch_reg = gen_rtx_REG (word_mode, scratch_regno);
   11168              : 
   11169           12 :       if (GET_MODE (fn) != word_mode)
   11170            0 :         fn = gen_rtx_ZERO_EXTEND (word_mode, fn);
   11171              : 
   11172           12 :       fn = copy_to_suggested_reg (fn, scratch_reg, word_mode);
   11173              :     }
   11174              : 
   11175       260043 :   call_insn = ix86_expand_call (NULL_RTX, gen_rtx_MEM (QImode, fn),
   11176       260043 :                                 GEN_INT (UNITS_PER_WORD), constm1_rtx,
   11177              :                                 pop, false);
   11178       260043 :   add_function_usage_to (call_insn, call_fusage);
   11179       260043 :   if (!TARGET_64BIT)
   11180        98005 :     add_reg_note (call_insn, REG_ARGS_SIZE, GEN_INT (0));
   11181              :   /* Indicate that this function can't jump to non-local gotos.  */
   11182       260043 :   make_reg_eh_region_note_nothrow_nononlocal (call_insn);
   11183              : 
   11184              :   /* In order to make call/return prediction work right, we now need
   11185              :      to execute a return instruction.  See
   11186              :      libgcc/config/i386/morestack.S for the details on how this works.
   11187              : 
   11188              :      For flow purposes gcc must not see this as a return
   11189              :      instruction--we need control flow to continue at the subsequent
   11190              :      label.  Therefore, we use an unspec.  */
   11191       260043 :   gcc_assert (crtl->args.pops_args < 65536);
   11192       260043 :   rtx_insn *ret_insn
   11193       260043 :     = emit_insn (gen_split_stack_return (GEN_INT (crtl->args.pops_args)));
   11194              : 
   11195       260043 :   if ((flag_cf_protection & CF_BRANCH))
   11196              :     {
   11197              :       /* Insert ENDBR since __morestack will jump back here via indirect
   11198              :          call.  */
   11199           21 :       rtx cet_eb = gen_nop_endbr ();
   11200           21 :       emit_insn_after (cet_eb, ret_insn);
   11201              :     }
   11202              : 
   11203              :   /* If we are in 64-bit mode and this function uses a static chain,
   11204              :      we saved %r10 in %rax before calling _morestack.  */
   11205       260043 :   if (TARGET_64BIT && DECL_STATIC_CHAIN (cfun->decl))
   11206         7505 :     emit_move_insn (gen_rtx_REG (word_mode, R10_REG),
   11207              :                     gen_rtx_REG (word_mode, AX_REG));
   11208              : 
   11209              :   /* If this function calls va_start, we need to store a pointer to
   11210              :      the arguments on the old stack, because they may not have been
   11211              :      all copied to the new stack.  At this point the old stack can be
   11212              :      found at the frame pointer value used by __morestack, because
   11213              :      __morestack has set that up before calling back to us.  Here we
   11214              :      store that pointer in a scratch register, and in
   11215              :      ix86_expand_prologue we store the scratch register in a stack
   11216              :      slot.  */
   11217       260043 :   if (cfun->machine->split_stack_varargs_pointer != NULL_RTX)
   11218              :     {
   11219           12 :       rtx frame_reg;
   11220           12 :       int words;
   11221              : 
   11222           12 :       scratch_regno = split_stack_prologue_scratch_regno ();
   11223           16 :       scratch_reg = gen_rtx_REG (Pmode, scratch_regno);
   11224           16 :       frame_reg = gen_rtx_REG (Pmode, BP_REG);
   11225              : 
   11226              :       /* 64-bit:
   11227              :          fp -> old fp value
   11228              :                return address within this function
   11229              :                return address of caller of this function
   11230              :                stack arguments
   11231              :          So we add three words to get to the stack arguments.
   11232              : 
   11233              :          32-bit:
   11234              :          fp -> old fp value
   11235              :                return address within this function
   11236              :                first argument to __morestack
   11237              :                second argument to __morestack
   11238              :                return address of caller of this function
   11239              :                stack arguments
   11240              :          So we add five words to get to the stack arguments.
   11241              :       */
   11242           12 :       words = TARGET_64BIT ? 3 : 5;
   11243           20 :       emit_insn (gen_rtx_SET (scratch_reg,
   11244              :                               plus_constant (Pmode, frame_reg,
   11245              :                                              words * UNITS_PER_WORD)));
   11246              : 
   11247           12 :       varargs_label = gen_label_rtx ();
   11248           12 :       emit_jump_insn (gen_jump (varargs_label));
   11249           12 :       JUMP_LABEL (get_last_insn ()) = varargs_label;
   11250              : 
   11251           12 :       emit_barrier ();
   11252              :     }
   11253              : 
   11254       260043 :   emit_label (label);
   11255       260043 :   LABEL_NUSES (label) = 1;
   11256              : 
   11257              :   /* If this function calls va_start, we now have to set the scratch
   11258              :      register for the case where we do not call __morestack.  In this
   11259              :      case we need to set it based on the stack pointer.  */
   11260       260043 :   if (cfun->machine->split_stack_varargs_pointer != NULL_RTX)
   11261              :     {
   11262           20 :       emit_insn (gen_rtx_SET (scratch_reg,
   11263              :                               plus_constant (Pmode, stack_pointer_rtx,
   11264              :                                              UNITS_PER_WORD)));
   11265              : 
   11266           12 :       emit_label (varargs_label);
   11267           12 :       LABEL_NUSES (varargs_label) = 1;
   11268              :     }
   11269              : }
   11270              : 
   11271              : /* We may have to tell the dataflow pass that the split stack prologue
   11272              :    is initializing a scratch register.  */
   11273              : 
   11274              : static void
   11275     15893874 : ix86_live_on_entry (bitmap regs)
   11276              : {
   11277     15893874 :   if (cfun->machine->split_stack_varargs_pointer != NULL_RTX)
   11278              :     {
   11279          124 :       gcc_assert (flag_split_stack);
   11280          124 :       bitmap_set_bit (regs, split_stack_prologue_scratch_regno ());
   11281              :     }
   11282     15893874 : }
   11283              : 
   11284              : /* Extract the parts of an RTL expression that is a valid memory address
   11285              :    for an instruction.  Return false if the structure of the address is
   11286              :    grossly off.  */
   11287              : 
   11288              : bool
   11289   4328599941 : ix86_decompose_address (rtx addr, struct ix86_address *out)
   11290              : {
   11291   4328599941 :   rtx base = NULL_RTX, index = NULL_RTX, disp = NULL_RTX;
   11292   4328599941 :   rtx base_reg, index_reg;
   11293   4328599941 :   HOST_WIDE_INT scale = 1;
   11294   4328599941 :   rtx scale_rtx = NULL_RTX;
   11295   4328599941 :   rtx tmp;
   11296   4328599941 :   addr_space_t seg = ADDR_SPACE_GENERIC;
   11297              : 
   11298              :   /* Allow zero-extended SImode addresses,
   11299              :      they will be emitted with addr32 prefix.  */
   11300   4328599941 :   if (TARGET_64BIT && GET_MODE (addr) == DImode)
   11301              :     {
   11302   2279648289 :       if (GET_CODE (addr) == ZERO_EXTEND
   11303      2170304 :           && GET_MODE (XEXP (addr, 0)) == SImode)
   11304              :         {
   11305      2075099 :           addr = XEXP (addr, 0);
   11306      2075099 :           if (CONST_INT_P (addr))
   11307              :             return false;
   11308              :         }
   11309   2277573190 :       else if (GET_CODE (addr) == AND)
   11310              :         {
   11311      2826049 :           rtx mask = XEXP (addr, 1);
   11312      2826049 :           rtx shift_val;
   11313              : 
   11314      2826049 :           if (const_32bit_mask (mask, DImode)
   11315              :               /* For ASHIFT inside AND, combine will not generate
   11316              :                  canonical zero-extend. Merge mask for AND and shift_count
   11317              :                  to check if it is canonical zero-extend.  */
   11318      2826049 :               || (CONST_INT_P (mask)
   11319      1834920 :                   && GET_CODE (XEXP (addr, 0)) == ASHIFT
   11320       142541 :                   && CONST_INT_P (shift_val = XEXP (XEXP (addr, 0), 1))
   11321       139442 :                   && ((UINTVAL (mask)
   11322       139442 :                        | ((HOST_WIDE_INT_1U << INTVAL (shift_val)) - 1))
   11323              :                       == HOST_WIDE_INT_UC (0xffffffff))))
   11324              :             {
   11325        82855 :               addr = lowpart_subreg (SImode, XEXP (addr, 0), DImode);
   11326        82855 :               if (addr == NULL_RTX)
   11327              :                 return false;
   11328              : 
   11329        82855 :               if (CONST_INT_P (addr))
   11330              :                 return false;
   11331              :             }
   11332              :         }
   11333              :     }
   11334              : 
   11335              :   /* Allow SImode subregs of DImode addresses,
   11336              :      they will be emitted with addr32 prefix.  */
   11337   4328599941 :   if (TARGET_64BIT && GET_MODE (addr) == SImode)
   11338              :     {
   11339     17222166 :       if (SUBREG_P (addr)
   11340       217253 :           && GET_MODE (SUBREG_REG (addr)) == DImode)
   11341              :         {
   11342       190276 :           addr = SUBREG_REG (addr);
   11343       190276 :           if (CONST_INT_P (addr))
   11344              :             return false;
   11345              :         }
   11346              :     }
   11347              : 
   11348   4328599941 :   if (REG_P (addr))
   11349              :     base = addr;
   11350              :   else if (SUBREG_P (addr))
   11351              :     {
   11352       458417 :       if (REG_P (SUBREG_REG (addr)))
   11353              :         base = addr;
   11354              :       else
   11355              :         return false;
   11356              :     }
   11357              :   else if (GET_CODE (addr) == PLUS)
   11358              :     {
   11359              :       rtx addends[4], op;
   11360              :       int n = 0, i;
   11361              : 
   11362              :       op = addr;
   11363   3164744291 :       do
   11364              :         {
   11365   3164744291 :           if (n >= 4)
   11366    643762767 :             return false;
   11367   3164738976 :           addends[n++] = XEXP (op, 1);
   11368   3164738976 :           op = XEXP (op, 0);
   11369              :         }
   11370   3164738976 :       while (GET_CODE (op) == PLUS);
   11371   3101459674 :       if (n >= 4)
   11372              :         return false;
   11373   3101452928 :       addends[n] = op;
   11374              : 
   11375   8083215547 :       for (i = n; i >= 0; --i)
   11376              :         {
   11377   5625513325 :           op = addends[i];
   11378   5625513325 :           switch (GET_CODE (op))
   11379              :             {
   11380     61215979 :             case MULT:
   11381     61215979 :               if (index)
   11382              :                 return false;
   11383     61176425 :               index = XEXP (op, 0);
   11384     61176425 :               scale_rtx = XEXP (op, 1);
   11385     61176425 :               break;
   11386              : 
   11387     12696505 :             case ASHIFT:
   11388     12696505 :               if (index)
   11389              :                 return false;
   11390     12623681 :               index = XEXP (op, 0);
   11391     12623681 :               tmp = XEXP (op, 1);
   11392     12623681 :               if (!CONST_INT_P (tmp))
   11393              :                 return false;
   11394     12609068 :               scale = INTVAL (tmp);
   11395     12609068 :               if ((unsigned HOST_WIDE_INT) scale > 3)
   11396              :                 return false;
   11397     12201582 :               scale = 1 << scale;
   11398     12201582 :               break;
   11399              : 
   11400      1013003 :             case ZERO_EXTEND:
   11401      1013003 :               op = XEXP (op, 0);
   11402      1013003 :               if (GET_CODE (op) != UNSPEC)
   11403              :                 return false;
   11404              :               /* FALLTHRU */
   11405              : 
   11406       698267 :             case UNSPEC:
   11407       698267 :               if (XINT (op, 1) == UNSPEC_TP
   11408       689947 :                   && TARGET_TLS_DIRECT_SEG_REFS
   11409       689947 :                   && seg == ADDR_SPACE_GENERIC)
   11410       689947 :                 seg = DEFAULT_TLS_SEG_REG;
   11411              :               else
   11412              :                 return false;
   11413              :               break;
   11414              : 
   11415       505472 :             case SUBREG:
   11416       505472 :               if (!REG_P (SUBREG_REG (op)))
   11417              :                 return false;
   11418              :               /* FALLTHRU */
   11419              : 
   11420   2528164786 :             case REG:
   11421   2528164786 :               if (!base)
   11422              :                 base = op;
   11423     82216450 :               else if (!index)
   11424              :                 index = op;
   11425              :               else
   11426              :                 return false;
   11427              :               break;
   11428              : 
   11429   2380348892 :             case CONST:
   11430   2380348892 :             case CONST_INT:
   11431   2380348892 :             case SYMBOL_REF:
   11432   2380348892 :             case LABEL_REF:
   11433   2380348892 :               if (disp)
   11434              :                 return false;
   11435              :               disp = op;
   11436              :               break;
   11437              : 
   11438              :             default:
   11439              :               return false;
   11440              :             }
   11441              :         }
   11442              :     }
   11443              :   else if (GET_CODE (addr) == MULT)
   11444              :     {
   11445      3719784 :       index = XEXP (addr, 0);           /* index*scale */
   11446      3719784 :       scale_rtx = XEXP (addr, 1);
   11447              :     }
   11448              :   else if (GET_CODE (addr) == ASHIFT)
   11449              :     {
   11450              :       /* We're called for lea too, which implements ashift on occasion.  */
   11451      3238325 :       index = XEXP (addr, 0);
   11452      3238325 :       tmp = XEXP (addr, 1);
   11453      3238325 :       if (!CONST_INT_P (tmp))
   11454              :         return false;
   11455      2852278 :       scale = INTVAL (tmp);
   11456      2852278 :       if ((unsigned HOST_WIDE_INT) scale > 3)
   11457              :         return false;
   11458      2115022 :       scale = 1 << scale;
   11459              :     }
   11460              :   else
   11461              :     disp = addr;                        /* displacement */
   11462              : 
   11463   2463537028 :   if (index)
   11464              :     {
   11465    151844924 :       if (REG_P (index))
   11466              :         ;
   11467      4040348 :       else if (SUBREG_P (index)
   11468       257830 :                && REG_P (SUBREG_REG (index)))
   11469              :         ;
   11470              :       else
   11471              :         return false;
   11472              :     }
   11473              : 
   11474              :   /* Extract the integral value of scale.  */
   11475   3679865220 :   if (scale_rtx)
   11476              :     {
   11477     56432837 :       if (!CONST_INT_P (scale_rtx))
   11478              :         return false;
   11479     55810074 :       scale = INTVAL (scale_rtx);
   11480              :     }
   11481              : 
   11482   3679242457 :   base_reg = base && SUBREG_P (base) ? SUBREG_REG (base) : base;
   11483   3679242457 :   index_reg = index && SUBREG_P (index) ? SUBREG_REG (index) : index;
   11484              : 
   11485              :   /* Avoid useless 0 displacement.  */
   11486   3679242457 :   if (disp == const0_rtx && (base || index))
   11487   3679242457 :     disp = NULL_RTX;
   11488              : 
   11489              :   /* Allow arg pointer and stack pointer as index if there is not scaling.  */
   11490   2685267914 :   if (base_reg && index_reg && scale == 1
   11491   3759978965 :       && (REGNO (index_reg) == ARG_POINTER_REGNUM
   11492              :           || REGNO (index_reg) == FRAME_POINTER_REGNUM
   11493              :           || REGNO (index_reg) == SP_REG))
   11494              :     {
   11495              :       std::swap (base, index);
   11496              :       std::swap (base_reg, index_reg);
   11497              :     }
   11498              : 
   11499              :   /* Special case: rewrite index*1+disp into base+disp.  */
   11500   3679242457 :   if (!base && index && scale == 1)
   11501            4 :     base = index, base_reg = index_reg, index = index_reg = NULL_RTX;
   11502              : 
   11503              :   /* Special case: %ebp cannot be encoded as a base without a displacement.
   11504              :      Similarly %r13.  */
   11505    323332547 :   if (!disp && base_reg
   11506   3998253540 :       && (REGNO (base_reg) == ARG_POINTER_REGNUM
   11507              :           || REGNO (base_reg) == FRAME_POINTER_REGNUM
   11508              :           || REGNO (base_reg) == BP_REG
   11509              :           || REGNO (base_reg) == R13_REG))
   11510              :     disp = const0_rtx;
   11511              : 
   11512              :   /* Special case: on K6, [%esi] makes the instruction vector decoded.
   11513              :      Avoid this by transforming to [%esi+0].
   11514              :      Reload calls address legitimization without cfun defined, so we need
   11515              :      to test cfun for being non-NULL. */
   11516            0 :   if (TARGET_CPU_P (K6) && cfun && optimize_function_for_speed_p (cfun)
   11517            0 :       && base_reg && !index_reg && !disp
   11518   3679242457 :       && REGNO (base_reg) == SI_REG)
   11519            0 :     disp = const0_rtx;
   11520              : 
   11521              :   /* Special case: encode reg+reg instead of reg*2.  */
   11522   3679242457 :   if (!base && index && scale == 2)
   11523    993974539 :     base = index, base_reg = index_reg, scale = 1;
   11524              : 
   11525              :   /* Special case: scaling cannot be encoded without base or displacement.  */
   11526    993974539 :   if (!base && !disp && index && scale != 1)
   11527      3437934 :     disp = const0_rtx;
   11528              : 
   11529   3679242457 :   out->base = base;
   11530   3679242457 :   out->index = index;
   11531   3679242457 :   out->disp = disp;
   11532   3679242457 :   out->scale = scale;
   11533   3679242457 :   out->seg = seg;
   11534              : 
   11535   3679242457 :   return true;
   11536              : }
   11537              : 
   11538              : /* Return cost of the memory address x.
   11539              :    For i386, it is better to use a complex address than let gcc copy
   11540              :    the address into a reg and make a new pseudo.  But not if the address
   11541              :    requires to two regs - that would mean more pseudos with longer
   11542              :    lifetimes.  */
   11543              : static int
   11544     11316720 : ix86_address_cost (rtx x, machine_mode, addr_space_t, bool)
   11545              : {
   11546     11316720 :   struct ix86_address parts;
   11547     11316720 :   int cost = 1;
   11548     11316720 :   int ok = ix86_decompose_address (x, &parts);
   11549              : 
   11550     11316720 :   gcc_assert (ok);
   11551              : 
   11552     11316720 :   if (parts.base && SUBREG_P (parts.base))
   11553          466 :     parts.base = SUBREG_REG (parts.base);
   11554     11316720 :   if (parts.index && SUBREG_P (parts.index))
   11555           20 :     parts.index = SUBREG_REG (parts.index);
   11556              : 
   11557              :   /* Attempt to minimize number of registers in the address by increasing
   11558              :      address cost for each used register.  We don't increase address cost
   11559              :      for "pic_offset_table_rtx".  When a memopt with "pic_offset_table_rtx"
   11560              :      is not invariant itself it most likely means that base or index is not
   11561              :      invariant.  Therefore only "pic_offset_table_rtx" could be hoisted out,
   11562              :      which is not profitable for x86.  */
   11563     11316720 :   if (parts.base
   11564      9910357 :       && (!REG_P (parts.base) || REGNO (parts.base) >= FIRST_PSEUDO_REGISTER)
   11565     20931502 :       && (current_pass->type == GIMPLE_PASS
   11566      2678072 :           || !pic_offset_table_rtx
   11567       127287 :           || !REG_P (parts.base)
   11568       127287 :           || REGNO (pic_offset_table_rtx) != REGNO (parts.base)))
   11569              :     cost++;
   11570              : 
   11571     11316720 :   if (parts.index
   11572      5488104 :       && (!REG_P (parts.index) || REGNO (parts.index) >= FIRST_PSEUDO_REGISTER)
   11573     16790854 :       && (current_pass->type == GIMPLE_PASS
   11574       645653 :           || !pic_offset_table_rtx
   11575        55572 :           || !REG_P (parts.index)
   11576        55572 :           || REGNO (pic_offset_table_rtx) != REGNO (parts.index)))
   11577      5472831 :     cost++;
   11578              : 
   11579              :   /* AMD-K6 don't like addresses with ModR/M set to 00_xxx_100b,
   11580              :      since it's predecode logic can't detect the length of instructions
   11581              :      and it degenerates to vector decoded.  Increase cost of such
   11582              :      addresses here.  The penalty is minimally 2 cycles.  It may be worthwhile
   11583              :      to split such addresses or even refuse such addresses at all.
   11584              : 
   11585              :      Following addressing modes are affected:
   11586              :       [base+scale*index]
   11587              :       [scale*index+disp]
   11588              :       [base+index]
   11589              : 
   11590              :      The first and last case  may be avoidable by explicitly coding the zero in
   11591              :      memory address, but I don't have AMD-K6 machine handy to check this
   11592              :      theory.  */
   11593              : 
   11594     11316720 :   if (TARGET_CPU_P (K6)
   11595            0 :       && ((!parts.disp && parts.base && parts.index && parts.scale != 1)
   11596            0 :           || (parts.disp && !parts.base && parts.index && parts.scale != 1)
   11597            0 :           || (!parts.disp && parts.base && parts.index && parts.scale == 1)))
   11598            0 :     cost += 10;
   11599              : 
   11600     11316720 :   return cost;
   11601              : }
   11602              : 
   11603              : /* Implement TARGET_USE_BY_PIECES_INFRASTRUCTURE_P.  */
   11604              : 
   11605              : bool
   11606      1179688 : ix86_use_by_pieces_infrastructure_p (unsigned HOST_WIDE_INT size,
   11607              :                                      unsigned int align,
   11608              :                                      enum by_pieces_operation op,
   11609              :                                      bool speed_p)
   11610              : {
   11611              :   /* Return true when we are currently expanding memcpy/memset epilogue
   11612              :      with move_by_pieces or store_by_pieces.  */
   11613      1179688 :   if (cfun->machine->by_pieces_in_use)
   11614              :     return true;
   11615              : 
   11616      1177579 :   return default_use_by_pieces_infrastructure_p (size, align, op,
   11617      1177579 :                                                  speed_p);
   11618              : }
   11619              : 
   11620              : /* Allow {LABEL | SYMBOL}_REF - SYMBOL_REF-FOR-PICBASE for Mach-O as
   11621              :    this is used for to form addresses to local data when -fPIC is in
   11622              :    use.  */
   11623              : 
   11624              : static bool
   11625            0 : darwin_local_data_pic (rtx disp)
   11626              : {
   11627            0 :   return (GET_CODE (disp) == UNSPEC
   11628            0 :           && XINT (disp, 1) == UNSPEC_MACHOPIC_OFFSET);
   11629              : }
   11630              : 
   11631              : /* True if the function symbol operand X should be loaded from GOT.
   11632              :    If CALL_P is true, X is a call operand.
   11633              : 
   11634              :    NB: -mno-direct-extern-access doesn't force load from GOT for
   11635              :    call.
   11636              : 
   11637              :    NB: In 32-bit mode, only non-PIC is allowed in inline assembly
   11638              :    statements, since a PIC register could not be available at the
   11639              :    call site.  */
   11640              : 
   11641              : bool
   11642   1839372862 : ix86_force_load_from_GOT_p (rtx x, bool call_p)
   11643              : {
   11644     96344826 :   return ((TARGET_64BIT || (!flag_pic && HAVE_AS_IX86_GOT32X))
   11645              :           && !TARGET_PECOFF && !TARGET_MACHO
   11646   1836510826 :           && (!flag_pic || this_is_asm_operands)
   11647   1816219429 :           && ix86_cmodel != CM_LARGE
   11648   1816213430 :           && ix86_cmodel != CM_LARGE_PIC
   11649   1816213429 :           && SYMBOL_REF_P (x)
   11650   1816213427 :           && ((!call_p
   11651   1810788549 :                && (!ix86_direct_extern_access
   11652   1810786279 :                    || (SYMBOL_REF_DECL (x)
   11653   1631693872 :                        && lookup_attribute ("nodirect_extern_access",
   11654   1631693872 :                                             DECL_ATTRIBUTES (SYMBOL_REF_DECL (x))))))
   11655   1816210703 :               || (SYMBOL_REF_FUNCTION_P (x)
   11656    685252434 :                   && (!flag_plt
   11657    685248023 :                       || (SYMBOL_REF_DECL (x)
   11658    685248023 :                           && lookup_attribute ("noplt",
   11659    685248023 :                                                DECL_ATTRIBUTES (SYMBOL_REF_DECL (x)))))))
   11660   1839380396 :           && !SYMBOL_REF_LOCAL_P (x));
   11661              : }
   11662              : 
   11663              : /* Determine if a given RTX is a valid constant.  We already know this
   11664              :    satisfies CONSTANT_P.  */
   11665              : 
   11666              : static bool
   11667   1546395025 : ix86_legitimate_constant_p (machine_mode mode, rtx x)
   11668              : {
   11669   1546395025 :   switch (GET_CODE (x))
   11670              :     {
   11671    135897883 :     case CONST:
   11672    135897883 :       x = XEXP (x, 0);
   11673              : 
   11674    135897883 :       if (GET_CODE (x) == PLUS)
   11675              :         {
   11676    135780171 :           if (!CONST_INT_P (XEXP (x, 1)))
   11677              :             return false;
   11678    135780171 :           x = XEXP (x, 0);
   11679              :         }
   11680              : 
   11681    135897883 :       if (TARGET_MACHO && darwin_local_data_pic (x))
   11682              :         return true;
   11683              : 
   11684              :       /* Only some unspecs are valid as "constants".  */
   11685    135897883 :       if (GET_CODE (x) == UNSPEC)
   11686       494976 :         switch (XINT (x, 1))
   11687              :           {
   11688        21143 :           case UNSPEC_GOT:
   11689        21143 :           case UNSPEC_GOTOFF:
   11690        21143 :           case UNSPEC_PLTOFF:
   11691        21143 :             return TARGET_64BIT;
   11692       473470 :           case UNSPEC_TPOFF:
   11693       473470 :           case UNSPEC_NTPOFF:
   11694       473470 :             x = XVECEXP (x, 0, 0);
   11695       473470 :             return (SYMBOL_REF_P (x)
   11696       473470 :                     && SYMBOL_REF_TLS_MODEL (x) == TLS_MODEL_LOCAL_EXEC);
   11697          275 :           case UNSPEC_DTPOFF:
   11698          275 :             x = XVECEXP (x, 0, 0);
   11699          275 :             return (SYMBOL_REF_P (x)
   11700          275 :                     && SYMBOL_REF_TLS_MODEL (x) == TLS_MODEL_LOCAL_DYNAMIC);
   11701            0 :           case UNSPEC_SECREL32:
   11702            0 :             x = XVECEXP (x, 0, 0);
   11703            0 :             return SYMBOL_REF_P (x);
   11704              :           default:
   11705              :             return false;
   11706              :           }
   11707              : 
   11708              :       /* We must have drilled down to a symbol.  */
   11709    135402907 :       if (LABEL_REF_P (x))
   11710              :         return true;
   11711    135397633 :       if (!SYMBOL_REF_P (x))
   11712              :         return false;
   11713              :       /* FALLTHRU */
   11714              : 
   11715    922245957 :     case SYMBOL_REF:
   11716              :       /* TLS symbols are never valid.  */
   11717    922245957 :       if (SYMBOL_REF_TLS_MODEL (x))
   11718              :         return false;
   11719              : 
   11720              :       /* DLLIMPORT symbols are never valid.  */
   11721    922141485 :       if (TARGET_DLLIMPORT_DECL_ATTRIBUTES
   11722              :           && SYMBOL_REF_DLLIMPORT_P (x))
   11723              :         return false;
   11724              : 
   11725              : #if TARGET_MACHO
   11726              :       /* mdynamic-no-pic */
   11727              :       if (MACHO_DYNAMIC_NO_PIC_P)
   11728              :         return machopic_symbol_defined_p (x);
   11729              : #endif
   11730              : 
   11731              :       /* External function address should be loaded
   11732              :          via the GOT slot to avoid PLT.  */
   11733    922141485 :       if (ix86_force_load_from_GOT_p (x))
   11734              :         return false;
   11735              : 
   11736              :       break;
   11737              : 
   11738    602684688 :     CASE_CONST_SCALAR_INT:
   11739    602684688 :       if (ix86_endbr_immediate_operand (x, VOIDmode))
   11740              :         return false;
   11741              : 
   11742    602684487 :       switch (mode)
   11743              :         {
   11744      1454356 :         case E_TImode:
   11745      1454356 :           if (TARGET_64BIT)
   11746              :             return true;
   11747              :           /* FALLTHRU */
   11748        26007 :         case E_OImode:
   11749        26007 :         case E_XImode:
   11750        26007 :           if (!standard_sse_constant_p (x, mode)
   11751        43218 :               && GET_MODE_SIZE (TARGET_AVX512F
   11752              :                                 ? XImode
   11753              :                                 : (TARGET_AVX
   11754              :                                    ? OImode
   11755              :                                    : (TARGET_SSE2
   11756        17211 :                                       ? TImode : DImode))) < GET_MODE_SIZE (mode))
   11757              :             return false;
   11758              :         default:
   11759              :           break;
   11760              :         }
   11761              :       break;
   11762              : 
   11763      8637727 :     case CONST_VECTOR:
   11764      8637727 :       if (!standard_sse_constant_p (x, mode))
   11765              :         return false;
   11766              :       break;
   11767              : 
   11768      7696204 :     case CONST_DOUBLE:
   11769      7696204 :       if (mode == E_BFmode)
   11770              :         return false;
   11771              : 
   11772              :     default:
   11773              :       break;
   11774              :     }
   11775              : 
   11776              :   /* Otherwise we handle everything else in the move patterns.  */
   11777              :   return true;
   11778              : }
   11779              : 
   11780              : /* Determine if it's legal to put X into the constant pool.  This
   11781              :    is not possible for the address of thread-local symbols, which
   11782              :    is checked above.  */
   11783              : 
   11784              : static bool
   11785     61553751 : ix86_cannot_force_const_mem (machine_mode mode, rtx x)
   11786              : {
   11787              :   /* We can put any immediate constant in memory.  */
   11788     61553751 :   switch (GET_CODE (x))
   11789              :     {
   11790              :     CASE_CONST_ANY:
   11791              :       return false;
   11792              : 
   11793      1797151 :     default:
   11794      1797151 :       break;
   11795              :     }
   11796              : 
   11797      1797151 :   return !ix86_legitimate_constant_p (mode, x);
   11798              : }
   11799              : 
   11800              : /* Return a unique alias set for the GOT.  */
   11801              : 
   11802              : alias_set_type
   11803       189283 : ix86_GOT_alias_set (void)
   11804              : {
   11805       189283 :   static alias_set_type set = -1;
   11806       189283 :   if (set == -1)
   11807         2988 :     set = new_alias_set ();
   11808       189283 :   return set;
   11809              : }
   11810              : 
   11811              : /* Nonzero if the constant value X is a legitimate general operand
   11812              :    when generating PIC code.  It is given that flag_pic is on and
   11813              :    that X satisfies CONSTANT_P.  */
   11814              : 
   11815              : bool
   11816    126156698 : legitimate_pic_operand_p (rtx x)
   11817              : {
   11818    126156698 :   rtx inner;
   11819              : 
   11820    126156698 :   switch (GET_CODE (x))
   11821              :     {
   11822      2505949 :     case CONST:
   11823      2505949 :       inner = XEXP (x, 0);
   11824      2505949 :       if (GET_CODE (inner) == PLUS
   11825       358195 :           && CONST_INT_P (XEXP (inner, 1)))
   11826       358195 :         inner = XEXP (inner, 0);
   11827              : 
   11828              :       /* Only some unspecs are valid as "constants".  */
   11829      2505949 :       if (GET_CODE (inner) == UNSPEC)
   11830      2255943 :         switch (XINT (inner, 1))
   11831              :           {
   11832      2195382 :           case UNSPEC_GOT:
   11833      2195382 :           case UNSPEC_GOTOFF:
   11834      2195382 :           case UNSPEC_PLTOFF:
   11835      2195382 :             return TARGET_64BIT;
   11836            0 :           case UNSPEC_TPOFF:
   11837            0 :             x = XVECEXP (inner, 0, 0);
   11838            0 :             return (SYMBOL_REF_P (x)
   11839            0 :                     && SYMBOL_REF_TLS_MODEL (x) == TLS_MODEL_LOCAL_EXEC);
   11840            0 :           case UNSPEC_SECREL32:
   11841            0 :             x = XVECEXP (inner, 0, 0);
   11842            0 :             return SYMBOL_REF_P (x);
   11843            0 :           case UNSPEC_MACHOPIC_OFFSET:
   11844            0 :             return legitimate_pic_address_disp_p (x);
   11845              :           default:
   11846              :             return false;
   11847              :           }
   11848              :       /* FALLTHRU */
   11849              : 
   11850      6990801 :     case SYMBOL_REF:
   11851      6990801 :     case LABEL_REF:
   11852      6990801 :       return legitimate_pic_address_disp_p (x);
   11853              : 
   11854              :     default:
   11855              :       return true;
   11856              :     }
   11857              : }
   11858              : 
   11859              : /* Determine if a given CONST RTX is a valid memory displacement
   11860              :    in PIC mode.  */
   11861              : 
   11862              : bool
   11863     64749842 : legitimate_pic_address_disp_p (rtx disp)
   11864              : {
   11865     64749842 :   bool saw_plus;
   11866              : 
   11867              :   /* In 64bit mode we can allow direct addresses of symbols and labels
   11868              :      when they are not dynamic symbols.  */
   11869     64749842 :   if (TARGET_64BIT)
   11870              :     {
   11871     39577700 :       rtx op0 = disp, op1;
   11872              : 
   11873     39577700 :       switch (GET_CODE (disp))
   11874              :         {
   11875              :         case LABEL_REF:
   11876              :           return true;
   11877              : 
   11878     10945329 :         case CONST:
   11879     10945329 :           if (GET_CODE (XEXP (disp, 0)) != PLUS)
   11880              :             break;
   11881      1177172 :           op0 = XEXP (XEXP (disp, 0), 0);
   11882      1177172 :           op1 = XEXP (XEXP (disp, 0), 1);
   11883      1177172 :           if (!CONST_INT_P (op1))
   11884              :             break;
   11885      1177172 :           if (GET_CODE (op0) == UNSPEC
   11886          296 :               && (XINT (op0, 1) == UNSPEC_DTPOFF
   11887          296 :                   || XINT (op0, 1) == UNSPEC_NTPOFF)
   11888      1177468 :               && trunc_int_for_mode (INTVAL (op1), SImode) == INTVAL (op1))
   11889              :             return true;
   11890      1176876 :           if (INTVAL (op1) >= 16*1024*1024
   11891      1176876 :               || INTVAL (op1) < -16*1024*1024)
   11892              :             break;
   11893      1176788 :           if (LABEL_REF_P (op0))
   11894              :             return true;
   11895      1176788 :           if (GET_CODE (op0) == CONST
   11896            0 :               && GET_CODE (XEXP (op0, 0)) == UNSPEC
   11897            0 :               && XINT (XEXP (op0, 0), 1) == UNSPEC_PCREL)
   11898              :             return true;
   11899      1176788 :           if (GET_CODE (op0) == UNSPEC
   11900            0 :               && XINT (op0, 1) == UNSPEC_PCREL)
   11901              :             return true;
   11902      1176788 :           if (!SYMBOL_REF_P (op0))
   11903              :             break;
   11904              :           /* FALLTHRU */
   11905              : 
   11906     29585603 :         case SYMBOL_REF:
   11907              :           /* TLS references should always be enclosed in UNSPEC.
   11908              :              The dllimported symbol needs always to be resolved.  */
   11909     29585603 :           if (SYMBOL_REF_TLS_MODEL (op0)
   11910              :               || (TARGET_DLLIMPORT_DECL_ATTRIBUTES && SYMBOL_REF_DLLIMPORT_P (op0)))
   11911              :             return false;
   11912              : 
   11913     29429812 :           if (TARGET_PECOFF)
   11914              :             {
   11915              : #if TARGET_PECOFF
   11916              :               if (is_imported_p (op0))
   11917              :                 return true;
   11918              : #endif
   11919              : 
   11920              :               if (SYMBOL_REF_FAR_ADDR_P (op0) || !SYMBOL_REF_LOCAL_P (op0))
   11921              :                 break;
   11922              : 
   11923              :               /* Non-external-weak function symbols need to be resolved only
   11924              :                  for the large model.  Non-external symbols don't need to be
   11925              :                  resolved for large and medium models.  For the small model,
   11926              :                  we don't need to resolve anything here.  */
   11927              :               if ((ix86_cmodel != CM_LARGE_PIC
   11928              :                    && SYMBOL_REF_FUNCTION_P (op0)
   11929              :                    && !(SYMBOL_REF_EXTERNAL_P (op0) && SYMBOL_REF_WEAK (op0)))
   11930              :                   || !SYMBOL_REF_EXTERNAL_P (op0)
   11931              :                   || ix86_cmodel == CM_SMALL_PIC)
   11932              :                 return true;
   11933              :             }
   11934     29429812 :           else if (!SYMBOL_REF_FAR_ADDR_P (op0)
   11935     29429808 :                    && (SYMBOL_REF_LOCAL_P (op0)
   11936     17842659 :                        || ((ix86_direct_extern_access
   11937     35513608 :                             && !(SYMBOL_REF_DECL (op0)
   11938     17671112 :                                  && lookup_attribute ("nodirect_extern_access",
   11939     17671112 :                                                       DECL_ATTRIBUTES (SYMBOL_REF_DECL (op0)))))
   11940              :                            && HAVE_LD_PIE_COPYRELOC
   11941     17842333 :                            && flag_pie
   11942        34047 :                            && !SYMBOL_REF_WEAK (op0)
   11943        33659 :                            && !SYMBOL_REF_FUNCTION_P (op0)))
   11944     41020694 :                    && ix86_cmodel != CM_LARGE_PIC)
   11945              :             return true;
   11946              :           break;
   11947              : 
   11948              :         default:
   11949              :           break;
   11950              :         }
   11951              :     }
   11952     52783192 :   if (GET_CODE (disp) != CONST)
   11953              :     return false;
   11954     14966138 :   disp = XEXP (disp, 0);
   11955              : 
   11956     14966138 :   if (TARGET_64BIT)
   11957              :     {
   11958              :       /* We are unsafe to allow PLUS expressions.  This limit allowed distance
   11959              :          of GOT tables.  We should not need these anyway.  */
   11960      9820639 :       if (GET_CODE (disp) != UNSPEC
   11961      9768157 :           || (XINT (disp, 1) != UNSPEC_GOTPCREL
   11962      9768157 :               && XINT (disp, 1) != UNSPEC_GOTOFF
   11963              :               && XINT (disp, 1) != UNSPEC_PCREL
   11964              :               && XINT (disp, 1) != UNSPEC_PLTOFF))
   11965              :         return false;
   11966              : 
   11967      9768157 :       if (!SYMBOL_REF_P (XVECEXP (disp, 0, 0))
   11968      9768157 :           && !LABEL_REF_P (XVECEXP (disp, 0, 0)))
   11969              :         return false;
   11970              :       return true;
   11971              :     }
   11972              : 
   11973      5145499 :   saw_plus = false;
   11974      5145499 :   if (GET_CODE (disp) == PLUS)
   11975              :     {
   11976       589234 :       if (!CONST_INT_P (XEXP (disp, 1)))
   11977              :         return false;
   11978       589234 :       disp = XEXP (disp, 0);
   11979       589234 :       saw_plus = true;
   11980              :     }
   11981              : 
   11982      5145499 :   if (TARGET_MACHO && darwin_local_data_pic (disp))
   11983              :     return true;
   11984              : 
   11985      5145499 :   if (GET_CODE (disp) != UNSPEC)
   11986              :     return false;
   11987              : 
   11988      4980299 :   switch (XINT (disp, 1))
   11989              :     {
   11990      2265997 :     case UNSPEC_GOT:
   11991      2265997 :       if (saw_plus)
   11992              :         return false;
   11993              :       /* We need to check for both symbols and labels because VxWorks loads
   11994              :          text labels with @GOT rather than @GOTOFF.  See gotoff_operand for
   11995              :          details.  */
   11996      2265996 :       return (SYMBOL_REF_P (XVECEXP (disp, 0, 0))
   11997      2265996 :               || LABEL_REF_P (XVECEXP (disp, 0, 0)));
   11998      2714302 :     case UNSPEC_GOTOFF:
   11999              :       /* Refuse GOTOFF in 64bit mode since it is always 64bit when used.
   12000              :          While ABI specify also 32bit relocation but we don't produce it in
   12001              :          small PIC model at all.  */
   12002      2714302 :       if ((SYMBOL_REF_P (XVECEXP (disp, 0, 0))
   12003      2714302 :            || LABEL_REF_P (XVECEXP (disp, 0, 0)))
   12004              :           && !TARGET_64BIT)
   12005      5428604 :         return !TARGET_PECOFF && gotoff_operand (XVECEXP (disp, 0, 0), Pmode);
   12006              :       return false;
   12007            0 :     case UNSPEC_GOTTPOFF:
   12008            0 :     case UNSPEC_GOTNTPOFF:
   12009            0 :     case UNSPEC_INDNTPOFF:
   12010            0 :       if (saw_plus)
   12011              :         return false;
   12012            0 :       disp = XVECEXP (disp, 0, 0);
   12013            0 :       return (SYMBOL_REF_P (disp)
   12014            0 :               && SYMBOL_REF_TLS_MODEL (disp) == TLS_MODEL_INITIAL_EXEC);
   12015            0 :     case UNSPEC_NTPOFF:
   12016            0 :       disp = XVECEXP (disp, 0, 0);
   12017            0 :       return (SYMBOL_REF_P (disp)
   12018            0 :               && SYMBOL_REF_TLS_MODEL (disp) == TLS_MODEL_LOCAL_EXEC);
   12019            0 :     case UNSPEC_DTPOFF:
   12020            0 :       disp = XVECEXP (disp, 0, 0);
   12021            0 :       return (SYMBOL_REF_P (disp)
   12022            0 :               && SYMBOL_REF_TLS_MODEL (disp) == TLS_MODEL_LOCAL_DYNAMIC);
   12023            0 :     case UNSPEC_SECREL32:
   12024            0 :       disp = XVECEXP (disp, 0, 0);
   12025            0 :       return SYMBOL_REF_P (disp);
   12026              :     }
   12027              : 
   12028              :   return false;
   12029              : }
   12030              : 
   12031              : /* Determine if op is suitable RTX for an address register.
   12032              :    Return naked register if a register or a register subreg is
   12033              :    found, otherwise return NULL_RTX.  */
   12034              : 
   12035              : static rtx
   12036   1373338834 : ix86_validate_address_register (rtx op)
   12037              : {
   12038   1373338834 :   machine_mode mode = GET_MODE (op);
   12039              : 
   12040              :   /* Only SImode or DImode registers can form the address.  */
   12041   1373338834 :   if (mode != SImode && mode != DImode)
   12042              :     return NULL_RTX;
   12043              : 
   12044   1373331923 :   if (REG_P (op))
   12045              :     return op;
   12046       694996 :   else if (SUBREG_P (op))
   12047              :     {
   12048       694996 :       rtx reg = SUBREG_REG (op);
   12049              : 
   12050       694996 :       if (!REG_P (reg))
   12051              :         return NULL_RTX;
   12052              : 
   12053       694996 :       mode = GET_MODE (reg);
   12054              : 
   12055              :       /* Don't allow SUBREGs that span more than a word.  It can
   12056              :          lead to spill failures when the register is one word out
   12057              :          of a two word structure.  */
   12058      1435356 :       if (GET_MODE_SIZE (mode) > UNITS_PER_WORD)
   12059              :         return NULL_RTX;
   12060              : 
   12061              :       /* Allow only SUBREGs of non-eliminable hard registers.  */
   12062       234033 :       if (register_no_elim_operand (reg, mode))
   12063              :         return reg;
   12064              :     }
   12065              : 
   12066              :   /* Op is not a register.  */
   12067              :   return NULL_RTX;
   12068              : }
   12069              : 
   12070              : /* Determine which memory address register set insn can use.  */
   12071              : 
   12072              : static enum attr_addr
   12073    253806684 : ix86_memory_address_reg_class (rtx_insn* insn)
   12074              : {
   12075              :   /* LRA can do some initialization with NULL insn,
   12076              :      return maximum register class in this case.  */
   12077    253806684 :   enum attr_addr addr_rclass = ADDR_GPR32;
   12078              : 
   12079    253806684 :   if (!insn)
   12080              :     return addr_rclass;
   12081              : 
   12082     72377934 :   if (asm_noperands (PATTERN (insn)) >= 0
   12083     72377934 :       || GET_CODE (PATTERN (insn)) == ASM_INPUT)
   12084        75424 :     return ix86_apx_inline_asm_use_gpr32 ? ADDR_GPR32 : ADDR_GPR16;
   12085              : 
   12086              :   /* Return maximum register class for unrecognized instructions.  */
   12087     72340222 :   if (INSN_CODE (insn) < 0)
   12088              :     return addr_rclass;
   12089              : 
   12090              :   /* Try to recognize the insn before calling get_attr_addr.
   12091              :      Save current recog_data and current alternative.  */
   12092     72340222 :   struct recog_data_d saved_recog_data = recog_data;
   12093     72340222 :   int saved_alternative = which_alternative;
   12094              : 
   12095              :   /* Update recog_data for processing of alternatives.  */
   12096     72340222 :   extract_insn_cached (insn);
   12097              : 
   12098              :   /* If current alternative is not set, loop through enabled
   12099              :      alternatives and get the most limited register class.  */
   12100     72340222 :   if (saved_alternative == -1)
   12101              :     {
   12102     72340222 :       alternative_mask enabled = get_enabled_alternatives (insn);
   12103              : 
   12104   1249068023 :       for (int i = 0; i < recog_data.n_alternatives; i++)
   12105              :         {
   12106   1176727801 :           if (!TEST_BIT (enabled, i))
   12107    349115972 :             continue;
   12108              : 
   12109    827611829 :           which_alternative = i;
   12110    827611829 :           addr_rclass = MIN (addr_rclass, get_attr_addr (insn));
   12111              :         }
   12112              :     }
   12113              :   else
   12114              :     {
   12115            0 :       which_alternative = saved_alternative;
   12116            0 :       addr_rclass = get_attr_addr (insn);
   12117              :     }
   12118              : 
   12119     72340222 :   recog_data = saved_recog_data;
   12120     72340222 :   which_alternative = saved_alternative;
   12121              : 
   12122     72340222 :   return addr_rclass;
   12123              : }
   12124              : 
   12125              : /* Return memory address register class insn can use.  */
   12126              : 
   12127              : enum reg_class
   12128    213290846 : ix86_insn_base_reg_class (rtx_insn* insn)
   12129              : {
   12130    213290846 :   switch (ix86_memory_address_reg_class (insn))
   12131              :     {
   12132              :     case ADDR_GPR8:
   12133              :       return LEGACY_GENERAL_REGS;
   12134              :     case ADDR_GPR16:
   12135              :       return GENERAL_GPR16;
   12136              :     case ADDR_GPR32:
   12137              :       break;
   12138            0 :     default:
   12139            0 :       gcc_unreachable ();
   12140              :     }
   12141              : 
   12142              :   return BASE_REG_CLASS;
   12143              : }
   12144              : 
   12145              : bool
   12146      1240506 : ix86_regno_ok_for_insn_base_p (int regno, rtx_insn* insn)
   12147              : {
   12148      1240506 :   switch (ix86_memory_address_reg_class (insn))
   12149              :     {
   12150            0 :     case ADDR_GPR8:
   12151            0 :       return LEGACY_INT_REGNO_P (regno);
   12152            0 :     case ADDR_GPR16:
   12153            0 :       return GENERAL_GPR16_REGNO_P (regno);
   12154      1240506 :     case ADDR_GPR32:
   12155      1240506 :       break;
   12156            0 :     default:
   12157            0 :       gcc_unreachable ();
   12158              :     }
   12159              : 
   12160      1240506 :   return GENERAL_REGNO_P (regno);
   12161              : }
   12162              : 
   12163              : enum reg_class
   12164     39275332 : ix86_insn_index_reg_class (rtx_insn* insn)
   12165              : {
   12166     39275332 :   switch (ix86_memory_address_reg_class (insn))
   12167              :     {
   12168              :     case ADDR_GPR8:
   12169              :       return LEGACY_INDEX_REGS;
   12170              :     case ADDR_GPR16:
   12171              :       return INDEX_GPR16;
   12172              :     case ADDR_GPR32:
   12173              :       break;
   12174            0 :     default:
   12175            0 :       gcc_unreachable ();
   12176              :     }
   12177              : 
   12178              :   return INDEX_REG_CLASS;
   12179              : }
   12180              : 
   12181              : /* Recognizes RTL expressions that are valid memory addresses for an
   12182              :    instruction.  The MODE argument is the machine mode for the MEM
   12183              :    expression that wants to use this address.
   12184              : 
   12185              :    It only recognizes address in canonical form.  LEGITIMIZE_ADDRESS should
   12186              :    convert common non-canonical forms to canonical form so that they will
   12187              :    be recognized.  */
   12188              : 
   12189              : static bool
   12190   2242806018 : ix86_legitimate_address_p (machine_mode, rtx addr, bool strict,
   12191              :                            code_helper = ERROR_MARK)
   12192              : {
   12193   2242806018 :   struct ix86_address parts;
   12194   2242806018 :   rtx base, index, disp;
   12195   2242806018 :   HOST_WIDE_INT scale;
   12196   2242806018 :   addr_space_t seg;
   12197              : 
   12198   2242806018 :   if (ix86_decompose_address (addr, &parts) == 0)
   12199              :     /* Decomposition failed.  */
   12200              :     return false;
   12201              : 
   12202   2231252893 :   base = parts.base;
   12203   2231252893 :   index = parts.index;
   12204   2231252893 :   disp = parts.disp;
   12205   2231252893 :   scale = parts.scale;
   12206   2231252893 :   seg = parts.seg;
   12207              : 
   12208              :   /* Validate base register.  */
   12209   2231252893 :   if (base)
   12210              :     {
   12211   1286095528 :       rtx reg = ix86_validate_address_register (base);
   12212              : 
   12213   1286095528 :       if (reg == NULL_RTX)
   12214              :         return false;
   12215              : 
   12216   1285664712 :       unsigned int regno = REGNO (reg);
   12217   1285664712 :       if ((strict && !REGNO_OK_FOR_BASE_P (regno))
   12218   1281239428 :           || (!strict && !REGNO_OK_FOR_BASE_NONSTRICT_P (regno)))
   12219              :         /* Base is not valid.  */
   12220              :         return false;
   12221              :     }
   12222              : 
   12223              :   /* Validate index register.  */
   12224   2229489639 :   if (index)
   12225              :     {
   12226     87243306 :       rtx reg = ix86_validate_address_register (index);
   12227              : 
   12228     87243306 :       if (reg == NULL_RTX)
   12229              :         return false;
   12230              : 
   12231     87206073 :       unsigned int regno = REGNO (reg);
   12232     87206073 :       if ((strict && !REGNO_OK_FOR_INDEX_P (regno))
   12233     87198126 :           || (!strict && !REGNO_OK_FOR_INDEX_NONSTRICT_P (regno)))
   12234              :         /* Index is not valid.  */
   12235              :         return false;
   12236              :     }
   12237              : 
   12238              :   /* Index and base should have the same mode.  */
   12239   2229450385 :   if (base && index
   12240     77615623 :       && GET_MODE (base) != GET_MODE (index))
   12241              :     return false;
   12242              : 
   12243              :   /* Address override works only on the (%reg) part of %fs:(%reg).  */
   12244   2229190252 :   if (seg != ADDR_SPACE_GENERIC
   12245   2229190252 :       && ((base && GET_MODE (base) != word_mode)
   12246       339890 :           || (index && GET_MODE (index) != word_mode)))
   12247              :     return false;
   12248              : 
   12249              :   /* Validate scale factor.  */
   12250   2229190223 :   if (scale != 1)
   12251              :     {
   12252     39989635 :       if (!index)
   12253              :         /* Scale without index.  */
   12254              :         return false;
   12255              : 
   12256     39989635 :       if (scale != 2 && scale != 4 && scale != 8)
   12257              :         /* Scale is not a valid multiplier.  */
   12258              :         return false;
   12259              :     }
   12260              : 
   12261              :   /* Validate displacement.  */
   12262   2226031536 :   if (disp)
   12263              :     {
   12264   1999784338 :       if (ix86_endbr_immediate_operand (disp, VOIDmode))
   12265              :         return false;
   12266              : 
   12267   1999784295 :       if (GET_CODE (disp) == CONST
   12268    146428852 :           && GET_CODE (XEXP (disp, 0)) == UNSPEC
   12269     15401655 :           && XINT (XEXP (disp, 0), 1) != UNSPEC_MACHOPIC_OFFSET)
   12270     15401655 :         switch (XINT (XEXP (disp, 0), 1))
   12271              :           {
   12272              :           /* Refuse GOTOFF and GOT in 64bit mode since it is always 64bit
   12273              :              when used.  While ABI specify also 32bit relocations, we
   12274              :              don't produce them at all and use IP relative instead.
   12275              :              Allow GOT in 32bit mode for both PIC and non-PIC if symbol
   12276              :              should be loaded via GOT.  */
   12277      2266055 :           case UNSPEC_GOT:
   12278      2266055 :             if (!TARGET_64BIT
   12279      2266055 :                 && ix86_force_load_from_GOT_p (XVECEXP (XEXP (disp, 0), 0, 0)))
   12280            0 :               goto is_legitimate_pic;
   12281              :             /* FALLTHRU */
   12282      4556468 :           case UNSPEC_GOTOFF:
   12283      4556468 :             gcc_assert (flag_pic);
   12284      4556468 :             if (!TARGET_64BIT)
   12285      4556265 :               goto is_legitimate_pic;
   12286              : 
   12287              :             /* 64bit address unspec.  */
   12288              :             return false;
   12289              : 
   12290      9768129 :           case UNSPEC_GOTPCREL:
   12291      9768129 :             if (ix86_force_load_from_GOT_p (XVECEXP (XEXP (disp, 0), 0, 0)))
   12292         2534 :               goto is_legitimate_pic;
   12293              :             /* FALLTHRU */
   12294      9765595 :           case UNSPEC_PCREL:
   12295      9765595 :             gcc_assert (flag_pic);
   12296      9765595 :             goto is_legitimate_pic;
   12297              : 
   12298              :           case UNSPEC_GOTTPOFF:
   12299              :           case UNSPEC_GOTNTPOFF:
   12300              :           case UNSPEC_INDNTPOFF:
   12301              :           case UNSPEC_NTPOFF:
   12302              :           case UNSPEC_DTPOFF:
   12303              :           case UNSPEC_SECREL32:
   12304              :             break;
   12305              : 
   12306              :           default:
   12307              :             /* Invalid address unspec.  */
   12308              :             return false;
   12309              :           }
   12310              : 
   12311   1261807855 :       else if (SYMBOLIC_CONST (disp)
   12312   2115409837 :                && (flag_pic
   12313              : #if TARGET_MACHO
   12314              :                    || (MACHOPIC_INDIRECT
   12315              :                        && !machopic_operand_p (disp))
   12316              : #endif
   12317              :                   ))
   12318              :         {
   12319              : 
   12320     57597759 :         is_legitimate_pic:
   12321     57597759 :           if (TARGET_64BIT && (index || base))
   12322              :             {
   12323              :               /* foo@dtpoff(%rX) is ok.  */
   12324        37405 :               if (GET_CODE (disp) != CONST
   12325         7108 :                   || GET_CODE (XEXP (disp, 0)) != PLUS
   12326         7108 :                   || GET_CODE (XEXP (XEXP (disp, 0), 0)) != UNSPEC
   12327         4637 :                   || !CONST_INT_P (XEXP (XEXP (disp, 0), 1))
   12328         4637 :                   || (XINT (XEXP (XEXP (disp, 0), 0), 1) != UNSPEC_DTPOFF
   12329         4637 :                       && XINT (XEXP (XEXP (disp, 0), 0), 1) != UNSPEC_NTPOFF
   12330            6 :                       && XINT (XEXP (XEXP (disp, 0), 0), 1) != UNSPEC_SECREL32))
   12331              :                 /* Non-constant pic memory reference.  */
   12332              :                 return false;
   12333              :             }
   12334     57560354 :           else if ((!TARGET_MACHO || flag_pic)
   12335     57560354 :                     && ! legitimate_pic_address_disp_p (disp))
   12336              :             /* Displacement is an invalid pic construct.  */
   12337              :             return false;
   12338              : #if TARGET_MACHO
   12339              :           else if (MACHO_DYNAMIC_NO_PIC_P
   12340              :                    && !ix86_legitimate_constant_p (Pmode, disp))
   12341              :             /* displacement must be referenced via non_lazy_pointer */
   12342              :             return false;
   12343              : #endif
   12344              : 
   12345              :           /* This code used to verify that a symbolic pic displacement
   12346              :              includes the pic_offset_table_rtx register.
   12347              : 
   12348              :              While this is good idea, unfortunately these constructs may
   12349              :              be created by "adds using lea" optimization for incorrect
   12350              :              code like:
   12351              : 
   12352              :              int a;
   12353              :              int foo(int i)
   12354              :                {
   12355              :                  return *(&a+i);
   12356              :                }
   12357              : 
   12358              :              This code is nonsensical, but results in addressing
   12359              :              GOT table with pic_offset_table_rtx base.  We can't
   12360              :              just refuse it easily, since it gets matched by
   12361              :              "addsi3" pattern, that later gets split to lea in the
   12362              :              case output register differs from input.  While this
   12363              :              can be handled by separate addsi pattern for this case
   12364              :              that never results in lea, this seems to be easier and
   12365              :              correct fix for crash to disable this test.  */
   12366              :         }
   12367   1941109275 :       else if (!LABEL_REF_P (disp)
   12368   1940932751 :                && !CONST_INT_P (disp)
   12369    867390853 :                && (GET_CODE (disp) != CONST
   12370    132464446 :                    || !ix86_legitimate_constant_p (Pmode, disp))
   12371   2679000186 :                && (!SYMBOL_REF_P (disp)
   12372    745781144 :                    || !ix86_legitimate_constant_p (Pmode, disp)))
   12373              :         /* Displacement is not constant.  */
   12374     57317512 :         return false;
   12375   1883791763 :       else if (TARGET_64BIT
   12376   1883791763 :                && !x86_64_immediate_operand (disp, VOIDmode))
   12377              :         /* Displacement is out of range.  */
   12378              :         return false;
   12379              :       /* In x32 mode, constant addresses are sign extended to 64bit, so
   12380              :          we have to prevent addresses from 0x80000000 to 0xffffffff.  */
   12381        45622 :       else if (TARGET_X32 && !(index || base)
   12382        17326 :                && CONST_INT_P (disp)
   12383   1883286380 :                && val_signbit_known_set_p (SImode, INTVAL (disp)))
   12384              :         return false;
   12385              :     }
   12386              : 
   12387              :   /* Everything looks valid.  */
   12388              :   return true;
   12389              : }
   12390              : 
   12391              : /* Determine if a given RTX is a valid constant address.  */
   12392              : 
   12393              : bool
   12394   2775903345 : constant_address_p (rtx x)
   12395              : {
   12396   2856287337 :   return CONSTANT_P (x) && ix86_legitimate_address_p (Pmode, x, 1);
   12397              : }
   12398              : 
   12399              : 
   12400              : /* Return a legitimate reference for ORIG (an address) using the
   12401              :    register REG.  If REG is 0, a new pseudo is generated.
   12402              : 
   12403              :    There are two types of references that must be handled:
   12404              : 
   12405              :    1. Global data references must load the address from the GOT, via
   12406              :       the PIC reg.  An insn is emitted to do this load, and the reg is
   12407              :       returned.
   12408              : 
   12409              :    2. Static data references, constant pool addresses, and code labels
   12410              :       compute the address as an offset from the GOT, whose base is in
   12411              :       the PIC reg.  Static data objects have SYMBOL_FLAG_LOCAL set to
   12412              :       differentiate them from global data objects.  The returned
   12413              :       address is the PIC reg + an unspec constant.
   12414              : 
   12415              :    TARGET_LEGITIMATE_ADDRESS_P rejects symbolic references unless the PIC
   12416              :    reg also appears in the address.  */
   12417              : 
   12418              : rtx
   12419       398533 : legitimize_pic_address (rtx orig, rtx reg)
   12420              : {
   12421       398533 :   rtx addr = orig;
   12422       398533 :   rtx new_rtx = orig;
   12423              : 
   12424              : #if TARGET_MACHO
   12425              :   if (TARGET_MACHO && !TARGET_64BIT)
   12426              :     {
   12427              :       if (reg == 0)
   12428              :         reg = gen_reg_rtx (Pmode);
   12429              :       /* Use the generic Mach-O PIC machinery.  */
   12430              :       return machopic_legitimize_pic_address (orig, GET_MODE (orig), reg);
   12431              :     }
   12432              : #endif
   12433              : 
   12434       398533 :   if (TARGET_64BIT && TARGET_DLLIMPORT_DECL_ATTRIBUTES)
   12435              :     {
   12436              : #if TARGET_PECOFF
   12437              :       rtx tmp = legitimize_pe_coff_symbol (addr, true);
   12438              :       if (tmp)
   12439              :         return tmp;
   12440              : #endif
   12441              :     }
   12442              : 
   12443       398533 :   if (TARGET_64BIT && legitimate_pic_address_disp_p (addr))
   12444              :     new_rtx = addr;
   12445       302160 :   else if ((!TARGET_64BIT
   12446       102314 :             || /* TARGET_64BIT && */ ix86_cmodel != CM_SMALL_PIC)
   12447              :            && !TARGET_PECOFF
   12448       502103 :            && gotoff_operand (addr, Pmode))
   12449              :     {
   12450              :       /* This symbol may be referenced via a displacement
   12451              :          from the PIC base address (@GOTOFF).  */
   12452        96408 :       if (GET_CODE (addr) == CONST)
   12453         3047 :         addr = XEXP (addr, 0);
   12454              : 
   12455        96408 :       if (GET_CODE (addr) == PLUS)
   12456              :           {
   12457         6094 :             new_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, XEXP (addr, 0)),
   12458              :                                       UNSPEC_GOTOFF);
   12459         6094 :             new_rtx = gen_rtx_PLUS (Pmode, new_rtx, XEXP (addr, 1));
   12460              :           }
   12461              :         else
   12462       186693 :           new_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), UNSPEC_GOTOFF);
   12463              : 
   12464       192787 :       new_rtx = gen_rtx_CONST (Pmode, new_rtx);
   12465              : 
   12466        96408 :       if (TARGET_64BIT)
   12467           29 :         new_rtx = copy_to_suggested_reg (new_rtx, reg, Pmode);
   12468              : 
   12469        96408 :       if (reg != 0)
   12470              :         {
   12471            3 :           gcc_assert (REG_P (reg));
   12472            3 :           new_rtx = expand_simple_binop (Pmode, PLUS, pic_offset_table_rtx,
   12473              :                                          new_rtx, reg, 1, OPTAB_DIRECT);
   12474              :         }
   12475              :       else
   12476       192784 :         new_rtx = gen_rtx_PLUS (Pmode, pic_offset_table_rtx, new_rtx);
   12477              :     }
   12478       384201 :   else if ((SYMBOL_REF_P (addr) && SYMBOL_REF_TLS_MODEL (addr) == 0)
   12479              :            /* We can't always use @GOTOFF for text labels
   12480              :               on VxWorks, see gotoff_operand.  */
   12481       205752 :            || (TARGET_VXWORKS_VAROFF && LABEL_REF_P (addr)))
   12482              :     {
   12483              : #if TARGET_PECOFF
   12484              :       rtx tmp = legitimize_pe_coff_symbol (addr, true);
   12485              :       if (tmp)
   12486              :         return tmp;
   12487              : #endif
   12488              : 
   12489              :       /* For x64 PE-COFF there is no GOT table,
   12490              :          so we use address directly.  */
   12491       178446 :       if (TARGET_64BIT && TARGET_PECOFF)
   12492              :         {
   12493              :           new_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), UNSPEC_PCREL);
   12494              :           new_rtx = gen_rtx_CONST (Pmode, new_rtx);
   12495              :         }
   12496       178446 :       else if (TARGET_64BIT && ix86_cmodel != CM_LARGE_PIC)
   12497              :         {
   12498        95044 :           new_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr),
   12499              :                                     UNSPEC_GOTPCREL);
   12500        95044 :           new_rtx = gen_rtx_CONST (Pmode, new_rtx);
   12501        95044 :           new_rtx = gen_const_mem (Pmode, new_rtx);
   12502        95041 :           set_mem_alias_set (new_rtx, GOT_ALIAS_SET);
   12503              :         }
   12504              :       else
   12505              :         {
   12506              :           /* This symbol must be referenced via a load
   12507              :              from the Global Offset Table (@GOT).  */
   12508       166787 :           new_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), UNSPEC_GOT);
   12509       166787 :           new_rtx = gen_rtx_CONST (Pmode, new_rtx);
   12510              : 
   12511        83405 :           if (TARGET_64BIT)
   12512           23 :             new_rtx = copy_to_suggested_reg (new_rtx, reg, Pmode);
   12513              : 
   12514        83405 :           if (reg != 0)
   12515              :             {
   12516            0 :               gcc_assert (REG_P (reg));
   12517            0 :               new_rtx = expand_simple_binop (Pmode, PLUS, pic_offset_table_rtx,
   12518              :                                              new_rtx, reg, 1, OPTAB_DIRECT);
   12519              :             }
   12520              :           else
   12521       166787 :             new_rtx = gen_rtx_PLUS (Pmode, pic_offset_table_rtx, new_rtx);
   12522              : 
   12523       166787 :           new_rtx = gen_const_mem (Pmode, new_rtx);
   12524        83405 :           set_mem_alias_set (new_rtx, GOT_ALIAS_SET);
   12525              :         }
   12526              : 
   12527       261831 :       new_rtx = copy_to_suggested_reg (new_rtx, reg, Pmode);
   12528              :     }
   12529              :   else
   12530              :     {
   12531        27306 :       if (CONST_INT_P (addr)
   12532        27306 :           && !x86_64_immediate_operand (addr, VOIDmode))
   12533            8 :         new_rtx = copy_to_suggested_reg (addr, reg, Pmode);
   12534        27298 :       else if (GET_CODE (addr) == CONST)
   12535              :         {
   12536        16751 :           addr = XEXP (addr, 0);
   12537              : 
   12538              :           /* We must match stuff we generate before.  Assume the only
   12539              :              unspecs that can get here are ours.  Not that we could do
   12540              :              anything with them anyway....  */
   12541        16751 :           if (GET_CODE (addr) == UNSPEC
   12542         8970 :               || (GET_CODE (addr) == PLUS
   12543         8970 :                   && GET_CODE (XEXP (addr, 0)) == UNSPEC))
   12544              :             return orig;
   12545         6840 :           gcc_assert (GET_CODE (addr) == PLUS);
   12546              :         }
   12547              : 
   12548        17395 :       if (GET_CODE (addr) == PLUS)
   12549              :         {
   12550         8698 :           rtx op0 = XEXP (addr, 0), op1 = XEXP (addr, 1);
   12551              : 
   12552              :           /* Check first to see if this is a constant
   12553              :              offset from a @GOTOFF symbol reference.  */
   12554         8698 :           if (!TARGET_PECOFF
   12555        13785 :               && gotoff_operand (op0, Pmode)
   12556         8698 :               && CONST_INT_P (op1))
   12557              :             {
   12558            4 :               if (!TARGET_64BIT)
   12559              :                 {
   12560            0 :                   new_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, op0),
   12561              :                                             UNSPEC_GOTOFF);
   12562            0 :                   new_rtx = gen_rtx_PLUS (Pmode, new_rtx, op1);
   12563            0 :                   new_rtx = gen_rtx_CONST (Pmode, new_rtx);
   12564              : 
   12565            0 :                   if (reg != 0)
   12566              :                     {
   12567            0 :                       gcc_assert (REG_P (reg));
   12568            0 :                       new_rtx = expand_simple_binop (Pmode, PLUS,
   12569              :                                                      pic_offset_table_rtx,
   12570              :                                                      new_rtx, reg, 1,
   12571              :                                                      OPTAB_DIRECT);
   12572              :                     }
   12573              :                   else
   12574            0 :                     new_rtx
   12575            0 :                       = gen_rtx_PLUS (Pmode, pic_offset_table_rtx, new_rtx);
   12576              :                 }
   12577              :               else
   12578              :                 {
   12579            4 :                   if (INTVAL (op1) < -16*1024*1024
   12580            4 :                       || INTVAL (op1) >= 16*1024*1024)
   12581              :                     {
   12582            4 :                       if (!x86_64_immediate_operand (op1, Pmode))
   12583            4 :                         op1 = force_reg (Pmode, op1);
   12584              : 
   12585            4 :                       new_rtx
   12586            4 :                         = gen_rtx_PLUS (Pmode, force_reg (Pmode, op0), op1);
   12587              :                     }
   12588              :                 }
   12589              :             }
   12590              :           else
   12591              :             {
   12592         8694 :               rtx base = legitimize_pic_address (op0, reg);
   12593         8694 :               machine_mode mode = GET_MODE (base);
   12594         8694 :               new_rtx
   12595         8694 :                 = legitimize_pic_address (op1, base == reg ? NULL_RTX : reg);
   12596              : 
   12597         8694 :               if (CONST_INT_P (new_rtx))
   12598              :                 {
   12599         6828 :                   if (INTVAL (new_rtx) < -16*1024*1024
   12600         6828 :                       || INTVAL (new_rtx) >= 16*1024*1024)
   12601              :                     {
   12602            0 :                       if (!x86_64_immediate_operand (new_rtx, mode))
   12603            0 :                         new_rtx = force_reg (mode, new_rtx);
   12604              : 
   12605            0 :                       new_rtx
   12606            0 :                         = gen_rtx_PLUS (mode, force_reg (mode, base), new_rtx);
   12607              :                     }
   12608              :                   else
   12609         6828 :                     new_rtx = plus_constant (mode, base, INTVAL (new_rtx));
   12610              :                 }
   12611              :               else
   12612              :                 {
   12613              :                   /* For %rip addressing, we have to use
   12614              :                      just disp32, not base nor index.  */
   12615         1866 :                   if (TARGET_64BIT
   12616          101 :                       && (SYMBOL_REF_P (base)
   12617          101 :                           || LABEL_REF_P (base)))
   12618            7 :                     base = force_reg (mode, base);
   12619         1866 :                   if (GET_CODE (new_rtx) == PLUS
   12620         1745 :                       && CONSTANT_P (XEXP (new_rtx, 1)))
   12621              :                     {
   12622         1741 :                       base = gen_rtx_PLUS (mode, base, XEXP (new_rtx, 0));
   12623         1741 :                       new_rtx = XEXP (new_rtx, 1);
   12624              :                     }
   12625         1866 :                   new_rtx = gen_rtx_PLUS (mode, base, new_rtx);
   12626              :                 }
   12627              :             }
   12628              :         }
   12629              :     }
   12630              :   return new_rtx;
   12631              : }
   12632              : 
   12633              : /* Load the thread pointer.  If TO_REG is true, force it into a register.  */
   12634              : 
   12635              : static rtx
   12636        24541 : get_thread_pointer (machine_mode tp_mode, bool to_reg)
   12637              : {
   12638        24541 :   rtx tp = gen_rtx_UNSPEC (ptr_mode, gen_rtvec (1, const0_rtx), UNSPEC_TP);
   12639              : 
   12640        24541 :   if (GET_MODE (tp) != tp_mode)
   12641              :     {
   12642           11 :       gcc_assert (GET_MODE (tp) == SImode);
   12643           11 :       gcc_assert (tp_mode == DImode);
   12644              : 
   12645           11 :       tp = gen_rtx_ZERO_EXTEND (tp_mode, tp);
   12646              :     }
   12647              : 
   12648        24541 :   if (to_reg)
   12649         8159 :     tp = copy_to_mode_reg (tp_mode, tp);
   12650              : 
   12651        24541 :   return tp;
   12652              : }
   12653              : 
   12654              : /* Construct the SYMBOL_REF for the _tls_index symbol.  */
   12655              : 
   12656              : static GTY(()) rtx ix86_tls_index_symbol;
   12657              : 
   12658              : static rtx
   12659            0 : ix86_tls_index (void)
   12660              : {
   12661            0 :   if (!ix86_tls_index_symbol)
   12662            0 :     ix86_tls_index_symbol = gen_rtx_SYMBOL_REF (SImode, "_tls_index");
   12663              : 
   12664            0 :   if (flag_pic)
   12665            0 :     return gen_rtx_CONST (Pmode,
   12666              :                           gen_rtx_UNSPEC (Pmode,
   12667              :                                           gen_rtvec (1, ix86_tls_index_symbol),
   12668              :                                           UNSPEC_PCREL));
   12669              :   else
   12670            0 :     return ix86_tls_index_symbol;
   12671              : }
   12672              : 
   12673              : /* Construct the SYMBOL_REF for the tls_get_addr function.  */
   12674              : 
   12675              : static GTY(()) rtx ix86_tls_symbol;
   12676              : 
   12677              : rtx
   12678         6720 : ix86_tls_get_addr (void)
   12679              : {
   12680         6720 :   if (cfun->machine->call_saved_registers
   12681         6720 :       == TYPE_NO_CALLER_SAVED_REGISTERS)
   12682              :     {
   12683              :       /* __tls_get_addr doesn't preserve vector registers.  When a
   12684              :          function with no_caller_saved_registers attribute calls
   12685              :          __tls_get_addr, YMM and ZMM registers will be clobbered.
   12686              :          Issue an error and suggest -mtls-dialect=gnu2 in this case.  */
   12687            3 :       if (cfun->machine->func_type == TYPE_NORMAL)
   12688            1 :         error (G_("%<-mtls-dialect=gnu2%> must be used with a function"
   12689              :                   " with the %<no_caller_saved_registers%> attribute"));
   12690              :       else
   12691            3 :         error (cfun->machine->func_type == TYPE_EXCEPTION
   12692              :                ? G_("%<-mtls-dialect=gnu2%> must be used with an"
   12693              :                     " exception service routine")
   12694              :                : G_("%<-mtls-dialect=gnu2%> must be used with an"
   12695              :                     " interrupt service routine"));
   12696              :       /* Don't issue the same error twice.  */
   12697            3 :       cfun->machine->func_type = TYPE_NORMAL;
   12698            3 :       cfun->machine->call_saved_registers
   12699            3 :         = TYPE_DEFAULT_CALL_SAVED_REGISTERS;
   12700              :     }
   12701              : 
   12702         6720 :   if (!ix86_tls_symbol)
   12703              :     {
   12704          209 :       const char *sym
   12705          246 :         = ((TARGET_ANY_GNU_TLS && !TARGET_64BIT)
   12706          246 :            ? "___tls_get_addr" : "__tls_get_addr");
   12707              : 
   12708          283 :       ix86_tls_symbol = gen_rtx_SYMBOL_REF (Pmode, sym);
   12709              :     }
   12710              : 
   12711         6720 :   if (ix86_cmodel == CM_LARGE_PIC && !TARGET_PECOFF)
   12712              :     {
   12713            2 :       rtx unspec = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, ix86_tls_symbol),
   12714              :                                    UNSPEC_PLTOFF);
   12715            2 :       return gen_rtx_PLUS (Pmode, pic_offset_table_rtx,
   12716              :                            gen_rtx_CONST (Pmode, unspec));
   12717              :     }
   12718              : 
   12719         6718 :   return ix86_tls_symbol;
   12720              : }
   12721              : 
   12722              : /* Construct the SYMBOL_REF for the _TLS_MODULE_BASE_ symbol.  */
   12723              : 
   12724              : static GTY(()) rtx ix86_tls_module_base_symbol;
   12725              : 
   12726              : rtx
   12727           98 : ix86_tls_module_base (void)
   12728              : {
   12729           98 :   if (!ix86_tls_module_base_symbol)
   12730              :     {
   12731           11 :       ix86_tls_module_base_symbol
   12732           11 :         = gen_rtx_SYMBOL_REF (ptr_mode, "_TLS_MODULE_BASE_");
   12733              : 
   12734           11 :       SYMBOL_REF_FLAGS (ix86_tls_module_base_symbol)
   12735           11 :         |= TLS_MODEL_GLOBAL_DYNAMIC << SYMBOL_FLAG_TLS_SHIFT;
   12736              :     }
   12737              : 
   12738           98 :   return ix86_tls_module_base_symbol;
   12739              : }
   12740              : 
   12741              : /* A subroutine of ix86_legitimize_address and ix86_expand_move.  FOR_MOV is
   12742              :    false if we expect this to be used for a memory address and true if
   12743              :    we expect to load the address into a register.  */
   12744              : 
   12745              : rtx
   12746        30969 : legitimize_tls_address (rtx x, enum tls_model model, bool for_mov)
   12747              : {
   12748        30969 :   rtx dest, base, off;
   12749        30969 :   rtx pic = NULL_RTX, tp = NULL_RTX;
   12750        30969 :   machine_mode tp_mode = Pmode;
   12751        30969 :   int type;
   12752              : 
   12753              :   /* Windows implements a single form of TLS.  */
   12754        30969 :   if (TARGET_WIN32_TLS)
   12755              :     {
   12756              :       /* Load the 32-bit index.  */
   12757              :       rtx ind = gen_const_mem (SImode, ix86_tls_index ());
   12758              :       set_mem_alias_set (ind, GOT_ALIAS_SET);
   12759              :       if (TARGET_64BIT)
   12760              :         ind = convert_to_mode (Pmode, ind, 1);
   12761              :       ind = force_reg (Pmode, ind);
   12762              : 
   12763              :       /* Add it to the thread pointer and load the base.  */
   12764              :       tp = get_thread_pointer (Pmode, true);
   12765              :       rtx addr = gen_rtx_PLUS (Pmode, tp,
   12766              :                                gen_rtx_MULT (Pmode, ind,
   12767              :                                              GEN_INT (UNITS_PER_WORD)));
   12768              :       base = gen_const_mem (Pmode, addr);
   12769              :       set_mem_alias_set (base, GOT_ALIAS_SET);
   12770              : 
   12771              :       /* Add the 32-bit section-relative offset to the base.  */
   12772              :       base = force_reg (Pmode, base);
   12773              :       off = gen_rtx_CONST (Pmode,
   12774              :                            gen_rtx_UNSPEC (SImode,
   12775              :                                            gen_rtvec (1, x),
   12776              :                                            UNSPEC_SECREL32));
   12777              :       return gen_rtx_PLUS (Pmode, base, off);
   12778              :     }
   12779              : 
   12780              :   /* Fall back to global dynamic model if tool chain cannot support local
   12781              :      dynamic.  */
   12782        30969 :   if (TARGET_SUN_TLS && !TARGET_64BIT
   12783              :       && !HAVE_AS_IX86_TLSLDMPLT && !HAVE_AS_IX86_TLSLDM
   12784              :       && model == TLS_MODEL_LOCAL_DYNAMIC)
   12785              :     model = TLS_MODEL_GLOBAL_DYNAMIC;
   12786              : 
   12787        30969 :   switch (model)
   12788              :     {
   12789         6121 :     case TLS_MODEL_GLOBAL_DYNAMIC:
   12790         6121 :       if (!TARGET_64BIT)
   12791              :         {
   12792         1930 :           if (flag_pic && !TARGET_PECOFF)
   12793         1930 :             pic = pic_offset_table_rtx;
   12794              :           else
   12795              :             {
   12796            0 :               pic = gen_reg_rtx (Pmode);
   12797            0 :               emit_insn (gen_set_got (pic));
   12798              :             }
   12799              :         }
   12800              : 
   12801         6121 :       if (TARGET_GNU2_TLS)
   12802              :         {
   12803           53 :           dest = gen_reg_rtx (ptr_mode);
   12804           53 :           if (TARGET_64BIT)
   12805           53 :             emit_insn (gen_tls_dynamic_gnu2_64 (ptr_mode, dest, x));
   12806              :           else
   12807            0 :             emit_insn (gen_tls_dynamic_gnu2_32 (dest, x, pic));
   12808              : 
   12809           53 :           tp = get_thread_pointer (ptr_mode, true);
   12810           53 :           dest = gen_rtx_PLUS (ptr_mode, tp, dest);
   12811           61 :           if (GET_MODE (dest) != Pmode)
   12812            6 :              dest = gen_rtx_ZERO_EXTEND (Pmode, dest);
   12813           61 :           dest = force_reg (Pmode, dest);
   12814              : 
   12815           61 :           if (GET_MODE (x) != Pmode)
   12816            3 :             x = gen_rtx_ZERO_EXTEND (Pmode, x);
   12817              : 
   12818           53 :           set_unique_reg_note (get_last_insn (), REG_EQUAL, x);
   12819              :         }
   12820              :       else
   12821              :         {
   12822         6068 :           rtx caddr = ix86_tls_get_addr ();
   12823              : 
   12824         7998 :           dest = gen_reg_rtx (Pmode);
   12825         6068 :           if (TARGET_64BIT)
   12826              :             {
   12827         4138 :               rtx rax = gen_rtx_REG (Pmode, AX_REG);
   12828         4138 :               rtx rdi = gen_rtx_REG (Pmode, DI_REG);
   12829         4138 :               rtx_insn *insns;
   12830              : 
   12831         4138 :               start_sequence ();
   12832         4138 :               emit_call_insn
   12833         4138 :                 (gen_tls_global_dynamic_64 (Pmode, rax, x, caddr, rdi));
   12834         4138 :               insns = end_sequence ();
   12835              : 
   12836         4138 :               if (GET_MODE (x) != Pmode)
   12837            1 :                 x = gen_rtx_ZERO_EXTEND (Pmode, x);
   12838              : 
   12839         4138 :               RTL_CONST_CALL_P (insns) = 1;
   12840         4138 :               emit_libcall_block (insns, dest, rax, x);
   12841              :             }
   12842              :           else
   12843         1930 :             emit_insn (gen_tls_global_dynamic_32 (dest, x, pic, caddr));
   12844              :         }
   12845              :       break;
   12846              : 
   12847          386 :     case TLS_MODEL_LOCAL_DYNAMIC:
   12848          386 :       if (!TARGET_64BIT)
   12849              :         {
   12850           92 :           if (flag_pic)
   12851           92 :             pic = pic_offset_table_rtx;
   12852              :           else
   12853              :             {
   12854            0 :               pic = gen_reg_rtx (Pmode);
   12855            0 :               emit_insn (gen_set_got (pic));
   12856              :             }
   12857              :         }
   12858              : 
   12859          386 :       if (TARGET_GNU2_TLS)
   12860              :         {
   12861           26 :           rtx tmp = ix86_tls_module_base ();
   12862              : 
   12863           26 :           base = gen_reg_rtx (ptr_mode);
   12864           26 :           if (TARGET_64BIT)
   12865           26 :             emit_insn (gen_tls_dynamic_gnu2_64 (ptr_mode, base, tmp));
   12866              :           else
   12867            0 :             emit_insn (gen_tls_dynamic_gnu2_32 (base, tmp, pic));
   12868              : 
   12869           26 :           tp = get_thread_pointer (ptr_mode, true);
   12870           32 :           if (GET_MODE (base) != Pmode)
   12871            2 :             base = gen_rtx_ZERO_EXTEND (Pmode, base);
   12872           32 :           base = force_reg (Pmode, base);
   12873              :         }
   12874              :       else
   12875              :         {
   12876          360 :           rtx caddr = ix86_tls_get_addr ();
   12877              : 
   12878          452 :           base = gen_reg_rtx (Pmode);
   12879          360 :           if (TARGET_64BIT)
   12880              :             {
   12881          268 :               rtx rax = gen_rtx_REG (Pmode, AX_REG);
   12882          268 :               rtx rdi = gen_rtx_REG (Pmode, DI_REG);
   12883          268 :               rtx_insn *insns;
   12884          268 :               rtx eqv;
   12885              : 
   12886          268 :               start_sequence ();
   12887          268 :               emit_call_insn
   12888          268 :                 (gen_tls_local_dynamic_base_64 (Pmode, rax, caddr, rdi));
   12889          268 :               insns = end_sequence ();
   12890              : 
   12891              :               /* Attach a unique REG_EQUAL, to allow the RTL optimizers to
   12892              :                  share the LD_BASE result with other LD model accesses.  */
   12893          268 :               eqv = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, const0_rtx),
   12894              :                                     UNSPEC_TLS_LD_BASE);
   12895              : 
   12896          268 :               RTL_CONST_CALL_P (insns) = 1;
   12897          268 :               emit_libcall_block (insns, base, rax, eqv);
   12898              :             }
   12899              :           else
   12900           92 :             emit_insn (gen_tls_local_dynamic_base_32 (base, pic, caddr));
   12901              :         }
   12902              : 
   12903          484 :       off = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, x), UNSPEC_DTPOFF);
   12904          484 :       off = gen_rtx_CONST (Pmode, off);
   12905              : 
   12906          582 :       dest = force_reg (Pmode, gen_rtx_PLUS (Pmode, base, off));
   12907              : 
   12908          386 :       if (TARGET_GNU2_TLS)
   12909              :         {
   12910           32 :           if (GET_MODE (tp) != Pmode)
   12911              :             {
   12912            2 :               dest = lowpart_subreg (ptr_mode, dest, Pmode);
   12913            2 :               dest = gen_rtx_PLUS (ptr_mode, tp, dest);
   12914            2 :               dest = gen_rtx_ZERO_EXTEND (Pmode, dest);
   12915              :             }
   12916              :           else
   12917           24 :             dest = gen_rtx_PLUS (Pmode, tp, dest);
   12918           32 :           dest = force_reg (Pmode, dest);
   12919              : 
   12920           32 :           if (GET_MODE (x) != Pmode)
   12921            1 :             x = gen_rtx_ZERO_EXTEND (Pmode, x);
   12922              : 
   12923           26 :           set_unique_reg_note (get_last_insn (), REG_EQUAL, x);
   12924              :         }
   12925              :       break;
   12926              : 
   12927        10817 :     case TLS_MODEL_INITIAL_EXEC:
   12928        10817 :       if (TARGET_64BIT)
   12929              :         {
   12930              :           /* Generate DImode references to avoid %fs:(%reg32)
   12931              :              problems and linker IE->LE relaxation bug.  */
   12932              :           tp_mode = DImode;
   12933              :           pic = NULL;
   12934              :           type = UNSPEC_GOTNTPOFF;
   12935              :         }
   12936          764 :       else if (flag_pic)
   12937              :         {
   12938          763 :           pic = pic_offset_table_rtx;
   12939          763 :           type = TARGET_ANY_GNU_TLS ? UNSPEC_GOTNTPOFF : UNSPEC_GOTTPOFF;
   12940              :         }
   12941            1 :       else if (!TARGET_ANY_GNU_TLS)
   12942              :         {
   12943            0 :           pic = gen_reg_rtx (Pmode);
   12944            0 :           emit_insn (gen_set_got (pic));
   12945            0 :           type = UNSPEC_GOTTPOFF;
   12946              :         }
   12947              :       else
   12948              :         {
   12949              :           pic = NULL;
   12950              :           type = UNSPEC_INDNTPOFF;
   12951              :         }
   12952              : 
   12953        10817 :       off = gen_rtx_UNSPEC (tp_mode, gen_rtvec (1, x), type);
   12954        10817 :       off = gen_rtx_CONST (tp_mode, off);
   12955        10817 :       if (pic)
   12956          763 :         off = gen_rtx_PLUS (tp_mode, pic, off);
   12957        10817 :       off = gen_const_mem (tp_mode, off);
   12958        10817 :       set_mem_alias_set (off, GOT_ALIAS_SET);
   12959              : 
   12960        10817 :       if (TARGET_64BIT || TARGET_ANY_GNU_TLS)
   12961              :         {
   12962        10817 :           base = get_thread_pointer (tp_mode,
   12963        10817 :                                      for_mov || !TARGET_TLS_DIRECT_SEG_REFS);
   12964        10817 :           off = force_reg (tp_mode, off);
   12965        10817 :           dest = gen_rtx_PLUS (tp_mode, base, off);
   12966        11585 :           if (tp_mode != Pmode)
   12967            4 :             dest = convert_to_mode (Pmode, dest, 1);
   12968              :         }
   12969              :       else
   12970              :         {
   12971            0 :           base = get_thread_pointer (Pmode, true);
   12972            0 :           dest = gen_reg_rtx (Pmode);
   12973            0 :           emit_insn (gen_sub3_insn (dest, base, off));
   12974              :         }
   12975              :       break;
   12976              : 
   12977        13645 :     case TLS_MODEL_LOCAL_EXEC:
   12978        28061 :       off = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, x),
   12979              :                             (TARGET_64BIT || TARGET_ANY_GNU_TLS)
   12980              :                             ? UNSPEC_NTPOFF : UNSPEC_TPOFF);
   12981        14416 :       off = gen_rtx_CONST (Pmode, off);
   12982              : 
   12983        13645 :       if (TARGET_64BIT || TARGET_ANY_GNU_TLS)
   12984              :         {
   12985        14416 :           base = get_thread_pointer (Pmode,
   12986        13645 :                                      for_mov || !TARGET_TLS_DIRECT_SEG_REFS);
   12987        14416 :           return gen_rtx_PLUS (Pmode, base, off);
   12988              :         }
   12989              :       else
   12990              :         {
   12991            0 :           base = get_thread_pointer (Pmode, true);
   12992            0 :           dest = gen_reg_rtx (Pmode);
   12993            0 :           emit_insn (gen_sub3_insn (dest, base, off));
   12994              :         }
   12995            0 :       break;
   12996              : 
   12997            0 :     default:
   12998            0 :       gcc_unreachable ();
   12999              :     }
   13000              : 
   13001              :   return dest;
   13002              : }
   13003              : 
   13004              : /* Return true if the TLS address requires insn using integer registers.
   13005              :    It's used to prevent KMOV/VMOV in TLS code sequences which require integer
   13006              :    MOV instructions, refer to PR103275.  */
   13007              : bool
   13008     15131374 : ix86_gpr_tls_address_pattern_p (rtx mem)
   13009              : {
   13010     15131374 :   gcc_assert (MEM_P (mem));
   13011              : 
   13012     15131374 :   rtx addr = XEXP (mem, 0);
   13013     15131374 :   subrtx_var_iterator::array_type array;
   13014     52610383 :   FOR_EACH_SUBRTX_VAR (iter, array, addr, ALL)
   13015              :     {
   13016     37486594 :       rtx op = *iter;
   13017     37486594 :       if (GET_CODE (op) == UNSPEC)
   13018       201262 :         switch (XINT (op, 1))
   13019              :           {
   13020              :           case UNSPEC_GOTNTPOFF:
   13021         7585 :             return true;
   13022            0 :           case UNSPEC_TPOFF:
   13023            0 :             if (!TARGET_64BIT)
   13024              :               return true;
   13025              :             break;
   13026              :           default:
   13027              :             break;
   13028              :           }
   13029              :     }
   13030              : 
   13031     15123789 :   return false;
   13032     15131374 : }
   13033              : 
   13034              : /* Return true if OP refers to a TLS address.  */
   13035              : bool
   13036    232068861 : ix86_tls_address_pattern_p (rtx op)
   13037              : {
   13038    232068861 :   subrtx_var_iterator::array_type array;
   13039   1381116263 :   FOR_EACH_SUBRTX_VAR (iter, array, op, ALL)
   13040              :     {
   13041   1149065518 :       rtx op = *iter;
   13042   1149065518 :       if (MEM_P (op))
   13043              :         {
   13044    104887277 :           rtx *x = &XEXP (op, 0);
   13045    165938522 :           while (GET_CODE (*x) == PLUS)
   13046              :             {
   13047              :               int i;
   13048    183171874 :               for (i = 0; i < 2; i++)
   13049              :                 {
   13050    122120629 :                   rtx u = XEXP (*x, i);
   13051    122120629 :                   if (GET_CODE (u) == ZERO_EXTEND)
   13052       111164 :                     u = XEXP (u, 0);
   13053    122120629 :                   if (GET_CODE (u) == UNSPEC
   13054        18148 :                       && XINT (u, 1) == UNSPEC_TP)
   13055        18116 :                     return true;
   13056              :                 }
   13057     61051245 :               x = &XEXP (*x, 0);
   13058              :             }
   13059              : 
   13060    104869161 :           iter.skip_subrtxes ();
   13061              :         }
   13062              :     }
   13063              : 
   13064    232050745 :   return false;
   13065    232068861 : }
   13066              : 
   13067              : /* Rewrite *LOC so that it refers to a default TLS address space.  */
   13068              : static void
   13069        18116 : ix86_rewrite_tls_address_1 (rtx *loc)
   13070              : {
   13071        18116 :   subrtx_ptr_iterator::array_type array;
   13072        53756 :   FOR_EACH_SUBRTX_PTR (iter, array, loc, ALL)
   13073              :     {
   13074        53756 :       rtx *loc = *iter;
   13075        53756 :       if (MEM_P (*loc))
   13076              :         {
   13077        18303 :           rtx addr = XEXP (*loc, 0);
   13078        18303 :           rtx *x = &addr;
   13079        23147 :           while (GET_CODE (*x) == PLUS)
   13080              :             {
   13081              :               int i;
   13082        32671 :               for (i = 0; i < 2; i++)
   13083              :                 {
   13084        27827 :                   rtx u = XEXP (*x, i);
   13085        27827 :                   if (GET_CODE (u) == ZERO_EXTEND)
   13086           19 :                     u = XEXP (u, 0);
   13087        27827 :                   if (GET_CODE (u) == UNSPEC
   13088        18116 :                       && XINT (u, 1) == UNSPEC_TP)
   13089              :                     {
   13090              :                       /* NB: Since address override only applies to the
   13091              :                          (reg32) part in fs:(reg32), return if address
   13092              :                          override is used.  */
   13093        19747 :                       if (Pmode != word_mode
   13094        18116 :                           && REG_P (XEXP (*x, 1 - i)))
   13095        18116 :                         return;
   13096              : 
   13097        18114 :                       addr_space_t as = DEFAULT_TLS_SEG_REG;
   13098              : 
   13099        18114 :                       *x = XEXP (*x, 1 - i);
   13100              : 
   13101        18114 :                       *loc = replace_equiv_address_nv (*loc, addr, true);
   13102        18114 :                       set_mem_addr_space (*loc, as);
   13103        18114 :                       return;
   13104              :                     }
   13105              :                 }
   13106         4844 :               x = &XEXP (*x, 0);
   13107              :             }
   13108              : 
   13109          187 :           iter.skip_subrtxes ();
   13110              :         }
   13111              :     }
   13112        18116 : }
   13113              : 
   13114              : /* Rewrite instruction pattern involvning TLS address
   13115              :    so that it refers to a default TLS address space.  */
   13116              : rtx
   13117        18116 : ix86_rewrite_tls_address (rtx pattern)
   13118              : {
   13119        18116 :   pattern = copy_insn (pattern);
   13120        18116 :   ix86_rewrite_tls_address_1 (&pattern);
   13121        18116 :   return pattern;
   13122              : }
   13123              : 
   13124              : /* Try machine-dependent ways of modifying an illegitimate address
   13125              :    to be legitimate.  If we find one, return the new, valid address.
   13126              :    This macro is used in only one place: `memory_address' in explow.cc.
   13127              : 
   13128              :    OLDX is the address as it was before break_out_memory_refs was called.
   13129              :    In some cases it is useful to look at this to decide what needs to be done.
   13130              : 
   13131              :    It is always safe for this macro to do nothing.  It exists to recognize
   13132              :    opportunities to optimize the output.
   13133              : 
   13134              :    For the 80386, we handle X+REG by loading X into a register R and
   13135              :    using R+REG.  R will go in a general reg and indexing will be used.
   13136              :    However, if REG is a broken-out memory address or multiplication,
   13137              :    nothing needs to be done because REG can certainly go in a general reg.
   13138              : 
   13139              :    When -fpic is used, special handling is needed for symbolic references.
   13140              :    See comments by legitimize_pic_address in i386.cc for details.  */
   13141              : 
   13142              : static rtx
   13143       664871 : ix86_legitimize_address (rtx x, rtx, machine_mode mode)
   13144              : {
   13145       664871 :   bool changed = false;
   13146       664871 :   unsigned log;
   13147              : 
   13148       664871 :   log = SYMBOL_REF_P (x) ? SYMBOL_REF_TLS_MODEL (x) : 0;
   13149       151784 :   if (log)
   13150        20803 :     return legitimize_tls_address (x, (enum tls_model) log, false);
   13151       644068 :   if (GET_CODE (x) == CONST
   13152          508 :       && GET_CODE (XEXP (x, 0)) == PLUS
   13153          508 :       && SYMBOL_REF_P (XEXP (XEXP (x, 0), 0))
   13154       644576 :       && (log = SYMBOL_REF_TLS_MODEL (XEXP (XEXP (x, 0), 0))))
   13155              :     {
   13156            4 :       rtx t = legitimize_tls_address (XEXP (XEXP (x, 0), 0),
   13157              :                                       (enum tls_model) log, false);
   13158            5 :       return gen_rtx_PLUS (Pmode, t, XEXP (XEXP (x, 0), 1));
   13159              :     }
   13160              : 
   13161       644064 :   if (TARGET_DLLIMPORT_DECL_ATTRIBUTES)
   13162              :     {
   13163              : #if TARGET_PECOFF
   13164              :       rtx tmp = legitimize_pe_coff_symbol (x, true);
   13165              :       if (tmp)
   13166              :         return tmp;
   13167              : #endif
   13168              :     }
   13169              : 
   13170       644064 :   if (flag_pic && SYMBOLIC_CONST (x))
   13171       131364 :     return legitimize_pic_address (x, 0);
   13172              : 
   13173              : #if TARGET_MACHO
   13174              :   if (MACHO_DYNAMIC_NO_PIC_P && SYMBOLIC_CONST (x))
   13175              :     return machopic_indirect_data_reference (x, 0);
   13176              : #endif
   13177              : 
   13178              :   /* Canonicalize shifts by 0, 1, 2, 3 into multiply */
   13179       512700 :   if (GET_CODE (x) == ASHIFT
   13180            0 :       && CONST_INT_P (XEXP (x, 1))
   13181            0 :       && (unsigned HOST_WIDE_INT) INTVAL (XEXP (x, 1)) < 4)
   13182              :     {
   13183            0 :       changed = true;
   13184            0 :       log = INTVAL (XEXP (x, 1));
   13185            0 :       x = gen_rtx_MULT (Pmode, force_reg (Pmode, XEXP (x, 0)),
   13186              :                         GEN_INT (1 << log));
   13187              :     }
   13188              : 
   13189       512700 :   if (GET_CODE (x) == PLUS)
   13190              :     {
   13191              :       /* Canonicalize shifts by 0, 1, 2, 3 into multiply.  */
   13192              : 
   13193       172312 :       if (GET_CODE (XEXP (x, 0)) == ASHIFT
   13194          622 :           && CONST_INT_P (XEXP (XEXP (x, 0), 1))
   13195          622 :           && (unsigned HOST_WIDE_INT) INTVAL (XEXP (XEXP (x, 0), 1)) < 4)
   13196              :         {
   13197          622 :           changed = true;
   13198          622 :           log = INTVAL (XEXP (XEXP (x, 0), 1));
   13199         1822 :           XEXP (x, 0) = gen_rtx_MULT (Pmode,
   13200              :                                       force_reg (Pmode, XEXP (XEXP (x, 0), 0)),
   13201              :                                       GEN_INT (1 << log));
   13202              :         }
   13203              : 
   13204       172312 :       if (GET_CODE (XEXP (x, 1)) == ASHIFT
   13205            0 :           && CONST_INT_P (XEXP (XEXP (x, 1), 1))
   13206            0 :           && (unsigned HOST_WIDE_INT) INTVAL (XEXP (XEXP (x, 1), 1)) < 4)
   13207              :         {
   13208            0 :           changed = true;
   13209            0 :           log = INTVAL (XEXP (XEXP (x, 1), 1));
   13210            0 :           XEXP (x, 1) = gen_rtx_MULT (Pmode,
   13211              :                                       force_reg (Pmode, XEXP (XEXP (x, 1), 0)),
   13212              :                                       GEN_INT (1 << log));
   13213              :         }
   13214              : 
   13215              :       /* Put multiply first if it isn't already.  */
   13216       172312 :       if (GET_CODE (XEXP (x, 1)) == MULT)
   13217              :         {
   13218            0 :           std::swap (XEXP (x, 0), XEXP (x, 1));
   13219            0 :           changed = true;
   13220              :         }
   13221              : 
   13222              :       /* Canonicalize (plus (mult (reg) (const)) (plus (reg) (const)))
   13223              :          into (plus (plus (mult (reg) (const)) (reg)) (const)).  This can be
   13224              :          created by virtual register instantiation, register elimination, and
   13225              :          similar optimizations.  */
   13226       172312 :       if (GET_CODE (XEXP (x, 0)) == MULT && GET_CODE (XEXP (x, 1)) == PLUS)
   13227              :         {
   13228         9473 :           changed = true;
   13229        15081 :           x = gen_rtx_PLUS (Pmode,
   13230              :                             gen_rtx_PLUS (Pmode, XEXP (x, 0),
   13231              :                                           XEXP (XEXP (x, 1), 0)),
   13232              :                             XEXP (XEXP (x, 1), 1));
   13233              :         }
   13234              : 
   13235              :       /* Canonicalize
   13236              :          (plus (plus (mult (reg) (const)) (plus (reg) (const))) const)
   13237              :          into (plus (plus (mult (reg) (const)) (reg)) (const)).  */
   13238       162839 :       else if (GET_CODE (x) == PLUS && GET_CODE (XEXP (x, 0)) == PLUS
   13239       100995 :                && GET_CODE (XEXP (XEXP (x, 0), 0)) == MULT
   13240        50960 :                && GET_CODE (XEXP (XEXP (x, 0), 1)) == PLUS
   13241            0 :                && CONSTANT_P (XEXP (x, 1)))
   13242              :         {
   13243            0 :           rtx constant;
   13244            0 :           rtx other = NULL_RTX;
   13245              : 
   13246            0 :           if (CONST_INT_P (XEXP (x, 1)))
   13247              :             {
   13248            0 :               constant = XEXP (x, 1);
   13249            0 :               other = XEXP (XEXP (XEXP (x, 0), 1), 1);
   13250              :             }
   13251            0 :           else if (CONST_INT_P (XEXP (XEXP (XEXP (x, 0), 1), 1)))
   13252              :             {
   13253              :               constant = XEXP (XEXP (XEXP (x, 0), 1), 1);
   13254              :               other = XEXP (x, 1);
   13255              :             }
   13256              :           else
   13257              :             constant = 0;
   13258              : 
   13259            0 :           if (constant)
   13260              :             {
   13261            0 :               changed = true;
   13262            0 :               x = gen_rtx_PLUS (Pmode,
   13263              :                                 gen_rtx_PLUS (Pmode, XEXP (XEXP (x, 0), 0),
   13264              :                                               XEXP (XEXP (XEXP (x, 0), 1), 0)),
   13265              :                                 plus_constant (Pmode, other,
   13266              :                                                INTVAL (constant)));
   13267              :             }
   13268              :         }
   13269              : 
   13270       172312 :       if (changed && ix86_legitimate_address_p (mode, x, false))
   13271         9509 :         return x;
   13272              : 
   13273       162803 :       if (GET_CODE (XEXP (x, 0)) == MULT)
   13274              :         {
   13275        19765 :           changed = true;
   13276        19765 :           XEXP (x, 0) = copy_addr_to_reg (XEXP (x, 0));
   13277              :         }
   13278              : 
   13279       162803 :       if (GET_CODE (XEXP (x, 1)) == MULT)
   13280              :         {
   13281            0 :           changed = true;
   13282            0 :           XEXP (x, 1) = copy_addr_to_reg (XEXP (x, 1));
   13283              :         }
   13284              : 
   13285       162803 :       if (changed
   13286        19773 :           && REG_P (XEXP (x, 1))
   13287        16178 :           && REG_P (XEXP (x, 0)))
   13288              :         return x;
   13289              : 
   13290       146625 :       if (flag_pic && SYMBOLIC_CONST (XEXP (x, 1)))
   13291              :         {
   13292         1858 :           changed = true;
   13293         1858 :           x = legitimize_pic_address (x, 0);
   13294              :         }
   13295              : 
   13296       146625 :       if (changed && ix86_legitimate_address_p (mode, x, false))
   13297         3868 :         return x;
   13298              : 
   13299       142757 :       if (REG_P (XEXP (x, 0)))
   13300              :         {
   13301        44058 :           rtx temp = gen_reg_rtx (Pmode);
   13302        41301 :           rtx val  = force_operand (XEXP (x, 1), temp);
   13303        41301 :           if (val != temp)
   13304              :             {
   13305        31960 :               val = convert_to_mode (Pmode, val, 1);
   13306        31667 :               emit_move_insn (temp, val);
   13307              :             }
   13308              : 
   13309        41301 :           XEXP (x, 1) = temp;
   13310        41301 :           return x;
   13311              :         }
   13312              : 
   13313       101456 :       else if (REG_P (XEXP (x, 1)))
   13314              :         {
   13315         3291 :           rtx temp = gen_reg_rtx (Pmode);
   13316         2657 :           rtx val  = force_operand (XEXP (x, 0), temp);
   13317         2657 :           if (val != temp)
   13318              :             {
   13319            0 :               val = convert_to_mode (Pmode, val, 1);
   13320            0 :               emit_move_insn (temp, val);
   13321              :             }
   13322              : 
   13323         2657 :           XEXP (x, 0) = temp;
   13324         2657 :           return x;
   13325              :         }
   13326              :     }
   13327              : 
   13328              :   return x;
   13329              : }
   13330              : 
   13331              : /* Print an integer constant expression in assembler syntax.  Addition
   13332              :    and subtraction are the only arithmetic that may appear in these
   13333              :    expressions.  FILE is the stdio stream to write to, X is the rtx, and
   13334              :    CODE is the operand print code from the output string.  */
   13335              : 
   13336              : static void
   13337      3681555 : output_pic_addr_const (FILE *file, rtx x, int code)
   13338              : {
   13339      3911497 :   char buf[256];
   13340              : 
   13341      3911497 :   switch (GET_CODE (x))
   13342              :     {
   13343            0 :     case PC:
   13344            0 :       gcc_assert (flag_pic);
   13345            0 :       putc ('.', file);
   13346            0 :       break;
   13347              : 
   13348       861658 :     case SYMBOL_REF:
   13349       861658 :       if (TARGET_64BIT || ! TARGET_MACHO_SYMBOL_STUBS)
   13350       861658 :         output_addr_const (file, x);
   13351              :       else
   13352              :         {
   13353              :           const char *name = XSTR (x, 0);
   13354              : 
   13355              :           /* Mark the decl as referenced so that cgraph will
   13356              :              output the function.  */
   13357              :           if (SYMBOL_REF_DECL (x))
   13358              :             mark_decl_referenced (SYMBOL_REF_DECL (x));
   13359              : 
   13360              : #if TARGET_MACHO
   13361              :           if (MACHOPIC_INDIRECT
   13362              :               && machopic_classify_symbol (x) == MACHOPIC_UNDEFINED_FUNCTION)
   13363              :             name = machopic_indirection_name (x, /*stub_p=*/true);
   13364              : #endif
   13365              :           assemble_name (file, name);
   13366              :         }
   13367       861658 :       if (!TARGET_MACHO && !(TARGET_64BIT && TARGET_PECOFF)
   13368       861658 :           && code == 'P' && ix86_call_use_plt_p (x))
   13369       391109 :         fputs ("@PLT", file);
   13370              :       break;
   13371              : 
   13372         2706 :     case LABEL_REF:
   13373         2706 :       x = XEXP (x, 0);
   13374              :       /* FALLTHRU */
   13375         2706 :     case CODE_LABEL:
   13376         2706 :       ASM_GENERATE_INTERNAL_LABEL (buf, "L", CODE_LABEL_NUMBER (x));
   13377         2706 :       assemble_name (asm_out_file, buf);
   13378         2706 :       break;
   13379              : 
   13380      2616021 :     CASE_CONST_SCALAR_INT:
   13381      2616021 :       output_addr_const (file, x);
   13382      2616021 :       break;
   13383              : 
   13384       210822 :     case CONST:
   13385              :       /* This used to output parentheses around the expression,
   13386              :          but that does not work on the 386 (either ATT or BSD assembler).  */
   13387       210822 :       output_pic_addr_const (file, XEXP (x, 0), code);
   13388       210822 :       break;
   13389              : 
   13390            0 :     case CONST_DOUBLE:
   13391              :       /* We can't handle floating point constants;
   13392              :          TARGET_PRINT_OPERAND must handle them.  */
   13393            0 :       output_operand_lossage ("floating constant misused");
   13394            0 :       break;
   13395              : 
   13396        19120 :     case PLUS:
   13397              :       /* Some assemblers need integer constants to appear first.  */
   13398        19120 :       if (CONST_INT_P (XEXP (x, 0)))
   13399              :         {
   13400            0 :           output_pic_addr_const (file, XEXP (x, 0), code);
   13401            0 :           putc ('+', file);
   13402            0 :           output_pic_addr_const (file, XEXP (x, 1), code);
   13403              :         }
   13404              :       else
   13405              :         {
   13406        19120 :           gcc_assert (CONST_INT_P (XEXP (x, 1)));
   13407        19120 :           output_pic_addr_const (file, XEXP (x, 1), code);
   13408        19120 :           putc ('+', file);
   13409        19120 :           output_pic_addr_const (file, XEXP (x, 0), code);
   13410              :         }
   13411              :       break;
   13412              : 
   13413            0 :     case MINUS:
   13414            0 :       if (!TARGET_MACHO)
   13415            0 :         putc (ASSEMBLER_DIALECT == ASM_INTEL ? '(' : '[', file);
   13416            0 :       output_pic_addr_const (file, XEXP (x, 0), code);
   13417            0 :       putc ('-', file);
   13418            0 :       output_pic_addr_const (file, XEXP (x, 1), code);
   13419            0 :       if (!TARGET_MACHO)
   13420            0 :         putc (ASSEMBLER_DIALECT == ASM_INTEL ? ')' : ']', file);
   13421            0 :       break;
   13422              : 
   13423       201170 :     case UNSPEC:
   13424       201170 :       gcc_assert (XVECLEN (x, 0) == 1);
   13425       201170 :       output_pic_addr_const (file, XVECEXP (x, 0, 0), code);
   13426       201170 :       switch (XINT (x, 1))
   13427              :         {
   13428        43320 :         case UNSPEC_GOT:
   13429        43320 :           fputs ("@GOT", file);
   13430        43320 :           break;
   13431        77894 :         case UNSPEC_GOTOFF:
   13432        77894 :           fputs ("@GOTOFF", file);
   13433        77894 :           break;
   13434           36 :         case UNSPEC_PLTOFF:
   13435           36 :           fputs ("@PLTOFF", file);
   13436           36 :           break;
   13437            0 :         case UNSPEC_PCREL:
   13438            0 :           fputs (ASSEMBLER_DIALECT == ASM_ATT ?
   13439              :                  "(%rip)" : "[rip]", file);
   13440            0 :           break;
   13441        75732 :         case UNSPEC_GOTPCREL:
   13442        75732 :           fputs (ASSEMBLER_DIALECT == ASM_ATT ?
   13443              :                  "@GOTPCREL(%rip)" : "@GOTPCREL[rip]", file);
   13444        75732 :           break;
   13445            0 :         case UNSPEC_GOTTPOFF:
   13446              :           /* FIXME: This might be @TPOFF in Sun ld too.  */
   13447            0 :           fputs ("@gottpoff", file);
   13448            0 :           break;
   13449            0 :         case UNSPEC_TPOFF:
   13450            0 :           fputs ("@tpoff", file);
   13451            0 :           break;
   13452         1459 :         case UNSPEC_NTPOFF:
   13453         1459 :           if (TARGET_64BIT)
   13454         1459 :             fputs ("@tpoff", file);
   13455              :           else
   13456            0 :             fputs ("@ntpoff", file);
   13457              :           break;
   13458          315 :         case UNSPEC_DTPOFF:
   13459          315 :           fputs ("@dtpoff", file);
   13460          315 :           break;
   13461         2414 :         case UNSPEC_GOTNTPOFF:
   13462         2414 :           if (TARGET_64BIT)
   13463         2150 :             fputs (ASSEMBLER_DIALECT == ASM_ATT ?
   13464              :                    "@gottpoff(%rip)": "@gottpoff[rip]", file);
   13465              :           else
   13466          264 :             fputs ("@gotntpoff", file);
   13467              :           break;
   13468            0 :         case UNSPEC_INDNTPOFF:
   13469            0 :           fputs ("@indntpoff", file);
   13470            0 :           break;
   13471            0 :         case UNSPEC_SECREL32:
   13472            0 :           fputs ("@secrel32", file);
   13473            0 :           break;
   13474              : #if TARGET_MACHO
   13475              :         case UNSPEC_MACHOPIC_OFFSET:
   13476              :           putc ('-', file);
   13477              :           machopic_output_function_base_name (file);
   13478              :           break;
   13479              : #endif
   13480            0 :         default:
   13481            0 :           output_operand_lossage ("invalid UNSPEC as operand");
   13482            0 :           break;
   13483              :         }
   13484              :        break;
   13485              : 
   13486            0 :     default:
   13487            0 :       output_operand_lossage ("invalid expression as operand");
   13488              :     }
   13489      3681555 : }
   13490              : 
   13491              : /* This is called from dwarf2out.cc via TARGET_ASM_OUTPUT_DWARF_DTPREL.
   13492              :    We need to emit DTP-relative relocations.  */
   13493              : 
   13494              : static void ATTRIBUTE_UNUSED
   13495          694 : i386_output_dwarf_dtprel (FILE *file, int size, rtx x)
   13496              : {
   13497          694 :   fputs (ASM_LONG, file);
   13498          694 :   output_addr_const (file, x);
   13499              : #if TARGET_WIN32_TLS
   13500              :   fputs ("@secrel32", file);
   13501              : #else
   13502          694 :   fputs ("@dtpoff", file);
   13503              : #endif
   13504          694 :   switch (size)
   13505              :     {
   13506              :     case 4:
   13507              :       break;
   13508          555 :     case 8:
   13509          555 :       fputs (", 0", file);
   13510          555 :       break;
   13511            0 :     default:
   13512            0 :       gcc_unreachable ();
   13513              :    }
   13514          694 : }
   13515              : 
   13516              : /* Return true if X is a representation of the PIC register.  This copes
   13517              :    with calls from ix86_find_base_term, where the register might have
   13518              :    been replaced by a cselib value.  */
   13519              : 
   13520              : static bool
   13521     26858218 : ix86_pic_register_p (rtx x)
   13522              : {
   13523     26858218 :   if (GET_CODE (x) == VALUE && CSELIB_VAL_PTR (x))
   13524       753736 :     return (pic_offset_table_rtx
   13525       753736 :             && rtx_equal_for_cselib_p (x, pic_offset_table_rtx));
   13526     26104482 :   else if (GET_CODE (x) == UNSPEC && XINT (x, 1) == UNSPEC_SET_GOT)
   13527              :     return true;
   13528     26099904 :   else if (!REG_P (x))
   13529              :     return false;
   13530     25493696 :   else if (pic_offset_table_rtx)
   13531              :     {
   13532     25474487 :       if (REGNO (x) == REGNO (pic_offset_table_rtx))
   13533              :         return true;
   13534       403994 :       if (HARD_REGISTER_P (x)
   13535       382104 :           && !HARD_REGISTER_P (pic_offset_table_rtx)
   13536       786098 :           && ORIGINAL_REGNO (x) == REGNO (pic_offset_table_rtx))
   13537              :         return true;
   13538              :       return false;
   13539              :     }
   13540              :   else
   13541        19209 :     return REGNO (x) == PIC_OFFSET_TABLE_REGNUM;
   13542              : }
   13543              : 
   13544              : /* Helper function for ix86_delegitimize_address.
   13545              :    Attempt to delegitimize TLS local-exec accesses.  */
   13546              : 
   13547              : static rtx
   13548   3509788824 : ix86_delegitimize_tls_address (rtx orig_x)
   13549              : {
   13550   3509788824 :   rtx x = orig_x, unspec;
   13551   3509788824 :   struct ix86_address addr;
   13552              : 
   13553   3509788824 :   if (!TARGET_TLS_DIRECT_SEG_REFS)
   13554              :     return orig_x;
   13555   3509788824 :   if (MEM_P (x))
   13556     42597995 :     x = XEXP (x, 0);
   13557   5044059545 :   if (GET_CODE (x) != PLUS || GET_MODE (x) != Pmode)
   13558              :     return orig_x;
   13559   1687472408 :   if (ix86_decompose_address (x, &addr) == 0
   13560   1951430359 :       || addr.seg != DEFAULT_TLS_SEG_REG
   13561       268427 :       || addr.disp == NULL_RTX
   13562   1687688800 :       || GET_CODE (addr.disp) != CONST)
   13563              :     return orig_x;
   13564       111900 :   unspec = XEXP (addr.disp, 0);
   13565       111900 :   if (GET_CODE (unspec) == PLUS && CONST_INT_P (XEXP (unspec, 1)))
   13566        65400 :     unspec = XEXP (unspec, 0);
   13567       111900 :   if (GET_CODE (unspec) != UNSPEC || XINT (unspec, 1) != UNSPEC_NTPOFF)
   13568              :     return orig_x;
   13569       111837 :   x = XVECEXP (unspec, 0, 0);
   13570       111837 :   gcc_assert (SYMBOL_REF_P (x));
   13571       111837 :   if (unspec != XEXP (addr.disp, 0))
   13572        86432 :     x = gen_rtx_PLUS (Pmode, x, XEXP (XEXP (addr.disp, 0), 1));
   13573       111837 :   if (addr.index)
   13574              :     {
   13575          185 :       rtx idx = addr.index;
   13576          185 :       if (addr.scale != 1)
   13577          185 :         idx = gen_rtx_MULT (Pmode, idx, GEN_INT (addr.scale));
   13578          185 :       x = gen_rtx_PLUS (Pmode, idx, x);
   13579              :     }
   13580       111837 :   if (addr.base)
   13581            2 :     x = gen_rtx_PLUS (Pmode, addr.base, x);
   13582       111837 :   if (MEM_P (orig_x))
   13583          198 :     x = replace_equiv_address_nv (orig_x, x);
   13584              :   return x;
   13585              : }
   13586              : 
   13587              : /* In the name of slightly smaller debug output, and to cater to
   13588              :    general assembler lossage, recognize PIC+GOTOFF and turn it back
   13589              :    into a direct symbol reference.
   13590              : 
   13591              :    On Darwin, this is necessary to avoid a crash, because Darwin
   13592              :    has a different PIC label for each routine but the DWARF debugging
   13593              :    information is not associated with any particular routine, so it's
   13594              :    necessary to remove references to the PIC label from RTL stored by
   13595              :    the DWARF output code.
   13596              : 
   13597              :    This helper is used in the normal ix86_delegitimize_address
   13598              :    entrypoint (e.g. used in the target delegitimization hook) and
   13599              :    in ix86_find_base_term.  As compile time memory optimization, we
   13600              :    avoid allocating rtxes that will not change anything on the outcome
   13601              :    of the callers (find_base_value and find_base_term).  */
   13602              : 
   13603              : static inline rtx
   13604   3534622840 : ix86_delegitimize_address_1 (rtx x, bool base_term_p)
   13605              : {
   13606   3534622840 :   rtx orig_x = delegitimize_mem_from_attrs (x);
   13607              :   /* addend is NULL or some rtx if x is something+GOTOFF where
   13608              :      something doesn't include the PIC register.  */
   13609   3534622840 :   rtx addend = NULL_RTX;
   13610              :   /* reg_addend is NULL or a multiple of some register.  */
   13611   3534622840 :   rtx reg_addend = NULL_RTX;
   13612              :   /* const_addend is NULL or a const_int.  */
   13613   3534622840 :   rtx const_addend = NULL_RTX;
   13614              :   /* This is the result, or NULL.  */
   13615   3534622840 :   rtx result = NULL_RTX;
   13616              : 
   13617   3534622840 :   x = orig_x;
   13618              : 
   13619   3534622840 :   if (MEM_P (x))
   13620     61798783 :     x = XEXP (x, 0);
   13621              : 
   13622   3534622840 :   if (TARGET_64BIT)
   13623              :     {
   13624    249973723 :       if (GET_CODE (x) == CONST
   13625      8614279 :           && GET_CODE (XEXP (x, 0)) == PLUS
   13626      6673786 :           && GET_MODE (XEXP (x, 0)) == Pmode
   13627      6673737 :           && CONST_INT_P (XEXP (XEXP (x, 0), 1))
   13628      6673737 :           && GET_CODE (XEXP (XEXP (x, 0), 0)) == UNSPEC
   13629    249977868 :           && XINT (XEXP (XEXP (x, 0), 0), 1) == UNSPEC_PCREL)
   13630              :         {
   13631              :           /* find_base_{value,term} only care about MEMs with arg_pointer_rtx
   13632              :              base.  A CONST can't be arg_pointer_rtx based.  */
   13633            0 :           if (base_term_p && MEM_P (orig_x))
   13634              :             return orig_x;
   13635            0 :           rtx x2 = XVECEXP (XEXP (XEXP (x, 0), 0), 0, 0);
   13636            0 :           x = gen_rtx_PLUS (Pmode, XEXP (XEXP (x, 0), 1), x2);
   13637            0 :           if (MEM_P (orig_x))
   13638            0 :             x = replace_equiv_address_nv (orig_x, x);
   13639            0 :           return x;
   13640              :         }
   13641              : 
   13642    249973723 :       if (GET_CODE (x) == CONST
   13643      8614279 :           && GET_CODE (XEXP (x, 0)) == UNSPEC
   13644      1940542 :           && (XINT (XEXP (x, 0), 1) == UNSPEC_GOTPCREL
   13645       655038 :               || XINT (XEXP (x, 0), 1) == UNSPEC_PCREL)
   13646      1285504 :           && (MEM_P (orig_x) || XINT (XEXP (x, 0), 1) == UNSPEC_PCREL))
   13647              :         {
   13648       294674 :           x = XVECEXP (XEXP (x, 0), 0, 0);
   13649       294674 :           if (GET_MODE (orig_x) != GET_MODE (x) && MEM_P (orig_x))
   13650              :             {
   13651            9 :               x = lowpart_subreg (GET_MODE (orig_x), x, GET_MODE (x));
   13652            9 :               if (x == NULL_RTX)
   13653              :                 return orig_x;
   13654              :             }
   13655       294674 :           return x;
   13656              :         }
   13657              : 
   13658    249679049 :       if (ix86_cmodel != CM_MEDIUM_PIC && ix86_cmodel != CM_LARGE_PIC)
   13659    249677388 :         return ix86_delegitimize_tls_address (orig_x);
   13660              : 
   13661              :       /* Fall thru into the code shared with -m32 for -mcmodel=large -fpic
   13662              :          and -mcmodel=medium -fpic.  */
   13663              :     }
   13664              : 
   13665   3284650778 :   if (GET_CODE (x) != PLUS
   13666   1558809835 :       || GET_CODE (XEXP (x, 1)) != CONST)
   13667   3258344203 :     return ix86_delegitimize_tls_address (orig_x);
   13668              : 
   13669     26306575 :   if (ix86_pic_register_p (XEXP (x, 0)))
   13670              :     /* %ebx + GOT/GOTOFF */
   13671              :     ;
   13672      1280086 :   else if (GET_CODE (XEXP (x, 0)) == PLUS)
   13673              :     {
   13674              :       /* %ebx + %reg * scale + GOT/GOTOFF */
   13675       473596 :       reg_addend = XEXP (x, 0);
   13676       473596 :       if (ix86_pic_register_p (XEXP (reg_addend, 0)))
   13677       395549 :         reg_addend = XEXP (reg_addend, 1);
   13678        78047 :       else if (ix86_pic_register_p (XEXP (reg_addend, 1)))
   13679        46661 :         reg_addend = XEXP (reg_addend, 0);
   13680              :       else
   13681              :         {
   13682        31386 :           reg_addend = NULL_RTX;
   13683        31386 :           addend = XEXP (x, 0);
   13684              :         }
   13685              :     }
   13686              :   else
   13687              :     addend = XEXP (x, 0);
   13688              : 
   13689     26306575 :   x = XEXP (XEXP (x, 1), 0);
   13690     26306575 :   if (GET_CODE (x) == PLUS
   13691      1447840 :       && CONST_INT_P (XEXP (x, 1)))
   13692              :     {
   13693      1447840 :       const_addend = XEXP (x, 1);
   13694      1447840 :       x = XEXP (x, 0);
   13695              :     }
   13696              : 
   13697     26306575 :   if (GET_CODE (x) == UNSPEC
   13698     25631934 :       && ((XINT (x, 1) == UNSPEC_GOT && MEM_P (orig_x) && !addend)
   13699      6725820 :           || (XINT (x, 1) == UNSPEC_GOTOFF && !MEM_P (orig_x))
   13700      1092596 :           || (XINT (x, 1) == UNSPEC_PLTOFF && ix86_cmodel == CM_LARGE_PIC
   13701            4 :               && !MEM_P (orig_x) && !addend)))
   13702     24539342 :     result = XVECEXP (x, 0, 0);
   13703              : 
   13704     24539342 :   if (!TARGET_64BIT && TARGET_MACHO && darwin_local_data_pic (x)
   13705              :       && !MEM_P (orig_x))
   13706              :     result = XVECEXP (x, 0, 0);
   13707              : 
   13708     24539342 :   if (! result)
   13709      1767233 :     return ix86_delegitimize_tls_address (orig_x);
   13710              : 
   13711              :   /* For (PLUS something CONST_INT) both find_base_{value,term} just
   13712              :      recurse on the first operand.  */
   13713     24539342 :   if (const_addend && !base_term_p)
   13714       355425 :     result = gen_rtx_CONST (Pmode, gen_rtx_PLUS (Pmode, result, const_addend));
   13715     24539342 :   if (reg_addend)
   13716       859412 :     result = gen_rtx_PLUS (Pmode, reg_addend, result);
   13717     24539342 :   if (addend)
   13718              :     {
   13719              :       /* If the rest of original X doesn't involve the PIC register, add
   13720              :          addend and subtract pic_offset_table_rtx.  This can happen e.g.
   13721              :          for code like:
   13722              :          leal (%ebx, %ecx, 4), %ecx
   13723              :          ...
   13724              :          movl foo@GOTOFF(%ecx), %edx
   13725              :          in which case we return (%ecx - %ebx) + foo
   13726              :          or (%ecx - _GLOBAL_OFFSET_TABLE_) + foo if pseudo_pic_reg
   13727              :          and reload has completed.  Don't do the latter for debug,
   13728              :          as _GLOBAL_OFFSET_TABLE_ can't be expressed in the assembly.  */
   13729       135856 :       if (pic_offset_table_rtx
   13730       135856 :           && (!reload_completed || !ix86_use_pseudo_pic_reg ()))
   13731         2370 :         result = gen_rtx_PLUS (Pmode, gen_rtx_MINUS (Pmode, copy_rtx (addend),
   13732              :                                                      pic_offset_table_rtx),
   13733              :                                result);
   13734       135066 :       else if (base_term_p
   13735       128758 :                && pic_offset_table_rtx
   13736              :                && !TARGET_MACHO
   13737              :                && !TARGET_VXWORKS_VAROFF)
   13738              :         {
   13739       257516 :           rtx tmp = gen_rtx_SYMBOL_REF (Pmode, GOT_SYMBOL_NAME);
   13740       257516 :           tmp = gen_rtx_MINUS (Pmode, copy_rtx (addend), tmp);
   13741       257516 :           result = gen_rtx_PLUS (Pmode, tmp, result);
   13742       128758 :         }
   13743              :       else
   13744              :         return orig_x;
   13745              :     }
   13746     49065983 :   if (GET_MODE (orig_x) != Pmode && MEM_P (orig_x))
   13747              :     {
   13748            0 :       result = lowpart_subreg (GET_MODE (orig_x), result, Pmode);
   13749            0 :       if (result == NULL_RTX)
   13750              :         return orig_x;
   13751              :     }
   13752              :   return result;
   13753              : }
   13754              : 
   13755              : /* The normal instantiation of the above template.  */
   13756              : 
   13757              : static rtx
   13758    320677177 : ix86_delegitimize_address (rtx x)
   13759              : {
   13760    320677177 :   return ix86_delegitimize_address_1 (x, false);
   13761              : }
   13762              : 
   13763              : /* If X is a machine specific address (i.e. a symbol or label being
   13764              :    referenced as a displacement from the GOT implemented using an
   13765              :    UNSPEC), then return the base term.  Otherwise return X.  */
   13766              : 
   13767              : rtx
   13768   6683261833 : ix86_find_base_term (rtx x)
   13769              : {
   13770   6683261833 :   rtx term;
   13771              : 
   13772   6683261833 :   if (TARGET_64BIT)
   13773              :     {
   13774   3469316170 :       if (GET_CODE (x) != CONST)
   13775              :         return x;
   13776     38640994 :       term = XEXP (x, 0);
   13777     38640994 :       if (GET_CODE (term) == PLUS
   13778     38626109 :           && CONST_INT_P (XEXP (term, 1)))
   13779     38626109 :         term = XEXP (term, 0);
   13780     38640994 :       if (GET_CODE (term) != UNSPEC
   13781        40591 :           || (XINT (term, 1) != UNSPEC_GOTPCREL
   13782        40591 :               && XINT (term, 1) != UNSPEC_PCREL))
   13783              :         return x;
   13784              : 
   13785            0 :       return XVECEXP (term, 0, 0);
   13786              :     }
   13787              : 
   13788   3213945663 :   return ix86_delegitimize_address_1 (x, true);
   13789              : }
   13790              : 
   13791              : /* Return true if X shouldn't be emitted into the debug info.
   13792              :    Disallow UNSPECs other than @gotoff - we can't emit _GLOBAL_OFFSET_TABLE_
   13793              :    symbol easily into the .debug_info section, so we need not to
   13794              :    delegitimize, but instead assemble as @gotoff.
   13795              :    Disallow _GLOBAL_OFFSET_TABLE_ SYMBOL_REF - the assembler magically
   13796              :    assembles that as _GLOBAL_OFFSET_TABLE_-. expression.  */
   13797              : 
   13798              : static bool
   13799      1743576 : ix86_const_not_ok_for_debug_p (rtx x)
   13800              : {
   13801      1743576 :   if (GET_CODE (x) == UNSPEC && XINT (x, 1) != UNSPEC_GOTOFF)
   13802              :     return true;
   13803              : 
   13804      1743556 :   if (SYMBOL_REF_P (x) && strcmp (XSTR (x, 0), GOT_SYMBOL_NAME) == 0)
   13805            0 :     return true;
   13806              : 
   13807              :   return false;
   13808              : }
   13809              : 
   13810              : static void
   13811      7106781 : put_condition_code (enum rtx_code code, machine_mode mode, bool reverse,
   13812              :                     bool fp, FILE *file)
   13813              : {
   13814      7106781 :   const char *suffix;
   13815              : 
   13816      7106781 :   if (mode == CCFPmode)
   13817              :     {
   13818       565676 :       code = ix86_fp_compare_code_to_integer (code);
   13819       565676 :       mode = CCmode;
   13820              :     }
   13821      7106781 :   if (reverse)
   13822       209421 :     code = reverse_condition (code);
   13823              : 
   13824      7106781 :   switch (code)
   13825              :     {
   13826      2746891 :     case EQ:
   13827      2746891 :       gcc_assert (mode != CCGZmode);
   13828      2746891 :       switch (mode)
   13829              :         {
   13830              :         case E_CCAmode:
   13831              :           suffix = "a";
   13832              :           break;
   13833              :         case E_CCCmode:
   13834        26378 :           suffix = "c";
   13835              :           break;
   13836              :         case E_CCOmode:
   13837      7106781 :           suffix = "o";
   13838              :           break;
   13839              :         case E_CCPmode:
   13840       234093 :           suffix = "p";
   13841              :           break;
   13842              :         case E_CCSmode:
   13843       121626 :           suffix = "s";
   13844              :           break;
   13845      2727163 :         default:
   13846      2727163 :           suffix = "e";
   13847      2727163 :           break;
   13848              :         }
   13849              :       break;
   13850      2315036 :     case NE:
   13851      2315036 :       gcc_assert (mode != CCGZmode);
   13852      2315036 :       switch (mode)
   13853              :         {
   13854              :         case E_CCAmode:
   13855              :           suffix = "na";
   13856              :           break;
   13857              :         case E_CCCmode:
   13858        12016 :           suffix = "nc";
   13859              :           break;
   13860        10766 :         case E_CCOmode:
   13861        10766 :           suffix = "no";
   13862        10766 :           break;
   13863              :         case E_CCPmode:
   13864         4443 :           suffix = "np";
   13865              :           break;
   13866              :         case E_CCSmode:
   13867        50707 :           suffix = "ns";
   13868              :           break;
   13869      2302384 :         default:
   13870      2302384 :           suffix = "ne";
   13871      2302384 :           break;
   13872              :         }
   13873              :       break;
   13874       256217 :     case GT:
   13875       256217 :       gcc_assert (mode == CCmode || mode == CCNOmode || mode == CCGCmode);
   13876              :       suffix = "g";
   13877              :       break;
   13878       172715 :     case GTU:
   13879              :       /* ??? Use "nbe" instead of "a" for fcmov lossage on some assemblers.
   13880              :          Those same assemblers have the same but opposite lossage on cmov.  */
   13881       172715 :       if (mode == CCmode)
   13882       172777 :         suffix = fp ? "nbe" : "a";
   13883              :       else
   13884            0 :         gcc_unreachable ();
   13885              :       break;
   13886       236850 :     case LT:
   13887       236850 :       switch (mode)
   13888              :         {
   13889              :         case E_CCNOmode:
   13890              :         case E_CCGOCmode:
   13891              :           suffix = "s";
   13892              :           break;
   13893              : 
   13894              :         case E_CCmode:
   13895              :         case E_CCGCmode:
   13896              :         case E_CCGZmode:
   13897      7106781 :           suffix = "l";
   13898              :           break;
   13899              : 
   13900            0 :         default:
   13901            0 :           gcc_unreachable ();
   13902              :         }
   13903              :       break;
   13904       442094 :     case LTU:
   13905       442094 :       if (mode == CCmode || mode == CCGZmode)
   13906              :         suffix = "b";
   13907        25047 :       else if (mode == CCCmode)
   13908        26378 :         suffix = fp ? "b" : "c";
   13909              :       else
   13910            0 :         gcc_unreachable ();
   13911              :       break;
   13912       145286 :     case GE:
   13913       145286 :       switch (mode)
   13914              :         {
   13915              :         case E_CCNOmode:
   13916              :         case E_CCGOCmode:
   13917              :           suffix = "ns";
   13918              :           break;
   13919              : 
   13920              :         case E_CCmode:
   13921              :         case E_CCGCmode:
   13922              :         case E_CCGZmode:
   13923      7106781 :           suffix = "ge";
   13924              :           break;
   13925              : 
   13926            0 :         default:
   13927            0 :           gcc_unreachable ();
   13928              :         }
   13929              :       break;
   13930       188608 :     case GEU:
   13931       188608 :       if (mode == CCmode || mode == CCGZmode)
   13932              :         suffix = "nb";
   13933        10150 :       else if (mode == CCCmode)
   13934        12016 :         suffix = fp ? "nb" : "nc";
   13935              :       else
   13936            0 :         gcc_unreachable ();
   13937              :       break;
   13938       247320 :     case LE:
   13939       247320 :       gcc_assert (mode == CCmode || mode == CCGCmode || mode == CCNOmode);
   13940              :       suffix = "le";
   13941              :       break;
   13942       117226 :     case LEU:
   13943       117226 :       if (mode == CCmode)
   13944              :         suffix = "be";
   13945              :       else
   13946            0 :         gcc_unreachable ();
   13947              :       break;
   13948       234093 :     case UNORDERED:
   13949       234100 :       suffix = fp ? "u" : "p";
   13950              :       break;
   13951         4445 :     case ORDERED:
   13952         4450 :       suffix = fp ? "nu" : "np";
   13953              :       break;
   13954            0 :     default:
   13955            0 :       gcc_unreachable ();
   13956              :     }
   13957      7106781 :   fputs (suffix, file);
   13958      7106781 : }
   13959              : 
   13960              : /* Print the name of register X to FILE based on its machine mode and number.
   13961              :    If CODE is 'w', pretend the mode is HImode.
   13962              :    If CODE is 'b', pretend the mode is QImode.
   13963              :    If CODE is 'k', pretend the mode is SImode.
   13964              :    If CODE is 'q', pretend the mode is DImode.
   13965              :    If CODE is 'x', pretend the mode is V4SFmode.
   13966              :    If CODE is 't', pretend the mode is V8SFmode.
   13967              :    If CODE is 'g', pretend the mode is V16SFmode.
   13968              :    If CODE is 'h', pretend the reg is the 'high' byte register.
   13969              :    If CODE is 'y', print "st(0)" instead of "st", if the reg is stack op.
   13970              :    If CODE is 'd', duplicate the operand for AVX instruction.
   13971              :    If CODE is 'V', print naked full integer register name without %.
   13972              :  */
   13973              : 
   13974              : void
   13975    123330134 : print_reg (rtx x, int code, FILE *file)
   13976              : {
   13977    123330134 :   const char *reg;
   13978    123330134 :   int msize;
   13979    123330134 :   unsigned int regno;
   13980    123330134 :   bool duplicated;
   13981              : 
   13982    123330134 :   if (ASSEMBLER_DIALECT == ASM_ATT && code != 'V')
   13983    123327681 :     putc ('%', file);
   13984              : 
   13985    123330134 :   if (x == pc_rtx)
   13986              :     {
   13987      5732733 :       gcc_assert (TARGET_64BIT);
   13988      5732733 :       fputs ("rip", file);
   13989      5732733 :       return;
   13990              :     }
   13991              : 
   13992    117597401 :   if (code == 'y' && STACK_TOP_P (x))
   13993              :     {
   13994       289965 :       fputs ("st(0)", file);
   13995       289965 :       return;
   13996              :     }
   13997              : 
   13998    117307436 :   if (code == 'w')
   13999              :     msize = 2;
   14000              :   else if (code == 'b')
   14001              :     msize = 1;
   14002              :   else if (code == 'k')
   14003              :     msize = 4;
   14004              :   else if (code == 'q')
   14005              :     msize = 8;
   14006              :   else if (code == 'h')
   14007              :     msize = 0;
   14008              :   else if (code == 'x')
   14009              :     msize = 16;
   14010              :   else if (code == 't')
   14011              :     msize = 32;
   14012              :   else if (code == 'g')
   14013              :     msize = 64;
   14014              :   else
   14015    200503384 :     msize = GET_MODE_SIZE (GET_MODE (x));
   14016              : 
   14017    117307436 :   regno = REGNO (x);
   14018              : 
   14019    117307436 :   if (regno == ARG_POINTER_REGNUM
   14020    117307436 :       || regno == FRAME_POINTER_REGNUM
   14021    117307436 :       || regno == FPSR_REG)
   14022              :     {
   14023            0 :       output_operand_lossage
   14024            0 :         ("invalid use of register '%s'", reg_names[regno]);
   14025            0 :       return;
   14026              :     }
   14027    117307436 :   else if (regno == FLAGS_REG)
   14028              :     {
   14029            1 :       output_operand_lossage ("invalid use of asm flag output");
   14030            1 :       return;
   14031              :     }
   14032              : 
   14033    117307435 :   if (code == 'V')
   14034              :     {
   14035            1 :       if (GENERAL_REGNO_P (regno))
   14036            2 :         msize = GET_MODE_SIZE (word_mode);
   14037              :       else
   14038            0 :         error ("%<V%> modifier on non-integer register");
   14039              :     }
   14040              : 
   14041    117307435 :   duplicated = code == 'd' && TARGET_AVX;
   14042              : 
   14043    117307435 :   switch (msize)
   14044              :     {
   14045     77818215 :     case 16:
   14046     77818215 :     case 12:
   14047     77818215 :     case 8:
   14048    145570853 :       if (GENERAL_REGNO_P (regno) && msize > GET_MODE_SIZE (word_mode))
   14049            5 :         warning (0, "unsupported size for integer register");
   14050              :       /* FALLTHRU */
   14051    113877268 :     case 4:
   14052    113877268 :       if (LEGACY_INT_REGNO_P (regno))
   14053    123269204 :         putc (msize > 4 && TARGET_64BIT ? 'r' : 'e', file);
   14054              :       /* FALLTHRU */
   14055    114757936 :     case 2:
   14056     22193275 :     normal:
   14057    114757936 :       reg = hi_reg_name[regno];
   14058    114757936 :       break;
   14059      2283053 :     case 1:
   14060      2283053 :       if (regno >= ARRAY_SIZE (qi_reg_name))
   14061       273789 :         goto normal;
   14062      2009264 :       if (!ANY_QI_REGNO_P (regno))
   14063            0 :         error ("unsupported size for integer register");
   14064      2009264 :       reg = qi_reg_name[regno];
   14065      2009264 :       break;
   14066        27105 :     case 0:
   14067        27105 :       if (regno >= ARRAY_SIZE (qi_high_reg_name))
   14068            0 :         goto normal;
   14069        27105 :       reg = qi_high_reg_name[regno];
   14070        27105 :       break;
   14071       513130 :     case 32:
   14072       513130 :     case 64:
   14073       513130 :       if (SSE_REGNO_P (regno))
   14074              :         {
   14075       513130 :           gcc_assert (!duplicated);
   14076       718784 :           putc (msize == 32 ? 'y' : 'z', file);
   14077       513130 :           reg = hi_reg_name[regno] + 1;
   14078       513130 :           break;
   14079              :         }
   14080            0 :       goto normal;
   14081            0 :     default:
   14082            0 :       gcc_unreachable ();
   14083              :     }
   14084              : 
   14085    117307435 :   fputs (reg, file);
   14086              : 
   14087              :   /* Irritatingly, AMD extended registers use
   14088              :      different naming convention: "r%d[bwd]"  */
   14089    117307435 :   if (REX_INT_REGNO_P (regno) || REX2_INT_REGNO_P (regno))
   14090              :     {
   14091     10398674 :       gcc_assert (TARGET_64BIT);
   14092     10398674 :       switch (msize)
   14093              :         {
   14094            0 :           case 0:
   14095            0 :             error ("extended registers have no high halves");
   14096            0 :             break;
   14097       181509 :           case 1:
   14098       181509 :             putc ('b', file);
   14099       181509 :             break;
   14100        27729 :           case 2:
   14101        27729 :             putc ('w', file);
   14102        27729 :             break;
   14103      2535580 :           case 4:
   14104      2535580 :             putc ('d', file);
   14105      2535580 :             break;
   14106              :           case 8:
   14107              :             /* no suffix */
   14108              :             break;
   14109            0 :           default:
   14110            0 :             error ("unsupported operand size for extended register");
   14111            0 :             break;
   14112              :         }
   14113     10398674 :       return;
   14114              :     }
   14115              : 
   14116    106908761 :   if (duplicated)
   14117              :     {
   14118        16919 :       if (ASSEMBLER_DIALECT == ASM_ATT)
   14119        16898 :         fprintf (file, ", %%%s", reg);
   14120              :       else
   14121           21 :         fprintf (file, ", %s", reg);
   14122              :     }
   14123              : }
   14124              : 
   14125              : /* Meaning of CODE:
   14126              :    L,W,B,Q,S,T -- print the opcode suffix for specified size of operand.
   14127              :    C -- print opcode suffix for set/cmov insn.
   14128              :    c -- like C, but print reversed condition
   14129              :    F,f -- likewise, but for floating-point.
   14130              :    O -- if HAVE_AS_IX86_CMOV_SUN_SYNTAX, expand to "w.", "l." or "q.",
   14131              :         otherwise nothing
   14132              :    R -- print embedded rounding and sae.
   14133              :    r -- print only sae.
   14134              :    z -- print the opcode suffix for the size of the current operand.
   14135              :    Z -- likewise, with special suffixes for x87 instructions.
   14136              :    * -- print a star (in certain assembler syntax)
   14137              :    A -- print an absolute memory reference.
   14138              :    E -- print address with DImode register names if TARGET_64BIT.
   14139              :    w -- print the operand as if it's a "word" (HImode) even if it isn't.
   14140              :    s -- print a shift double count, followed by the assemblers argument
   14141              :         delimiter.
   14142              :    b -- print the QImode name of the register for the indicated operand.
   14143              :         %b0 would print %al if operands[0] is reg 0.
   14144              :    w --  likewise, print the HImode name of the register.
   14145              :    k --  likewise, print the SImode name of the register.
   14146              :    q --  likewise, print the DImode name of the register.
   14147              :    x --  likewise, print the V4SFmode name of the register.
   14148              :    t --  likewise, print the V8SFmode name of the register.
   14149              :    g --  likewise, print the V16SFmode name of the register.
   14150              :    h -- print the QImode name for a "high" register, either ah, bh, ch or dh.
   14151              :    y -- print "st(0)" instead of "st" as a register.
   14152              :    d -- print duplicated register operand for AVX instruction.
   14153              :    D -- print condition for SSE cmp instruction.
   14154              :    P -- if PIC, print an @PLT suffix.  For -fno-plt, load function
   14155              :         address from GOT.
   14156              :    p -- print raw symbol name.
   14157              :    X -- don't print any sort of PIC '@' suffix for a symbol.
   14158              :    & -- print some in-use local-dynamic symbol name.
   14159              :    H -- print a memory address offset by 8; used for sse high-parts
   14160              :    Y -- print condition for XOP pcom* instruction.
   14161              :    V -- print naked full integer register name without %.
   14162              :    v -- print segment override prefix
   14163              :    + -- print a branch hint as 'cs' or 'ds' prefix
   14164              :    ; -- print a semicolon (after prefixes due to bug in older gas).
   14165              :    ~ -- print "i" if TARGET_AVX2, "f" otherwise.
   14166              :    ^ -- print addr32 prefix if Pmode != word_mode
   14167              :    M -- print addr32 prefix for TARGET_X32 with VSIB address.
   14168              :    ! -- print NOTRACK prefix for jxx/call/ret instructions if required.
   14169              :    N -- print maskz if it's constant 0 operand.
   14170              :    G -- print embedded flag for ccmp/ctest.
   14171              :  */
   14172              : 
   14173              : void
   14174    176569534 : ix86_print_operand (FILE *file, rtx x, int code)
   14175              : {
   14176    176759063 :   if (code)
   14177              :     {
   14178     61916564 :       switch (code)
   14179              :         {
   14180       189525 :         case 'A':
   14181       189525 :           switch (ASSEMBLER_DIALECT)
   14182              :             {
   14183       189525 :             case ASM_ATT:
   14184       189525 :               putc ('*', file);
   14185       189525 :               break;
   14186              : 
   14187            0 :             case ASM_INTEL:
   14188              :               /* Intel syntax. For absolute addresses, registers should not
   14189              :                  be surrounded by braces.  */
   14190            0 :               if (!REG_P (x))
   14191              :                 {
   14192            0 :                   putc ('[', file);
   14193            0 :                   ix86_print_operand (file, x, 0);
   14194            0 :                   putc (']', file);
   14195            0 :                   return;
   14196              :                 }
   14197              :               break;
   14198              : 
   14199            0 :             default:
   14200            0 :               gcc_unreachable ();
   14201              :             }
   14202              : 
   14203       189525 :           ix86_print_operand (file, x, 0);
   14204       189525 :           return;
   14205              : 
   14206      3529794 :         case 'E':
   14207              :           /* Wrap address in an UNSPEC to declare special handling.  */
   14208      3529794 :           if (TARGET_64BIT)
   14209      3045556 :             x = gen_rtx_UNSPEC (DImode, gen_rtvec (1, x), UNSPEC_LEA_ADDR);
   14210              : 
   14211      3529794 :           output_address (VOIDmode, x);
   14212      3529794 :           return;
   14213              : 
   14214            0 :         case 'L':
   14215            0 :           if (ASSEMBLER_DIALECT == ASM_ATT)
   14216            0 :             putc ('l', file);
   14217            0 :           return;
   14218              : 
   14219            0 :         case 'W':
   14220            0 :           if (ASSEMBLER_DIALECT == ASM_ATT)
   14221            0 :             putc ('w', file);
   14222            0 :           return;
   14223              : 
   14224            0 :         case 'B':
   14225            0 :           if (ASSEMBLER_DIALECT == ASM_ATT)
   14226            0 :             putc ('b', file);
   14227            0 :           return;
   14228              : 
   14229            0 :         case 'Q':
   14230            0 :           if (ASSEMBLER_DIALECT == ASM_ATT)
   14231            0 :             putc ('l', file);
   14232            0 :           return;
   14233              : 
   14234            0 :         case 'S':
   14235            0 :           if (ASSEMBLER_DIALECT == ASM_ATT)
   14236            0 :             putc ('s', file);
   14237            0 :           return;
   14238              : 
   14239            0 :         case 'T':
   14240            0 :           if (ASSEMBLER_DIALECT == ASM_ATT)
   14241            0 :             putc ('t', file);
   14242            0 :           return;
   14243              : 
   14244              :         case 'O':
   14245              : #ifdef HAVE_AS_IX86_CMOV_SUN_SYNTAX
   14246              :           if (ASSEMBLER_DIALECT != ASM_ATT)
   14247              :             return;
   14248              : 
   14249              :           switch (GET_MODE_SIZE (GET_MODE (x)))
   14250              :             {
   14251              :             case 2:
   14252              :               putc ('w', file);
   14253              :               break;
   14254              : 
   14255              :             case 4:
   14256              :               putc ('l', file);
   14257              :               break;
   14258              : 
   14259              :             case 8:
   14260              :               putc ('q', file);
   14261              :               break;
   14262              : 
   14263              :             default:
   14264              :               output_operand_lossage ("invalid operand size for operand "
   14265              :                                       "code 'O'");
   14266              :               return;
   14267              :             }
   14268              : 
   14269              :           putc ('.', file);
   14270              : #endif
   14271              :           return;
   14272              : 
   14273        38044 :         case 'z':
   14274        38044 :           if (GET_MODE_CLASS (GET_MODE (x)) == MODE_INT)
   14275              :             {
   14276              :               /* Opcodes don't get size suffixes if using Intel opcodes.  */
   14277        38042 :               if (ASSEMBLER_DIALECT == ASM_INTEL)
   14278              :                 return;
   14279              : 
   14280        76084 :               switch (GET_MODE_SIZE (GET_MODE (x)))
   14281              :                 {
   14282            6 :                 case 1:
   14283            6 :                   putc ('b', file);
   14284            6 :                   return;
   14285              : 
   14286            6 :                 case 2:
   14287            6 :                   putc ('w', file);
   14288            6 :                   return;
   14289              : 
   14290        37549 :                 case 4:
   14291        37549 :                   putc ('l', file);
   14292        37549 :                   return;
   14293              : 
   14294          481 :                 case 8:
   14295          481 :                   putc ('q', file);
   14296          481 :                   return;
   14297              : 
   14298            0 :                 default:
   14299            0 :                   output_operand_lossage ("invalid operand size for operand "
   14300              :                                           "code 'z'");
   14301            0 :                   return;
   14302              :                 }
   14303              :             }
   14304              : 
   14305            2 :           if (GET_MODE_CLASS (GET_MODE (x)) == MODE_FLOAT)
   14306              :             {
   14307            1 :               if (this_is_asm_operands)
   14308            1 :                 warning_for_asm (this_is_asm_operands,
   14309              :                                  "non-integer operand used with operand code %<z%>");
   14310              :               else
   14311            0 :                 warning (0, "non-integer operand used with operand code %<z%>");
   14312              :             }
   14313              :           /* FALLTHRU */
   14314              : 
   14315       378797 :         case 'Z':
   14316              :           /* 387 opcodes don't get size suffixes if using Intel opcodes.  */
   14317       378797 :           if (ASSEMBLER_DIALECT == ASM_INTEL)
   14318              :             return;
   14319              : 
   14320       378797 :           if (GET_MODE_CLASS (GET_MODE (x)) == MODE_INT)
   14321              :             {
   14322        29330 :               switch (GET_MODE_SIZE (GET_MODE (x)))
   14323              :                 {
   14324         3525 :                 case 2:
   14325              : #ifdef HAVE_AS_IX86_FILDS
   14326         3525 :                   putc ('s', file);
   14327              : #endif
   14328         3525 :                   return;
   14329              : 
   14330         3944 :                 case 4:
   14331         3944 :                   putc ('l', file);
   14332         3944 :                   return;
   14333              : 
   14334         7196 :                 case 8:
   14335              : #ifdef HAVE_AS_IX86_FILDQ
   14336         7196 :                   putc ('q', file);
   14337              : #else
   14338              :                   fputs ("ll", file);
   14339              : #endif
   14340         7196 :                   return;
   14341              : 
   14342              :                 default:
   14343              :                   break;
   14344              :                 }
   14345              :             }
   14346       364132 :           else if (GET_MODE_CLASS (GET_MODE (x)) == MODE_FLOAT)
   14347              :             {
   14348              :               /* 387 opcodes don't get size suffixes
   14349              :                  if the operands are registers.  */
   14350       364130 :               if (STACK_REG_P (x))
   14351              :                 return;
   14352              : 
   14353       683708 :               switch (GET_MODE_SIZE (GET_MODE (x)))
   14354              :                 {
   14355        23309 :                 case 4:
   14356        23309 :                   putc ('s', file);
   14357        23309 :                   return;
   14358              : 
   14359        32699 :                 case 8:
   14360        32699 :                   putc ('l', file);
   14361        32699 :                   return;
   14362              : 
   14363       285844 :                 case 12:
   14364       285844 :                 case 16:
   14365       285844 :                   putc ('t', file);
   14366       285844 :                   return;
   14367              : 
   14368              :                 default:
   14369              :                   break;
   14370              :                 }
   14371              :             }
   14372              :           else
   14373              :             {
   14374            2 :               output_operand_lossage ("invalid operand type used with "
   14375              :                                       "operand code '%c'", code);
   14376            2 :               return;
   14377              :             }
   14378              : 
   14379            2 :           output_operand_lossage ("invalid operand size for operand code '%c'",
   14380              :                                   code);
   14381            2 :           return;
   14382              : 
   14383              :         case 'd':
   14384              :         case 'b':
   14385              :         case 'w':
   14386              :         case 'k':
   14387              :         case 'q':
   14388              :         case 'h':
   14389              :         case 't':
   14390              :         case 'g':
   14391              :         case 'y':
   14392              :         case 'x':
   14393              :         case 'X':
   14394              :         case 'P':
   14395              :         case 'p':
   14396              :         case 'V':
   14397              :           break;
   14398              : 
   14399            0 :         case 's':
   14400            0 :           if (CONST_INT_P (x) || ! SHIFT_DOUBLE_OMITS_COUNT)
   14401              :             {
   14402            0 :               ix86_print_operand (file, x, 0);
   14403            0 :               fputs (", ", file);
   14404              :             }
   14405            0 :           return;
   14406              : 
   14407          494 :         case 'Y':
   14408          494 :           switch (GET_CODE (x))
   14409              :             {
   14410          182 :             case NE:
   14411          182 :               fputs ("neq", file);
   14412          182 :               break;
   14413           32 :             case EQ:
   14414           32 :               fputs ("eq", file);
   14415           32 :               break;
   14416           64 :             case GE:
   14417           64 :             case GEU:
   14418           64 :               fputs (INTEGRAL_MODE_P (GET_MODE (x)) ? "ge" : "unlt", file);
   14419           64 :               break;
   14420           40 :             case GT:
   14421           40 :             case GTU:
   14422           40 :               fputs (INTEGRAL_MODE_P (GET_MODE (x)) ? "gt" : "unle", file);
   14423           40 :               break;
   14424           64 :             case LE:
   14425           64 :             case LEU:
   14426           64 :               fputs ("le", file);
   14427           64 :               break;
   14428          112 :             case LT:
   14429          112 :             case LTU:
   14430          112 :               fputs ("lt", file);
   14431          112 :               break;
   14432            0 :             case UNORDERED:
   14433            0 :               fputs ("unord", file);
   14434            0 :               break;
   14435            0 :             case ORDERED:
   14436            0 :               fputs ("ord", file);
   14437            0 :               break;
   14438            0 :             case UNEQ:
   14439            0 :               fputs ("ueq", file);
   14440            0 :               break;
   14441            0 :             case UNGE:
   14442            0 :               fputs ("nlt", file);
   14443            0 :               break;
   14444            0 :             case UNGT:
   14445            0 :               fputs ("nle", file);
   14446            0 :               break;
   14447            0 :             case UNLE:
   14448            0 :               fputs ("ule", file);
   14449            0 :               break;
   14450            0 :             case UNLT:
   14451            0 :               fputs ("ult", file);
   14452            0 :               break;
   14453            0 :             case LTGT:
   14454            0 :               fputs ("une", file);
   14455            0 :               break;
   14456            0 :             default:
   14457            0 :               output_operand_lossage ("operand is not a condition code, "
   14458              :                                       "invalid operand code 'Y'");
   14459            0 :               return;
   14460              :             }
   14461          494 :           return;
   14462              : 
   14463         9339 :         case 'D':
   14464              :           /* Little bit of braindamage here.  The SSE compare instructions
   14465              :              does use completely different names for the comparisons that the
   14466              :              fp conditional moves.  */
   14467         9339 :           switch (GET_CODE (x))
   14468              :             {
   14469            3 :             case UNEQ:
   14470            3 :               if (TARGET_AVX)
   14471              :                 {
   14472            3 :                   fputs ("eq_us", file);
   14473            3 :                   break;
   14474              :                 }
   14475              :              /* FALLTHRU */
   14476         4635 :             case EQ:
   14477         4635 :               fputs ("eq", file);
   14478         4635 :               break;
   14479            0 :             case UNLT:
   14480            0 :               if (TARGET_AVX)
   14481              :                 {
   14482            0 :                   fputs ("nge", file);
   14483            0 :                   break;
   14484              :                 }
   14485              :              /* FALLTHRU */
   14486         1637 :             case LT:
   14487         1637 :               fputs ("lt", file);
   14488         1637 :               break;
   14489            0 :             case UNLE:
   14490            0 :               if (TARGET_AVX)
   14491              :                 {
   14492            0 :                   fputs ("ngt", file);
   14493            0 :                   break;
   14494              :                 }
   14495              :              /* FALLTHRU */
   14496          795 :             case LE:
   14497          795 :               fputs ("le", file);
   14498          795 :               break;
   14499           95 :             case UNORDERED:
   14500           95 :               fputs ("unord", file);
   14501           95 :               break;
   14502           24 :             case LTGT:
   14503           24 :               if (TARGET_AVX)
   14504              :                 {
   14505           24 :                   fputs ("neq_oq", file);
   14506           24 :                   break;
   14507              :                 }
   14508              :              /* FALLTHRU */
   14509          898 :             case NE:
   14510          898 :               fputs ("neq", file);
   14511          898 :               break;
   14512            0 :             case GE:
   14513            0 :               if (TARGET_AVX)
   14514              :                 {
   14515            0 :                   fputs ("ge", file);
   14516            0 :                   break;
   14517              :                 }
   14518              :              /* FALLTHRU */
   14519          402 :             case UNGE:
   14520          402 :               fputs ("nlt", file);
   14521          402 :               break;
   14522            0 :             case GT:
   14523            0 :               if (TARGET_AVX)
   14524              :                 {
   14525            0 :                   fputs ("gt", file);
   14526            0 :                   break;
   14527              :                 }
   14528              :              /* FALLTHRU */
   14529          767 :             case UNGT:
   14530          767 :               fputs ("nle", file);
   14531          767 :               break;
   14532           83 :             case ORDERED:
   14533           83 :               fputs ("ord", file);
   14534           83 :               break;
   14535            0 :             default:
   14536            0 :               output_operand_lossage ("operand is not a condition code, "
   14537              :                                       "invalid operand code 'D'");
   14538            0 :               return;
   14539              :             }
   14540         9339 :           return;
   14541              : 
   14542      7106781 :         case 'F':
   14543      7106781 :         case 'f':
   14544              : #ifdef HAVE_AS_IX86_CMOV_SUN_SYNTAX
   14545              :           if (ASSEMBLER_DIALECT == ASM_ATT)
   14546              :             putc ('.', file);
   14547              :           gcc_fallthrough ();
   14548              : #endif
   14549              : 
   14550      7106781 :         case 'C':
   14551      7106781 :         case 'c':
   14552      7106781 :           if (!COMPARISON_P (x))
   14553              :             {
   14554            0 :               output_operand_lossage ("operand is not a condition code, "
   14555              :                                       "invalid operand code '%c'", code);
   14556            0 :               return;
   14557              :             }
   14558      7106781 :           put_condition_code (GET_CODE (x), GET_MODE (XEXP (x, 0)),
   14559      7106781 :                               code == 'c' || code == 'f',
   14560      7106781 :                               code == 'F' || code == 'f',
   14561              :                               file);
   14562      7106781 :           return;
   14563              : 
   14564           21 :         case 'G':
   14565           21 :           {
   14566           21 :             int dfv = INTVAL (x);
   14567           21 :             const char *dfv_suffix = ix86_ccmp_dfv_mapping[dfv];
   14568           21 :             fputs (dfv_suffix, file);
   14569              :           }
   14570           21 :           return;
   14571              : 
   14572         1286 :         case 'H':
   14573         1286 :           if (!offsettable_memref_p (x))
   14574              :             {
   14575            1 :               output_operand_lossage ("operand is not an offsettable memory "
   14576              :                                       "reference, invalid operand code 'H'");
   14577            1 :               return;
   14578              :             }
   14579              :           /* It doesn't actually matter what mode we use here, as we're
   14580              :              only going to use this for printing.  */
   14581         1285 :           x = adjust_address_nv (x, DImode, 8);
   14582              :           /* Output 'qword ptr' for intel assembler dialect.  */
   14583         1285 :           if (ASSEMBLER_DIALECT == ASM_INTEL)
   14584            0 :             code = 'q';
   14585              :           break;
   14586              : 
   14587        75584 :         case 'K':
   14588        75584 :           if (!CONST_INT_P (x))
   14589              :             {
   14590            1 :               output_operand_lossage ("operand is not an integer, invalid "
   14591              :                                       "operand code 'K'");
   14592            1 :               return;
   14593              :             }
   14594              : 
   14595        75583 :           if (INTVAL (x) & IX86_HLE_ACQUIRE)
   14596              : #ifdef HAVE_AS_IX86_HLE
   14597           22 :             fputs ("xacquire ", file);
   14598              : #else
   14599              :             fputs ("\n" ASM_BYTE "0xf2\n\t", file);
   14600              : #endif
   14601        75561 :           else if (INTVAL (x) & IX86_HLE_RELEASE)
   14602              : #ifdef HAVE_AS_IX86_HLE
   14603           24 :             fputs ("xrelease ", file);
   14604              : #else
   14605              :             fputs ("\n" ASM_BYTE "0xf3\n\t", file);
   14606              : #endif
   14607              :           /* We do not want to print value of the operand.  */
   14608        75583 :           return;
   14609              : 
   14610        43238 :         case 'N':
   14611        43238 :           if (x == const0_rtx || x == CONST0_RTX (GET_MODE (x)))
   14612        15513 :             fputs ("{z}", file);
   14613        43238 :           return;
   14614              : 
   14615         4117 :         case 'r':
   14616         4117 :           if (!CONST_INT_P (x) || INTVAL (x) != ROUND_SAE)
   14617              :             {
   14618            2 :               output_operand_lossage ("operand is not a specific integer, "
   14619              :                                       "invalid operand code 'r'");
   14620            2 :               return;
   14621              :             }
   14622              : 
   14623         4115 :           if (ASSEMBLER_DIALECT == ASM_INTEL)
   14624            1 :             fputs (", ", file);
   14625              : 
   14626         4115 :           fputs ("{sae}", file);
   14627              : 
   14628         4115 :           if (ASSEMBLER_DIALECT == ASM_ATT)
   14629         4114 :             fputs (", ", file);
   14630              : 
   14631         4115 :           return;
   14632              : 
   14633         6097 :         case 'R':
   14634         6097 :           if (!CONST_INT_P (x))
   14635              :             {
   14636            1 :               output_operand_lossage ("operand is not an integer, invalid "
   14637              :                                       "operand code 'R'");
   14638            1 :               return;
   14639              :             }
   14640              : 
   14641         6096 :           if (ASSEMBLER_DIALECT == ASM_INTEL)
   14642            6 :             fputs (", ", file);
   14643              : 
   14644         6096 :           switch (INTVAL (x))
   14645              :             {
   14646         5281 :             case ROUND_NEAREST_INT | ROUND_SAE:
   14647         5281 :               fputs ("{rn-sae}", file);
   14648         5281 :               break;
   14649          637 :             case ROUND_NEG_INF | ROUND_SAE:
   14650          637 :               fputs ("{rd-sae}", file);
   14651          637 :               break;
   14652           56 :             case ROUND_POS_INF | ROUND_SAE:
   14653           56 :               fputs ("{ru-sae}", file);
   14654           56 :               break;
   14655          121 :             case ROUND_ZERO | ROUND_SAE:
   14656          121 :               fputs ("{rz-sae}", file);
   14657          121 :               break;
   14658            1 :             default:
   14659            1 :               output_operand_lossage ("operand is not a specific integer, "
   14660              :                                       "invalid operand code 'R'");
   14661              :             }
   14662              : 
   14663         6096 :           if (ASSEMBLER_DIALECT == ASM_ATT)
   14664         6090 :             fputs (", ", file);
   14665              : 
   14666         6096 :           return;
   14667              : 
   14668        10281 :         case 'v':
   14669        10281 :           if (MEM_P (x))
   14670              :             {
   14671        10400 :               switch (MEM_ADDR_SPACE (x))
   14672              :                 {
   14673              :                 case ADDR_SPACE_GENERIC:
   14674              :                   break;
   14675            0 :                 case ADDR_SPACE_SEG_FS:
   14676            0 :                   fputs ("fs ", file);
   14677            0 :                   break;
   14678            0 :                 case ADDR_SPACE_SEG_GS:
   14679            0 :                   fputs ("gs ", file);
   14680            0 :                   break;
   14681            0 :                 default:
   14682            0 :                   gcc_unreachable ();
   14683              :                 }
   14684              :             }
   14685              :           else
   14686            0 :             output_operand_lossage ("operand is not a memory reference, "
   14687              :                                     "invalid operand code 'v'");
   14688        10281 :           return;
   14689              : 
   14690            0 :         case '*':
   14691            0 :           if (ASSEMBLER_DIALECT == ASM_ATT)
   14692            0 :             putc ('*', file);
   14693            0 :           return;
   14694              : 
   14695          202 :         case '&':
   14696          202 :           {
   14697          202 :             const char *name = get_some_local_dynamic_name ();
   14698          202 :             if (name == NULL)
   14699            1 :               output_operand_lossage ("'%%&' used without any "
   14700              :                                       "local dynamic TLS references");
   14701              :             else
   14702          201 :               assemble_name (file, name);
   14703          202 :             return;
   14704              :           }
   14705              : 
   14706      6457293 :         case '+':
   14707      6457293 :           {
   14708      6457293 :             rtx x;
   14709              : 
   14710      6457293 :             if (!optimize
   14711      5031381 :                 || optimize_function_for_size_p (cfun)
   14712     11300134 :                 || (!TARGET_BRANCH_PREDICTION_HINTS_NOT_TAKEN
   14713      4842841 :                     && !TARGET_BRANCH_PREDICTION_HINTS_TAKEN))
   14714      6457293 :               return;
   14715              : 
   14716            0 :             x = find_reg_note (current_output_insn, REG_BR_PROB, 0);
   14717            0 :             if (x)
   14718              :               {
   14719            0 :                 int pred_val = profile_probability::from_reg_br_prob_note
   14720            0 :                                  (XINT (x, 0)).to_reg_br_prob_base ();
   14721              : 
   14722            0 :                 bool taken = pred_val > REG_BR_PROB_BASE / 2;
   14723              :                 /* We use 3e (DS) prefix for taken branches and
   14724              :                    2e (CS) prefix for not taken branches.  */
   14725            0 :                 if (taken && TARGET_BRANCH_PREDICTION_HINTS_TAKEN)
   14726            0 :                   fputs ("ds ; ", file);
   14727            0 :                 else if (!taken && TARGET_BRANCH_PREDICTION_HINTS_NOT_TAKEN)
   14728            0 :                   fputs ("cs ; ", file);
   14729              :               }
   14730            0 :             return;
   14731              :           }
   14732              : 
   14733              :         case ';':
   14734              : #ifndef HAVE_AS_IX86_REP_LOCK_PREFIX
   14735              :           putc (';', file);
   14736              : #endif
   14737              :           return;
   14738              : 
   14739         3267 :         case '~':
   14740         3267 :           putc (TARGET_AVX2 ? 'i' : 'f', file);
   14741         3267 :           return;
   14742              : 
   14743         1675 :         case 'M':
   14744         1675 :           if (TARGET_X32)
   14745              :             {
   14746              :               /* NB: 32-bit indices in VSIB address are sign-extended
   14747              :                  to 64 bits. In x32, if 32-bit address 0xf7fa3010 is
   14748              :                  sign-extended to 0xfffffffff7fa3010 which is invalid
   14749              :                  address.  Add addr32 prefix if there is no base
   14750              :                  register nor symbol.  */
   14751           40 :               bool ok;
   14752           40 :               struct ix86_address parts;
   14753           40 :               ok = ix86_decompose_address (x, &parts);
   14754           40 :               gcc_assert (ok && parts.index == NULL_RTX);
   14755           40 :               if (parts.base == NULL_RTX
   14756           40 :                   && (parts.disp == NULL_RTX
   14757           34 :                       || !symbolic_operand (parts.disp,
   14758           34 :                                             GET_MODE (parts.disp))))
   14759           34 :                 fputs ("addr32 ", file);
   14760              :             }
   14761         1675 :           return;
   14762              : 
   14763        22156 :         case '^':
   14764        25346 :           if (Pmode != word_mode)
   14765            0 :             fputs ("addr32 ", file);
   14766        22156 :           return;
   14767              : 
   14768     14773479 :         case '!':
   14769     14773479 :           if (ix86_notrack_prefixed_insn_p (current_output_insn))
   14770         4579 :             fputs ("notrack ", file);
   14771     14773479 :           return;
   14772              : 
   14773            1 :         default:
   14774            1 :           output_operand_lossage ("invalid operand code '%c'", code);
   14775              :         }
   14776              :     }
   14777              : 
   14778    143707757 :   if (REG_P (x))
   14779     85613092 :     print_reg (x, code, file);
   14780              : 
   14781     58094665 :   else if (MEM_P (x))
   14782              :     {
   14783     33274130 :       rtx addr = XEXP (x, 0);
   14784              : 
   14785              :       /* No `byte ptr' prefix for call instructions ... */
   14786     33274130 :       if (ASSEMBLER_DIALECT == ASM_INTEL && code != 'X' && code != 'P')
   14787              :         {
   14788          321 :           machine_mode mode = GET_MODE (x);
   14789          321 :           const char *size;
   14790              : 
   14791              :           /* Check for explicit size override codes.  */
   14792          321 :           if (code == 'b')
   14793              :             size = "BYTE";
   14794              :           else if (code == 'w')
   14795              :             size = "WORD";
   14796              :           else if (code == 'k')
   14797              :             size = "DWORD";
   14798              :           else if (code == 'q')
   14799              :             size = "QWORD";
   14800              :           else if (code == 'x')
   14801              :             size = "XMMWORD";
   14802              :           else if (code == 't')
   14803              :             size = "YMMWORD";
   14804              :           else if (code == 'g')
   14805              :             size = "ZMMWORD";
   14806          235 :           else if (mode == BLKmode)
   14807              :             /* ... or BLKmode operands, when not overridden.  */
   14808              :             size = NULL;
   14809              :           else
   14810          466 :             switch (GET_MODE_SIZE (mode))
   14811              :               {
   14812              :               case 1: size = "BYTE"; break;
   14813              :               case 2: size = "WORD"; break;
   14814              :               case 4: size = "DWORD"; break;
   14815              :               case 8: size = "QWORD"; break;
   14816              :               case 12: size = "TBYTE"; break;
   14817            7 :               case 16:
   14818            7 :                 if (mode == XFmode)
   14819              :                   size = "TBYTE";
   14820              :                 else
   14821              :                   size = "XMMWORD";
   14822              :                 break;
   14823              :               case 32: size = "YMMWORD"; break;
   14824              :               case 64: size = "ZMMWORD"; break;
   14825            0 :               default:
   14826            0 :                 gcc_unreachable ();
   14827              :               }
   14828              :           if (size)
   14829              :             {
   14830          319 :               fputs (size, file);
   14831          319 :               fputs (" PTR ", file);
   14832              :             }
   14833              :         }
   14834              : 
   14835     33274130 :       if (this_is_asm_operands && ! address_operand (addr, VOIDmode))
   14836            0 :         output_operand_lossage ("invalid constraints for operand");
   14837              :       else
   14838     33274130 :         ix86_print_operand_address_as
   14839     33910120 :           (file, addr, MEM_ADDR_SPACE (x), code == 'p' || code == 'P');
   14840              :     }
   14841              : 
   14842     24820535 :   else if (CONST_DOUBLE_P (x) && GET_MODE (x) == HFmode)
   14843              :     {
   14844          767 :       long l = real_to_target (NULL, CONST_DOUBLE_REAL_VALUE (x),
   14845          767 :                                REAL_MODE_FORMAT (HFmode));
   14846          767 :       if (ASSEMBLER_DIALECT == ASM_ATT)
   14847          767 :         putc ('$', file);
   14848          767 :       fprintf (file, "0x%04x", (unsigned int) l);
   14849          767 :     }
   14850              : 
   14851     24819768 :   else if (CONST_DOUBLE_P (x) && GET_MODE (x) == SFmode)
   14852              :     {
   14853        20697 :       long l;
   14854              : 
   14855        20697 :       REAL_VALUE_TO_TARGET_SINGLE (*CONST_DOUBLE_REAL_VALUE (x), l);
   14856              : 
   14857        20697 :       if (ASSEMBLER_DIALECT == ASM_ATT)
   14858        20697 :         putc ('$', file);
   14859              :       /* Sign extend 32bit SFmode immediate to 8 bytes.  */
   14860        20697 :       if (code == 'q')
   14861          327 :         fprintf (file, "0x%08" HOST_LONG_LONG_FORMAT "x",
   14862              :                  (unsigned long long) (int) l);
   14863              :       else
   14864        20370 :         fprintf (file, "0x%08x", (unsigned int) l);
   14865              :     }
   14866              : 
   14867     24799071 :   else if (CONST_DOUBLE_P (x) && GET_MODE (x) == DFmode)
   14868              :     {
   14869         3281 :       long l[2];
   14870              : 
   14871         3281 :       REAL_VALUE_TO_TARGET_DOUBLE (*CONST_DOUBLE_REAL_VALUE (x), l);
   14872              : 
   14873         3281 :       if (ASSEMBLER_DIALECT == ASM_ATT)
   14874         3281 :         putc ('$', file);
   14875         3281 :       fprintf (file, "0x%lx%08lx", l[1] & 0xffffffff, l[0] & 0xffffffff);
   14876         3281 :     }
   14877              : 
   14878              :   /* These float cases don't actually occur as immediate operands.  */
   14879     24795790 :   else if (CONST_DOUBLE_P (x) && GET_MODE (x) == XFmode)
   14880              :     {
   14881            0 :       char dstr[30];
   14882              : 
   14883            0 :       real_to_decimal (dstr, CONST_DOUBLE_REAL_VALUE (x), sizeof (dstr), 0, 1);
   14884            0 :       fputs (dstr, file);
   14885            0 :     }
   14886              : 
   14887              :   /* Print bcst_mem_operand.  */
   14888     24795790 :   else if (GET_CODE (x) == VEC_DUPLICATE)
   14889              :     {
   14890          314 :       machine_mode vmode = GET_MODE (x);
   14891              :       /* Must be bcst_memory_operand.  */
   14892          314 :       gcc_assert (bcst_mem_operand (x, vmode));
   14893              : 
   14894          314 :       rtx mem = XEXP (x,0);
   14895          314 :       ix86_print_operand (file, mem, 0);
   14896              : 
   14897          314 :       switch (vmode)
   14898              :         {
   14899           28 :         case E_V2DImode:
   14900           28 :         case E_V2DFmode:
   14901           28 :           fputs ("{1to2}", file);
   14902           28 :           break;
   14903           74 :         case E_V4SImode:
   14904           74 :         case E_V4SFmode:
   14905           74 :         case E_V4DImode:
   14906           74 :         case E_V4DFmode:
   14907           74 :           fputs ("{1to4}", file);
   14908           74 :           break;
   14909           94 :         case E_V8SImode:
   14910           94 :         case E_V8SFmode:
   14911           94 :         case E_V8DFmode:
   14912           94 :         case E_V8DImode:
   14913           94 :         case E_V8HFmode:
   14914           94 :           fputs ("{1to8}", file);
   14915           94 :           break;
   14916          110 :         case E_V16SFmode:
   14917          110 :         case E_V16SImode:
   14918          110 :         case E_V16HFmode:
   14919          110 :           fputs ("{1to16}", file);
   14920          110 :           break;
   14921            8 :         case E_V32HFmode:
   14922            8 :           fputs ("{1to32}", file);
   14923            8 :           break;
   14924            0 :         default:
   14925            0 :           gcc_unreachable ();
   14926              :         }
   14927              :     }
   14928              : 
   14929              :   else
   14930              :     {
   14931              :       /* We have patterns that allow zero sets of memory, for instance.
   14932              :          In 64-bit mode, we should probably support all 8-byte vectors,
   14933              :          since we can in fact encode that into an immediate.  */
   14934     24795476 :       if (CONST_VECTOR_P (x))
   14935              :         {
   14936         3258 :           if (x != CONST0_RTX (GET_MODE (x)))
   14937            2 :             output_operand_lossage ("invalid vector immediate");
   14938         3258 :           x = const0_rtx;
   14939              :         }
   14940              : 
   14941     24795476 :       if (code == 'P')
   14942              :         {
   14943      5912944 :           if (ix86_force_load_from_GOT_p (x, true))
   14944              :             {
   14945              :               /* For inline assembly statement, load function address
   14946              :                  from GOT with 'P' operand modifier to avoid PLT.  */
   14947            4 :               x = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, x),
   14948              :                                   (TARGET_64BIT
   14949              :                                    ? UNSPEC_GOTPCREL
   14950              :                                    : UNSPEC_GOT));
   14951            4 :               x = gen_rtx_CONST (Pmode, x);
   14952            4 :               x = gen_const_mem (Pmode, x);
   14953            4 :               ix86_print_operand (file, x, 'A');
   14954            4 :               return;
   14955              :             }
   14956              :         }
   14957     18882532 :       else if (code != 'p')
   14958              :         {
   14959     18882423 :           if (CONST_INT_P (x))
   14960              :             {
   14961     15599638 :               if (ASSEMBLER_DIALECT == ASM_ATT)
   14962     15599410 :                 putc ('$', file);
   14963              :             }
   14964      3282785 :           else if (GET_CODE (x) == CONST || SYMBOL_REF_P (x)
   14965         9409 :                    || LABEL_REF_P (x))
   14966              :             {
   14967      3282783 :               if (ASSEMBLER_DIALECT == ASM_ATT)
   14968      3282759 :                 putc ('$', file);
   14969              :               else
   14970           24 :                 fputs ("OFFSET FLAT:", file);
   14971              :             }
   14972              :         }
   14973     24795472 :       if (CONST_INT_P (x))
   14974     15599724 :         fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (x));
   14975      9195748 :       else if (flag_pic || MACHOPIC_INDIRECT)
   14976       522767 :         output_pic_addr_const (file, x, code);
   14977              :       else
   14978      8672981 :         output_addr_const (file, x);
   14979              :     }
   14980              : }
   14981              : 
   14982              : static bool
   14983     21336742 : ix86_print_operand_punct_valid_p (unsigned char code)
   14984              : {
   14985     21336742 :   return (code == '*' || code == '+' || code == '&' || code == ';'
   14986     14795635 :           || code == '~' || code == '^' || code == '!');
   14987              : }
   14988              : 
   14989              : /* Print a memory operand whose address is ADDR.  */
   14990              : 
   14991              : static void
   14992     36806190 : ix86_print_operand_address_as (FILE *file, rtx addr,
   14993              :                                addr_space_t as, bool raw)
   14994              : {
   14995     36806190 :   struct ix86_address parts;
   14996     36806190 :   rtx base, index, disp;
   14997     36806190 :   int scale;
   14998     36806190 :   int ok;
   14999     36806190 :   bool vsib = false;
   15000     36806190 :   int code = 0;
   15001              : 
   15002     36806190 :   if (GET_CODE (addr) == UNSPEC && XINT (addr, 1) == UNSPEC_VSIBADDR)
   15003              :     {
   15004         1675 :       ok = ix86_decompose_address (XVECEXP (addr, 0, 0), &parts);
   15005         1675 :       gcc_assert (parts.index == NULL_RTX);
   15006         1675 :       parts.index = XVECEXP (addr, 0, 1);
   15007         1675 :       parts.scale = INTVAL (XVECEXP (addr, 0, 2));
   15008         1675 :       addr = XVECEXP (addr, 0, 0);
   15009         1675 :       vsib = true;
   15010              :     }
   15011     36804515 :   else if (GET_CODE (addr) == UNSPEC && XINT (addr, 1) == UNSPEC_LEA_ADDR)
   15012              :     {
   15013      3045556 :       gcc_assert (TARGET_64BIT);
   15014      3045556 :       ok = ix86_decompose_address (XVECEXP (addr, 0, 0), &parts);
   15015      3045556 :       code = 'q';
   15016              :     }
   15017              :   else
   15018     33758959 :     ok = ix86_decompose_address (addr, &parts);
   15019              : 
   15020     36806190 :   gcc_assert (ok);
   15021              : 
   15022     36806190 :   base = parts.base;
   15023     36806190 :   index = parts.index;
   15024     36806190 :   disp = parts.disp;
   15025     36806190 :   scale = parts.scale;
   15026              : 
   15027     36806190 :   if (ADDR_SPACE_GENERIC_P (as))
   15028     36524289 :     as = parts.seg;
   15029              :   else
   15030       281901 :     gcc_assert (ADDR_SPACE_GENERIC_P (parts.seg));
   15031              : 
   15032     36806190 :   if (!ADDR_SPACE_GENERIC_P (as) && !raw)
   15033              :     {
   15034       281916 :       if (ASSEMBLER_DIALECT == ASM_ATT)
   15035       281914 :         putc ('%', file);
   15036              : 
   15037       281916 :       switch (as)
   15038              :         {
   15039       182225 :         case ADDR_SPACE_SEG_FS:
   15040       182225 :           fputs ("fs:", file);
   15041       182225 :           break;
   15042        99691 :         case ADDR_SPACE_SEG_GS:
   15043        99691 :           fputs ("gs:", file);
   15044        99691 :           break;
   15045            0 :         default:
   15046            0 :           gcc_unreachable ();
   15047              :         }
   15048              :     }
   15049              : 
   15050              :   /* Use one byte shorter RIP relative addressing for 64bit mode.  */
   15051     36806190 :   if (TARGET_64BIT && !base && !index && !raw)
   15052              :     {
   15053      5994273 :       rtx symbol = disp;
   15054              : 
   15055      5994273 :       if (GET_CODE (disp) == CONST
   15056      2173578 :           && GET_CODE (XEXP (disp, 0)) == PLUS
   15057      2088445 :           && CONST_INT_P (XEXP (XEXP (disp, 0), 1)))
   15058      2088445 :         symbol = XEXP (XEXP (disp, 0), 0);
   15059              : 
   15060      5994273 :       if (LABEL_REF_P (symbol)
   15061      5994273 :           || (SYMBOL_REF_P (symbol)
   15062      5732831 :               && SYMBOL_REF_TLS_MODEL (symbol) == 0))
   15063      5732733 :         base = pc_rtx;
   15064              :     }
   15065              : 
   15066     36806190 :   if (!base && !index)
   15067              :     {
   15068              :       /* Displacement only requires special attention.  */
   15069       601147 :       if (CONST_INT_P (disp))
   15070              :         {
   15071       269387 :           if (ASSEMBLER_DIALECT == ASM_INTEL && ADDR_SPACE_GENERIC_P (as))
   15072            0 :             fputs ("ds:", file);
   15073       269387 :           fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (disp));
   15074              :         }
   15075              :       /* Load the external function address via the GOT slot to avoid PLT.  */
   15076       331760 :       else if (GET_CODE (disp) == CONST
   15077       113436 :                && GET_CODE (XEXP (disp, 0)) == UNSPEC
   15078        85372 :                && (XINT (XEXP (disp, 0), 1) == UNSPEC_GOTPCREL
   15079         9640 :                    || XINT (XEXP (disp, 0), 1) == UNSPEC_GOT)
   15080       407492 :                && ix86_force_load_from_GOT_p (XVECEXP (XEXP (disp, 0), 0, 0)))
   15081           24 :         output_pic_addr_const (file, disp, 0);
   15082       331736 :       else if (flag_pic)
   15083       114778 :         output_pic_addr_const (file, disp, 0);
   15084              :       else
   15085       216958 :         output_addr_const (file, disp);
   15086              :     }
   15087              :   else
   15088              :     {
   15089              :       /* Print SImode register names to force addr32 prefix.  */
   15090     36205043 :       if (SImode_address_operand (addr, VOIDmode))
   15091              :         {
   15092           37 :           if (flag_checking)
   15093              :             {
   15094           37 :               gcc_assert (TARGET_64BIT);
   15095           37 :               switch (GET_CODE (addr))
   15096              :                 {
   15097            0 :                 case SUBREG:
   15098            0 :                   gcc_assert (GET_MODE (addr) == SImode);
   15099            0 :                   gcc_assert (GET_MODE (SUBREG_REG (addr)) == DImode);
   15100              :                   break;
   15101           37 :                 case ZERO_EXTEND:
   15102           37 :                 case AND:
   15103           37 :                   gcc_assert (GET_MODE (addr) == DImode);
   15104              :                   break;
   15105            0 :                 default:
   15106            0 :                   gcc_unreachable ();
   15107              :                 }
   15108              :             }
   15109           37 :           gcc_assert (!code);
   15110              :           code = 'k';
   15111              :         }
   15112     36205006 :       else if (code == 0
   15113     33161087 :                && TARGET_X32
   15114          482 :                && disp
   15115          410 :                && CONST_INT_P (disp)
   15116          311 :                && INTVAL (disp) < -16*1024*1024)
   15117              :         {
   15118              :           /* X32 runs in 64-bit mode, where displacement, DISP, in
   15119              :              address DISP(%r64), is encoded as 32-bit immediate sign-
   15120              :              extended from 32-bit to 64-bit.  For -0x40000300(%r64),
   15121              :              address is %r64 + 0xffffffffbffffd00.  When %r64 <
   15122              :              0x40000300, like 0x37ffe064, address is 0xfffffffff7ffdd64,
   15123              :              which is invalid for x32.  The correct address is %r64
   15124              :              - 0x40000300 == 0xf7ffdd64.  To properly encode
   15125              :              -0x40000300(%r64) for x32, we zero-extend negative
   15126              :              displacement by forcing addr32 prefix which truncates
   15127              :              0xfffffffff7ffdd64 to 0xf7ffdd64.  In theory, we should
   15128              :              zero-extend all negative displacements, including -1(%rsp).
   15129              :              However, for small negative displacements, sign-extension
   15130              :              won't cause overflow.  We only zero-extend negative
   15131              :              displacements if they < -16*1024*1024, which is also used
   15132              :              to check legitimate address displacements for PIC.  */
   15133           38 :           code = 'k';
   15134              :         }
   15135              : 
   15136              :       /* Since the upper 32 bits of RSP are always zero for x32,
   15137              :          we can encode %esp as %rsp to avoid 0x67 prefix if
   15138              :          there is no index register.  */
   15139          976 :       if (TARGET_X32 && Pmode == SImode
   15140     36205447 :           && !index && base && REG_P (base) && REGNO (base) == SP_REG)
   15141              :         code = 'q';
   15142              : 
   15143     36205043 :       if (ASSEMBLER_DIALECT == ASM_ATT)
   15144              :         {
   15145     36204673 :           if (disp)
   15146              :             {
   15147     32151519 :               if (flag_pic)
   15148      2823696 :                 output_pic_addr_const (file, disp, 0);
   15149     29327823 :               else if (LABEL_REF_P (disp))
   15150         5907 :                 output_asm_label (disp);
   15151              :               else
   15152     29321916 :                 output_addr_const (file, disp);
   15153              :             }
   15154              : 
   15155     36204673 :           putc ('(', file);
   15156     36204673 :           if (base)
   15157     35785886 :             print_reg (base, code, file);
   15158     36204673 :           if (index)
   15159              :             {
   15160      1930733 :               putc (',', file);
   15161      3859839 :               print_reg (index, vsib ? 0 : code, file);
   15162      1930733 :               if (scale != 1 || vsib)
   15163      1029121 :                 fprintf (file, ",%d", scale);
   15164              :             }
   15165     36204673 :           putc (')', file);
   15166              :         }
   15167              :       else
   15168              :         {
   15169          370 :           rtx offset = NULL_RTX;
   15170              : 
   15171          370 :           if (disp)
   15172              :             {
   15173              :               /* Pull out the offset of a symbol; print any symbol itself.  */
   15174          290 :               if (GET_CODE (disp) == CONST
   15175           18 :                   && GET_CODE (XEXP (disp, 0)) == PLUS
   15176           18 :                   && CONST_INT_P (XEXP (XEXP (disp, 0), 1)))
   15177              :                 {
   15178           18 :                   offset = XEXP (XEXP (disp, 0), 1);
   15179           18 :                   disp = gen_rtx_CONST (VOIDmode,
   15180              :                                         XEXP (XEXP (disp, 0), 0));
   15181              :                 }
   15182              : 
   15183          290 :               if (flag_pic)
   15184            0 :                 output_pic_addr_const (file, disp, 0);
   15185          290 :               else if (LABEL_REF_P (disp))
   15186            0 :                 output_asm_label (disp);
   15187          290 :               else if (CONST_INT_P (disp))
   15188              :                 offset = disp;
   15189              :               else
   15190          123 :                 output_addr_const (file, disp);
   15191              :             }
   15192              : 
   15193          370 :           putc ('[', file);
   15194          370 :           if (base)
   15195              :             {
   15196          329 :               print_reg (base, code, file);
   15197          329 :               if (offset)
   15198              :                 {
   15199          185 :                   if (INTVAL (offset) >= 0)
   15200           20 :                     putc ('+', file);
   15201          185 :                   fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (offset));
   15202              :                 }
   15203              :             }
   15204           41 :           else if (offset)
   15205            0 :             fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (offset));
   15206              :           else
   15207           41 :             putc ('0', file);
   15208              : 
   15209          370 :           if (index)
   15210              :             {
   15211           94 :               putc ('+', file);
   15212          140 :               print_reg (index, vsib ? 0 : code, file);
   15213           94 :               if (scale != 1 || vsib)
   15214           92 :                 fprintf (file, "*%d", scale);
   15215              :             }
   15216          370 :           putc (']', file);
   15217              :         }
   15218              :     }
   15219     36806190 : }
   15220              : 
   15221              : static void
   15222      3532061 : ix86_print_operand_address (FILE *file, machine_mode /*mode*/, rtx addr)
   15223              : {
   15224      3532061 :   if (this_is_asm_operands && ! address_operand (addr, VOIDmode))
   15225            1 :     output_operand_lossage ("invalid constraints for operand");
   15226              :   else
   15227      3532060 :     ix86_print_operand_address_as (file, addr, ADDR_SPACE_GENERIC, false);
   15228      3532061 : }
   15229              : 
   15230              : /* Implementation of TARGET_ASM_OUTPUT_ADDR_CONST_EXTRA.  */
   15231              : 
   15232              : static bool
   15233        15451 : i386_asm_output_addr_const_extra (FILE *file, rtx x)
   15234              : {
   15235        15451 :   rtx op;
   15236              : 
   15237        15451 :   if (GET_CODE (x) != UNSPEC)
   15238              :     return false;
   15239              : 
   15240        15451 :   op = XVECEXP (x, 0, 0);
   15241        15451 :   switch (XINT (x, 1))
   15242              :     {
   15243         1357 :     case UNSPEC_GOTOFF:
   15244         1357 :       output_addr_const (file, op);
   15245         1357 :       fputs ("@gotoff", file);
   15246         1357 :       break;
   15247            0 :     case UNSPEC_GOTTPOFF:
   15248            0 :       output_addr_const (file, op);
   15249              :       /* FIXME: This might be @TPOFF in Sun ld.  */
   15250            0 :       fputs ("@gottpoff", file);
   15251            0 :       break;
   15252            0 :     case UNSPEC_TPOFF:
   15253            0 :       output_addr_const (file, op);
   15254            0 :       fputs ("@tpoff", file);
   15255            0 :       break;
   15256        10998 :     case UNSPEC_NTPOFF:
   15257        10998 :       output_addr_const (file, op);
   15258        10998 :       if (TARGET_64BIT)
   15259        10249 :         fputs ("@tpoff", file);
   15260              :       else
   15261          749 :         fputs ("@ntpoff", file);
   15262              :       break;
   15263            0 :     case UNSPEC_DTPOFF:
   15264            0 :       output_addr_const (file, op);
   15265            0 :       fputs ("@dtpoff", file);
   15266            0 :       break;
   15267         3095 :     case UNSPEC_GOTNTPOFF:
   15268         3095 :       output_addr_const (file, op);
   15269         3095 :       if (TARGET_64BIT)
   15270         3095 :         fputs (ASSEMBLER_DIALECT == ASM_ATT ?
   15271              :                "@gottpoff(%rip)" : "@gottpoff[rip]", file);
   15272              :       else
   15273            0 :         fputs ("@gotntpoff", file);
   15274              :       break;
   15275            1 :     case UNSPEC_INDNTPOFF:
   15276            1 :       output_addr_const (file, op);
   15277            1 :       fputs ("@indntpoff", file);
   15278            1 :       break;
   15279            0 :     case UNSPEC_SECREL32:
   15280            0 :       output_addr_const (file, op);
   15281            0 :       fputs ("@secrel32", file);
   15282            0 :       break;
   15283              : #if TARGET_MACHO
   15284              :     case UNSPEC_MACHOPIC_OFFSET:
   15285              :       output_addr_const (file, op);
   15286              :       putc ('-', file);
   15287              :       machopic_output_function_base_name (file);
   15288              :       break;
   15289              : #endif
   15290              : 
   15291              :     default:
   15292              :       return false;
   15293              :     }
   15294              : 
   15295              :   return true;
   15296              : }
   15297              : 
   15298              : 
   15299              : /* Output code to perform a 387 binary operation in INSN, one of PLUS,
   15300              :    MINUS, MULT or DIV.  OPERANDS are the insn operands, where operands[3]
   15301              :    is the expression of the binary operation.  The output may either be
   15302              :    emitted here, or returned to the caller, like all output_* functions.
   15303              : 
   15304              :    There is no guarantee that the operands are the same mode, as they
   15305              :    might be within FLOAT or FLOAT_EXTEND expressions.  */
   15306              : 
   15307              : #ifndef SYSV386_COMPAT
   15308              : /* Set to 1 for compatibility with brain-damaged assemblers.  No-one
   15309              :    wants to fix the assemblers because that causes incompatibility
   15310              :    with gcc.  No-one wants to fix gcc because that causes
   15311              :    incompatibility with assemblers...  You can use the option of
   15312              :    -DSYSV386_COMPAT=0 if you recompile both gcc and gas this way.  */
   15313              : #define SYSV386_COMPAT 1
   15314              : #endif
   15315              : 
   15316              : const char *
   15317       606259 : output_387_binary_op (rtx_insn *insn, rtx *operands)
   15318              : {
   15319       606259 :   static char buf[40];
   15320       606259 :   const char *p;
   15321       606259 :   bool is_sse
   15322       606259 :     = (SSE_REG_P (operands[0])
   15323       661468 :        || SSE_REG_P (operands[1]) || SSE_REG_P (operands[2]));
   15324              : 
   15325        55209 :   if (is_sse)
   15326              :     p = "%v";
   15327        55209 :   else if (GET_MODE_CLASS (GET_MODE (operands[1])) == MODE_INT
   15328        55202 :            || GET_MODE_CLASS (GET_MODE (operands[2])) == MODE_INT)
   15329              :     p = "fi";
   15330              :   else
   15331       606259 :     p = "f";
   15332              : 
   15333       606259 :   strcpy (buf, p);
   15334              : 
   15335       606259 :   switch (GET_CODE (operands[3]))
   15336              :     {
   15337              :     case PLUS:
   15338              :       p = "add"; break;
   15339              :     case MINUS:
   15340              :       p = "sub"; break;
   15341        94192 :     case MULT:
   15342        94192 :       p = "mul"; break;
   15343        27615 :     case DIV:
   15344        27615 :       p = "div"; break;
   15345            0 :     default:
   15346            0 :       gcc_unreachable ();
   15347              :     }
   15348              : 
   15349       606259 :   strcat (buf, p);
   15350              : 
   15351       606259 :   if (is_sse)
   15352              :    {
   15353       551050 :      p = GET_MODE (operands[0]) == SFmode ? "ss" : "sd";
   15354       551050 :      strcat (buf, p);
   15355              : 
   15356       551050 :      if (TARGET_AVX)
   15357              :        p = "\t{%2, %1, %0|%0, %1, %2}";
   15358              :      else
   15359       534847 :        p = "\t{%2, %0|%0, %2}";
   15360              : 
   15361       551050 :      strcat (buf, p);
   15362       551050 :      return buf;
   15363              :    }
   15364              : 
   15365              :   /* Even if we do not want to check the inputs, this documents input
   15366              :      constraints.  Which helps in understanding the following code.  */
   15367        55209 :   if (flag_checking)
   15368              :     {
   15369        55208 :       if (STACK_REG_P (operands[0])
   15370        55208 :           && ((REG_P (operands[1])
   15371        53633 :                && REGNO (operands[0]) == REGNO (operands[1])
   15372        49645 :                && (STACK_REG_P (operands[2]) || MEM_P (operands[2])))
   15373         5563 :               || (REG_P (operands[2])
   15374         5563 :                   && REGNO (operands[0]) == REGNO (operands[2])
   15375         5563 :                   && (STACK_REG_P (operands[1]) || MEM_P (operands[1]))))
   15376       110416 :           && (STACK_TOP_P (operands[1]) || STACK_TOP_P (operands[2])))
   15377              :         ; /* ok */
   15378              :       else
   15379            0 :         gcc_unreachable ();
   15380              :     }
   15381              : 
   15382        55209 :   switch (GET_CODE (operands[3]))
   15383              :     {
   15384        40398 :     case MULT:
   15385        40398 :     case PLUS:
   15386        40398 :       if (REG_P (operands[2]) && REGNO (operands[0]) == REGNO (operands[2]))
   15387         1984 :         std::swap (operands[1], operands[2]);
   15388              : 
   15389              :       /* know operands[0] == operands[1].  */
   15390              : 
   15391        40398 :       if (MEM_P (operands[2]))
   15392              :         {
   15393              :           p = "%Z2\t%2";
   15394              :           break;
   15395              :         }
   15396              : 
   15397        36040 :       if (find_regno_note (insn, REG_DEAD, REGNO (operands[2])))
   15398              :         {
   15399        21069 :           if (STACK_TOP_P (operands[0]))
   15400              :             /* How is it that we are storing to a dead operand[2]?
   15401              :                Well, presumably operands[1] is dead too.  We can't
   15402              :                store the result to st(0) as st(0) gets popped on this
   15403              :                instruction.  Instead store to operands[2] (which I
   15404              :                think has to be st(1)).  st(1) will be popped later.
   15405              :                gcc <= 2.8.1 didn't have this check and generated
   15406              :                assembly code that the Unixware assembler rejected.  */
   15407              :             p = "p\t{%0, %2|%2, %0}"; /* st(1) = st(0) op st(1); pop */
   15408              :           else
   15409              :             p = "p\t{%2, %0|%0, %2}"; /* st(r1) = st(r1) op st(0); pop */
   15410              :           break;
   15411              :         }
   15412              : 
   15413        14971 :       if (STACK_TOP_P (operands[0]))
   15414              :         p = "\t{%y2, %0|%0, %y2}";    /* st(0) = st(0) op st(r2) */
   15415              :       else
   15416              :         p = "\t{%2, %0|%0, %2}";      /* st(r1) = st(r1) op st(0) */
   15417              :       break;
   15418              : 
   15419        14811 :     case MINUS:
   15420        14811 :     case DIV:
   15421        14811 :       if (MEM_P (operands[1]))
   15422              :         {
   15423              :           p = "r%Z1\t%1";
   15424              :           break;
   15425              :         }
   15426              : 
   15427        14375 :       if (MEM_P (operands[2]))
   15428              :         {
   15429              :           p = "%Z2\t%2";
   15430              :           break;
   15431              :         }
   15432              : 
   15433        12765 :       if (find_regno_note (insn, REG_DEAD, REGNO (operands[2])))
   15434              :         {
   15435              : #if SYSV386_COMPAT
   15436              :           /* The SystemV/386 SVR3.2 assembler, and probably all AT&T
   15437              :              derived assemblers, confusingly reverse the direction of
   15438              :              the operation for fsub{r} and fdiv{r} when the
   15439              :              destination register is not st(0).  The Intel assembler
   15440              :              doesn't have this brain damage.  Read !SYSV386_COMPAT to
   15441              :              figure out what the hardware really does.  */
   15442         6179 :           if (STACK_TOP_P (operands[0]))
   15443              :             p = "{p\t%0, %2|rp\t%2, %0}";
   15444              :           else
   15445              :             p = "{rp\t%2, %0|p\t%0, %2}";
   15446              : #else
   15447              :           if (STACK_TOP_P (operands[0]))
   15448              :             /* As above for fmul/fadd, we can't store to st(0).  */
   15449              :             p = "rp\t{%0, %2|%2, %0}";        /* st(1) = st(0) op st(1); pop */
   15450              :           else
   15451              :             p = "p\t{%2, %0|%0, %2}"; /* st(r1) = st(r1) op st(0); pop */
   15452              : #endif
   15453              :           break;
   15454              :         }
   15455              : 
   15456         6586 :       if (find_regno_note (insn, REG_DEAD, REGNO (operands[1])))
   15457              :         {
   15458              : #if SYSV386_COMPAT
   15459         3074 :           if (STACK_TOP_P (operands[0]))
   15460              :             p = "{rp\t%0, %1|p\t%1, %0}";
   15461              :           else
   15462              :             p = "{p\t%1, %0|rp\t%0, %1}";
   15463              : #else
   15464              :           if (STACK_TOP_P (operands[0]))
   15465              :             p = "p\t{%0, %1|%1, %0}"; /* st(1) = st(1) op st(0); pop */
   15466              :           else
   15467              :             p = "rp\t{%1, %0|%0, %1}";        /* st(r2) = st(0) op st(r2); pop */
   15468              : #endif
   15469              :           break;
   15470              :         }
   15471              : 
   15472         3512 :       if (STACK_TOP_P (operands[0]))
   15473              :         {
   15474         2670 :           if (STACK_TOP_P (operands[1]))
   15475              :             p = "\t{%y2, %0|%0, %y2}";        /* st(0) = st(0) op st(r2) */
   15476              :           else
   15477              :             p = "r\t{%y1, %0|%0, %y1}";       /* st(0) = st(r1) op st(0) */
   15478              :           break;
   15479              :         }
   15480          842 :       else if (STACK_TOP_P (operands[1]))
   15481              :         {
   15482              : #if SYSV386_COMPAT
   15483              :           p = "{\t%1, %0|r\t%0, %1}";
   15484              : #else
   15485              :           p = "r\t{%1, %0|%0, %1}";   /* st(r2) = st(0) op st(r2) */
   15486              : #endif
   15487              :         }
   15488              :       else
   15489              :         {
   15490              : #if SYSV386_COMPAT
   15491              :           p = "{r\t%2, %0|\t%0, %2}";
   15492              : #else
   15493              :           p = "\t{%2, %0|%0, %2}";    /* st(r1) = st(r1) op st(0) */
   15494              : #endif
   15495              :         }
   15496              :       break;
   15497              : 
   15498            0 :     default:
   15499            0 :       gcc_unreachable ();
   15500              :     }
   15501              : 
   15502        55209 :   strcat (buf, p);
   15503        55209 :   return buf;
   15504              : }
   15505              : 
   15506              : /* Return needed mode for entity in optimize_mode_switching pass.  */
   15507              : 
   15508              : static int
   15509         1656 : ix86_dirflag_mode_needed (rtx_insn *insn)
   15510              : {
   15511         1656 :   if (CALL_P (insn))
   15512              :     {
   15513          339 :       if (cfun->machine->func_type == TYPE_NORMAL)
   15514              :         return X86_DIRFLAG_ANY;
   15515              :       else
   15516              :         /* No need to emit CLD in interrupt handler for TARGET_CLD.  */
   15517          339 :         return TARGET_CLD ? X86_DIRFLAG_ANY : X86_DIRFLAG_RESET;
   15518              :     }
   15519              : 
   15520         1317 :   if (recog_memoized (insn) < 0)
   15521              :     return X86_DIRFLAG_ANY;
   15522              : 
   15523         1315 :   if (get_attr_type (insn) == TYPE_STR)
   15524              :     {
   15525              :       /* Emit cld instruction if stringops are used in the function.  */
   15526            1 :       if (cfun->machine->func_type == TYPE_NORMAL)
   15527            0 :         return TARGET_CLD ? X86_DIRFLAG_RESET : X86_DIRFLAG_ANY;
   15528              :       else
   15529              :         return X86_DIRFLAG_RESET;
   15530              :     }
   15531              : 
   15532              :   return X86_DIRFLAG_ANY;
   15533              : }
   15534              : 
   15535              : /* Check if a 256bit or 512 bit AVX register is referenced inside of EXP.   */
   15536              : 
   15537              : static bool
   15538      2216644 : ix86_check_avx_upper_register (const_rtx exp)
   15539              : {
   15540              :   /* construct_container may return a parallel with expr_list
   15541              :      which contains the real reg and mode  */
   15542      2216644 :   subrtx_iterator::array_type array;
   15543      8483328 :   FOR_EACH_SUBRTX (iter, array, exp, NONCONST)
   15544              :     {
   15545      6427854 :       const_rtx x = *iter;
   15546      2586431 :       if (SSE_REG_P (x)
   15547       837024 :           && !EXT_REX_SSE_REG_P (x)
   15548      8088882 :           && GET_MODE_BITSIZE (GET_MODE (x)) > 128)
   15549       161170 :         return true;
   15550              :     }
   15551              : 
   15552      2055474 :   return false;
   15553      2216644 : }
   15554              : 
   15555              : /* Check if a 256bit or 512bit AVX register is referenced in stores.   */
   15556              : 
   15557              : static void
   15558        51713 : ix86_check_avx_upper_stores (rtx dest, const_rtx, void *data)
   15559              : {
   15560        51713 :   if (SSE_REG_P (dest)
   15561        12859 :       && !EXT_REX_SSE_REG_P (dest)
   15562        77431 :       && GET_MODE_BITSIZE (GET_MODE (dest)) > 128)
   15563              :     {
   15564          760 :       bool *used = (bool *) data;
   15565          760 :       *used = true;
   15566              :     }
   15567        51713 : }
   15568              : 
   15569              : /* Return needed mode for entity in optimize_mode_switching pass.  */
   15570              : 
   15571              : static int
   15572      2070240 : ix86_avx_u128_mode_needed (rtx_insn *insn)
   15573              : {
   15574      2070240 :   if (DEBUG_INSN_P (insn))
   15575              :     return AVX_U128_ANY;
   15576              : 
   15577      2070240 :   if (CALL_P (insn))
   15578              :     {
   15579        49537 :       rtx link;
   15580              : 
   15581              :       /* Needed mode is set to AVX_U128_CLEAN if there are
   15582              :          no 256bit or 512bit modes used in function arguments. */
   15583        49537 :       for (link = CALL_INSN_FUNCTION_USAGE (insn);
   15584       134671 :            link;
   15585        85134 :            link = XEXP (link, 1))
   15586              :         {
   15587        86186 :           if (GET_CODE (XEXP (link, 0)) == USE)
   15588              :             {
   15589        84776 :               rtx arg = XEXP (XEXP (link, 0), 0);
   15590              : 
   15591        84776 :               if (ix86_check_avx_upper_register (arg))
   15592              :                 return AVX_U128_DIRTY;
   15593              :             }
   15594              :         }
   15595              : 
   15596              :       /* Needed mode is set to AVX_U128_CLEAN if there are no 256bit
   15597              :          nor 512bit registers used in the function return register.  */
   15598        48485 :       bool avx_upper_reg_found = false;
   15599        48485 :       note_stores (insn, ix86_check_avx_upper_stores,
   15600              :                    &avx_upper_reg_found);
   15601        48485 :       if (avx_upper_reg_found)
   15602              :         return AVX_U128_DIRTY;
   15603              : 
   15604              :       /* If the function is known to preserve some SSE registers,
   15605              :          RA and previous passes can legitimately rely on that for
   15606              :          modes wider than 256 bits.  It's only safe to issue a
   15607              :          vzeroupper if all SSE registers are clobbered.  */
   15608        48301 :       const function_abi &abi = insn_callee_abi (insn);
   15609        48301 :       if (vzeroupper_pattern (PATTERN (insn), VOIDmode)
   15610              :           /* Should be safe to issue an vzeroupper before sibling_call_p.
   15611              :              Also there not mode_exit for sibling_call, so there could be
   15612              :              missing vzeroupper for that.  */
   15613        48301 :           || !(SIBLING_CALL_P (insn)
   15614        47015 :                || hard_reg_set_subset_p (reg_class_contents[SSE_REGS],
   15615        47015 :                                          abi.mode_clobbers (V4DImode))))
   15616         8438 :         return AVX_U128_ANY;
   15617              : 
   15618        39863 :       return AVX_U128_CLEAN;
   15619              :     }
   15620              : 
   15621      2020703 :   rtx set = single_set (insn);
   15622      2020703 :   if (set)
   15623              :     {
   15624      1947296 :       rtx dest = SET_DEST (set);
   15625      1947296 :       rtx src = SET_SRC (set);
   15626      1466836 :       if (SSE_REG_P (dest)
   15627       554277 :           && !EXT_REX_SSE_REG_P (dest)
   15628      3043698 :           && GET_MODE_BITSIZE (GET_MODE (dest)) > 128)
   15629              :         {
   15630              :           /* This is an YMM/ZMM load.  Return AVX_U128_DIRTY if the
   15631              :              source isn't zero.  */
   15632       169200 :           if (standard_sse_constant_p (src, GET_MODE (dest)) != 1)
   15633              :             return AVX_U128_DIRTY;
   15634              :           else
   15635              :             return AVX_U128_ANY;
   15636              :         }
   15637              :       else
   15638              :         {
   15639      1778096 :           if (ix86_check_avx_upper_register (src))
   15640              :             return AVX_U128_DIRTY;
   15641              :         }
   15642              : 
   15643              :       /* This isn't YMM/ZMM load/store.  */
   15644              :       return AVX_U128_ANY;
   15645              :     }
   15646              : 
   15647              :   /* Require DIRTY mode if a 256bit or 512bit AVX register is referenced.
   15648              :      Hardware changes state only when a 256bit register is written to,
   15649              :      but we need to prevent the compiler from moving optimal insertion
   15650              :      point above eventual read from 256bit or 512 bit register.  */
   15651        73407 :   if (ix86_check_avx_upper_register (PATTERN (insn)))
   15652              :     return AVX_U128_DIRTY;
   15653              : 
   15654              :   return AVX_U128_ANY;
   15655              : }
   15656              : 
   15657              : /* Return mode that i387 must be switched into
   15658              :    prior to the execution of insn.  */
   15659              : 
   15660              : static int
   15661       417088 : ix86_i387_mode_needed (int entity, rtx_insn *insn)
   15662              : {
   15663       417088 :   enum attr_i387_cw mode;
   15664              : 
   15665              :   /* The mode UNINITIALIZED is used to store control word after a
   15666              :      function call or ASM pattern.  The mode ANY specify that function
   15667              :      has no requirements on the control word and make no changes in the
   15668              :      bits we are interested in.  */
   15669              : 
   15670       417088 :   if (CALL_P (insn)
   15671       417088 :       || (NONJUMP_INSN_P (insn)
   15672       341280 :           && (asm_noperands (PATTERN (insn)) >= 0
   15673       341227 :               || GET_CODE (PATTERN (insn)) == ASM_INPUT)))
   15674        14637 :     return I387_CW_UNINITIALIZED;
   15675              : 
   15676       402451 :   if (recog_memoized (insn) < 0)
   15677              :     return I387_CW_ANY;
   15678              : 
   15679       401509 :   mode = get_attr_i387_cw (insn);
   15680              : 
   15681       401509 :   switch (entity)
   15682              :     {
   15683            0 :     case I387_ROUNDEVEN:
   15684            0 :       if (mode == I387_CW_ROUNDEVEN)
   15685              :         return mode;
   15686              :       break;
   15687              : 
   15688       396860 :     case I387_TRUNC:
   15689       396860 :       if (mode == I387_CW_TRUNC)
   15690              :         return mode;
   15691              :       break;
   15692              : 
   15693         3618 :     case I387_FLOOR:
   15694         3618 :       if (mode == I387_CW_FLOOR)
   15695              :         return mode;
   15696              :       break;
   15697              : 
   15698         1031 :     case I387_CEIL:
   15699         1031 :       if (mode == I387_CW_CEIL)
   15700              :         return mode;
   15701              :       break;
   15702              : 
   15703            0 :     default:
   15704            0 :       gcc_unreachable ();
   15705              :     }
   15706              : 
   15707              :   return I387_CW_ANY;
   15708              : }
   15709              : 
   15710              : /* Return mode that entity must be switched into
   15711              :    prior to the execution of insn.  */
   15712              : 
   15713              : static int
   15714      2488984 : ix86_mode_needed (int entity, rtx_insn *insn, HARD_REG_SET)
   15715              : {
   15716      2488984 :   switch (entity)
   15717              :     {
   15718         1656 :     case X86_DIRFLAG:
   15719         1656 :       return ix86_dirflag_mode_needed (insn);
   15720      2070240 :     case AVX_U128:
   15721      2070240 :       return ix86_avx_u128_mode_needed (insn);
   15722       417088 :     case I387_ROUNDEVEN:
   15723       417088 :     case I387_TRUNC:
   15724       417088 :     case I387_FLOOR:
   15725       417088 :     case I387_CEIL:
   15726       417088 :       return ix86_i387_mode_needed (entity, insn);
   15727            0 :     default:
   15728            0 :       gcc_unreachable ();
   15729              :     }
   15730              :   return 0;
   15731              : }
   15732              : 
   15733              : /* Calculate mode of upper 128bit AVX registers after the insn.  */
   15734              : 
   15735              : static int
   15736      2070240 : ix86_avx_u128_mode_after (int mode, rtx_insn *insn)
   15737              : {
   15738      2070240 :   rtx pat = PATTERN (insn);
   15739              : 
   15740      2070240 :   if (vzeroupper_pattern (pat, VOIDmode)
   15741      2070240 :       || vzeroall_pattern (pat, VOIDmode))
   15742          243 :     return AVX_U128_CLEAN;
   15743              : 
   15744              :   /* We know that state is clean after CALL insn if there are no
   15745              :      256bit or 512bit registers used in the function return register. */
   15746      2069997 :   if (CALL_P (insn))
   15747              :     {
   15748        49491 :       bool avx_upper_reg_found = false;
   15749        49491 :       note_stores (insn, ix86_check_avx_upper_stores, &avx_upper_reg_found);
   15750              : 
   15751        49491 :       if (avx_upper_reg_found)
   15752              :         return AVX_U128_DIRTY;
   15753              : 
   15754              :       /* If the function doesn't clobber any sse registers or only clobber
   15755              :          128-bit part, Then vzeroupper isn't issued before the function exit.
   15756              :          the status not CLEAN but ANY after the function.  */
   15757        48915 :       const function_abi &abi = insn_callee_abi (insn);
   15758        48915 :       if (!(SIBLING_CALL_P (insn)
   15759        47634 :             || hard_reg_set_subset_p (reg_class_contents[SSE_REGS],
   15760        47634 :                                       abi.mode_clobbers (V4DImode))))
   15761         8734 :         return AVX_U128_ANY;
   15762              : 
   15763        40181 :       return  AVX_U128_CLEAN;
   15764              :     }
   15765              : 
   15766              :   /* Otherwise, return current mode.  Remember that if insn
   15767              :      references AVX 256bit or 512bit registers, the mode was already
   15768              :      changed to DIRTY from MODE_NEEDED.  */
   15769              :   return mode;
   15770              : }
   15771              : 
   15772              : /* Return the mode that an insn results in.  */
   15773              : 
   15774              : static int
   15775      2488139 : ix86_mode_after (int entity, int mode, rtx_insn *insn, HARD_REG_SET)
   15776              : {
   15777      2488139 :   switch (entity)
   15778              :     {
   15779              :     case X86_DIRFLAG:
   15780              :       return mode;
   15781      2070240 :     case AVX_U128:
   15782      2070240 :       return ix86_avx_u128_mode_after (mode, insn);
   15783              :     case I387_ROUNDEVEN:
   15784              :     case I387_TRUNC:
   15785              :     case I387_FLOOR:
   15786              :     case I387_CEIL:
   15787              :       return mode;
   15788            0 :     default:
   15789            0 :       gcc_unreachable ();
   15790              :     }
   15791              : }
   15792              : 
   15793              : static int
   15794          120 : ix86_dirflag_mode_entry (void)
   15795              : {
   15796              :   /* For TARGET_CLD or in the interrupt handler we can't assume
   15797              :      direction flag state at function entry.  */
   15798          120 :   if (TARGET_CLD
   15799          118 :       || cfun->machine->func_type != TYPE_NORMAL)
   15800          120 :     return X86_DIRFLAG_ANY;
   15801              : 
   15802              :   return X86_DIRFLAG_RESET;
   15803              : }
   15804              : 
   15805              : static int
   15806       123851 : ix86_avx_u128_mode_entry (void)
   15807              : {
   15808       123851 :   tree arg;
   15809              : 
   15810              :   /* Entry mode is set to AVX_U128_DIRTY if there are
   15811              :      256bit or 512bit modes used in function arguments.  */
   15812       311790 :   for (arg = DECL_ARGUMENTS (current_function_decl); arg;
   15813       187939 :        arg = TREE_CHAIN (arg))
   15814              :     {
   15815       222016 :       rtx incoming = DECL_INCOMING_RTL (arg);
   15816              : 
   15817       222016 :       if (incoming && ix86_check_avx_upper_register (incoming))
   15818              :         return AVX_U128_DIRTY;
   15819              :     }
   15820              : 
   15821              :   return AVX_U128_CLEAN;
   15822              : }
   15823              : 
   15824              : /* Return a mode that ENTITY is assumed to be
   15825              :    switched to at function entry.  */
   15826              : 
   15827              : static int
   15828        76314 : ix86_mode_entry (int entity)
   15829              : {
   15830        76314 :   switch (entity)
   15831              :     {
   15832          120 :     case X86_DIRFLAG:
   15833          120 :       return ix86_dirflag_mode_entry ();
   15834        75059 :     case AVX_U128:
   15835        75059 :       return ix86_avx_u128_mode_entry ();
   15836              :     case I387_ROUNDEVEN:
   15837              :     case I387_TRUNC:
   15838              :     case I387_FLOOR:
   15839              :     case I387_CEIL:
   15840              :       return I387_CW_ANY;
   15841            0 :     default:
   15842            0 :       gcc_unreachable ();
   15843              :     }
   15844              : }
   15845              : 
   15846              : static int
   15847        73810 : ix86_avx_u128_mode_exit (void)
   15848              : {
   15849        73810 :   rtx reg = crtl->return_rtx;
   15850              : 
   15851              :   /* Exit mode is set to AVX_U128_DIRTY if there are 256bit
   15852              :      or 512 bit modes used in the function return register. */
   15853        73810 :   if (reg && ix86_check_avx_upper_register (reg))
   15854              :     return AVX_U128_DIRTY;
   15855              : 
   15856              :   /* Exit mode is set to AVX_U128_DIRTY if there are 256bit or 512bit
   15857              :      modes used in function arguments, otherwise return AVX_U128_CLEAN.
   15858              :    */
   15859        48792 :   return ix86_avx_u128_mode_entry ();
   15860              : }
   15861              : 
   15862              : /* Return a mode that ENTITY is assumed to be
   15863              :    switched to at function exit.  */
   15864              : 
   15865              : static int
   15866        74920 : ix86_mode_exit (int entity)
   15867              : {
   15868        74920 :   switch (entity)
   15869              :     {
   15870              :     case X86_DIRFLAG:
   15871              :       return X86_DIRFLAG_ANY;
   15872        73810 :     case AVX_U128:
   15873        73810 :       return ix86_avx_u128_mode_exit ();
   15874         1076 :     case I387_ROUNDEVEN:
   15875         1076 :     case I387_TRUNC:
   15876         1076 :     case I387_FLOOR:
   15877         1076 :     case I387_CEIL:
   15878         1076 :       return I387_CW_ANY;
   15879            0 :     default:
   15880            0 :       gcc_unreachable ();
   15881              :     }
   15882              : }
   15883              : 
   15884              : static int
   15885      2173190 : ix86_mode_priority (int, int n)
   15886              : {
   15887      2173190 :   return n;
   15888              : }
   15889              : 
   15890              : /* Output code to initialize control word copies used by trunc?f?i and
   15891              :    rounding patterns.  CURRENT_MODE is set to current control word,
   15892              :    while NEW_MODE is set to new control word.  */
   15893              : 
   15894              : static void
   15895         3296 : emit_i387_cw_initialization (int mode)
   15896              : {
   15897         3296 :   rtx stored_mode = assign_386_stack_local (HImode, SLOT_CW_STORED);
   15898         3296 :   rtx new_mode;
   15899              : 
   15900         3296 :   enum ix86_stack_slot slot;
   15901              : 
   15902         3296 :   rtx reg = gen_reg_rtx (HImode);
   15903              : 
   15904         3296 :   emit_insn (gen_x86_fnstcw_1 (stored_mode));
   15905         3296 :   emit_move_insn (reg, copy_rtx (stored_mode));
   15906              : 
   15907         3296 :   switch (mode)
   15908              :     {
   15909            0 :     case I387_CW_ROUNDEVEN:
   15910              :       /* round to nearest */
   15911            0 :       emit_insn (gen_andhi3 (reg, reg, GEN_INT (~0x0c00)));
   15912            0 :       slot = SLOT_CW_ROUNDEVEN;
   15913            0 :       break;
   15914              : 
   15915         3100 :     case I387_CW_TRUNC:
   15916              :       /* round toward zero (truncate) */
   15917         3100 :       emit_insn (gen_iorhi3 (reg, reg, GEN_INT (0x0c00)));
   15918         3100 :       slot = SLOT_CW_TRUNC;
   15919         3100 :       break;
   15920              : 
   15921          137 :     case I387_CW_FLOOR:
   15922              :       /* round down toward -oo */
   15923          137 :       emit_insn (gen_andhi3 (reg, reg, GEN_INT (~0x0c00)));
   15924          137 :       emit_insn (gen_iorhi3 (reg, reg, GEN_INT (0x0400)));
   15925          137 :       slot = SLOT_CW_FLOOR;
   15926          137 :       break;
   15927              : 
   15928           59 :     case I387_CW_CEIL:
   15929              :       /* round up toward +oo */
   15930           59 :       emit_insn (gen_andhi3 (reg, reg, GEN_INT (~0x0c00)));
   15931           59 :       emit_insn (gen_iorhi3 (reg, reg, GEN_INT (0x0800)));
   15932           59 :       slot = SLOT_CW_CEIL;
   15933           59 :       break;
   15934              : 
   15935            0 :     default:
   15936            0 :       gcc_unreachable ();
   15937              :     }
   15938              : 
   15939         3296 :   gcc_assert (slot < MAX_386_STACK_LOCALS);
   15940              : 
   15941         3296 :   new_mode = assign_386_stack_local (HImode, slot);
   15942         3296 :   emit_move_insn (new_mode, reg);
   15943         3296 : }
   15944              : 
   15945              : /* Generate one or more insns to set ENTITY to MODE.  */
   15946              : 
   15947              : static void
   15948        51598 : ix86_emit_mode_set (int entity, int mode, int prev_mode ATTRIBUTE_UNUSED,
   15949              :                     HARD_REG_SET regs_live ATTRIBUTE_UNUSED)
   15950              : {
   15951        51598 :   switch (entity)
   15952              :     {
   15953          265 :     case X86_DIRFLAG:
   15954          265 :       if (mode == X86_DIRFLAG_RESET)
   15955          265 :         emit_insn (gen_cld ());
   15956              :       break;
   15957        43157 :     case AVX_U128:
   15958        43157 :       if (mode == AVX_U128_CLEAN)
   15959        21818 :         ix86_expand_avx_vzeroupper ();
   15960              :       break;
   15961         8176 :     case I387_ROUNDEVEN:
   15962         8176 :     case I387_TRUNC:
   15963         8176 :     case I387_FLOOR:
   15964         8176 :     case I387_CEIL:
   15965         8176 :       if (mode != I387_CW_ANY
   15966         8176 :           && mode != I387_CW_UNINITIALIZED)
   15967         3296 :         emit_i387_cw_initialization (mode);
   15968              :       break;
   15969            0 :     default:
   15970            0 :       gcc_unreachable ();
   15971              :     }
   15972        51598 : }
   15973              : 
   15974              : /* Output code for INSN to convert a float to a signed int.  OPERANDS
   15975              :    are the insn operands.  The output may be [HSD]Imode and the input
   15976              :    operand may be [SDX]Fmode.  */
   15977              : 
   15978              : const char *
   15979         7437 : output_fix_trunc (rtx_insn *insn, rtx *operands, bool fisttp)
   15980              : {
   15981         7437 :   bool stack_top_dies = find_regno_note (insn, REG_DEAD, FIRST_STACK_REG);
   15982         7437 :   bool dimode_p = GET_MODE (operands[0]) == DImode;
   15983         7437 :   int round_mode = get_attr_i387_cw (insn);
   15984              : 
   15985         7437 :   static char buf[40];
   15986         7437 :   const char *p;
   15987              : 
   15988              :   /* Jump through a hoop or two for DImode, since the hardware has no
   15989              :      non-popping instruction.  We used to do this a different way, but
   15990              :      that was somewhat fragile and broke with post-reload splitters.  */
   15991         7437 :   if ((dimode_p || fisttp) && !stack_top_dies)
   15992           25 :     output_asm_insn ("fld\t%y1", operands);
   15993              : 
   15994         7437 :   gcc_assert (STACK_TOP_P (operands[1]));
   15995         7437 :   gcc_assert (MEM_P (operands[0]));
   15996         7437 :   gcc_assert (GET_MODE (operands[1]) != TFmode);
   15997              : 
   15998         7437 :   if (fisttp)
   15999              :     return "fisttp%Z0\t%0";
   16000              : 
   16001         7436 :   strcpy (buf, "fist");
   16002              : 
   16003         7436 :   if (round_mode != I387_CW_ANY)
   16004         7392 :     output_asm_insn ("fldcw\t%3", operands);
   16005              : 
   16006         7436 :   p = "p%Z0\t%0";
   16007         7436 :   strcat (buf, p + !(stack_top_dies || dimode_p));
   16008              : 
   16009         7436 :   output_asm_insn (buf, operands);
   16010              : 
   16011         7436 :   if (round_mode != I387_CW_ANY)
   16012         7392 :     output_asm_insn ("fldcw\t%2", operands);
   16013              : 
   16014              :   return "";
   16015              : }
   16016              : 
   16017              : /* Output code for x87 ffreep insn.  The OPNO argument, which may only
   16018              :    have the values zero or one, indicates the ffreep insn's operand
   16019              :    from the OPERANDS array.  */
   16020              : 
   16021              : static const char *
   16022       275148 : output_387_ffreep (rtx *operands ATTRIBUTE_UNUSED, int opno)
   16023              : {
   16024            0 :   if (TARGET_USE_FFREEP)
   16025              : #ifdef HAVE_AS_IX86_FFREEP
   16026            0 :     return opno ? "ffreep\t%y1" : "ffreep\t%y0";
   16027              : #else
   16028              :     {
   16029              :       static char retval[32];
   16030              :       int regno = REGNO (operands[opno]);
   16031              : 
   16032              :       gcc_assert (STACK_REGNO_P (regno));
   16033              : 
   16034              :       regno -= FIRST_STACK_REG;
   16035              : 
   16036              :       snprintf (retval, sizeof (retval), ASM_SHORT "0xc%ddf", regno);
   16037              :       return retval;
   16038              :     }
   16039              : #endif
   16040              : 
   16041            0 :   return opno ? "fstp\t%y1" : "fstp\t%y0";
   16042              : }
   16043              : 
   16044              : 
   16045              : /* Output code for INSN to compare OPERANDS.  EFLAGS_P is 1 when fcomi
   16046              :    should be used.  UNORDERED_P is true when fucom should be used.  */
   16047              : 
   16048              : const char *
   16049       107693 : output_fp_compare (rtx_insn *insn, rtx *operands,
   16050              :                    bool eflags_p, bool unordered_p)
   16051              : {
   16052       107693 :   rtx *xops = eflags_p ? &operands[0] : &operands[1];
   16053       107693 :   bool stack_top_dies;
   16054              : 
   16055       107693 :   static char buf[40];
   16056       107693 :   const char *p;
   16057              : 
   16058       107693 :   gcc_assert (STACK_TOP_P (xops[0]));
   16059              : 
   16060       107693 :   stack_top_dies = find_regno_note (insn, REG_DEAD, FIRST_STACK_REG);
   16061              : 
   16062       107693 :   if (eflags_p)
   16063              :     {
   16064       107693 :       p = unordered_p ? "fucomi" : "fcomi";
   16065       107693 :       strcpy (buf, p);
   16066              : 
   16067       107693 :       p = "p\t{%y1, %0|%0, %y1}";
   16068       107693 :       strcat (buf, p + !stack_top_dies);
   16069              : 
   16070       107693 :       return buf;
   16071              :     }
   16072              : 
   16073            0 :   if (STACK_REG_P (xops[1])
   16074            0 :       && stack_top_dies
   16075            0 :       && find_regno_note (insn, REG_DEAD, FIRST_STACK_REG + 1))
   16076              :     {
   16077            0 :       gcc_assert (REGNO (xops[1]) == FIRST_STACK_REG + 1);
   16078              : 
   16079              :       /* If both the top of the 387 stack die, and the other operand
   16080              :          is also a stack register that dies, then this must be a
   16081              :          `fcompp' float compare.  */
   16082            0 :       p = unordered_p ? "fucompp" : "fcompp";
   16083            0 :       strcpy (buf, p);
   16084              :     }
   16085            0 :   else if (const0_operand (xops[1], VOIDmode))
   16086              :     {
   16087            0 :       gcc_assert (!unordered_p);
   16088            0 :       strcpy (buf, "ftst");
   16089              :     }
   16090              :   else
   16091              :     {
   16092            0 :       if (GET_MODE_CLASS (GET_MODE (xops[1])) == MODE_INT)
   16093              :         {
   16094            0 :           gcc_assert (!unordered_p);
   16095              :           p = "ficom";
   16096              :         }
   16097              :       else
   16098            0 :         p = unordered_p ? "fucom" : "fcom";
   16099              : 
   16100            0 :       strcpy (buf, p);
   16101              : 
   16102            0 :       p = "p%Z2\t%y2";
   16103            0 :       strcat (buf, p + !stack_top_dies);
   16104              :     }
   16105              : 
   16106            0 :   output_asm_insn (buf, operands);
   16107            0 :   return "fnstsw\t%0";
   16108              : }
   16109              : 
   16110              : void
   16111       130755 : ix86_output_addr_vec_elt (FILE *file, int value)
   16112              : {
   16113       130755 :   const char *directive = ASM_LONG;
   16114              : 
   16115              : #ifdef ASM_QUAD
   16116       130755 :   if (TARGET_LP64)
   16117       119030 :     directive = ASM_QUAD;
   16118              : #else
   16119              :   gcc_assert (!TARGET_64BIT);
   16120              : #endif
   16121              : 
   16122       130755 :   fprintf (file, "%s%s%d\n", directive, LPREFIX, value);
   16123       130755 : }
   16124              : 
   16125              : void
   16126        27384 : ix86_output_addr_diff_elt (FILE *file, int value, int rel)
   16127              : {
   16128        27384 :   const char *directive = ASM_LONG;
   16129              : 
   16130              : #ifdef ASM_QUAD
   16131        41076 :   if (TARGET_64BIT && CASE_VECTOR_MODE == DImode)
   16132              :     directive = ASM_QUAD;
   16133              : #else
   16134              :   gcc_assert (!TARGET_64BIT);
   16135              : #endif
   16136              :   /* We can't use @GOTOFF for text labels on VxWorks; see gotoff_operand.  */
   16137        27384 :   if (TARGET_64BIT || TARGET_VXWORKS_VAROFF)
   16138        13692 :     fprintf (file, "%s%s%d-%s%d\n",
   16139              :              directive, LPREFIX, value, LPREFIX, rel);
   16140              : #if TARGET_MACHO
   16141              :   else if (TARGET_MACHO)
   16142              :     {
   16143              :       fprintf (file, ASM_LONG "%s%d-", LPREFIX, value);
   16144              :       machopic_output_function_base_name (file);
   16145              :       putc ('\n', file);
   16146              :     }
   16147              : #endif
   16148        13692 :   else if (HAVE_AS_GOTOFF_IN_DATA)
   16149        13692 :     fprintf (file, ASM_LONG "%s%d@GOTOFF\n", LPREFIX, value);
   16150              :   else
   16151              :     asm_fprintf (file, ASM_LONG "%U%s+[.-%s%d]\n",
   16152              :                  GOT_SYMBOL_NAME, LPREFIX, value);
   16153        27384 : }
   16154              : 
   16155              : #define LEA_MAX_STALL (3)
   16156              : #define LEA_SEARCH_THRESHOLD (LEA_MAX_STALL << 1)
   16157              : 
   16158              : /* Increase given DISTANCE in half-cycles according to
   16159              :    dependencies between PREV and NEXT instructions.
   16160              :    Add 1 half-cycle if there is no dependency and
   16161              :    go to next cycle if there is some dependency.  */
   16162              : 
   16163              : static unsigned int
   16164         2077 : increase_distance (rtx_insn *prev, rtx_insn *next, unsigned int distance)
   16165              : {
   16166         2077 :   df_ref def, use;
   16167              : 
   16168         2077 :   if (!prev || !next)
   16169          727 :     return distance + (distance & 1) + 2;
   16170              : 
   16171         1350 :   if (!DF_INSN_USES (next) || !DF_INSN_DEFS (prev))
   16172          226 :     return distance + 1;
   16173              : 
   16174         1887 :   FOR_EACH_INSN_USE (use, next)
   16175         2410 :     FOR_EACH_INSN_DEF (def, prev)
   16176         1647 :       if (!DF_REF_IS_ARTIFICIAL (def)
   16177         1647 :           && DF_REF_REGNO (use) == DF_REF_REGNO (def))
   16178          699 :         return distance + (distance & 1) + 2;
   16179              : 
   16180          425 :   return distance + 1;
   16181              : }
   16182              : 
   16183              : /* Function checks if instruction INSN defines register number
   16184              :    REGNO1 or REGNO2.  */
   16185              : 
   16186              : bool
   16187         2029 : insn_defines_reg (unsigned int regno1, unsigned int regno2,
   16188              :                   rtx_insn *insn)
   16189              : {
   16190         2029 :   df_ref def;
   16191              : 
   16192         3661 :   FOR_EACH_INSN_DEF (def, insn)
   16193         2030 :     if (DF_REF_REG_DEF_P (def)
   16194         2030 :         && !DF_REF_IS_ARTIFICIAL (def)
   16195         2030 :         && (regno1 == DF_REF_REGNO (def)
   16196         1648 :             || regno2 == DF_REF_REGNO (def)))
   16197              :       return true;
   16198              : 
   16199              :   return false;
   16200              : }
   16201              : 
   16202              : /* Function checks if instruction INSN uses register number
   16203              :    REGNO as a part of address expression.  */
   16204              : 
   16205              : static bool
   16206         1162 : insn_uses_reg_mem (unsigned int regno, rtx insn)
   16207              : {
   16208         1162 :   df_ref use;
   16209              : 
   16210         2444 :   FOR_EACH_INSN_USE (use, insn)
   16211         1365 :     if (DF_REF_REG_MEM_P (use) && regno == DF_REF_REGNO (use))
   16212              :       return true;
   16213              : 
   16214              :   return false;
   16215              : }
   16216              : 
   16217              : /* Search backward for non-agu definition of register number REGNO1
   16218              :    or register number REGNO2 in basic block starting from instruction
   16219              :    START up to head of basic block or instruction INSN.
   16220              : 
   16221              :    Function puts true value into *FOUND var if definition was found
   16222              :    and false otherwise.
   16223              : 
   16224              :    Distance in half-cycles between START and found instruction or head
   16225              :    of BB is added to DISTANCE and returned.  */
   16226              : 
   16227              : static int
   16228          611 : distance_non_agu_define_in_bb (unsigned int regno1, unsigned int regno2,
   16229              :                                rtx_insn *insn, int distance,
   16230              :                                rtx_insn *start, bool *found)
   16231              : {
   16232          611 :   basic_block bb = start ? BLOCK_FOR_INSN (start) : NULL;
   16233          611 :   rtx_insn *prev = start;
   16234          611 :   rtx_insn *next = NULL;
   16235              : 
   16236          611 :   *found = false;
   16237              : 
   16238          611 :   while (prev
   16239         1818 :          && prev != insn
   16240         1818 :          && distance < LEA_SEARCH_THRESHOLD)
   16241              :     {
   16242         1626 :       if (NONDEBUG_INSN_P (prev) && NONJUMP_INSN_P (prev))
   16243              :         {
   16244          915 :           distance = increase_distance (prev, next, distance);
   16245          915 :           if (insn_defines_reg (regno1, regno2, prev))
   16246              :             {
   16247          239 :               if (recog_memoized (prev) < 0
   16248          239 :                   || get_attr_type (prev) != TYPE_LEA)
   16249              :                 {
   16250          197 :                   *found = true;
   16251          197 :                   return distance;
   16252              :                 }
   16253              :             }
   16254              : 
   16255              :           next = prev;
   16256              :         }
   16257         1429 :       if (prev == BB_HEAD (bb))
   16258              :         break;
   16259              : 
   16260         1207 :       prev = PREV_INSN (prev);
   16261              :     }
   16262              : 
   16263              :   return distance;
   16264              : }
   16265              : 
   16266              : /* Search backward for non-agu definition of register number REGNO1
   16267              :    or register number REGNO2 in INSN's basic block until
   16268              :    1. Pass LEA_SEARCH_THRESHOLD instructions, or
   16269              :    2. Reach neighbor BBs boundary, or
   16270              :    3. Reach agu definition.
   16271              :    Returns the distance between the non-agu definition point and INSN.
   16272              :    If no definition point, returns -1.  */
   16273              : 
   16274              : static int
   16275          417 : distance_non_agu_define (unsigned int regno1, unsigned int regno2,
   16276              :                          rtx_insn *insn)
   16277              : {
   16278          417 :   basic_block bb = BLOCK_FOR_INSN (insn);
   16279          417 :   int distance = 0;
   16280          417 :   bool found = false;
   16281              : 
   16282          417 :   if (insn != BB_HEAD (bb))
   16283          417 :     distance = distance_non_agu_define_in_bb (regno1, regno2, insn,
   16284              :                                               distance, PREV_INSN (insn),
   16285              :                                               &found);
   16286              : 
   16287          417 :   if (!found && distance < LEA_SEARCH_THRESHOLD)
   16288              :     {
   16289          166 :       edge e;
   16290          166 :       edge_iterator ei;
   16291          166 :       bool simple_loop = false;
   16292              : 
   16293          334 :       FOR_EACH_EDGE (e, ei, bb->preds)
   16294          205 :         if (e->src == bb)
   16295              :           {
   16296              :             simple_loop = true;
   16297              :             break;
   16298              :           }
   16299              : 
   16300          166 :       if (simple_loop)
   16301           37 :         distance = distance_non_agu_define_in_bb (regno1, regno2,
   16302              :                                                   insn, distance,
   16303           37 :                                                   BB_END (bb), &found);
   16304              :       else
   16305              :         {
   16306          129 :           int shortest_dist = -1;
   16307          129 :           bool found_in_bb = false;
   16308              : 
   16309          286 :           FOR_EACH_EDGE (e, ei, bb->preds)
   16310              :             {
   16311          157 :               int bb_dist
   16312          314 :                 = distance_non_agu_define_in_bb (regno1, regno2,
   16313              :                                                  insn, distance,
   16314          157 :                                                  BB_END (e->src),
   16315              :                                                  &found_in_bb);
   16316          157 :               if (found_in_bb)
   16317              :                 {
   16318           24 :                   if (shortest_dist < 0)
   16319              :                     shortest_dist = bb_dist;
   16320            0 :                   else if (bb_dist > 0)
   16321            0 :                     shortest_dist = MIN (bb_dist, shortest_dist);
   16322              : 
   16323           24 :                   found = true;
   16324              :                 }
   16325              :             }
   16326              : 
   16327          129 :           distance = shortest_dist;
   16328              :         }
   16329              :     }
   16330              : 
   16331          417 :   if (!found)
   16332              :     return -1;
   16333              : 
   16334          197 :   return distance >> 1;
   16335              : }
   16336              : 
   16337              : /* Return the distance in half-cycles between INSN and the next
   16338              :    insn that uses register number REGNO in memory address added
   16339              :    to DISTANCE.  Return -1 if REGNO0 is set.
   16340              : 
   16341              :    Put true value into *FOUND if register usage was found and
   16342              :    false otherwise.
   16343              :    Put true value into *REDEFINED if register redefinition was
   16344              :    found and false otherwise.  */
   16345              : 
   16346              : static int
   16347          747 : distance_agu_use_in_bb (unsigned int regno,
   16348              :                         rtx_insn *insn, int distance, rtx_insn *start,
   16349              :                         bool *found, bool *redefined)
   16350              : {
   16351          747 :   basic_block bb = NULL;
   16352          747 :   rtx_insn *next = start;
   16353          747 :   rtx_insn *prev = NULL;
   16354              : 
   16355          747 :   *found = false;
   16356          747 :   *redefined = false;
   16357              : 
   16358          747 :   if (start != NULL_RTX)
   16359              :     {
   16360          730 :       bb = BLOCK_FOR_INSN (start);
   16361          730 :       if (start != BB_HEAD (bb))
   16362              :         /* If insn and start belong to the same bb, set prev to insn,
   16363              :            so the call to increase_distance will increase the distance
   16364              :            between insns by 1.  */
   16365          400 :         prev = insn;
   16366              :     }
   16367              : 
   16368         2525 :   while (next
   16369         2525 :          && next != insn
   16370         2525 :          && distance < LEA_SEARCH_THRESHOLD)
   16371              :     {
   16372         2339 :       if (NONDEBUG_INSN_P (next) && NONJUMP_INSN_P (next))
   16373              :         {
   16374         1162 :           distance = increase_distance(prev, next, distance);
   16375         1162 :           if (insn_uses_reg_mem (regno, next))
   16376              :             {
   16377              :               /* Return DISTANCE if OP0 is used in memory
   16378              :                  address in NEXT.  */
   16379           83 :               *found = true;
   16380           83 :               return distance;
   16381              :             }
   16382              : 
   16383         1079 :           if (insn_defines_reg (regno, INVALID_REGNUM, next))
   16384              :             {
   16385              :               /* Return -1 if OP0 is set in NEXT.  */
   16386          154 :               *redefined = true;
   16387          154 :               return -1;
   16388              :             }
   16389              : 
   16390              :           prev = next;
   16391              :         }
   16392              : 
   16393         2102 :       if (next == BB_END (bb))
   16394              :         break;
   16395              : 
   16396         1778 :       next = NEXT_INSN (next);
   16397              :     }
   16398              : 
   16399              :   return distance;
   16400              : }
   16401              : 
   16402              : /* Return the distance between INSN and the next insn that uses
   16403              :    register number REGNO0 in memory address.  Return -1 if no such
   16404              :    a use is found within LEA_SEARCH_THRESHOLD or REGNO0 is set.  */
   16405              : 
   16406              : static int
   16407          417 : distance_agu_use (unsigned int regno0, rtx_insn *insn)
   16408              : {
   16409          417 :   basic_block bb = BLOCK_FOR_INSN (insn);
   16410          417 :   int distance = 0;
   16411          417 :   bool found = false;
   16412          417 :   bool redefined = false;
   16413              : 
   16414          417 :   if (insn != BB_END (bb))
   16415          400 :     distance = distance_agu_use_in_bb (regno0, insn, distance,
   16416              :                                        NEXT_INSN (insn),
   16417              :                                        &found, &redefined);
   16418              : 
   16419          417 :   if (!found && !redefined && distance < LEA_SEARCH_THRESHOLD)
   16420              :     {
   16421          242 :       edge e;
   16422          242 :       edge_iterator ei;
   16423          242 :       bool simple_loop = false;
   16424              : 
   16425          527 :       FOR_EACH_EDGE (e, ei, bb->succs)
   16426          347 :         if (e->dest == bb)
   16427              :           {
   16428              :             simple_loop = true;
   16429              :             break;
   16430              :           }
   16431              : 
   16432          242 :       if (simple_loop)
   16433           62 :         distance = distance_agu_use_in_bb (regno0, insn,
   16434              :                                            distance, BB_HEAD (bb),
   16435              :                                            &found, &redefined);
   16436              :       else
   16437              :         {
   16438          180 :           int shortest_dist = -1;
   16439          180 :           bool found_in_bb = false;
   16440          180 :           bool redefined_in_bb = false;
   16441              : 
   16442          465 :           FOR_EACH_EDGE (e, ei, bb->succs)
   16443              :             {
   16444          285 :               int bb_dist
   16445          570 :                 = distance_agu_use_in_bb (regno0, insn,
   16446          285 :                                           distance, BB_HEAD (e->dest),
   16447              :                                           &found_in_bb, &redefined_in_bb);
   16448          285 :               if (found_in_bb)
   16449              :                 {
   16450           17 :                   if (shortest_dist < 0)
   16451              :                     shortest_dist = bb_dist;
   16452            2 :                   else if (bb_dist > 0)
   16453            2 :                     shortest_dist = MIN (bb_dist, shortest_dist);
   16454              : 
   16455           17 :                   found = true;
   16456              :                 }
   16457              :             }
   16458              : 
   16459          180 :           distance = shortest_dist;
   16460              :         }
   16461              :     }
   16462              : 
   16463          417 :   if (!found || redefined)
   16464              :     return -1;
   16465              : 
   16466           81 :   return distance >> 1;
   16467              : }
   16468              : 
   16469              : /* Define this macro to tune LEA priority vs ADD, it take effect when
   16470              :    there is a dilemma of choosing LEA or ADD
   16471              :    Negative value: ADD is more preferred than LEA
   16472              :    Zero: Neutral
   16473              :    Positive value: LEA is more preferred than ADD.  */
   16474              : #define IX86_LEA_PRIORITY 0
   16475              : 
   16476              : /* Return true if usage of lea INSN has performance advantage
   16477              :    over a sequence of instructions.  Instructions sequence has
   16478              :    SPLIT_COST cycles higher latency than lea latency.  */
   16479              : 
   16480              : static bool
   16481         1617 : ix86_lea_outperforms (rtx_insn *insn, unsigned int regno0, unsigned int regno1,
   16482              :                       unsigned int regno2, int split_cost, bool has_scale)
   16483              : {
   16484         1617 :   int dist_define, dist_use;
   16485              : 
   16486              :   /* For Atom processors newer than Bonnell, if using a 2-source or
   16487              :      3-source LEA for non-destructive destination purposes, or due to
   16488              :      wanting ability to use SCALE, the use of LEA is justified.  */
   16489         1617 :   if (!TARGET_CPU_P (BONNELL))
   16490              :     {
   16491         1200 :       if (has_scale)
   16492              :         return true;
   16493         1181 :       if (split_cost < 1)
   16494              :         return false;
   16495          406 :       if (regno0 == regno1 || regno0 == regno2)
   16496              :         return false;
   16497              :       return true;
   16498              :     }
   16499              : 
   16500              :   /* Remember recog_data content.  */
   16501          417 :   struct recog_data_d recog_data_save = recog_data;
   16502              : 
   16503          417 :   dist_define = distance_non_agu_define (regno1, regno2, insn);
   16504          417 :   dist_use = distance_agu_use (regno0, insn);
   16505              : 
   16506              :   /* distance_non_agu_define can call get_attr_type which can call
   16507              :      recog_memoized, restore recog_data back to previous content.  */
   16508          417 :   recog_data = recog_data_save;
   16509              : 
   16510          417 :   if (dist_define < 0 || dist_define >= LEA_MAX_STALL)
   16511              :     {
   16512              :       /* If there is no non AGU operand definition, no AGU
   16513              :          operand usage and split cost is 0 then both lea
   16514              :          and non lea variants have same priority.  Currently
   16515              :          we prefer lea for 64 bit code and non lea on 32 bit
   16516              :          code.  */
   16517          223 :       if (dist_use < 0 && split_cost == 0)
   16518           98 :         return TARGET_64BIT || IX86_LEA_PRIORITY;
   16519              :       else
   16520              :         return true;
   16521              :     }
   16522              : 
   16523              :   /* With longer definitions distance lea is more preferable.
   16524              :      Here we change it to take into account splitting cost and
   16525              :      lea priority.  */
   16526          194 :   dist_define += split_cost + IX86_LEA_PRIORITY;
   16527              : 
   16528              :   /* If there is no use in memory address then we just check
   16529              :      that split cost exceeds AGU stall.  */
   16530          194 :   if (dist_use < 0)
   16531          190 :     return dist_define > LEA_MAX_STALL;
   16532              : 
   16533              :   /* If this insn has both backward non-agu dependence and forward
   16534              :      agu dependence, the one with short distance takes effect.  */
   16535            4 :   return dist_define >= dist_use;
   16536              : }
   16537              : 
   16538              : /* Return true if we need to split op0 = op1 + op2 into a sequence of
   16539              :    move and add to avoid AGU stalls.  */
   16540              : 
   16541              : bool
   16542      9114182 : ix86_avoid_lea_for_add (rtx_insn *insn, rtx operands[])
   16543              : {
   16544      9114182 :   unsigned int regno0, regno1, regno2;
   16545              : 
   16546              :   /* Check if we need to optimize.  */
   16547      9114182 :   if (!TARGET_OPT_AGU || optimize_function_for_size_p (cfun))
   16548      9113373 :     return false;
   16549              : 
   16550          809 :   regno0 = true_regnum (operands[0]);
   16551          809 :   regno1 = true_regnum (operands[1]);
   16552          809 :   regno2 = true_regnum (operands[2]);
   16553              : 
   16554              :   /* We need to split only adds with non destructive
   16555              :      destination operand.  */
   16556          809 :   if (regno0 == regno1 || regno0 == regno2)
   16557              :     return false;
   16558              :   else
   16559          244 :     return !ix86_lea_outperforms (insn, regno0, regno1, regno2, 1, false);
   16560              : }
   16561              : 
   16562              : /* Return true if we should emit lea instruction instead of mov
   16563              :    instruction.  */
   16564              : 
   16565              : bool
   16566     29504460 : ix86_use_lea_for_mov (rtx_insn *insn, rtx operands[])
   16567              : {
   16568     29504460 :   unsigned int regno0, regno1;
   16569              : 
   16570              :   /* Check if we need to optimize.  */
   16571     29504460 :   if (!TARGET_OPT_AGU || optimize_function_for_size_p (cfun))
   16572     29502172 :     return false;
   16573              : 
   16574              :   /* Use lea for reg to reg moves only.  */
   16575         2288 :   if (!REG_P (operands[0]) || !REG_P (operands[1]))
   16576              :     return false;
   16577              : 
   16578          463 :   regno0 = true_regnum (operands[0]);
   16579          463 :   regno1 = true_regnum (operands[1]);
   16580              : 
   16581          463 :   return ix86_lea_outperforms (insn, regno0, regno1, INVALID_REGNUM, 0, false);
   16582              : }
   16583              : 
   16584              : /* Return true if we need to split lea into a sequence of
   16585              :    instructions to avoid AGU stalls during peephole2. */
   16586              : 
   16587              : bool
   16588     11055145 : ix86_avoid_lea_for_addr (rtx_insn *insn, rtx operands[])
   16589              : {
   16590     11055145 :   unsigned int regno0, regno1, regno2;
   16591     11055145 :   int split_cost;
   16592     11055145 :   struct ix86_address parts;
   16593     11055145 :   int ok;
   16594              : 
   16595              :   /* The "at least two components" test below might not catch simple
   16596              :      move or zero extension insns if parts.base is non-NULL and parts.disp
   16597              :      is const0_rtx as the only components in the address, e.g. if the
   16598              :      register is %rbp or %r13.  As this test is much cheaper and moves or
   16599              :      zero extensions are the common case, do this check first.  */
   16600     11055145 :   if (REG_P (operands[1])
   16601     11055145 :       || (SImode_address_operand (operands[1], VOIDmode)
   16602       144387 :           && REG_P (XEXP (operands[1], 0))))
   16603      4039459 :     return false;
   16604              : 
   16605      7015686 :   ok = ix86_decompose_address (operands[1], &parts);
   16606      7015686 :   gcc_assert (ok);
   16607              : 
   16608              :   /* There should be at least two components in the address.  */
   16609      7015686 :   if ((parts.base != NULL_RTX) + (parts.index != NULL_RTX)
   16610      7015686 :       + (parts.disp != NULL_RTX) + (parts.scale > 1) < 2)
   16611              :     return false;
   16612              : 
   16613              :   /* We should not split into add if non legitimate pic
   16614              :      operand is used as displacement. */
   16615      2647412 :   if (parts.disp && flag_pic && !LEGITIMATE_PIC_OPERAND_P (parts.disp))
   16616              :     return false;
   16617              : 
   16618      2597380 :   regno0 = true_regnum (operands[0]) ;
   16619      2597380 :   regno1 = INVALID_REGNUM;
   16620      2597380 :   regno2 = INVALID_REGNUM;
   16621              : 
   16622      2597380 :   if (parts.base)
   16623      2522032 :     regno1 = true_regnum (parts.base);
   16624      2597380 :   if (parts.index)
   16625       483542 :     regno2 = true_regnum (parts.index);
   16626              : 
   16627              :   /* Use add for a = a + b and a = b + a since it is faster and shorter
   16628              :      than lea for most processors.  For the processors like BONNELL, if
   16629              :      the destination register of LEA holds an actual address which will
   16630              :      be used soon, LEA is better and otherwise ADD is better.  */
   16631      2597380 :   if (!TARGET_CPU_P (BONNELL)
   16632      2597256 :       && parts.scale == 1
   16633      2353397 :       && (!parts.disp || parts.disp == const0_rtx)
   16634       177187 :       && (regno0 == regno1 || regno0 == regno2))
   16635              :     return true;
   16636              : 
   16637              :   /* Split with -Oz if the encoding requires fewer bytes.  */
   16638      2591316 :   if (optimize_size > 1
   16639           27 :       && parts.scale > 1
   16640            4 :       && !parts.base
   16641            4 :       && (!parts.disp || parts.disp == const0_rtx))
   16642              :     return true;
   16643              : 
   16644              :   /* Check we need to optimize.  */
   16645      2591312 :   if (!TARGET_AVOID_LEA_FOR_ADDR || optimize_function_for_size_p (cfun))
   16646      2590976 :     return false;
   16647              : 
   16648          336 :   split_cost = 0;
   16649              : 
   16650              :   /* Compute how many cycles we will add to execution time
   16651              :      if split lea into a sequence of instructions.  */
   16652          336 :   if (parts.base || parts.index)
   16653              :     {
   16654              :       /* Have to use mov instruction if non destructive
   16655              :          destination form is used.  */
   16656          336 :       if (regno1 != regno0 && regno2 != regno0)
   16657          265 :         split_cost += 1;
   16658              : 
   16659              :       /* Have to add index to base if both exist.  */
   16660          336 :       if (parts.base && parts.index)
   16661           53 :         split_cost += 1;
   16662              : 
   16663              :       /* Have to use shift and adds if scale is 2 or greater.  */
   16664          336 :       if (parts.scale > 1)
   16665              :         {
   16666           29 :           if (regno0 != regno1)
   16667           23 :             split_cost += 1;
   16668            6 :           else if (regno2 == regno0)
   16669            0 :             split_cost += 4;
   16670              :           else
   16671            6 :             split_cost += parts.scale;
   16672              :         }
   16673              : 
   16674              :       /* Have to use add instruction with immediate if
   16675              :          disp is non zero.  */
   16676          336 :       if (parts.disp && parts.disp != const0_rtx)
   16677          276 :         split_cost += 1;
   16678              : 
   16679              :       /* Subtract the price of lea.  */
   16680          336 :       split_cost -= 1;
   16681              :     }
   16682              : 
   16683          336 :   return !ix86_lea_outperforms (insn, regno0, regno1, regno2, split_cost,
   16684          336 :                                 parts.scale > 1);
   16685              : }
   16686              : 
   16687              : /* Return true if it is ok to optimize an ADD operation to LEA
   16688              :    operation to avoid flag register consumation.  For most processors,
   16689              :    ADD is faster than LEA.  For the processors like BONNELL, if the
   16690              :    destination register of LEA holds an actual address which will be
   16691              :    used soon, LEA is better and otherwise ADD is better.  */
   16692              : 
   16693              : bool
   16694      9172507 : ix86_lea_for_add_ok (rtx_insn *insn, rtx operands[])
   16695              : {
   16696      9172507 :   unsigned int regno0 = true_regnum (operands[0]);
   16697      9172507 :   unsigned int regno1 = true_regnum (operands[1]);
   16698      9172507 :   unsigned int regno2 = true_regnum (operands[2]);
   16699              : 
   16700              :   /* If a = b + c, (a!=b && a!=c), must use lea form. */
   16701      9172507 :   if (regno0 != regno1 && regno0 != regno2)
   16702              :     return true;
   16703              : 
   16704      7149274 :   if (!TARGET_OPT_AGU || optimize_function_for_size_p (cfun))
   16705      7148700 :     return false;
   16706              : 
   16707          574 :   return ix86_lea_outperforms (insn, regno0, regno1, regno2, 0, false);
   16708              : }
   16709              : 
   16710              : /* Return true if destination reg of SET_BODY is shift count of
   16711              :    USE_BODY.  */
   16712              : 
   16713              : static bool
   16714           89 : ix86_dep_by_shift_count_body (const_rtx set_body, const_rtx use_body)
   16715              : {
   16716           89 :   rtx set_dest;
   16717           89 :   rtx shift_rtx;
   16718           89 :   int i;
   16719              : 
   16720              :   /* Retrieve destination of SET_BODY.  */
   16721           89 :   switch (GET_CODE (set_body))
   16722              :     {
   16723           73 :     case SET:
   16724           73 :       set_dest = SET_DEST (set_body);
   16725           73 :       if (!set_dest || !REG_P (set_dest))
   16726              :         return false;
   16727           72 :       break;
   16728            8 :     case PARALLEL:
   16729           24 :       for (i = XVECLEN (set_body, 0) - 1; i >= 0; i--)
   16730           16 :         if (ix86_dep_by_shift_count_body (XVECEXP (set_body, 0, i),
   16731              :                                           use_body))
   16732              :           return true;
   16733              :       /* FALLTHROUGH */
   16734              :     default:
   16735              :       return false;
   16736              :     }
   16737              : 
   16738              :   /* Retrieve shift count of USE_BODY.  */
   16739           72 :   switch (GET_CODE (use_body))
   16740              :     {
   16741           24 :     case SET:
   16742           24 :       shift_rtx = XEXP (use_body, 1);
   16743           24 :       break;
   16744           24 :     case PARALLEL:
   16745           72 :       for (i = XVECLEN (use_body, 0) - 1; i >= 0; i--)
   16746           48 :         if (ix86_dep_by_shift_count_body (set_body,
   16747           48 :                                           XVECEXP (use_body, 0, i)))
   16748              :           return true;
   16749              :       /* FALLTHROUGH */
   16750              :     default:
   16751              :       return false;
   16752              :     }
   16753              : 
   16754           24 :   if (shift_rtx
   16755           24 :       && (GET_CODE (shift_rtx) == ASHIFT
   16756           21 :           || GET_CODE (shift_rtx) == LSHIFTRT
   16757            5 :           || GET_CODE (shift_rtx) == ASHIFTRT
   16758            0 :           || GET_CODE (shift_rtx) == ROTATE
   16759            0 :           || GET_CODE (shift_rtx) == ROTATERT))
   16760              :     {
   16761           24 :       rtx shift_count = XEXP (shift_rtx, 1);
   16762              : 
   16763              :       /* Return true if shift count is dest of SET_BODY.  */
   16764           24 :       if (REG_P (shift_count))
   16765              :         {
   16766              :           /* Add check since it can be invoked before register
   16767              :              allocation in pre-reload schedule.  */
   16768            0 :           if (reload_completed
   16769            0 :               && true_regnum (set_dest) == true_regnum (shift_count))
   16770              :             return true;
   16771            0 :           else if (REGNO(set_dest) == REGNO(shift_count))
   16772              :             return true;
   16773              :         }
   16774              :     }
   16775              : 
   16776              :   return false;
   16777              : }
   16778              : 
   16779              : /* Return true if destination reg of SET_INSN is shift count of
   16780              :    USE_INSN.  */
   16781              : 
   16782              : bool
   16783           25 : ix86_dep_by_shift_count (const_rtx set_insn, const_rtx use_insn)
   16784              : {
   16785           25 :   return ix86_dep_by_shift_count_body (PATTERN (set_insn),
   16786           25 :                                        PATTERN (use_insn));
   16787              : }
   16788              : 
   16789              : /* Return TRUE if the operands to a vec_interleave_{high,low}v2df
   16790              :    are ok, keeping in mind the possible movddup alternative.  */
   16791              : 
   16792              : bool
   16793        89884 : ix86_vec_interleave_v2df_operator_ok (rtx operands[3], bool high)
   16794              : {
   16795        89884 :   if (MEM_P (operands[0]))
   16796         2025 :     return rtx_equal_p (operands[0], operands[1 + high]);
   16797        87859 :   if (MEM_P (operands[1]) && MEM_P (operands[2]))
   16798         1009 :     return false;
   16799              :   return true;
   16800              : }
   16801              : 
   16802              : /* A subroutine of ix86_build_signbit_mask.  If VECT is true,
   16803              :    then replicate the value for all elements of the vector
   16804              :    register.  */
   16805              : 
   16806              : rtx
   16807        74842 : ix86_build_const_vector (machine_mode mode, bool vect, rtx value)
   16808              : {
   16809        74842 :   int i, n_elt;
   16810        74842 :   rtvec v;
   16811        74842 :   machine_mode scalar_mode;
   16812              : 
   16813        74842 :   switch (mode)
   16814              :     {
   16815         1292 :     case E_V64QImode:
   16816         1292 :     case E_V32QImode:
   16817         1292 :     case E_V16QImode:
   16818         1292 :     case E_V32HImode:
   16819         1292 :     case E_V16HImode:
   16820         1292 :     case E_V8HImode:
   16821         1292 :     case E_V16SImode:
   16822         1292 :     case E_V8SImode:
   16823         1292 :     case E_V4SImode:
   16824         1292 :     case E_V2SImode:
   16825         1292 :     case E_V8DImode:
   16826         1292 :     case E_V4DImode:
   16827         1292 :     case E_V2DImode:
   16828         1292 :       gcc_assert (vect);
   16829              :       /* FALLTHRU */
   16830        74842 :     case E_V2HFmode:
   16831        74842 :     case E_V4HFmode:
   16832        74842 :     case E_V8HFmode:
   16833        74842 :     case E_V16HFmode:
   16834        74842 :     case E_V32HFmode:
   16835        74842 :     case E_V16SFmode:
   16836        74842 :     case E_V8SFmode:
   16837        74842 :     case E_V4SFmode:
   16838        74842 :     case E_V2SFmode:
   16839        74842 :     case E_V8DFmode:
   16840        74842 :     case E_V4DFmode:
   16841        74842 :     case E_V2DFmode:
   16842        74842 :     case E_V32BFmode:
   16843        74842 :     case E_V16BFmode:
   16844        74842 :     case E_V8BFmode:
   16845        74842 :     case E_V4BFmode:
   16846        74842 :     case E_V2BFmode:
   16847        74842 :       n_elt = GET_MODE_NUNITS (mode);
   16848        74842 :       v = rtvec_alloc (n_elt);
   16849        74842 :       scalar_mode = GET_MODE_INNER (mode);
   16850              : 
   16851        74842 :       RTVEC_ELT (v, 0) = value;
   16852              : 
   16853       232146 :       for (i = 1; i < n_elt; ++i)
   16854       157304 :         RTVEC_ELT (v, i) = vect ? value : CONST0_RTX (scalar_mode);
   16855              : 
   16856        74842 :       return gen_rtx_CONST_VECTOR (mode, v);
   16857              : 
   16858            0 :     default:
   16859            0 :       gcc_unreachable ();
   16860              :     }
   16861              : }
   16862              : 
   16863              : /* A subroutine of ix86_expand_fp_absneg_operator, copysign expanders
   16864              :    and ix86_expand_int_vcond.  Create a mask for the sign bit in MODE
   16865              :    for an SSE register.  If VECT is true, then replicate the mask for
   16866              :    all elements of the vector register.  If INVERT is true, then create
   16867              :    a mask excluding the sign bit.  */
   16868              : 
   16869              : rtx
   16870        76227 : ix86_build_signbit_mask (machine_mode mode, bool vect, bool invert)
   16871              : {
   16872        76227 :   machine_mode vec_mode, imode;
   16873        76227 :   wide_int w;
   16874        76227 :   rtx mask, v;
   16875              : 
   16876        76227 :   switch (mode)
   16877              :     {
   16878              :     case E_V2HFmode:
   16879              :     case E_V4HFmode:
   16880              :     case E_V8HFmode:
   16881              :     case E_V16HFmode:
   16882              :     case E_V32HFmode:
   16883              :     case E_V32BFmode:
   16884              :     case E_V16BFmode:
   16885              :     case E_V8BFmode:
   16886              :     case E_V4BFmode:
   16887              :     case E_V2BFmode:
   16888              :       vec_mode = mode;
   16889              :       imode = HImode;
   16890              :       break;
   16891              : 
   16892        34172 :     case E_V16SImode:
   16893        34172 :     case E_V16SFmode:
   16894        34172 :     case E_V8SImode:
   16895        34172 :     case E_V4SImode:
   16896        34172 :     case E_V8SFmode:
   16897        34172 :     case E_V4SFmode:
   16898        34172 :     case E_V2SFmode:
   16899        34172 :     case E_V2SImode:
   16900        34172 :       vec_mode = mode;
   16901        34172 :       imode = SImode;
   16902        34172 :       break;
   16903              : 
   16904        39192 :     case E_V8DImode:
   16905        39192 :     case E_V4DImode:
   16906        39192 :     case E_V2DImode:
   16907        39192 :     case E_V8DFmode:
   16908        39192 :     case E_V4DFmode:
   16909        39192 :     case E_V2DFmode:
   16910        39192 :       vec_mode = mode;
   16911        39192 :       imode = DImode;
   16912        39192 :       break;
   16913              : 
   16914         2364 :     case E_TImode:
   16915         2364 :     case E_TFmode:
   16916         2364 :       vec_mode = VOIDmode;
   16917         2364 :       imode = TImode;
   16918         2364 :       break;
   16919              : 
   16920            0 :     default:
   16921            0 :       gcc_unreachable ();
   16922              :     }
   16923              : 
   16924        76227 :   machine_mode inner_mode = GET_MODE_INNER (mode);
   16925       152454 :   w = wi::set_bit_in_zero (GET_MODE_BITSIZE (inner_mode) - 1,
   16926       152454 :                            GET_MODE_BITSIZE (inner_mode));
   16927        76227 :   if (invert)
   16928        39888 :     w = wi::bit_not (w);
   16929              : 
   16930              :   /* Force this value into the low part of a fp vector constant.  */
   16931        76227 :   mask = immed_wide_int_const (w, imode);
   16932        76227 :   mask = gen_lowpart (inner_mode, mask);
   16933              : 
   16934        76227 :   if (vec_mode == VOIDmode)
   16935         2364 :     return force_reg (inner_mode, mask);
   16936              : 
   16937        73863 :   v = ix86_build_const_vector (vec_mode, vect, mask);
   16938        73863 :   return force_reg (vec_mode, v);
   16939        76227 : }
   16940              : 
   16941              : /* Return HOST_WIDE_INT for const vector OP in MODE.  */
   16942              : 
   16943              : HOST_WIDE_INT
   16944       134425 : ix86_convert_const_vector_to_integer (rtx op, machine_mode mode)
   16945              : {
   16946       285073 :   if (GET_MODE_SIZE (mode) > UNITS_PER_WORD)
   16947            0 :     gcc_unreachable ();
   16948              : 
   16949       134425 :   int nunits = GET_MODE_NUNITS (mode);
   16950       268850 :   wide_int val = wi::zero (GET_MODE_BITSIZE (mode));
   16951       134425 :   machine_mode innermode = GET_MODE_INNER (mode);
   16952       134425 :   unsigned int innermode_bits = GET_MODE_BITSIZE (innermode);
   16953              : 
   16954       134425 :   switch (mode)
   16955              :     {
   16956              :     case E_V2QImode:
   16957              :     case E_V4QImode:
   16958              :     case E_V2HImode:
   16959              :     case E_V8QImode:
   16960              :     case E_V4HImode:
   16961              :     case E_V2SImode:
   16962       461903 :       for (int i = 0; i < nunits; ++i)
   16963              :         {
   16964       332172 :           int v = INTVAL (XVECEXP (op, 0, i));
   16965       332172 :           wide_int wv = wi::shwi (v, innermode_bits);
   16966       332172 :           val = wi::insert (val, wv, innermode_bits * i, innermode_bits);
   16967       332172 :         }
   16968              :       break;
   16969           88 :     case E_V1SImode:
   16970           88 :     case E_V1DImode:
   16971           88 :       op = CONST_VECTOR_ELT (op, 0);
   16972           88 :       return INTVAL (op);
   16973              :     case E_V2HFmode:
   16974              :     case E_V2BFmode:
   16975              :     case E_V4HFmode:
   16976              :     case E_V4BFmode:
   16977              :     case E_V2SFmode:
   16978        13840 :       for (int i = 0; i < nunits; ++i)
   16979              :         {
   16980         9234 :           rtx x = XVECEXP (op, 0, i);
   16981         9234 :           int v = real_to_target (NULL, CONST_DOUBLE_REAL_VALUE (x),
   16982         9234 :                                   REAL_MODE_FORMAT (innermode));
   16983         9234 :           wide_int wv = wi::shwi (v, innermode_bits);
   16984         9234 :           val = wi::insert (val, wv, innermode_bits * i, innermode_bits);
   16985         9234 :         }
   16986              :       break;
   16987            0 :     default:
   16988            0 :       gcc_unreachable ();
   16989              :     }
   16990              : 
   16991       134337 :   return val.to_shwi ();
   16992       134425 : }
   16993              : 
   16994           32 : int ix86_get_flags_cc (rtx_code code)
   16995              : {
   16996           32 :   switch (code)
   16997              :     {
   16998              :       case NE: return X86_CCNE;
   16999              :       case EQ: return X86_CCE;
   17000              :       case GE: return X86_CCNL;
   17001              :       case GT: return X86_CCNLE;
   17002              :       case LE: return X86_CCLE;
   17003              :       case LT: return X86_CCL;
   17004              :       case GEU: return X86_CCNB;
   17005              :       case GTU: return X86_CCNBE;
   17006              :       case LEU: return X86_CCBE;
   17007              :       case LTU: return X86_CCB;
   17008              :       default: return -1;
   17009              :     }
   17010              : }
   17011              : 
   17012              : /* Return TRUE or FALSE depending on whether the first SET in INSN
   17013              :    has source and destination with matching CC modes, and that the
   17014              :    CC mode is at least as constrained as REQ_MODE.  */
   17015              : 
   17016              : bool
   17017     53324329 : ix86_match_ccmode (rtx insn, machine_mode req_mode)
   17018              : {
   17019     53324329 :   rtx set;
   17020     53324329 :   machine_mode set_mode;
   17021              : 
   17022     53324329 :   set = PATTERN (insn);
   17023     53324329 :   if (GET_CODE (set) == PARALLEL)
   17024       495976 :     set = XVECEXP (set, 0, 0);
   17025     53324329 :   gcc_assert (GET_CODE (set) == SET);
   17026     53324329 :   gcc_assert (GET_CODE (SET_SRC (set)) == COMPARE);
   17027              : 
   17028     53324329 :   set_mode = GET_MODE (SET_DEST (set));
   17029     53324329 :   switch (set_mode)
   17030              :     {
   17031      1416404 :     case E_CCNOmode:
   17032      1416404 :       if (req_mode != CCNOmode
   17033        97262 :           && (req_mode != CCmode
   17034            0 :               || XEXP (SET_SRC (set), 1) != const0_rtx))
   17035              :         return false;
   17036              :       break;
   17037      5472747 :     case E_CCmode:
   17038      5472747 :       if (req_mode == CCGCmode)
   17039              :         return false;
   17040              :       /* FALLTHRU */
   17041      9075499 :     case E_CCGCmode:
   17042      9075499 :       if (req_mode == CCGOCmode || req_mode == CCNOmode)
   17043              :         return false;
   17044              :       /* FALLTHRU */
   17045     10131699 :     case E_CCGOCmode:
   17046     10131699 :       if (req_mode == CCZmode)
   17047              :         return false;
   17048              :       /* FALLTHRU */
   17049              :     case E_CCZmode:
   17050              :       break;
   17051              : 
   17052            0 :     case E_CCGZmode:
   17053              : 
   17054            0 :     case E_CCAmode:
   17055            0 :     case E_CCCmode:
   17056            0 :     case E_CCOmode:
   17057            0 :     case E_CCPmode:
   17058            0 :     case E_CCSmode:
   17059            0 :       if (set_mode != req_mode)
   17060              :         return false;
   17061              :       break;
   17062              : 
   17063            0 :     default:
   17064            0 :       gcc_unreachable ();
   17065              :     }
   17066              : 
   17067     53218385 :   return GET_MODE (SET_SRC (set)) == set_mode;
   17068              : }
   17069              : 
   17070              : machine_mode
   17071     13531609 : ix86_cc_mode (enum rtx_code code, rtx op0, rtx op1)
   17072              : {
   17073     13531609 :   machine_mode mode = GET_MODE (op0);
   17074              : 
   17075     13531609 :   if (SCALAR_FLOAT_MODE_P (mode))
   17076              :     {
   17077       143707 :       gcc_assert (!DECIMAL_FLOAT_MODE_P (mode));
   17078              :       return CCFPmode;
   17079              :     }
   17080              : 
   17081     13387902 :   switch (code)
   17082              :     {
   17083              :       /* Only zero flag is needed.  */
   17084              :     case EQ:                    /* ZF=0 */
   17085              :     case NE:                    /* ZF!=0 */
   17086              :       return CCZmode;
   17087              :       /* Codes needing carry flag.  */
   17088       977403 :     case GEU:                   /* CF=0 */
   17089       977403 :     case LTU:                   /* CF=1 */
   17090       977403 :       rtx geu;
   17091              :       /* Detect overflow checks.  They need just the carry flag.  */
   17092       977403 :       if (GET_CODE (op0) == PLUS
   17093       977403 :           && (rtx_equal_p (op1, XEXP (op0, 0))
   17094       125414 :               || rtx_equal_p (op1, XEXP (op0, 1))))
   17095        17411 :         return CCCmode;
   17096              :       /* Similarly for *setcc_qi_addqi3_cconly_overflow_1_* patterns.
   17097              :          Match LTU of op0
   17098              :          (neg:QI (geu:QI (reg:CC_CCC FLAGS_REG) (const_int 0)))
   17099              :          and op1
   17100              :          (ltu:QI (reg:CC_CCC FLAGS_REG) (const_int 0))
   17101              :          where CC_CCC is either CC or CCC.  */
   17102       959992 :       else if (code == LTU
   17103       366638 :                && GET_CODE (op0) == NEG
   17104           18 :                && GET_CODE (geu = XEXP (op0, 0)) == GEU
   17105            0 :                && REG_P (XEXP (geu, 0))
   17106            0 :                && (GET_MODE (XEXP (geu, 0)) == CCCmode
   17107            0 :                    || GET_MODE (XEXP (geu, 0)) == CCmode)
   17108            0 :                && REGNO (XEXP (geu, 0)) == FLAGS_REG
   17109            0 :                && XEXP (geu, 1) == const0_rtx
   17110            0 :                && GET_CODE (op1) == LTU
   17111            0 :                && REG_P (XEXP (op1, 0))
   17112            0 :                && GET_MODE (XEXP (op1, 0)) == GET_MODE (XEXP (geu, 0))
   17113            0 :                && REGNO (XEXP (op1, 0)) == FLAGS_REG
   17114       959992 :                && XEXP (op1, 1) == const0_rtx)
   17115              :         return CCCmode;
   17116              :       /* Similarly for *x86_cmc pattern.
   17117              :          Match LTU of op0 (neg:QI (ltu:QI (reg:CCC FLAGS_REG) (const_int 0)))
   17118              :          and op1 (geu:QI (reg:CCC FLAGS_REG) (const_int 0)).
   17119              :          It is sufficient to test that the operand modes are CCCmode.  */
   17120       959992 :       else if (code == LTU
   17121       366638 :                && GET_CODE (op0) == NEG
   17122           18 :                && GET_CODE (XEXP (op0, 0)) == LTU
   17123            0 :                && GET_MODE (XEXP (XEXP (op0, 0), 0)) == CCCmode
   17124            0 :                && GET_CODE (op1) == GEU
   17125            0 :                && GET_MODE (XEXP (op1, 0)) == CCCmode)
   17126              :         return CCCmode;
   17127              :       /* Similarly for the comparison of addcarry/subborrow pattern.  */
   17128       366638 :       else if (code == LTU
   17129       366638 :                && GET_CODE (op0) == ZERO_EXTEND
   17130        15266 :                && GET_CODE (op1) == PLUS
   17131        11016 :                && ix86_carry_flag_operator (XEXP (op1, 0), VOIDmode)
   17132        11016 :                && GET_CODE (XEXP (op1, 1)) == ZERO_EXTEND)
   17133              :         return CCCmode;
   17134              :       else
   17135       948976 :         return CCmode;
   17136              :     case GTU:                   /* CF=0 & ZF=0 */
   17137              :     case LEU:                   /* CF=1 | ZF=1 */
   17138              :       return CCmode;
   17139              :       /* Codes possibly doable only with sign flag when
   17140              :          comparing against zero.  */
   17141       788742 :     case GE:                    /* SF=OF   or   SF=0 */
   17142       788742 :     case LT:                    /* SF<>OF  or   SF=1 */
   17143       788742 :       if (op1 == const0_rtx)
   17144              :         return CCGOCmode;
   17145              :       else
   17146              :         /* For other cases Carry flag is not required.  */
   17147       447606 :         return CCGCmode;
   17148              :       /* Codes doable only with sign flag when comparing
   17149              :          against zero, but we miss jump instruction for it
   17150              :          so we need to use relational tests against overflow
   17151              :          that thus needs to be zero.  */
   17152       902213 :     case GT:                    /* ZF=0 & SF=OF */
   17153       902213 :     case LE:                    /* ZF=1 | SF<>OF */
   17154       902213 :       if (op1 == const0_rtx)
   17155              :         return CCNOmode;
   17156              :       else
   17157       599000 :         return CCGCmode;
   17158              :     default:
   17159              :       /* CCmode should be used in all other cases.  */
   17160              :       return CCmode;
   17161              :     }
   17162              : }
   17163              : 
   17164              : /* Return TRUE or FALSE depending on whether the ptest instruction
   17165              :    INSN has source and destination with suitable matching CC modes.  */
   17166              : 
   17167              : bool
   17168        91731 : ix86_match_ptest_ccmode (rtx insn)
   17169              : {
   17170        91731 :   rtx set, src;
   17171        91731 :   machine_mode set_mode;
   17172              : 
   17173        91731 :   set = PATTERN (insn);
   17174        91731 :   gcc_assert (GET_CODE (set) == SET);
   17175        91731 :   src = SET_SRC (set);
   17176        91731 :   gcc_assert (GET_CODE (src) == UNSPEC
   17177              :               && XINT (src, 1) == UNSPEC_PTEST);
   17178              : 
   17179        91731 :   set_mode = GET_MODE (src);
   17180        91731 :   if (set_mode != CCZmode
   17181              :       && set_mode != CCCmode
   17182              :       && set_mode != CCmode)
   17183              :     return false;
   17184        91731 :   return GET_MODE (SET_DEST (set)) == set_mode;
   17185              : }
   17186              : 
   17187              : /* Return the fixed registers used for condition codes.  */
   17188              : 
   17189              : static bool
   17190     18543986 : ix86_fixed_condition_code_regs (unsigned int *p1, unsigned int *p2)
   17191              : {
   17192     18543986 :   *p1 = FLAGS_REG;
   17193     18543986 :   *p2 = INVALID_REGNUM;
   17194     18543986 :   return true;
   17195              : }
   17196              : 
   17197              : /* If two condition code modes are compatible, return a condition code
   17198              :    mode which is compatible with both.  Otherwise, return
   17199              :    VOIDmode.  */
   17200              : 
   17201              : static machine_mode
   17202        30486 : ix86_cc_modes_compatible (machine_mode m1, machine_mode m2)
   17203              : {
   17204        30486 :   if (m1 == m2)
   17205              :     return m1;
   17206              : 
   17207        29741 :   if (GET_MODE_CLASS (m1) != MODE_CC || GET_MODE_CLASS (m2) != MODE_CC)
   17208              :     return VOIDmode;
   17209              : 
   17210        29741 :   if ((m1 == CCGCmode && m2 == CCGOCmode)
   17211        29741 :       || (m1 == CCGOCmode && m2 == CCGCmode))
   17212              :     return CCGCmode;
   17213              : 
   17214        29741 :   if ((m1 == CCNOmode && m2 == CCGOCmode)
   17215        29560 :       || (m1 == CCGOCmode && m2 == CCNOmode))
   17216              :     return CCNOmode;
   17217              : 
   17218        29448 :   if (m1 == CCZmode
   17219        15606 :       && (m2 == CCGCmode || m2 == CCGOCmode || m2 == CCNOmode))
   17220              :     return m2;
   17221        16972 :   else if (m2 == CCZmode
   17222        13586 :            && (m1 == CCGCmode || m1 == CCGOCmode || m1 == CCNOmode))
   17223              :     return m1;
   17224              : 
   17225         6589 :   switch (m1)
   17226              :     {
   17227            0 :     default:
   17228            0 :       gcc_unreachable ();
   17229              : 
   17230         6589 :     case E_CCmode:
   17231         6589 :     case E_CCGCmode:
   17232         6589 :     case E_CCGOCmode:
   17233         6589 :     case E_CCNOmode:
   17234         6589 :     case E_CCAmode:
   17235         6589 :     case E_CCCmode:
   17236         6589 :     case E_CCOmode:
   17237         6589 :     case E_CCPmode:
   17238         6589 :     case E_CCSmode:
   17239         6589 :     case E_CCZmode:
   17240         6589 :       switch (m2)
   17241              :         {
   17242              :         default:
   17243              :           return VOIDmode;
   17244              : 
   17245              :         case E_CCmode:
   17246              :         case E_CCGCmode:
   17247              :         case E_CCGOCmode:
   17248              :         case E_CCNOmode:
   17249              :         case E_CCAmode:
   17250              :         case E_CCCmode:
   17251              :         case E_CCOmode:
   17252              :         case E_CCPmode:
   17253              :         case E_CCSmode:
   17254              :         case E_CCZmode:
   17255              :           return CCmode;
   17256              :         }
   17257              : 
   17258              :     case E_CCFPmode:
   17259              :       /* These are only compatible with themselves, which we already
   17260              :          checked above.  */
   17261              :       return VOIDmode;
   17262              :     }
   17263              : }
   17264              : 
   17265              : /* Return strategy to use for floating-point.  We assume that fcomi is always
   17266              :    preferable where available, since that is also true when looking at size
   17267              :    (2 bytes, vs. 3 for fnstsw+sahf and at least 5 for fnstsw+test).  */
   17268              : 
   17269              : enum ix86_fpcmp_strategy
   17270      5547592 : ix86_fp_comparison_strategy (enum rtx_code)
   17271              : {
   17272              :   /* Do fcomi/sahf based test when profitable.  */
   17273              : 
   17274      5547592 :   if (TARGET_CMOVE)
   17275              :     return IX86_FPCMP_COMI;
   17276              : 
   17277            0 :   if (TARGET_SAHF && (TARGET_USE_SAHF || optimize_insn_for_size_p ()))
   17278            0 :     return IX86_FPCMP_SAHF;
   17279              : 
   17280              :   return IX86_FPCMP_ARITH;
   17281              : }
   17282              : 
   17283              : /* Convert comparison codes we use to represent FP comparison to integer
   17284              :    code that will result in proper branch.  Return UNKNOWN if no such code
   17285              :    is available.  */
   17286              : 
   17287              : enum rtx_code
   17288       584771 : ix86_fp_compare_code_to_integer (enum rtx_code code)
   17289              : {
   17290       584771 :   switch (code)
   17291              :     {
   17292              :     case GT:
   17293              :       return GTU;
   17294        17891 :     case GE:
   17295        17891 :       return GEU;
   17296              :     case ORDERED:
   17297              :     case UNORDERED:
   17298              :       return code;
   17299       118982 :     case UNEQ:
   17300       118982 :       return EQ;
   17301        17396 :     case UNLT:
   17302        17396 :       return LTU;
   17303        31481 :     case UNLE:
   17304        31481 :       return LEU;
   17305       113696 :     case LTGT:
   17306       113696 :       return NE;
   17307          683 :     case EQ:
   17308          683 :     case NE:
   17309          683 :       if (TARGET_AVX10_2)
   17310              :         return code;
   17311              :       /* FALLTHRU.  */
   17312          225 :     default:
   17313          225 :       return UNKNOWN;
   17314              :     }
   17315              : }
   17316              : 
   17317              : /* Zero extend possibly SImode EXP to Pmode register.  */
   17318              : rtx
   17319        44362 : ix86_zero_extend_to_Pmode (rtx exp)
   17320              : {
   17321        56194 :   return force_reg (Pmode, convert_to_mode (Pmode, exp, 1));
   17322              : }
   17323              : 
   17324              : /* Return true if the function is called via PLT.   */
   17325              : 
   17326              : bool
   17327       982043 : ix86_call_use_plt_p (rtx call_op)
   17328              : {
   17329       982043 :   if (SYMBOL_REF_LOCAL_P (call_op))
   17330              :     {
   17331       195837 :       if (SYMBOL_REF_DECL (call_op)
   17332       195837 :           && TREE_CODE (SYMBOL_REF_DECL (call_op)) == FUNCTION_DECL)
   17333              :         {
   17334              :           /* NB: All ifunc functions must be called via PLT.  */
   17335       112609 :           cgraph_node *node
   17336       112609 :             = cgraph_node::get (SYMBOL_REF_DECL (call_op));
   17337       112609 :           if (node && node->ifunc_resolver)
   17338              :             return true;
   17339              :         }
   17340       195817 :       return false;
   17341              :     }
   17342              :   return true;
   17343              : }
   17344              : 
   17345              : /* Implement TARGET_IFUNC_REF_LOCAL_OK.  If this hook returns true,
   17346              :    the PLT entry will be used as the function address for local IFUNC
   17347              :    functions.  When the PIC register is needed for PLT call, indirect
   17348              :    call via the PLT entry will fail since the PIC register may not be
   17349              :    set up properly for indirect call.  In this case, we should return
   17350              :    false.  */
   17351              : 
   17352              : static bool
   17353    765881824 : ix86_ifunc_ref_local_ok (void)
   17354              : {
   17355    765881824 :   return !flag_pic || (TARGET_64BIT && ix86_cmodel != CM_LARGE_PIC);
   17356              : }
   17357              : 
   17358              : /* Return true if the function being called was marked with attribute
   17359              :    "noplt" or using -fno-plt and we are compiling for non-PIC.  We need
   17360              :    to handle the non-PIC case in the backend because there is no easy
   17361              :    interface for the front-end to force non-PLT calls to use the GOT.
   17362              :    This is currently used only with 64-bit or 32-bit GOT32X ELF targets
   17363              :    to call the function marked "noplt" indirectly.  */
   17364              : 
   17365              : bool
   17366      5897138 : ix86_nopic_noplt_attribute_p (rtx call_op)
   17367              : {
   17368      5411816 :   if (flag_pic || ix86_cmodel == CM_LARGE
   17369              :       || !(TARGET_64BIT || HAVE_AS_IX86_GOT32X)
   17370              :       || TARGET_MACHO || TARGET_SEH || TARGET_PECOFF
   17371     11308954 :       || SYMBOL_REF_LOCAL_P (call_op))
   17372              :     return false;
   17373              : 
   17374      3785147 :   tree symbol_decl = SYMBOL_REF_DECL (call_op);
   17375              : 
   17376      3785147 :   if (!flag_plt
   17377      3785147 :       || (symbol_decl != NULL_TREE
   17378      3785115 :           && lookup_attribute ("noplt", DECL_ATTRIBUTES (symbol_decl))))
   17379           34 :     return true;
   17380              : 
   17381              :   return false;
   17382              : }
   17383              : 
   17384              : /* Helper to output the jmp/call.  */
   17385              : static void
   17386           33 : ix86_output_jmp_thunk_or_indirect (const char *thunk_name, const int regno)
   17387              : {
   17388           33 :   if (thunk_name != NULL)
   17389              :     {
   17390           22 :       if ((REX_INT_REGNO_P (regno) || REX2_INT_REGNO_P (regno))
   17391            1 :           && ix86_indirect_branch_cs_prefix)
   17392            1 :         fprintf (asm_out_file, "\tcs\n");
   17393           22 :       fprintf (asm_out_file, "\tjmp\t");
   17394           22 :       assemble_name (asm_out_file, thunk_name);
   17395           22 :       putc ('\n', asm_out_file);
   17396           22 :       if ((ix86_harden_sls & harden_sls_indirect_jmp))
   17397            2 :         fputs ("\tint3\n", asm_out_file);
   17398              :     }
   17399              :   else
   17400           11 :     output_indirect_thunk (regno);
   17401           33 : }
   17402              : 
   17403              : /* Output indirect branch via a call and return thunk.  CALL_OP is a
   17404              :    register which contains the branch target.  XASM is the assembly
   17405              :    template for CALL_OP.  Branch is a tail call if SIBCALL_P is true.
   17406              :    A normal call is converted to:
   17407              : 
   17408              :         call __x86_indirect_thunk_reg
   17409              : 
   17410              :    and a tail call is converted to:
   17411              : 
   17412              :         jmp __x86_indirect_thunk_reg
   17413              :  */
   17414              : 
   17415              : static void
   17416           50 : ix86_output_indirect_branch_via_reg (rtx call_op, bool sibcall_p)
   17417              : {
   17418           50 :   char thunk_name_buf[32];
   17419           50 :   char *thunk_name;
   17420           50 :   enum indirect_thunk_prefix need_prefix
   17421           50 :     = indirect_thunk_need_prefix (current_output_insn);
   17422           50 :   int regno = REGNO (call_op);
   17423              : 
   17424           50 :   if (cfun->machine->indirect_branch_type
   17425           50 :       != indirect_branch_thunk_inline)
   17426              :     {
   17427           39 :       if (cfun->machine->indirect_branch_type == indirect_branch_thunk)
   17428           16 :         SET_HARD_REG_BIT (indirect_thunks_used, regno);
   17429              : 
   17430           39 :       indirect_thunk_name (thunk_name_buf, regno, need_prefix, false);
   17431           39 :       thunk_name = thunk_name_buf;
   17432              :     }
   17433              :   else
   17434              :     thunk_name = NULL;
   17435              : 
   17436           50 :   if (sibcall_p)
   17437           27 :      ix86_output_jmp_thunk_or_indirect (thunk_name, regno);
   17438              :   else
   17439              :     {
   17440           23 :       if (thunk_name != NULL)
   17441              :         {
   17442           17 :           if ((REX_INT_REGNO_P (regno) || REX_INT_REGNO_P (regno))
   17443            1 :               && ix86_indirect_branch_cs_prefix)
   17444            1 :             fprintf (asm_out_file, "\tcs\n");
   17445           17 :           fprintf (asm_out_file, "\tcall\t");
   17446           17 :           assemble_name (asm_out_file, thunk_name);
   17447           17 :           putc ('\n', asm_out_file);
   17448           17 :           return;
   17449              :         }
   17450              : 
   17451            6 :       char indirectlabel1[32];
   17452            6 :       char indirectlabel2[32];
   17453              : 
   17454            6 :       ASM_GENERATE_INTERNAL_LABEL (indirectlabel1,
   17455              :                                    INDIRECT_LABEL,
   17456              :                                    indirectlabelno++);
   17457            6 :       ASM_GENERATE_INTERNAL_LABEL (indirectlabel2,
   17458              :                                    INDIRECT_LABEL,
   17459              :                                    indirectlabelno++);
   17460              : 
   17461              :       /* Jump.  */
   17462            6 :       fputs ("\tjmp\t", asm_out_file);
   17463            6 :       assemble_name_raw (asm_out_file, indirectlabel2);
   17464            6 :       fputc ('\n', asm_out_file);
   17465              : 
   17466            6 :       ASM_OUTPUT_INTERNAL_LABEL (asm_out_file, indirectlabel1);
   17467              : 
   17468            6 :      ix86_output_jmp_thunk_or_indirect (thunk_name, regno);
   17469              : 
   17470            6 :       ASM_OUTPUT_INTERNAL_LABEL (asm_out_file, indirectlabel2);
   17471              : 
   17472              :       /* Call.  */
   17473            6 :       fputs ("\tcall\t", asm_out_file);
   17474            6 :       assemble_name_raw (asm_out_file, indirectlabel1);
   17475            6 :       fputc ('\n', asm_out_file);
   17476              :     }
   17477              : }
   17478              : 
   17479              : /* Output indirect branch via a call and return thunk.  CALL_OP is
   17480              :    the branch target.  XASM is the assembly template for CALL_OP.
   17481              :    Branch is a tail call if SIBCALL_P is true.  A normal call is
   17482              :    converted to:
   17483              : 
   17484              :         jmp L2
   17485              :    L1:
   17486              :         push CALL_OP
   17487              :         jmp __x86_indirect_thunk
   17488              :    L2:
   17489              :         call L1
   17490              : 
   17491              :    and a tail call is converted to:
   17492              : 
   17493              :         push CALL_OP
   17494              :         jmp __x86_indirect_thunk
   17495              :  */
   17496              : 
   17497              : static void
   17498            0 : ix86_output_indirect_branch_via_push (rtx call_op, const char *xasm,
   17499              :                                       bool sibcall_p)
   17500              : {
   17501            0 :   char thunk_name_buf[32];
   17502            0 :   char *thunk_name;
   17503            0 :   char push_buf[64];
   17504            0 :   enum indirect_thunk_prefix need_prefix
   17505            0 :     = indirect_thunk_need_prefix (current_output_insn);
   17506            0 :   int regno = -1;
   17507              : 
   17508            0 :   if (cfun->machine->indirect_branch_type
   17509            0 :       != indirect_branch_thunk_inline)
   17510              :     {
   17511            0 :       if (cfun->machine->indirect_branch_type == indirect_branch_thunk)
   17512            0 :         indirect_thunk_needed = true;
   17513            0 :       indirect_thunk_name (thunk_name_buf, regno, need_prefix, false);
   17514            0 :       thunk_name = thunk_name_buf;
   17515              :     }
   17516              :   else
   17517              :     thunk_name = NULL;
   17518              : 
   17519            0 :   snprintf (push_buf, sizeof (push_buf), "push{%c}\t%s",
   17520            0 :             TARGET_64BIT ? 'q' : 'l', xasm);
   17521              : 
   17522            0 :   if (sibcall_p)
   17523              :     {
   17524            0 :       output_asm_insn (push_buf, &call_op);
   17525            0 :       ix86_output_jmp_thunk_or_indirect (thunk_name, regno);
   17526              :     }
   17527              :   else
   17528              :     {
   17529            0 :       char indirectlabel1[32];
   17530            0 :       char indirectlabel2[32];
   17531              : 
   17532            0 :       ASM_GENERATE_INTERNAL_LABEL (indirectlabel1,
   17533              :                                    INDIRECT_LABEL,
   17534              :                                    indirectlabelno++);
   17535            0 :       ASM_GENERATE_INTERNAL_LABEL (indirectlabel2,
   17536              :                                    INDIRECT_LABEL,
   17537              :                                    indirectlabelno++);
   17538              : 
   17539              :       /* Jump.  */
   17540            0 :       fputs ("\tjmp\t", asm_out_file);
   17541            0 :       assemble_name_raw (asm_out_file, indirectlabel2);
   17542            0 :       fputc ('\n', asm_out_file);
   17543              : 
   17544            0 :       ASM_OUTPUT_INTERNAL_LABEL (asm_out_file, indirectlabel1);
   17545              : 
   17546              :       /* An external function may be called via GOT, instead of PLT.  */
   17547            0 :       if (MEM_P (call_op))
   17548              :         {
   17549            0 :           struct ix86_address parts;
   17550            0 :           rtx addr = XEXP (call_op, 0);
   17551            0 :           if (ix86_decompose_address (addr, &parts)
   17552            0 :               && parts.base == stack_pointer_rtx)
   17553              :             {
   17554              :               /* Since call will adjust stack by -UNITS_PER_WORD,
   17555              :                  we must convert "disp(stack, index, scale)" to
   17556              :                  "disp+UNITS_PER_WORD(stack, index, scale)".  */
   17557            0 :               if (parts.index)
   17558              :                 {
   17559            0 :                   addr = gen_rtx_MULT (Pmode, parts.index,
   17560              :                                        GEN_INT (parts.scale));
   17561            0 :                   addr = gen_rtx_PLUS (Pmode, stack_pointer_rtx,
   17562              :                                        addr);
   17563              :                 }
   17564              :               else
   17565              :                 addr = stack_pointer_rtx;
   17566              : 
   17567            0 :               rtx disp;
   17568            0 :               if (parts.disp != NULL_RTX)
   17569            0 :                 disp = plus_constant (Pmode, parts.disp,
   17570            0 :                                       UNITS_PER_WORD);
   17571              :               else
   17572            0 :                 disp = GEN_INT (UNITS_PER_WORD);
   17573              : 
   17574            0 :               addr = gen_rtx_PLUS (Pmode, addr, disp);
   17575            0 :               call_op = gen_rtx_MEM (GET_MODE (call_op), addr);
   17576              :             }
   17577              :         }
   17578              : 
   17579            0 :       output_asm_insn (push_buf, &call_op);
   17580              : 
   17581            0 :       ix86_output_jmp_thunk_or_indirect (thunk_name, regno);
   17582              : 
   17583            0 :       ASM_OUTPUT_INTERNAL_LABEL (asm_out_file, indirectlabel2);
   17584              : 
   17585              :       /* Call.  */
   17586            0 :       fputs ("\tcall\t", asm_out_file);
   17587            0 :       assemble_name_raw (asm_out_file, indirectlabel1);
   17588            0 :       fputc ('\n', asm_out_file);
   17589              :     }
   17590            0 : }
   17591              : 
   17592              : /* Output indirect branch via a call and return thunk.  CALL_OP is
   17593              :    the branch target.  XASM is the assembly template for CALL_OP.
   17594              :    Branch is a tail call if SIBCALL_P is true.   */
   17595              : 
   17596              : static void
   17597           50 : ix86_output_indirect_branch (rtx call_op, const char *xasm,
   17598              :                              bool sibcall_p)
   17599              : {
   17600           50 :   if (REG_P (call_op))
   17601           50 :     ix86_output_indirect_branch_via_reg (call_op, sibcall_p);
   17602              :   else
   17603            0 :     ix86_output_indirect_branch_via_push (call_op, xasm, sibcall_p);
   17604           50 : }
   17605              : 
   17606              : /* Output indirect jump.  CALL_OP is the jump target.  */
   17607              : 
   17608              : const char *
   17609         8466 : ix86_output_indirect_jmp (rtx call_op)
   17610              : {
   17611         8466 :   if (cfun->machine->indirect_branch_type != indirect_branch_keep)
   17612              :     {
   17613              :       /* We can't have red-zone since "call" in the indirect thunk
   17614              :          pushes the return address onto stack, destroying red-zone.  */
   17615            4 :       if (ix86_red_zone_used)
   17616            0 :         gcc_unreachable ();
   17617              : 
   17618            4 :       ix86_output_indirect_branch (call_op, "%0", true);
   17619              :     }
   17620              :   else
   17621         8462 :     output_asm_insn ("%!jmp\t%A0", &call_op);
   17622         8466 :   return (ix86_harden_sls & harden_sls_indirect_jmp) ? "int3" : "";
   17623              : }
   17624              : 
   17625              : /* Output return instrumentation for current function if needed.  */
   17626              : 
   17627              : static void
   17628      1715110 : output_return_instrumentation (void)
   17629              : {
   17630      1715110 :   if (ix86_instrument_return != instrument_return_none
   17631            6 :       && flag_fentry
   17632      1715116 :       && !DECL_NO_INSTRUMENT_FUNCTION_ENTRY_EXIT (cfun->decl))
   17633              :     {
   17634            5 :       if (ix86_flag_record_return)
   17635            5 :         fprintf (asm_out_file, "1:\n");
   17636            5 :       switch (ix86_instrument_return)
   17637              :         {
   17638            2 :         case instrument_return_call:
   17639            2 :           fprintf (asm_out_file, "\tcall\t__return__\n");
   17640            2 :           break;
   17641            3 :         case instrument_return_nop5:
   17642              :           /* 5 byte nop: nopl 0(%[re]ax,%[re]ax,1)  */
   17643            3 :           fprintf (asm_out_file, ASM_BYTE "0x0f, 0x1f, 0x44, 0x00, 0x00\n");
   17644            3 :           break;
   17645              :         case instrument_return_none:
   17646              :           break;
   17647              :         }
   17648              : 
   17649            5 :       if (ix86_flag_record_return)
   17650              :         {
   17651            5 :           fprintf (asm_out_file, "\t.section __return_loc, \"a\",@progbits\n");
   17652            5 :           fprintf (asm_out_file, "\t.%s 1b\n", TARGET_64BIT ? "quad" : "long");
   17653            5 :           fprintf (asm_out_file, "\t.previous\n");
   17654              :         }
   17655              :     }
   17656      1715110 : }
   17657              : 
   17658              : /* Output function return.  CALL_OP is the jump target.  Add a REP
   17659              :    prefix to RET if LONG_P is true and function return is kept.  */
   17660              : 
   17661              : const char *
   17662      1588444 : ix86_output_function_return (bool long_p)
   17663              : {
   17664      1588444 :   output_return_instrumentation ();
   17665              : 
   17666      1588444 :   if (cfun->machine->function_return_type != indirect_branch_keep)
   17667              :     {
   17668           17 :       char thunk_name[32];
   17669           17 :       enum indirect_thunk_prefix need_prefix
   17670           17 :         = indirect_thunk_need_prefix (current_output_insn);
   17671              : 
   17672           17 :       if (cfun->machine->function_return_type
   17673           17 :           != indirect_branch_thunk_inline)
   17674              :         {
   17675           12 :           bool need_thunk = (cfun->machine->function_return_type
   17676              :                              == indirect_branch_thunk);
   17677           12 :           indirect_thunk_name (thunk_name, INVALID_REGNUM, need_prefix,
   17678              :                                true);
   17679           12 :           indirect_return_needed |= need_thunk;
   17680           12 :           fprintf (asm_out_file, "\tjmp\t");
   17681           12 :           assemble_name (asm_out_file, thunk_name);
   17682           12 :           putc ('\n', asm_out_file);
   17683              :         }
   17684              :       else
   17685            5 :         output_indirect_thunk (INVALID_REGNUM);
   17686              : 
   17687           17 :       return "";
   17688              :     }
   17689              : 
   17690      3176365 :   output_asm_insn (long_p ? "rep%; ret" : "ret", nullptr);
   17691      1588427 :   return (ix86_harden_sls & harden_sls_return) ? "int3" : "";
   17692              : }
   17693              : 
   17694              : /* Output indirect function return.  RET_OP is the function return
   17695              :    target.  */
   17696              : 
   17697              : const char *
   17698           17 : ix86_output_indirect_function_return (rtx ret_op)
   17699              : {
   17700           17 :   if (cfun->machine->function_return_type != indirect_branch_keep)
   17701              :     {
   17702            0 :       char thunk_name[32];
   17703            0 :       enum indirect_thunk_prefix need_prefix
   17704            0 :         = indirect_thunk_need_prefix (current_output_insn);
   17705            0 :       unsigned int regno = REGNO (ret_op);
   17706            0 :       gcc_assert (regno == CX_REG);
   17707              : 
   17708            0 :       if (cfun->machine->function_return_type
   17709            0 :           != indirect_branch_thunk_inline)
   17710              :         {
   17711            0 :           bool need_thunk = (cfun->machine->function_return_type
   17712              :                              == indirect_branch_thunk);
   17713            0 :           indirect_thunk_name (thunk_name, regno, need_prefix, true);
   17714              : 
   17715            0 :           if (need_thunk)
   17716              :             {
   17717            0 :               indirect_return_via_cx = true;
   17718            0 :               SET_HARD_REG_BIT (indirect_thunks_used, CX_REG);
   17719              :             }
   17720            0 :           fprintf (asm_out_file, "\tjmp\t");
   17721            0 :           assemble_name (asm_out_file, thunk_name);
   17722            0 :           putc ('\n', asm_out_file);
   17723              :         }
   17724              :       else
   17725            0 :         output_indirect_thunk (regno);
   17726              :     }
   17727              :   else
   17728              :     {
   17729           17 :       output_asm_insn ("%!jmp\t%A0", &ret_op);
   17730           17 :       if (ix86_harden_sls & harden_sls_indirect_jmp)
   17731            1 :         fputs ("\tint3\n", asm_out_file);
   17732              :     }
   17733           17 :   return "";
   17734              : }
   17735              : 
   17736              : /* Output the assembly for a call instruction.  */
   17737              : 
   17738              : const char *
   17739      6077204 : ix86_output_call_insn (rtx_insn *insn, rtx call_op)
   17740              : {
   17741      6077204 :   bool direct_p = constant_call_address_operand (call_op, VOIDmode);
   17742      6077204 :   bool output_indirect_p
   17743              :     = (!TARGET_SEH
   17744      6077204 :        && cfun->machine->indirect_branch_type != indirect_branch_keep);
   17745      6077204 :   bool seh_nop_p = false;
   17746      6077204 :   const char *xasm;
   17747              : 
   17748      6077204 :   if (SIBLING_CALL_P (insn))
   17749              :     {
   17750       126666 :       output_return_instrumentation ();
   17751       126666 :       if (direct_p)
   17752              :         {
   17753       117060 :           if (ix86_nopic_noplt_attribute_p (call_op))
   17754              :             {
   17755            4 :               direct_p = false;
   17756            4 :               if (TARGET_64BIT)
   17757              :                 {
   17758            4 :                   if (output_indirect_p)
   17759              :                     xasm = "{%p0@GOTPCREL(%%rip)|[QWORD PTR %p0@GOTPCREL[rip]]}";
   17760              :                   else
   17761            4 :                     xasm = "%!jmp\t{*%p0@GOTPCREL(%%rip)|[QWORD PTR %p0@GOTPCREL[rip]]}";
   17762              :                 }
   17763              :               else
   17764              :                 {
   17765            0 :                   if (output_indirect_p)
   17766              :                     xasm = "{%p0@GOT|[DWORD PTR %p0@GOT]}";
   17767              :                   else
   17768            0 :                     xasm = "%!jmp\t{*%p0@GOT|[DWORD PTR %p0@GOT]}";
   17769              :                 }
   17770              :             }
   17771              :           else
   17772              :             xasm = "%!jmp\t%P0";
   17773              :         }
   17774              :       /* SEH epilogue detection requires the indirect branch case
   17775              :          to include REX.W.  */
   17776         9606 :       else if (TARGET_SEH)
   17777              :         xasm = "%!rex.W jmp\t%A0";
   17778              :       else
   17779              :         {
   17780         9606 :           if (output_indirect_p)
   17781              :             xasm = "%0";
   17782              :           else
   17783         9583 :             xasm = "%!jmp\t%A0";
   17784              :         }
   17785              : 
   17786       126666 :       if (output_indirect_p && !direct_p)
   17787           23 :         ix86_output_indirect_branch (call_op, xasm, true);
   17788              :       else
   17789              :         {
   17790       126643 :           output_asm_insn (xasm, &call_op);
   17791       126643 :           if (!direct_p
   17792         9587 :               && (ix86_harden_sls & harden_sls_indirect_jmp))
   17793              :             return "int3";
   17794              :         }
   17795       126665 :       return "";
   17796              :     }
   17797              : 
   17798              :   /* SEH unwinding can require an extra nop to be emitted in several
   17799              :      circumstances.  Determine if we have one of those.  */
   17800      5950538 :   if (TARGET_SEH)
   17801              :     {
   17802              :       rtx_insn *i;
   17803              : 
   17804              :       for (i = NEXT_INSN (insn); i ; i = NEXT_INSN (i))
   17805              :         {
   17806              :           /* Prevent a catch region from being adjacent to a jump that would
   17807              :              be interpreted as an epilogue sequence by the unwinder.  */
   17808              :           if (JUMP_P(i) && CROSSING_JUMP_P (i))
   17809              :             {
   17810              :               seh_nop_p = true;
   17811              :               break;
   17812              :             }
   17813              : 
   17814              :           /* If we get to another real insn, we don't need the nop.  */
   17815              :           if (INSN_P (i))
   17816              :             break;
   17817              : 
   17818              :           /* If we get to the epilogue note, prevent a catch region from
   17819              :              being adjacent to the standard epilogue sequence.  Note that,
   17820              :              if non-call exceptions are enabled, we already did it during
   17821              :              epilogue expansion, or else, if the insn can throw internally,
   17822              :              we already did it during the reorg pass.  */
   17823              :           if (NOTE_P (i) && NOTE_KIND (i) == NOTE_INSN_EPILOGUE_BEG
   17824              :               && !flag_non_call_exceptions
   17825              :               && !can_throw_internal (insn))
   17826              :             {
   17827              :               seh_nop_p = true;
   17828              :               break;
   17829              :             }
   17830              :         }
   17831              : 
   17832              :       /* If we didn't find a real insn following the call, prevent the
   17833              :          unwinder from looking into the next function.  */
   17834              :       if (i == NULL)
   17835              :         seh_nop_p = true;
   17836              :     }
   17837              : 
   17838      5950538 :   if (direct_p)
   17839              :     {
   17840      5779056 :       if (ix86_nopic_noplt_attribute_p (call_op))
   17841              :         {
   17842            6 :           direct_p = false;
   17843            6 :           if (TARGET_64BIT)
   17844              :             {
   17845            6 :               if (output_indirect_p)
   17846              :                 xasm = "{%p0@GOTPCREL(%%rip)|[QWORD PTR %p0@GOTPCREL[rip]]}";
   17847              :               else
   17848            6 :                 xasm = "%!call\t{*%p0@GOTPCREL(%%rip)|[QWORD PTR %p0@GOTPCREL[rip]]}";
   17849              :             }
   17850              :           else
   17851              :             {
   17852            0 :               if (output_indirect_p)
   17853              :                 xasm = "{%p0@GOT|[DWORD PTR %p0@GOT]}";
   17854              :               else
   17855            0 :                 xasm = "%!call\t{*%p0@GOT|[DWORD PTR %p0@GOT]}";
   17856              :             }
   17857              :         }
   17858              :       else
   17859              :         xasm = "%!call\t%P0";
   17860              :     }
   17861              :   else
   17862              :     {
   17863       171482 :       if (output_indirect_p)
   17864              :         xasm = "%0";
   17865              :       else
   17866       171459 :         xasm = "%!call\t%A0";
   17867              :     }
   17868              : 
   17869      5950538 :   if (output_indirect_p && !direct_p)
   17870           23 :     ix86_output_indirect_branch (call_op, xasm, false);
   17871              :   else
   17872      5950515 :     output_asm_insn (xasm, &call_op);
   17873              : 
   17874              :   if (seh_nop_p)
   17875              :     return "nop";
   17876              : 
   17877              :   return "";
   17878              : }
   17879              : 
   17880              : /* Return a MEM corresponding to a stack slot with mode MODE.
   17881              :    Allocate a new slot if necessary.
   17882              : 
   17883              :    The RTL for a function can have several slots available: N is
   17884              :    which slot to use.  */
   17885              : 
   17886              : rtx
   17887        22370 : assign_386_stack_local (machine_mode mode, enum ix86_stack_slot n)
   17888              : {
   17889        22370 :   struct stack_local_entry *s;
   17890              : 
   17891        22370 :   gcc_assert (n < MAX_386_STACK_LOCALS);
   17892              : 
   17893        33733 :   for (s = ix86_stack_locals; s; s = s->next)
   17894        31150 :     if (s->mode == mode && s->n == n)
   17895        19787 :       return validize_mem (copy_rtx (s->rtl));
   17896              : 
   17897         2583 :   int align = 0;
   17898              :   /* For DImode with SLOT_FLOATxFDI_387 use 32-bit
   17899              :      alignment with -m32 -mpreferred-stack-boundary=2.  */
   17900         2583 :   if (mode == DImode
   17901          329 :       && !TARGET_64BIT
   17902          329 :       && n == SLOT_FLOATxFDI_387
   17903         2912 :       && ix86_preferred_stack_boundary < GET_MODE_ALIGNMENT (DImode))
   17904              :     align = 32;
   17905         2583 :   s = ggc_alloc<stack_local_entry> ();
   17906         2583 :   s->n = n;
   17907         2583 :   s->mode = mode;
   17908         5166 :   s->rtl = assign_stack_local (mode, GET_MODE_SIZE (mode), align);
   17909              : 
   17910         2583 :   s->next = ix86_stack_locals;
   17911         2583 :   ix86_stack_locals = s;
   17912         2583 :   return validize_mem (copy_rtx (s->rtl));
   17913              : }
   17914              : 
   17915              : static void
   17916      1488365 : ix86_instantiate_decls (void)
   17917              : {
   17918      1488365 :   struct stack_local_entry *s;
   17919              : 
   17920      1488365 :   for (s = ix86_stack_locals; s; s = s->next)
   17921            0 :     if (s->rtl != NULL_RTX)
   17922            0 :       instantiate_decl_rtl (s->rtl);
   17923      1488365 : }
   17924              : 
   17925              : /* Check whether x86 address PARTS is a pc-relative address.  */
   17926              : 
   17927              : bool
   17928     26363425 : ix86_rip_relative_addr_p (struct ix86_address *parts)
   17929              : {
   17930     26363425 :   rtx base, index, disp;
   17931              : 
   17932     26363425 :   base = parts->base;
   17933     26363425 :   index = parts->index;
   17934     26363425 :   disp = parts->disp;
   17935              : 
   17936     26363425 :   if (disp && !base && !index)
   17937              :     {
   17938     24606788 :       if (TARGET_64BIT)
   17939              :         {
   17940     22941988 :           rtx symbol = disp;
   17941              : 
   17942     22941988 :           if (GET_CODE (disp) == CONST)
   17943      6880660 :             symbol = XEXP (disp, 0);
   17944     22941988 :           if (GET_CODE (symbol) == PLUS
   17945      6366369 :               && CONST_INT_P (XEXP (symbol, 1)))
   17946      6366369 :             symbol = XEXP (symbol, 0);
   17947              : 
   17948     22941988 :           if (LABEL_REF_P (symbol)
   17949     22934350 :               || (SYMBOL_REF_P (symbol)
   17950     21669546 :                   && SYMBOL_REF_TLS_MODEL (symbol) == 0)
   17951     24206792 :               || (GET_CODE (symbol) == UNSPEC
   17952       533179 :                   && (XINT (symbol, 1) == UNSPEC_GOTPCREL
   17953              :                       || XINT (symbol, 1) == UNSPEC_PCREL
   17954              :                       || XINT (symbol, 1) == UNSPEC_GOTNTPOFF)))
   17955     22182814 :             return true;
   17956              :         }
   17957              :     }
   17958              :   return false;
   17959              : }
   17960              : 
   17961              : /* Calculate the length of the memory address in the instruction encoding.
   17962              :    Includes addr32 prefix, does not include the one-byte modrm, opcode,
   17963              :    or other prefixes.  We never generate addr32 prefix for LEA insn.  */
   17964              : 
   17965              : int
   17966    270573577 : memory_address_length (rtx addr, bool lea)
   17967              : {
   17968    270573577 :   struct ix86_address parts;
   17969    270573577 :   rtx base, index, disp;
   17970    270573577 :   int len;
   17971    270573577 :   int ok;
   17972              : 
   17973    270573577 :   if (GET_CODE (addr) == PRE_DEC
   17974    262010413 :       || GET_CODE (addr) == POST_INC
   17975    257510338 :       || GET_CODE (addr) == PRE_MODIFY
   17976    257510338 :       || GET_CODE (addr) == POST_MODIFY)
   17977              :     return 0;
   17978              : 
   17979    257510338 :   ok = ix86_decompose_address (addr, &parts);
   17980    257510338 :   gcc_assert (ok);
   17981              : 
   17982    257510338 :   len = (parts.seg == ADDR_SPACE_GENERIC) ? 0 : 1;
   17983              : 
   17984              :   /*  If this is not LEA instruction, add the length of addr32 prefix.  */
   17985    218909428 :   if (TARGET_64BIT && !lea
   17986    451501233 :       && (SImode_address_operand (addr, VOIDmode)
   17987    193990742 :           || (parts.base && GET_MODE (parts.base) == SImode)
   17988    193980512 :           || (parts.index && GET_MODE (parts.index) == SImode)))
   17989        10383 :     len++;
   17990              : 
   17991    257510338 :   base = parts.base;
   17992    257510338 :   index = parts.index;
   17993    257510338 :   disp = parts.disp;
   17994              : 
   17995    257510338 :   if (base && SUBREG_P (base))
   17996            2 :     base = SUBREG_REG (base);
   17997    257510338 :   if (index && SUBREG_P (index))
   17998            0 :     index = SUBREG_REG (index);
   17999              : 
   18000    257510338 :   gcc_assert (base == NULL_RTX || REG_P (base));
   18001    257510338 :   gcc_assert (index == NULL_RTX || REG_P (index));
   18002              : 
   18003              :   /* Rule of thumb:
   18004              :        - esp as the base always wants an index,
   18005              :        - ebp as the base always wants a displacement,
   18006              :        - r12 as the base always wants an index,
   18007              :        - r13 as the base always wants a displacement.  */
   18008              : 
   18009              :   /* Register Indirect.  */
   18010    257510338 :   if (base && !index && !disp)
   18011              :     {
   18012              :       /* esp (for its index) and ebp (for its displacement) need
   18013              :          the two-byte modrm form.  Similarly for r12 and r13 in 64-bit
   18014              :          code.  */
   18015     16713556 :       if (base == arg_pointer_rtx
   18016     16713556 :           || base == frame_pointer_rtx
   18017     16713556 :           || REGNO (base) == SP_REG
   18018      9988927 :           || REGNO (base) == BP_REG
   18019      9988927 :           || REGNO (base) == R12_REG
   18020     26255190 :           || REGNO (base) == R13_REG)
   18021      7171922 :         len++;
   18022              :     }
   18023              : 
   18024              :   /* Direct Addressing.  In 64-bit mode mod 00 r/m 5
   18025              :      is not disp32, but disp32(%rip), so for disp32
   18026              :      SIB byte is needed, unless print_operand_address
   18027              :      optimizes it into disp32(%rip) or (%rip) is implied
   18028              :      by UNSPEC.  */
   18029    240796782 :   else if (disp && !base && !index)
   18030              :     {
   18031     23631298 :       len += 4;
   18032     23631298 :       if (!ix86_rip_relative_addr_p (&parts))
   18033      1851259 :         len++;
   18034              :     }
   18035              :   else
   18036              :     {
   18037              :       /* Find the length of the displacement constant.  */
   18038    217165484 :       if (disp)
   18039              :         {
   18040    213086325 :           if (base && satisfies_constraint_K (disp))
   18041    123270110 :             len += 1;
   18042              :           else
   18043     89816215 :             len += 4;
   18044              :         }
   18045              :       /* ebp always wants a displacement.  Similarly r13.  */
   18046      4079159 :       else if (base && (REGNO (base) == BP_REG || REGNO (base) == R13_REG))
   18047         9299 :         len++;
   18048              : 
   18049              :       /* An index requires the two-byte modrm form....  */
   18050    217165484 :       if (index
   18051              :           /* ...like esp (or r12), which always wants an index.  */
   18052    206436840 :           || base == arg_pointer_rtx
   18053    206436840 :           || base == frame_pointer_rtx
   18054    423602324 :           || (base && (REGNO (base) == SP_REG || REGNO (base) == R12_REG)))
   18055    155379764 :         len++;
   18056              :     }
   18057              : 
   18058              :   return len;
   18059              : }
   18060              : 
   18061              : /* Compute default value for "length_immediate" attribute.  When SHORTFORM
   18062              :    is set, expect that insn have 8bit immediate alternative.  */
   18063              : int
   18064    315708902 : ix86_attr_length_immediate_default (rtx_insn *insn, bool shortform)
   18065              : {
   18066    315708902 :   int len = 0;
   18067    315708902 :   int i;
   18068    315708902 :   extract_insn_cached (insn);
   18069    985380920 :   for (i = recog_data.n_operands - 1; i >= 0; --i)
   18070    669672018 :     if (CONSTANT_P (recog_data.operand[i]))
   18071              :       {
   18072    138576588 :         enum attr_mode mode = get_attr_mode (insn);
   18073              : 
   18074    138576588 :         gcc_assert (!len);
   18075    138576588 :         if (shortform && CONST_INT_P (recog_data.operand[i]))
   18076              :           {
   18077     37440632 :             HOST_WIDE_INT ival = INTVAL (recog_data.operand[i]);
   18078     37440632 :             switch (mode)
   18079              :               {
   18080      1281157 :               case MODE_QI:
   18081      1281157 :                 len = 1;
   18082      1281157 :                 continue;
   18083       436587 :               case MODE_HI:
   18084       436587 :                 ival = trunc_int_for_mode (ival, HImode);
   18085       436587 :                 break;
   18086     15895465 :               case MODE_SI:
   18087     15895465 :                 ival = trunc_int_for_mode (ival, SImode);
   18088     15895465 :                 break;
   18089              :               default:
   18090              :                 break;
   18091              :               }
   18092     36159475 :             if (IN_RANGE (ival, -128, 127))
   18093              :               {
   18094     32052059 :                 len = 1;
   18095     32052059 :                 continue;
   18096              :               }
   18097              :           }
   18098    105243372 :         switch (mode)
   18099              :           {
   18100              :           case MODE_QI:
   18101              :             len = 1;
   18102              :             break;
   18103              :           case MODE_HI:
   18104    669672018 :             len = 2;
   18105              :             break;
   18106              :           case MODE_SI:
   18107     99864582 :             len = 4;
   18108              :             break;
   18109              :           /* Immediates for DImode instructions are encoded
   18110              :              as 32bit sign extended values.  */
   18111              :           case MODE_DI:
   18112     99864582 :             len = 4;
   18113              :             break;
   18114            0 :           default:
   18115            0 :             fatal_insn ("unknown insn mode", insn);
   18116              :         }
   18117              :       }
   18118    315708902 :   return len;
   18119              : }
   18120              : 
   18121              : /* Compute default value for "length_address" attribute.  */
   18122              : int
   18123    443218305 : ix86_attr_length_address_default (rtx_insn *insn)
   18124              : {
   18125    443218305 :   int i;
   18126              : 
   18127    443218305 :   if (get_attr_type (insn) == TYPE_LEA)
   18128              :     {
   18129     27626149 :       rtx set = PATTERN (insn), addr;
   18130              : 
   18131     27626149 :       if (GET_CODE (set) == PARALLEL)
   18132        86956 :         set = XVECEXP (set, 0, 0);
   18133              : 
   18134     27626149 :       gcc_assert (GET_CODE (set) == SET);
   18135              : 
   18136     27626149 :       addr = SET_SRC (set);
   18137              : 
   18138     27626149 :       return memory_address_length (addr, true);
   18139              :     }
   18140              : 
   18141    415592156 :   extract_insn_cached (insn);
   18142    954527493 :   for (i = recog_data.n_operands - 1; i >= 0; --i)
   18143              :     {
   18144    781597049 :       rtx op = recog_data.operand[i];
   18145    781597049 :       if (MEM_P (op))
   18146              :         {
   18147    242940755 :           constrain_operands_cached (insn, reload_completed);
   18148    242940755 :           if (which_alternative != -1)
   18149              :             {
   18150    242940755 :               const char *constraints = recog_data.constraints[i];
   18151    242940755 :               int alt = which_alternative;
   18152              : 
   18153    385076158 :               while (*constraints == '=' || *constraints == '+')
   18154    142135403 :                 constraints++;
   18155   1102394327 :               while (alt-- > 0)
   18156   2108641635 :                 while (*constraints++ != ',')
   18157              :                   ;
   18158              :               /* Skip ignored operands.  */
   18159    242940755 :               if (*constraints == 'X')
   18160       279043 :                 continue;
   18161              :             }
   18162              : 
   18163    242661712 :           int len = memory_address_length (XEXP (op, 0), false);
   18164              : 
   18165              :           /* Account for segment prefix for non-default addr spaces.  */
   18166    255846796 :           if (!ADDR_SPACE_GENERIC_P (MEM_ADDR_SPACE (op)))
   18167       784055 :             len++;
   18168              : 
   18169    242661712 :           return len;
   18170              :         }
   18171              :     }
   18172              :   return 0;
   18173              : }
   18174              : 
   18175              : /* Compute default value for "length_vex" attribute. It includes
   18176              :    2 or 3 byte VEX prefix and 1 opcode byte.  */
   18177              : 
   18178              : int
   18179      5079429 : ix86_attr_length_vex_default (rtx_insn *insn, bool has_0f_opcode,
   18180              :                               bool has_vex_w)
   18181              : {
   18182      5079429 :   int i, reg_only = 2 + 1;
   18183      5079429 :   bool has_mem = false;
   18184              : 
   18185              :   /* Only 0f opcode can use 2 byte VEX prefix and  VEX W bit uses 3
   18186              :      byte VEX prefix.  */
   18187      5079429 :   if (!has_0f_opcode || has_vex_w)
   18188              :     return 3 + 1;
   18189              : 
   18190              :  /* We can always use 2 byte VEX prefix in 32bit.  */
   18191      4641315 :   if (!TARGET_64BIT)
   18192              :     return 2 + 1;
   18193              : 
   18194      3526158 :   extract_insn_cached (insn);
   18195              : 
   18196     11051946 :   for (i = recog_data.n_operands - 1; i >= 0; --i)
   18197      7828093 :     if (REG_P (recog_data.operand[i]))
   18198              :       {
   18199              :         /* REX.W bit uses 3 byte VEX prefix.
   18200              :            REX2 with vex use extended EVEX prefix length is 4-byte.  */
   18201      5158101 :         if (GET_MODE (recog_data.operand[i]) == DImode
   18202      5158101 :             && GENERAL_REG_P (recog_data.operand[i]))
   18203              :           return 3 + 1;
   18204              : 
   18205              :         /* REX.B bit requires 3-byte VEX. Right here we don't know which
   18206              :            operand will be encoded using VEX.B, so be conservative.
   18207              :            REX2 with vex use extended EVEX prefix length is 4-byte.  */
   18208      5145004 :         if (REX_INT_REGNO_P (recog_data.operand[i])
   18209      5145004 :             || REX2_INT_REGNO_P (recog_data.operand[i])
   18210      5145004 :             || REX_SSE_REGNO_P (recog_data.operand[i]))
   18211            0 :           reg_only = 3 + 1;
   18212              :       }
   18213      2669992 :     else if (MEM_P (recog_data.operand[i]))
   18214              :       {
   18215              :         /* REX2.X or REX2.B bits use 3 byte VEX prefix.  */
   18216      2043400 :         if (x86_extended_rex2reg_mentioned_p (recog_data.operand[i]))
   18217              :           return 4;
   18218              : 
   18219              :         /* REX.X or REX.B bits use 3 byte VEX prefix.  */
   18220      2043159 :         if (x86_extended_reg_mentioned_p (recog_data.operand[i]))
   18221              :           return 3 + 1;
   18222              : 
   18223              :         has_mem = true;
   18224              :       }
   18225              : 
   18226      3223853 :   return has_mem ? 2 + 1 : reg_only;
   18227              : }
   18228              : 
   18229              : 
   18230              : static bool
   18231              : ix86_class_likely_spilled_p (reg_class_t);
   18232              : 
   18233              : /* Returns true if lhs of insn is HW function argument register and set up
   18234              :    is_spilled to true if it is likely spilled HW register.  */
   18235              : static bool
   18236         1149 : insn_is_function_arg (rtx insn, bool* is_spilled)
   18237              : {
   18238         1149 :   rtx dst;
   18239              : 
   18240         1149 :   if (!NONDEBUG_INSN_P (insn))
   18241              :     return false;
   18242              :   /* Call instructions are not movable, ignore it.  */
   18243         1149 :   if (CALL_P (insn))
   18244              :     return false;
   18245         1075 :   insn = PATTERN (insn);
   18246         1075 :   if (GET_CODE (insn) == PARALLEL)
   18247           73 :     insn = XVECEXP (insn, 0, 0);
   18248         1075 :   if (GET_CODE (insn) != SET)
   18249              :     return false;
   18250         1075 :   dst = SET_DEST (insn);
   18251          979 :   if (REG_P (dst) && HARD_REGISTER_P (dst)
   18252         1948 :       && ix86_function_arg_regno_p (REGNO (dst)))
   18253              :     {
   18254              :       /* Is it likely spilled HW register?  */
   18255          873 :       if (!TEST_HARD_REG_BIT (fixed_reg_set, REGNO (dst))
   18256          873 :           && ix86_class_likely_spilled_p (REGNO_REG_CLASS (REGNO (dst))))
   18257          829 :         *is_spilled = true;
   18258          873 :       return true;
   18259              :     }
   18260              :   return false;
   18261              : }
   18262              : 
   18263              : /* Add output dependencies for chain of function adjacent arguments if only
   18264              :    there is a move to likely spilled HW register.  Return first argument
   18265              :    if at least one dependence was added or NULL otherwise.  */
   18266              : static rtx_insn *
   18267          415 : add_parameter_dependencies (rtx_insn *call, rtx_insn *head)
   18268              : {
   18269          415 :   rtx_insn *insn;
   18270          415 :   rtx_insn *last = call;
   18271          415 :   rtx_insn *first_arg = NULL;
   18272          415 :   bool is_spilled = false;
   18273              : 
   18274          415 :   head = PREV_INSN (head);
   18275              : 
   18276              :   /* Find nearest to call argument passing instruction.  */
   18277          415 :   while (true)
   18278              :     {
   18279          415 :       last = PREV_INSN (last);
   18280          415 :       if (last == head)
   18281              :         return NULL;
   18282          415 :       if (!NONDEBUG_INSN_P (last))
   18283            0 :         continue;
   18284          415 :       if (insn_is_function_arg (last, &is_spilled))
   18285              :         break;
   18286              :       return NULL;
   18287              :     }
   18288              : 
   18289              :   first_arg = last;
   18290         1054 :   while (true)
   18291              :     {
   18292         1054 :       insn = PREV_INSN (last);
   18293         1054 :       if (!INSN_P (insn))
   18294              :         break;
   18295          956 :       if (insn == head)
   18296              :         break;
   18297          915 :       if (!NONDEBUG_INSN_P (insn))
   18298              :         {
   18299          181 :           last = insn;
   18300          181 :           continue;
   18301              :         }
   18302          734 :       if (insn_is_function_arg (insn, &is_spilled))
   18303              :         {
   18304              :           /* Add output dependence between two function arguments if chain
   18305              :              of output arguments contains likely spilled HW registers.  */
   18306          466 :           if (is_spilled)
   18307          466 :             add_dependence (first_arg, insn, REG_DEP_OUTPUT);
   18308              :           first_arg = last = insn;
   18309              :         }
   18310              :       else
   18311              :         break;
   18312              :     }
   18313          407 :   if (!is_spilled)
   18314              :     return NULL;
   18315              :   return first_arg;
   18316              : }
   18317              : 
   18318              : /* Add output or anti dependency from insn to first_arg to restrict its code
   18319              :    motion.  */
   18320              : static void
   18321         2333 : avoid_func_arg_motion (rtx_insn *first_arg, rtx_insn *insn)
   18322              : {
   18323         2333 :   rtx set;
   18324         2333 :   rtx tmp;
   18325              : 
   18326         2333 :   set = single_set (insn);
   18327         2333 :   if (!set)
   18328              :     return;
   18329         1453 :   tmp = SET_DEST (set);
   18330         1453 :   if (REG_P (tmp))
   18331              :     {
   18332              :       /* Add output dependency to the first function argument.  */
   18333         1258 :       add_dependence (first_arg, insn, REG_DEP_OUTPUT);
   18334         1258 :       return;
   18335              :     }
   18336              :   /* Add anti dependency.  */
   18337          195 :   add_dependence (first_arg, insn, REG_DEP_ANTI);
   18338              : }
   18339              : 
   18340              : /* Avoid cross block motion of function argument through adding dependency
   18341              :    from the first non-jump instruction in bb.  */
   18342              : static void
   18343           68 : add_dependee_for_func_arg (rtx_insn *arg, basic_block bb)
   18344              : {
   18345           68 :   rtx_insn *insn = BB_END (bb);
   18346              : 
   18347          134 :   while (insn)
   18348              :     {
   18349          134 :       if (NONDEBUG_INSN_P (insn) && NONJUMP_INSN_P (insn))
   18350              :         {
   18351           67 :           rtx set = single_set (insn);
   18352           67 :           if (set)
   18353              :             {
   18354           67 :               avoid_func_arg_motion (arg, insn);
   18355           67 :               return;
   18356              :             }
   18357              :         }
   18358           67 :       if (insn == BB_HEAD (bb))
   18359              :         return;
   18360           66 :       insn = PREV_INSN (insn);
   18361              :     }
   18362              : }
   18363              : 
   18364              : /* Hook for pre-reload schedule - avoid motion of function arguments
   18365              :    passed in likely spilled HW registers.  */
   18366              : static void
   18367     10162244 : ix86_dependencies_evaluation_hook (rtx_insn *head, rtx_insn *tail)
   18368              : {
   18369     10162244 :   rtx_insn *insn;
   18370     10162244 :   rtx_insn *first_arg = NULL;
   18371     10162244 :   if (reload_completed)
   18372              :     return;
   18373         2268 :   while (head != tail && DEBUG_INSN_P (head))
   18374          766 :     head = NEXT_INSN (head);
   18375        10882 :   for (insn = tail; insn != head; insn = PREV_INSN (insn))
   18376         9516 :     if (INSN_P (insn) && CALL_P (insn))
   18377              :       {
   18378          415 :         first_arg = add_parameter_dependencies (insn, head);
   18379          415 :         if (first_arg)
   18380              :           {
   18381              :             /* Add dependee for first argument to predecessors if only
   18382              :                region contains more than one block.  */
   18383          407 :             basic_block bb =  BLOCK_FOR_INSN (insn);
   18384          407 :             int rgn = CONTAINING_RGN (bb->index);
   18385          407 :             int nr_blks = RGN_NR_BLOCKS (rgn);
   18386              :             /* Skip trivial regions and region head blocks that can have
   18387              :                predecessors outside of region.  */
   18388          407 :             if (nr_blks > 1 && BLOCK_TO_BB (bb->index) != 0)
   18389              :               {
   18390           67 :                 edge e;
   18391           67 :                 edge_iterator ei;
   18392              : 
   18393              :                 /* Regions are SCCs with the exception of selective
   18394              :                    scheduling with pipelining of outer blocks enabled.
   18395              :                    So also check that immediate predecessors of a non-head
   18396              :                    block are in the same region.  */
   18397          137 :                 FOR_EACH_EDGE (e, ei, bb->preds)
   18398              :                   {
   18399              :                     /* Avoid creating of loop-carried dependencies through
   18400              :                        using topological ordering in the region.  */
   18401           70 :                     if (rgn == CONTAINING_RGN (e->src->index)
   18402           69 :                         && BLOCK_TO_BB (bb->index) > BLOCK_TO_BB (e->src->index))
   18403           68 :                       add_dependee_for_func_arg (first_arg, e->src);
   18404              :                   }
   18405              :               }
   18406          407 :             insn = first_arg;
   18407          407 :             if (insn == head)
   18408              :               break;
   18409              :           }
   18410              :       }
   18411         9101 :     else if (first_arg)
   18412         2266 :       avoid_func_arg_motion (first_arg, insn);
   18413              : }
   18414              : 
   18415              : /* Hook for pre-reload schedule - set priority of moves from likely spilled
   18416              :    HW registers to maximum, to schedule them at soon as possible. These are
   18417              :    moves from function argument registers at the top of the function entry
   18418              :    and moves from function return value registers after call.  */
   18419              : static int
   18420    106666117 : ix86_adjust_priority (rtx_insn *insn, int priority)
   18421              : {
   18422    106666117 :   rtx set;
   18423              : 
   18424    106666117 :   if (reload_completed)
   18425              :     return priority;
   18426              : 
   18427        14746 :   if (!NONDEBUG_INSN_P (insn))
   18428              :     return priority;
   18429              : 
   18430        12688 :   set = single_set (insn);
   18431        12688 :   if (set)
   18432              :     {
   18433        12080 :       rtx tmp = SET_SRC (set);
   18434        12080 :       if (REG_P (tmp)
   18435         2532 :           && HARD_REGISTER_P (tmp)
   18436          500 :           && !TEST_HARD_REG_BIT (fixed_reg_set, REGNO (tmp))
   18437        12080 :           && ix86_class_likely_spilled_p (REGNO_REG_CLASS (REGNO (tmp))))
   18438          449 :         return current_sched_info->sched_max_insns_priority;
   18439              :     }
   18440              : 
   18441              :   return priority;
   18442              : }
   18443              : 
   18444              : /* Prepare for scheduling pass.  */
   18445              : static void
   18446       961800 : ix86_sched_init_global (FILE *, int, int)
   18447              : {
   18448              :   /* Install scheduling hooks for current CPU.  Some of these hooks are used
   18449              :      in time-critical parts of the scheduler, so we only set them up when
   18450              :      they are actually used.  */
   18451       961800 :   switch (ix86_tune)
   18452              :     {
   18453       915249 :     case PROCESSOR_CORE2:
   18454       915249 :     case PROCESSOR_NEHALEM:
   18455       915249 :     case PROCESSOR_SANDYBRIDGE:
   18456       915249 :     case PROCESSOR_HASWELL:
   18457       915249 :     case PROCESSOR_TREMONT:
   18458       915249 :     case PROCESSOR_ALDERLAKE:
   18459       915249 :     case PROCESSOR_GENERIC:
   18460              :       /* Do not perform multipass scheduling for pre-reload schedule
   18461              :          to save compile time.  */
   18462       915249 :       if (reload_completed)
   18463              :         {
   18464       914762 :           ix86_core2i7_init_hooks ();
   18465       914762 :           break;
   18466              :         }
   18467              :       /* Fall through.  */
   18468        47038 :     default:
   18469        47038 :       targetm.sched.dfa_post_advance_cycle = NULL;
   18470        47038 :       targetm.sched.first_cycle_multipass_init = NULL;
   18471        47038 :       targetm.sched.first_cycle_multipass_begin = NULL;
   18472        47038 :       targetm.sched.first_cycle_multipass_issue = NULL;
   18473        47038 :       targetm.sched.first_cycle_multipass_backtrack = NULL;
   18474        47038 :       targetm.sched.first_cycle_multipass_end = NULL;
   18475        47038 :       targetm.sched.first_cycle_multipass_fini = NULL;
   18476        47038 :       break;
   18477              :     }
   18478       961800 : }
   18479              : 
   18480              : 
   18481              : /* Implement TARGET_STATIC_RTX_ALIGNMENT.  */
   18482              : 
   18483              : static HOST_WIDE_INT
   18484       725219 : ix86_static_rtx_alignment (machine_mode mode)
   18485              : {
   18486       725219 :   if (mode == DFmode)
   18487              :     return 64;
   18488              :   if (ALIGN_MODE_128 (mode))
   18489       155571 :     return MAX (128, GET_MODE_ALIGNMENT (mode));
   18490       484289 :   return GET_MODE_ALIGNMENT (mode);
   18491              : }
   18492              : 
   18493              : /* Implement TARGET_CONSTANT_ALIGNMENT.  */
   18494              : 
   18495              : static HOST_WIDE_INT
   18496      6760707 : ix86_constant_alignment (const_tree exp, HOST_WIDE_INT align)
   18497              : {
   18498      6760707 :   if (TREE_CODE (exp) == REAL_CST || TREE_CODE (exp) == VECTOR_CST
   18499              :       || TREE_CODE (exp) == INTEGER_CST)
   18500              :     {
   18501       370552 :       machine_mode mode = TYPE_MODE (TREE_TYPE (exp));
   18502       370552 :       HOST_WIDE_INT mode_align = ix86_static_rtx_alignment (mode);
   18503       370552 :       return MAX (mode_align, align);
   18504              :     }
   18505      6247724 :   else if (!optimize_size && TREE_CODE (exp) == STRING_CST
   18506      9366687 :            && TREE_STRING_LENGTH (exp) >= 31 && align < BITS_PER_WORD)
   18507              :     return BITS_PER_WORD;
   18508              : 
   18509              :   return align;
   18510              : }
   18511              : 
   18512              : /* Implement TARGET_EMPTY_RECORD_P.  */
   18513              : 
   18514              : static bool
   18515   1455060077 : ix86_is_empty_record (const_tree type)
   18516              : {
   18517   1455060077 :   if (!TARGET_64BIT)
   18518              :     return false;
   18519   1424107672 :   return default_is_empty_record (type);
   18520              : }
   18521              : 
   18522              : /* Implement TARGET_WARN_PARAMETER_PASSING_ABI.  */
   18523              : 
   18524              : static void
   18525     15174088 : ix86_warn_parameter_passing_abi (cumulative_args_t cum_v, tree type)
   18526              : {
   18527     15174088 :   CUMULATIVE_ARGS *cum = get_cumulative_args (cum_v);
   18528              : 
   18529     15174088 :   if (!cum->warn_empty)
   18530              :     return;
   18531              : 
   18532     13095379 :   if (!TYPE_EMPTY_P (type))
   18533              :     return;
   18534              : 
   18535              :   /* Don't warn if the function isn't visible outside of the TU.  */
   18536        15038 :   if (cum->decl && !TREE_PUBLIC (cum->decl))
   18537              :     return;
   18538              : 
   18539        13554 :   tree decl = cum->decl;
   18540        13554 :   if (!decl)
   18541              :     /* If we don't know the target, look at the current TU.  */
   18542           39 :     decl = current_function_decl;
   18543              : 
   18544        13554 :   const_tree ctx = get_ultimate_context (decl);
   18545        13554 :   if (ctx == NULL_TREE
   18546        27074 :       || !TRANSLATION_UNIT_WARN_EMPTY_P (ctx))
   18547              :     return;
   18548              : 
   18549              :   /* If the actual size of the type is zero, then there is no change
   18550              :      in how objects of this size are passed.  */
   18551           72 :   if (int_size_in_bytes (type) == 0)
   18552              :     return;
   18553              : 
   18554           66 :   warning (OPT_Wabi, "empty class %qT parameter passing ABI "
   18555              :            "changes in %<-fabi-version=12%> (GCC 8)", type);
   18556              : 
   18557              :   /* Only warn once.  */
   18558           66 :   cum->warn_empty = false;
   18559              : }
   18560              : 
   18561              : /* This hook returns name of multilib ABI.  */
   18562              : 
   18563              : static const char *
   18564      3429975 : ix86_get_multilib_abi_name (void)
   18565              : {
   18566      3429975 :   if (!(TARGET_64BIT_P (ix86_isa_flags)))
   18567              :     return "i386";
   18568      3386019 :   else if (TARGET_X32_P (ix86_isa_flags))
   18569              :     return "x32";
   18570              :   else
   18571      3386019 :     return "x86_64";
   18572              : }
   18573              : 
   18574              : /* Compute the alignment for a variable for Intel MCU psABI.  TYPE is
   18575              :    the data type, and ALIGN is the alignment that the object would
   18576              :    ordinarily have.  */
   18577              : 
   18578              : static int
   18579            0 : iamcu_alignment (tree type, int align)
   18580              : {
   18581            0 :   machine_mode mode;
   18582              : 
   18583            0 :   if (align < 32 || TYPE_USER_ALIGN (type))
   18584              :     return align;
   18585              : 
   18586              :   /* Intel MCU psABI specifies scalar types > 4 bytes aligned to 4
   18587              :      bytes.  */
   18588            0 :   type = strip_array_types (type);
   18589            0 :   if (TYPE_ATOMIC (type))
   18590              :     return align;
   18591              : 
   18592            0 :   mode = TYPE_MODE (type);
   18593            0 :   switch (GET_MODE_CLASS (mode))
   18594              :     {
   18595              :     case MODE_INT:
   18596              :     case MODE_COMPLEX_INT:
   18597              :     case MODE_COMPLEX_FLOAT:
   18598              :     case MODE_FLOAT:
   18599              :     case MODE_DECIMAL_FLOAT:
   18600              :       return 32;
   18601              :     default:
   18602              :       return align;
   18603              :     }
   18604              : }
   18605              : 
   18606              : /* Compute the alignment for a static variable.
   18607              :    TYPE is the data type, and ALIGN is the alignment that
   18608              :    the object would ordinarily have.  The value of this function is used
   18609              :    instead of that alignment to align the object.  */
   18610              : 
   18611              : int
   18612     12060313 : ix86_data_alignment (tree type, unsigned int align, bool opt)
   18613              : {
   18614              :   /* GCC 4.8 and earlier used to incorrectly assume this alignment even
   18615              :      for symbols from other compilation units or symbols that don't need
   18616              :      to bind locally.  In order to preserve some ABI compatibility with
   18617              :      those compilers, ensure we don't decrease alignment from what we
   18618              :      used to assume.  */
   18619              : 
   18620     12060313 :   unsigned int max_align_compat = MIN (256, MAX_OFILE_ALIGNMENT);
   18621              : 
   18622              :   /* A data structure, equal or greater than the size of a cache line
   18623              :      (64 bytes in the Pentium 4 and other recent Intel processors, including
   18624              :      processors based on Intel Core microarchitecture) should be aligned
   18625              :      so that its base address is a multiple of a cache line size.  */
   18626              : 
   18627     24120626 :   unsigned int max_align
   18628     12060313 :     = MIN ((unsigned) ix86_tune_cost->prefetch_block * 8, MAX_OFILE_ALIGNMENT);
   18629              : 
   18630     14650129 :   if (max_align < BITS_PER_WORD)
   18631            0 :     max_align = BITS_PER_WORD;
   18632              : 
   18633     12060313 :   switch (ix86_align_data_type)
   18634              :     {
   18635     12060313 :     case ix86_align_data_type_abi: opt = false; break;
   18636     12060293 :     case ix86_align_data_type_compat: max_align = BITS_PER_WORD; break;
   18637              :     case ix86_align_data_type_cacheline: break;
   18638              :     }
   18639              : 
   18640     12060313 :   if (TARGET_IAMCU)
   18641            0 :     align = iamcu_alignment (type, align);
   18642              : 
   18643     12060313 :   if (opt
   18644      5805152 :       && AGGREGATE_TYPE_P (type)
   18645      3721870 :       && TYPE_SIZE (type)
   18646     15782131 :       && TREE_CODE (TYPE_SIZE (type)) == INTEGER_CST)
   18647              :     {
   18648      6742085 :       if (wi::geu_p (wi::to_wide (TYPE_SIZE (type)), max_align_compat)
   18649      3721818 :           && align < max_align_compat)
   18650       701551 :         align = max_align_compat;
   18651      7380499 :       if (wi::geu_p (wi::to_wide (TYPE_SIZE (type)), max_align)
   18652      3721818 :           && align < max_align)
   18653        63137 :         align = max_align;
   18654              :     }
   18655              : 
   18656              :   /* x86-64 ABI requires arrays greater than 16 bytes to be aligned
   18657              :      to 16byte boundary.  */
   18658     12060313 :   if (TARGET_64BIT)
   18659              :     {
   18660      4959493 :       if ((opt ? AGGREGATE_TYPE_P (type) : TREE_CODE (type) == ARRAY_TYPE)
   18661      3273501 :           && TYPE_SIZE (type)
   18662      3273439 :           && TREE_CODE (TYPE_SIZE (type)) == INTEGER_CST
   18663     10908123 :           && wi::geu_p (wi::to_wide (TYPE_SIZE (type)), 128)
   18664     11524296 :           && align < 128)
   18665       616173 :         return 128;
   18666              :     }
   18667              : 
   18668     11444140 :   if (!opt)
   18669      6059608 :     return align;
   18670              : 
   18671      5384532 :   if (TREE_CODE (type) == ARRAY_TYPE)
   18672              :     {
   18673      1105579 :       if (TYPE_MODE (TREE_TYPE (type)) == DFmode && align < 64)
   18674              :         return 64;
   18675      1105579 :       if (ALIGN_MODE_128 (TYPE_MODE (TREE_TYPE (type))) && align < 128)
   18676              :         return 128;
   18677              :     }
   18678      4278953 :   else if (TREE_CODE (type) == COMPLEX_TYPE)
   18679              :     {
   18680              : 
   18681        12972 :       if (TYPE_MODE (type) == DCmode && align < 64)
   18682              :         return 64;
   18683        12972 :       if ((TYPE_MODE (type) == XCmode
   18684        12972 :            || TYPE_MODE (type) == TCmode) && align < 128)
   18685              :         return 128;
   18686              :     }
   18687      4265981 :   else if (RECORD_OR_UNION_TYPE_P (type)
   18688      4265981 :            && TYPE_FIELDS (type))
   18689              :     {
   18690      2193924 :       if (DECL_MODE (TYPE_FIELDS (type)) == DFmode && align < 64)
   18691              :         return 64;
   18692      2193924 :       if (ALIGN_MODE_128 (DECL_MODE (TYPE_FIELDS (type))) && align < 128)
   18693              :         return 128;
   18694              :     }
   18695      2072057 :   else if (SCALAR_FLOAT_TYPE_P (type) || VECTOR_TYPE_P (type)
   18696              :            || TREE_CODE (type) == INTEGER_TYPE)
   18697              :     {
   18698      1921513 :       if (TYPE_MODE (type) == DFmode && align < 64)
   18699              :         return 64;
   18700      1921513 :       if (ALIGN_MODE_128 (TYPE_MODE (type)) && align < 128)
   18701              :         return 128;
   18702              :     }
   18703              : 
   18704      5384419 :   return align;
   18705              : }
   18706              : 
   18707              : /* Implement TARGET_LOWER_LOCAL_DECL_ALIGNMENT.  */
   18708              : static void
   18709     31537804 : ix86_lower_local_decl_alignment (tree decl)
   18710              : {
   18711     31537804 :   unsigned int new_align = ix86_local_alignment (decl, VOIDmode,
   18712     31537804 :                                                  DECL_ALIGN (decl), true);
   18713     31537804 :   if (new_align < DECL_ALIGN (decl))
   18714            0 :     SET_DECL_ALIGN (decl, new_align);
   18715     31537804 : }
   18716              : 
   18717              : /* Compute the alignment for a local variable or a stack slot.  EXP is
   18718              :    the data type or decl itself, MODE is the widest mode available and
   18719              :    ALIGN is the alignment that the object would ordinarily have.  The
   18720              :    value of this macro is used instead of that alignment to align the
   18721              :    object.  */
   18722              : 
   18723              : unsigned int
   18724     48915663 : ix86_local_alignment (tree exp, machine_mode mode,
   18725              :                       unsigned int align, bool may_lower)
   18726              : {
   18727     48915663 :   tree type, decl;
   18728              : 
   18729     48915663 :   if (exp && DECL_P (exp))
   18730              :     {
   18731     46747659 :       type = TREE_TYPE (exp);
   18732     46747659 :       decl = exp;
   18733              :     }
   18734              :   else
   18735              :     {
   18736              :       type = exp;
   18737              :       decl = NULL;
   18738              :     }
   18739              : 
   18740              :   /* Don't do dynamic stack realignment for long long objects with
   18741              :      -mpreferred-stack-boundary=2.  */
   18742     48915663 :   if (may_lower
   18743     31537804 :       && !TARGET_64BIT
   18744       249417 :       && align == 64
   18745        38964 :       && ix86_preferred_stack_boundary < 64
   18746            0 :       && (mode == DImode || (type && TYPE_MODE (type) == DImode))
   18747            0 :       && (!type || (!TYPE_USER_ALIGN (type)
   18748            0 :                     && !TYPE_ATOMIC (strip_array_types (type))))
   18749     48915663 :       && (!decl || !DECL_USER_ALIGN (decl)))
   18750              :     align = 32;
   18751              : 
   18752              :   /* If TYPE is NULL, we are allocating a stack slot for caller-save
   18753              :      register in MODE.  We will return the largest alignment of XF
   18754              :      and DF.  */
   18755     48915663 :   if (!type)
   18756              :     {
   18757      1408554 :       if (mode == XFmode && align < GET_MODE_ALIGNMENT (DFmode))
   18758         1517 :         align = GET_MODE_ALIGNMENT (DFmode);
   18759      1408554 :       return align;
   18760              :     }
   18761              : 
   18762              :   /* Don't increase alignment for Intel MCU psABI.  */
   18763     47507109 :   if (TARGET_IAMCU)
   18764              :     return align;
   18765              : 
   18766              :   /* x86-64 ABI requires arrays greater than 16 bytes to be aligned
   18767              :      to 16byte boundary.  Exact wording is:
   18768              : 
   18769              :      An array uses the same alignment as its elements, except that a local or
   18770              :      global array variable of length at least 16 bytes or
   18771              :      a C99 variable-length array variable always has alignment of at least 16 bytes.
   18772              : 
   18773              :      This was added to allow use of aligned SSE instructions at arrays.  This
   18774              :      rule is meant for static storage (where compiler cannot do the analysis
   18775              :      by itself).  We follow it for automatic variables only when convenient.
   18776              :      We fully control everything in the function compiled and functions from
   18777              :      other unit cannot rely on the alignment.
   18778              : 
   18779              :      Exclude va_list type.  It is the common case of local array where
   18780              :      we cannot benefit from the alignment.
   18781              : 
   18782              :      TODO: Probably one should optimize for size only when var is not escaping.  */
   18783     44672681 :   if (TARGET_64BIT && optimize_function_for_speed_p (cfun)
   18784     91823379 :       && TARGET_SSE)
   18785              :     {
   18786     44276599 :       if (AGGREGATE_TYPE_P (type)
   18787      9322160 :           && (va_list_type_node == NULL_TREE
   18788      9322160 :               || (TYPE_MAIN_VARIANT (type)
   18789      9322160 :                   != TYPE_MAIN_VARIANT (va_list_type_node)))
   18790      9223320 :           && TYPE_SIZE (type)
   18791      9223320 :           && TREE_CODE (TYPE_SIZE (type)) == INTEGER_CST
   18792     45341826 :           && wi::geu_p (wi::to_wide (TYPE_SIZE (type)), 128)
   18793     51105538 :           && align < 128)
   18794      5763712 :         return 128;
   18795              :     }
   18796     41743397 :   if (TREE_CODE (type) == ARRAY_TYPE)
   18797              :     {
   18798       794057 :       if (TYPE_MODE (TREE_TYPE (type)) == DFmode && align < 64)
   18799              :         return 64;
   18800       794057 :       if (ALIGN_MODE_128 (TYPE_MODE (TREE_TYPE (type))) && align < 128)
   18801              :         return 128;
   18802              :     }
   18803     40949340 :   else if (TREE_CODE (type) == COMPLEX_TYPE)
   18804              :     {
   18805       154329 :       if (TYPE_MODE (type) == DCmode && align < 64)
   18806              :         return 64;
   18807       154329 :       if ((TYPE_MODE (type) == XCmode
   18808       154329 :            || TYPE_MODE (type) == TCmode) && align < 128)
   18809              :         return 128;
   18810              :     }
   18811     40795011 :   else if (RECORD_OR_UNION_TYPE_P (type)
   18812     40795011 :            && TYPE_FIELDS (type))
   18813              :     {
   18814      4739374 :       if (DECL_MODE (TYPE_FIELDS (type)) == DFmode && align < 64)
   18815              :         return 64;
   18816      4736269 :       if (ALIGN_MODE_128 (DECL_MODE (TYPE_FIELDS (type))) && align < 128)
   18817              :         return 128;
   18818              :     }
   18819     36055637 :   else if (SCALAR_FLOAT_TYPE_P (type) || VECTOR_TYPE_P (type)
   18820              :            || TREE_CODE (type) == INTEGER_TYPE)
   18821              :     {
   18822              : 
   18823     29732676 :       if (TYPE_MODE (type) == DFmode && align < 64)
   18824              :         return 64;
   18825     29732676 :       if (ALIGN_MODE_128 (TYPE_MODE (type)) && align < 128)
   18826              :         return 128;
   18827              :     }
   18828              :   return align;
   18829              : }
   18830              : 
   18831              : /* Compute the minimum required alignment for dynamic stack realignment
   18832              :    purposes for a local variable, parameter or a stack slot.  EXP is
   18833              :    the data type or decl itself, MODE is its mode and ALIGN is the
   18834              :    alignment that the object would ordinarily have.  */
   18835              : 
   18836              : unsigned int
   18837     47724870 : ix86_minimum_alignment (tree exp, machine_mode mode,
   18838              :                         unsigned int align)
   18839              : {
   18840     47724870 :   tree type, decl;
   18841              : 
   18842     47724870 :   if (exp && DECL_P (exp))
   18843              :     {
   18844     15065590 :       type = TREE_TYPE (exp);
   18845     15065590 :       decl = exp;
   18846              :     }
   18847              :   else
   18848              :     {
   18849              :       type = exp;
   18850              :       decl = NULL;
   18851              :     }
   18852              : 
   18853     47724870 :   if (TARGET_64BIT || align != 64 || ix86_preferred_stack_boundary >= 64)
   18854              :     return align;
   18855              : 
   18856              :   /* Don't do dynamic stack realignment for long long objects with
   18857              :      -mpreferred-stack-boundary=2.  */
   18858            0 :   if ((mode == DImode || (type && TYPE_MODE (type) == DImode))
   18859            0 :       && (!type || (!TYPE_USER_ALIGN (type)
   18860            0 :                     && !TYPE_ATOMIC (strip_array_types (type))))
   18861            0 :       && (!decl || !DECL_USER_ALIGN (decl)))
   18862              :     {
   18863            0 :       gcc_checking_assert (!TARGET_STV);
   18864              :       return 32;
   18865              :     }
   18866              : 
   18867              :   return align;
   18868              : }
   18869              : 
   18870              : /* Find a location for the static chain incoming to a nested function.
   18871              :    This is a register, unless all free registers are used by arguments.  */
   18872              : 
   18873              : static rtx
   18874       270266 : ix86_static_chain (const_tree fndecl_or_type, bool incoming_p)
   18875              : {
   18876       270266 :   unsigned regno;
   18877              : 
   18878       270266 :   if (TARGET_64BIT)
   18879              :     {
   18880              :       /* We always use R10 in 64-bit mode.  */
   18881              :       regno = R10_REG;
   18882              :     }
   18883              :   else
   18884              :     {
   18885        88535 :       const_tree fntype, fndecl;
   18886        88535 :       unsigned int ccvt;
   18887              : 
   18888              :       /* By default in 32-bit mode we use ECX to pass the static chain.  */
   18889        88535 :       regno = CX_REG;
   18890              : 
   18891        88535 :       if (TREE_CODE (fndecl_or_type) == FUNCTION_DECL)
   18892              :         {
   18893        78559 :           fntype = TREE_TYPE (fndecl_or_type);
   18894        78559 :           fndecl = fndecl_or_type;
   18895              :         }
   18896              :       else
   18897              :         {
   18898              :           fntype = fndecl_or_type;
   18899              :           fndecl = NULL;
   18900              :         }
   18901              : 
   18902        88535 :       ccvt = ix86_get_callcvt (fntype);
   18903        88535 :       if ((ccvt & IX86_CALLCVT_FASTCALL) != 0)
   18904              :         {
   18905              :           /* Fastcall functions use ecx/edx for arguments, which leaves
   18906              :              us with EAX for the static chain.
   18907              :              Thiscall functions use ecx for arguments, which also
   18908              :              leaves us with EAX for the static chain.  */
   18909              :           regno = AX_REG;
   18910              :         }
   18911        88535 :       else if ((ccvt & IX86_CALLCVT_THISCALL) != 0)
   18912              :         {
   18913              :           /* Thiscall functions use ecx for arguments, which leaves
   18914              :              us with EAX and EDX for the static chain.
   18915              :              We are using for abi-compatibility EAX.  */
   18916              :           regno = AX_REG;
   18917              :         }
   18918        88535 :       else if (ix86_function_regparm (fntype, fndecl) == 3)
   18919              :         {
   18920              :           /* For regparm 3, we have no free call-clobbered registers in
   18921              :              which to store the static chain.  In order to implement this,
   18922              :              we have the trampoline push the static chain to the stack.
   18923              :              However, we can't push a value below the return address when
   18924              :              we call the nested function directly, so we have to use an
   18925              :              alternate entry point.  For this we use ESI, and have the
   18926              :              alternate entry point push ESI, so that things appear the
   18927              :              same once we're executing the nested function.  */
   18928            0 :           if (incoming_p)
   18929              :             {
   18930            0 :               if (fndecl == current_function_decl
   18931            0 :                   && !ix86_static_chain_on_stack)
   18932              :                 {
   18933            0 :                   gcc_assert (!reload_completed);
   18934            0 :                   ix86_static_chain_on_stack = true;
   18935              :                 }
   18936            0 :               return gen_frame_mem (SImode,
   18937            0 :                                     plus_constant (Pmode,
   18938              :                                                    arg_pointer_rtx, -8));
   18939              :             }
   18940              :           regno = SI_REG;
   18941              :         }
   18942              :     }
   18943              : 
   18944       358814 :   return gen_rtx_REG (Pmode, regno);
   18945              : }
   18946              : 
   18947              : /* Emit RTL insns to initialize the variable parts of a trampoline.
   18948              :    FNDECL is the decl of the target address; M_TRAMP is a MEM for
   18949              :    the trampoline, and CHAIN_VALUE is an RTX for the static chain
   18950              :    to be passed to the target function.  */
   18951              : 
   18952              : static void
   18953          305 : ix86_trampoline_init (rtx m_tramp, tree fndecl, rtx chain_value)
   18954              : {
   18955          305 :   rtx mem, fnaddr;
   18956          305 :   int opcode;
   18957          305 :   int offset = 0;
   18958          305 :   bool need_endbr = (flag_cf_protection & CF_BRANCH);
   18959              : 
   18960          305 :   fnaddr = XEXP (DECL_RTL (fndecl), 0);
   18961              : 
   18962          305 :   if (TARGET_64BIT)
   18963              :     {
   18964          305 :       int size;
   18965              : 
   18966          305 :       if (need_endbr)
   18967              :         {
   18968              :           /* Insert ENDBR64.  */
   18969            1 :           mem = adjust_address (m_tramp, SImode, offset);
   18970            1 :           emit_move_insn (mem, gen_int_mode (0xfa1e0ff3, SImode));
   18971            1 :           offset += 4;
   18972              :         }
   18973              : 
   18974              :       /* Load the function address to r11.  Try to load address using
   18975              :          the shorter movl instead of movabs.  We may want to support
   18976              :          movq for kernel mode, but kernel does not use trampolines at
   18977              :          the moment.  FNADDR is a 32bit address and may not be in
   18978              :          DImode when ptr_mode == SImode.  Always use movl in this
   18979              :          case.  */
   18980          305 :       if (ptr_mode == SImode
   18981          305 :           || x86_64_zext_immediate_operand (fnaddr, VOIDmode))
   18982              :         {
   18983          273 :           fnaddr = copy_addr_to_reg (fnaddr);
   18984              : 
   18985          273 :           mem = adjust_address (m_tramp, HImode, offset);
   18986          273 :           emit_move_insn (mem, gen_int_mode (0xbb41, HImode));
   18987              : 
   18988          273 :           mem = adjust_address (m_tramp, SImode, offset + 2);
   18989          273 :           emit_move_insn (mem, gen_lowpart (SImode, fnaddr));
   18990          273 :           offset += 6;
   18991              :         }
   18992              :       else
   18993              :         {
   18994           32 :           mem = adjust_address (m_tramp, HImode, offset);
   18995           32 :           emit_move_insn (mem, gen_int_mode (0xbb49, HImode));
   18996              : 
   18997           32 :           mem = adjust_address (m_tramp, DImode, offset + 2);
   18998           32 :           emit_move_insn (mem, fnaddr);
   18999           32 :           offset += 10;
   19000              :         }
   19001              : 
   19002              :       /* Load static chain using movabs to r10.  Use the shorter movl
   19003              :          instead of movabs when ptr_mode == SImode.  */
   19004          305 :       if (ptr_mode == SImode)
   19005              :         {
   19006              :           opcode = 0xba41;
   19007              :           size = 6;
   19008              :         }
   19009              :       else
   19010              :         {
   19011          305 :           opcode = 0xba49;
   19012          305 :           size = 10;
   19013              :         }
   19014              : 
   19015          305 :       mem = adjust_address (m_tramp, HImode, offset);
   19016          305 :       emit_move_insn (mem, gen_int_mode (opcode, HImode));
   19017              : 
   19018          305 :       mem = adjust_address (m_tramp, ptr_mode, offset + 2);
   19019          305 :       emit_move_insn (mem, chain_value);
   19020          305 :       offset += size;
   19021              : 
   19022              :       /* Jump to r11; the last (unused) byte is a nop, only there to
   19023              :          pad the write out to a single 32-bit store.  */
   19024          305 :       mem = adjust_address (m_tramp, SImode, offset);
   19025          305 :       emit_move_insn (mem, gen_int_mode (0x90e3ff49, SImode));
   19026          305 :       offset += 4;
   19027              :     }
   19028              :   else
   19029              :     {
   19030            0 :       rtx disp, chain;
   19031              : 
   19032              :       /* Depending on the static chain location, either load a register
   19033              :          with a constant, or push the constant to the stack.  All of the
   19034              :          instructions are the same size.  */
   19035            0 :       chain = ix86_static_chain (fndecl, true);
   19036            0 :       if (REG_P (chain))
   19037              :         {
   19038            0 :           switch (REGNO (chain))
   19039              :             {
   19040              :             case AX_REG:
   19041              :               opcode = 0xb8; break;
   19042            0 :             case CX_REG:
   19043            0 :               opcode = 0xb9; break;
   19044            0 :             default:
   19045            0 :               gcc_unreachable ();
   19046              :             }
   19047              :         }
   19048              :       else
   19049              :         opcode = 0x68;
   19050              : 
   19051            0 :       if (need_endbr)
   19052              :         {
   19053              :           /* Insert ENDBR32.  */
   19054            0 :           mem = adjust_address (m_tramp, SImode, offset);
   19055            0 :           emit_move_insn (mem, gen_int_mode (0xfb1e0ff3, SImode));
   19056            0 :           offset += 4;
   19057              :         }
   19058              : 
   19059            0 :       mem = adjust_address (m_tramp, QImode, offset);
   19060            0 :       emit_move_insn (mem, gen_int_mode (opcode, QImode));
   19061              : 
   19062            0 :       mem = adjust_address (m_tramp, SImode, offset + 1);
   19063            0 :       emit_move_insn (mem, chain_value);
   19064            0 :       offset += 5;
   19065              : 
   19066            0 :       mem = adjust_address (m_tramp, QImode, offset);
   19067            0 :       emit_move_insn (mem, gen_int_mode (0xe9, QImode));
   19068              : 
   19069            0 :       mem = adjust_address (m_tramp, SImode, offset + 1);
   19070              : 
   19071              :       /* Compute offset from the end of the jmp to the target function.
   19072              :          In the case in which the trampoline stores the static chain on
   19073              :          the stack, we need to skip the first insn which pushes the
   19074              :          (call-saved) register static chain; this push is 1 byte.  */
   19075            0 :       offset += 5;
   19076            0 :       int skip = MEM_P (chain) ? 1 : 0;
   19077              :       /* Skip ENDBR32 at the entry of the target function.  */
   19078            0 :       if (need_endbr
   19079            0 :           && !cgraph_node::get (fndecl)->only_called_directly_p ())
   19080            0 :         skip += 4;
   19081            0 :       disp = expand_binop (SImode, sub_optab, fnaddr,
   19082            0 :                            plus_constant (Pmode, XEXP (m_tramp, 0),
   19083            0 :                                           offset - skip),
   19084              :                            NULL_RTX, 1, OPTAB_DIRECT);
   19085            0 :       emit_move_insn (mem, disp);
   19086              :     }
   19087              : 
   19088          305 :   gcc_assert (offset <= TRAMPOLINE_SIZE);
   19089              : 
   19090              : #ifdef HAVE_ENABLE_EXECUTE_STACK
   19091              : #ifdef CHECK_EXECUTE_STACK_ENABLED
   19092              :   if (CHECK_EXECUTE_STACK_ENABLED)
   19093              : #endif
   19094              :   emit_library_call (gen_rtx_SYMBOL_REF (Pmode, "__enable_execute_stack"),
   19095              :                      LCT_NORMAL, VOIDmode, XEXP (m_tramp, 0), Pmode);
   19096              : #endif
   19097          305 : }
   19098              : 
   19099              : static bool
   19100     53922445 : ix86_allocate_stack_slots_for_args (void)
   19101              : {
   19102              :   /* Naked functions should not allocate stack slots for arguments.  */
   19103     53922445 :   return !ix86_function_naked (current_function_decl);
   19104              : }
   19105              : 
   19106              : static bool
   19107     37939568 : ix86_warn_func_return (tree decl)
   19108              : {
   19109              :   /* Naked functions are implemented entirely in assembly, including the
   19110              :      return sequence, so suppress warnings about this.  */
   19111     37939568 :   return !ix86_function_naked (decl);
   19112              : }
   19113              : 
   19114              : /* Return the shift count of a vector by scalar shift builtin second argument
   19115              :    ARG1.  */
   19116              : static tree
   19117        14142 : ix86_vector_shift_count (tree arg1)
   19118              : {
   19119        14142 :   if (tree_fits_uhwi_p (arg1))
   19120              :     return arg1;
   19121         8316 :   else if (TREE_CODE (arg1) == VECTOR_CST && CHAR_BIT == 8)
   19122              :     {
   19123              :       /* The count argument is weird, passed in as various 128-bit
   19124              :          (or 64-bit) vectors, the low 64 bits from it are the count.  */
   19125          162 :       unsigned char buf[16];
   19126          162 :       int len = native_encode_expr (arg1, buf, 16);
   19127          162 :       if (len == 0)
   19128          162 :         return NULL_TREE;
   19129          162 :       tree t = native_interpret_expr (uint64_type_node, buf, len);
   19130          162 :       if (t && tree_fits_uhwi_p (t))
   19131              :         return t;
   19132              :     }
   19133              :   return NULL_TREE;
   19134              : }
   19135              : 
   19136              : /* Return true if arg_mask is all ones, ELEMS is elements number of
   19137              :    corresponding vector.  */
   19138              : static bool
   19139        25042 : ix86_masked_all_ones (unsigned HOST_WIDE_INT elems, tree arg_mask)
   19140              : {
   19141        25042 :   if (TREE_CODE (arg_mask) != INTEGER_CST)
   19142              :     return false;
   19143              : 
   19144         7462 :   unsigned HOST_WIDE_INT mask = TREE_INT_CST_LOW (arg_mask);
   19145         7462 :   if (elems == HOST_BITS_PER_WIDE_INT)
   19146           33 :     return  mask == HOST_WIDE_INT_M1U;
   19147         7429 :   if ((mask | (HOST_WIDE_INT_M1U << elems)) != HOST_WIDE_INT_M1U)
   19148         2681 :     return false;
   19149              : 
   19150              :   return true;
   19151              : }
   19152              : 
   19153              : static tree
   19154     68154418 : ix86_fold_builtin (tree fndecl, int n_args,
   19155              :                    tree *args, bool ignore ATTRIBUTE_UNUSED)
   19156              : {
   19157     68154418 :   if (DECL_BUILT_IN_CLASS (fndecl) == BUILT_IN_MD)
   19158              :     {
   19159     68154418 :       enum ix86_builtins fn_code
   19160     68154418 :         = (enum ix86_builtins) DECL_MD_FUNCTION_CODE (fndecl);
   19161     68154418 :       enum rtx_code rcode;
   19162     68154418 :       bool is_vshift;
   19163     68154418 :       enum tree_code tcode;
   19164     68154418 :       bool is_scalar;
   19165     68154418 :       unsigned HOST_WIDE_INT mask;
   19166              : 
   19167     68154418 :       switch (fn_code)
   19168              :         {
   19169         8883 :         case IX86_BUILTIN_CPU_IS:
   19170         8883 :         case IX86_BUILTIN_CPU_SUPPORTS:
   19171         8883 :           gcc_assert (n_args == 1);
   19172         8883 :           return fold_builtin_cpu (fndecl, args);
   19173              : 
   19174        25133 :         case IX86_BUILTIN_NANQ:
   19175        25133 :         case IX86_BUILTIN_NANSQ:
   19176        25133 :           {
   19177        25133 :             tree type = TREE_TYPE (TREE_TYPE (fndecl));
   19178        25133 :             const char *str = c_getstr (*args);
   19179        25133 :             int quiet = fn_code == IX86_BUILTIN_NANQ;
   19180        25133 :             REAL_VALUE_TYPE real;
   19181              : 
   19182        25133 :             if (str && real_nan (&real, str, quiet, TYPE_MODE (type)))
   19183        25133 :               return build_real (type, real);
   19184            0 :             return NULL_TREE;
   19185              :           }
   19186              : 
   19187          108 :         case IX86_BUILTIN_INFQ:
   19188          108 :         case IX86_BUILTIN_HUGE_VALQ:
   19189          108 :           {
   19190          108 :             tree type = TREE_TYPE (TREE_TYPE (fndecl));
   19191          108 :             REAL_VALUE_TYPE inf;
   19192          108 :             real_inf (&inf);
   19193          108 :             return build_real (type, inf);
   19194              :           }
   19195              : 
   19196        62447 :         case IX86_BUILTIN_TZCNT16:
   19197        62447 :         case IX86_BUILTIN_CTZS:
   19198        62447 :         case IX86_BUILTIN_TZCNT32:
   19199        62447 :         case IX86_BUILTIN_TZCNT64:
   19200        62447 :           gcc_assert (n_args == 1);
   19201        62447 :           if (TREE_CODE (args[0]) == INTEGER_CST)
   19202              :             {
   19203           45 :               tree type = TREE_TYPE (TREE_TYPE (fndecl));
   19204           45 :               tree arg = args[0];
   19205           45 :               if (fn_code == IX86_BUILTIN_TZCNT16
   19206           45 :                   || fn_code == IX86_BUILTIN_CTZS)
   19207            3 :                 arg = fold_convert (short_unsigned_type_node, arg);
   19208           45 :               if (integer_zerop (arg))
   19209            6 :                 return build_int_cst (type, TYPE_PRECISION (TREE_TYPE (arg)));
   19210              :               else
   19211           39 :                 return fold_const_call (CFN_CTZ, type, arg);
   19212              :             }
   19213              :           break;
   19214              : 
   19215        52002 :         case IX86_BUILTIN_LZCNT16:
   19216        52002 :         case IX86_BUILTIN_CLZS:
   19217        52002 :         case IX86_BUILTIN_LZCNT32:
   19218        52002 :         case IX86_BUILTIN_LZCNT64:
   19219        52002 :           gcc_assert (n_args == 1);
   19220        52002 :           if (TREE_CODE (args[0]) == INTEGER_CST)
   19221              :             {
   19222           54 :               tree type = TREE_TYPE (TREE_TYPE (fndecl));
   19223           54 :               tree arg = args[0];
   19224           54 :               if (fn_code == IX86_BUILTIN_LZCNT16
   19225           54 :                   || fn_code == IX86_BUILTIN_CLZS)
   19226           18 :                 arg = fold_convert (short_unsigned_type_node, arg);
   19227           54 :               if (integer_zerop (arg))
   19228            3 :                 return build_int_cst (type, TYPE_PRECISION (TREE_TYPE (arg)));
   19229              :               else
   19230           51 :                 return fold_const_call (CFN_CLZ, type, arg);
   19231              :             }
   19232              :           break;
   19233              : 
   19234        61227 :         case IX86_BUILTIN_BEXTR32:
   19235        61227 :         case IX86_BUILTIN_BEXTR64:
   19236        61227 :         case IX86_BUILTIN_BEXTRI32:
   19237        61227 :         case IX86_BUILTIN_BEXTRI64:
   19238        61227 :           gcc_assert (n_args == 2);
   19239        61227 :           if (tree_fits_uhwi_p (args[1]))
   19240              :             {
   19241          152 :               unsigned HOST_WIDE_INT res = 0;
   19242          152 :               unsigned int prec = TYPE_PRECISION (TREE_TYPE (args[0]));
   19243          152 :               unsigned int start = tree_to_uhwi (args[1]);
   19244          152 :               unsigned int len = (start & 0xff00) >> 8;
   19245          152 :               tree lhs_type = TREE_TYPE (TREE_TYPE (fndecl));
   19246          152 :               start &= 0xff;
   19247          152 :               if (start >= prec || len == 0)
   19248          111 :                 return omit_one_operand (lhs_type, build_zero_cst (lhs_type),
   19249              :                                          args[0]);
   19250           41 :               else if (!tree_fits_uhwi_p (args[0]))
   19251              :                 break;
   19252              :               else
   19253           24 :                 res = tree_to_uhwi (args[0]) >> start;
   19254           24 :               if (len > prec)
   19255              :                 len = prec;
   19256           24 :               if (len < HOST_BITS_PER_WIDE_INT)
   19257           15 :                 res &= (HOST_WIDE_INT_1U << len) - 1;
   19258           24 :               return build_int_cstu (lhs_type, res);
   19259              :             }
   19260              :           break;
   19261              : 
   19262        21034 :         case IX86_BUILTIN_BZHI32:
   19263        21034 :         case IX86_BUILTIN_BZHI64:
   19264        21034 :           gcc_assert (n_args == 2);
   19265        21034 :           if (tree_fits_uhwi_p (args[1]))
   19266              :             {
   19267          190 :               unsigned int idx = tree_to_uhwi (args[1]) & 0xff;
   19268          190 :               tree lhs_type = TREE_TYPE (TREE_TYPE (fndecl));
   19269          190 :               if (idx >= TYPE_PRECISION (TREE_TYPE (args[0])))
   19270              :                 return args[0];
   19271          190 :               if (idx == 0)
   19272           52 :                 return omit_one_operand (lhs_type, build_zero_cst (lhs_type),
   19273              :                                          args[0]);
   19274          138 :               if (!tree_fits_uhwi_p (args[0]))
   19275              :                 break;
   19276           12 :               unsigned HOST_WIDE_INT res = tree_to_uhwi (args[0]);
   19277           12 :               res &= ~(HOST_WIDE_INT_M1U << idx);
   19278           12 :               return build_int_cstu (lhs_type, res);
   19279              :             }
   19280              :           break;
   19281              : 
   19282        20792 :         case IX86_BUILTIN_PDEP32:
   19283        20792 :         case IX86_BUILTIN_PDEP64:
   19284        20792 :           gcc_assert (n_args == 2);
   19285        20792 :           if (tree_fits_uhwi_p (args[0]) && tree_fits_uhwi_p (args[1]))
   19286              :             {
   19287           46 :               unsigned HOST_WIDE_INT src = tree_to_uhwi (args[0]);
   19288           46 :               unsigned HOST_WIDE_INT mask = tree_to_uhwi (args[1]);
   19289           46 :               unsigned HOST_WIDE_INT res = 0;
   19290           46 :               unsigned HOST_WIDE_INT m, k = 1;
   19291         2990 :               for (m = 1; m; m <<= 1)
   19292         2944 :                 if ((mask & m) != 0)
   19293              :                   {
   19294         1440 :                     if ((src & k) != 0)
   19295          789 :                       res |= m;
   19296         1440 :                     k <<= 1;
   19297              :                   }
   19298           46 :               return build_int_cstu (TREE_TYPE (TREE_TYPE (fndecl)), res);
   19299              :             }
   19300              :           break;
   19301              : 
   19302        20794 :         case IX86_BUILTIN_PEXT32:
   19303        20794 :         case IX86_BUILTIN_PEXT64:
   19304        20794 :           gcc_assert (n_args == 2);
   19305        20794 :           if (tree_fits_uhwi_p (args[0]) && tree_fits_uhwi_p (args[1]))
   19306              :             {
   19307           46 :               unsigned HOST_WIDE_INT src = tree_to_uhwi (args[0]);
   19308           46 :               unsigned HOST_WIDE_INT mask = tree_to_uhwi (args[1]);
   19309           46 :               unsigned HOST_WIDE_INT res = 0;
   19310           46 :               unsigned HOST_WIDE_INT m, k = 1;
   19311         2990 :               for (m = 1; m; m <<= 1)
   19312         2944 :                 if ((mask & m) != 0)
   19313              :                   {
   19314         2016 :                     if ((src & m) != 0)
   19315         1063 :                       res |= k;
   19316         2016 :                     k <<= 1;
   19317              :                   }
   19318           46 :               return build_int_cstu (TREE_TYPE (TREE_TYPE (fndecl)), res);
   19319              :             }
   19320              :           break;
   19321              : 
   19322       100831 :         case IX86_BUILTIN_MOVMSKPS:
   19323       100831 :         case IX86_BUILTIN_PMOVMSKB:
   19324       100831 :         case IX86_BUILTIN_MOVMSKPD:
   19325       100831 :         case IX86_BUILTIN_PMOVMSKB128:
   19326       100831 :         case IX86_BUILTIN_MOVMSKPD256:
   19327       100831 :         case IX86_BUILTIN_MOVMSKPS256:
   19328       100831 :         case IX86_BUILTIN_PMOVMSKB256:
   19329       100831 :           gcc_assert (n_args == 1);
   19330       100831 :           if (TREE_CODE (args[0]) == VECTOR_CST)
   19331              :             {
   19332              :               HOST_WIDE_INT res = 0;
   19333         1492 :               for (unsigned i = 0; i < VECTOR_CST_NELTS (args[0]); ++i)
   19334              :                 {
   19335         1242 :                   tree e = VECTOR_CST_ELT (args[0], i);
   19336         1242 :                   if (TREE_CODE (e) == INTEGER_CST && !TREE_OVERFLOW (e))
   19337              :                     {
   19338          624 :                       if (wi::neg_p (wi::to_wide (e)))
   19339          575 :                         res |= HOST_WIDE_INT_1 << i;
   19340              :                     }
   19341          618 :                   else if (TREE_CODE (e) == REAL_CST && !TREE_OVERFLOW (e))
   19342              :                     {
   19343          618 :                       if (TREE_REAL_CST (e).sign)
   19344          517 :                         res |= HOST_WIDE_INT_1 << i;
   19345              :                     }
   19346              :                   else
   19347              :                     return NULL_TREE;
   19348              :                 }
   19349          250 :               return build_int_cst (TREE_TYPE (TREE_TYPE (fndecl)), res);
   19350              :             }
   19351              :           break;
   19352              : 
   19353       659648 :         case IX86_BUILTIN_PSLLD:
   19354       659648 :         case IX86_BUILTIN_PSLLD128:
   19355       659648 :         case IX86_BUILTIN_PSLLD128_MASK:
   19356       659648 :         case IX86_BUILTIN_PSLLD256:
   19357       659648 :         case IX86_BUILTIN_PSLLD256_MASK:
   19358       659648 :         case IX86_BUILTIN_PSLLD512:
   19359       659648 :         case IX86_BUILTIN_PSLLDI:
   19360       659648 :         case IX86_BUILTIN_PSLLDI128:
   19361       659648 :         case IX86_BUILTIN_PSLLDI128_MASK:
   19362       659648 :         case IX86_BUILTIN_PSLLDI256:
   19363       659648 :         case IX86_BUILTIN_PSLLDI256_MASK:
   19364       659648 :         case IX86_BUILTIN_PSLLDI512:
   19365       659648 :         case IX86_BUILTIN_PSLLQ:
   19366       659648 :         case IX86_BUILTIN_PSLLQ128:
   19367       659648 :         case IX86_BUILTIN_PSLLQ128_MASK:
   19368       659648 :         case IX86_BUILTIN_PSLLQ256:
   19369       659648 :         case IX86_BUILTIN_PSLLQ256_MASK:
   19370       659648 :         case IX86_BUILTIN_PSLLQ512:
   19371       659648 :         case IX86_BUILTIN_PSLLQI:
   19372       659648 :         case IX86_BUILTIN_PSLLQI128:
   19373       659648 :         case IX86_BUILTIN_PSLLQI128_MASK:
   19374       659648 :         case IX86_BUILTIN_PSLLQI256:
   19375       659648 :         case IX86_BUILTIN_PSLLQI256_MASK:
   19376       659648 :         case IX86_BUILTIN_PSLLQI512:
   19377       659648 :         case IX86_BUILTIN_PSLLW:
   19378       659648 :         case IX86_BUILTIN_PSLLW128:
   19379       659648 :         case IX86_BUILTIN_PSLLW128_MASK:
   19380       659648 :         case IX86_BUILTIN_PSLLW256:
   19381       659648 :         case IX86_BUILTIN_PSLLW256_MASK:
   19382       659648 :         case IX86_BUILTIN_PSLLW512_MASK:
   19383       659648 :         case IX86_BUILTIN_PSLLWI:
   19384       659648 :         case IX86_BUILTIN_PSLLWI128:
   19385       659648 :         case IX86_BUILTIN_PSLLWI128_MASK:
   19386       659648 :         case IX86_BUILTIN_PSLLWI256:
   19387       659648 :         case IX86_BUILTIN_PSLLWI256_MASK:
   19388       659648 :         case IX86_BUILTIN_PSLLWI512_MASK:
   19389       659648 :           rcode = ASHIFT;
   19390       659648 :           is_vshift = false;
   19391       659648 :           goto do_shift;
   19392       601251 :         case IX86_BUILTIN_PSRAD:
   19393       601251 :         case IX86_BUILTIN_PSRAD128:
   19394       601251 :         case IX86_BUILTIN_PSRAD128_MASK:
   19395       601251 :         case IX86_BUILTIN_PSRAD256:
   19396       601251 :         case IX86_BUILTIN_PSRAD256_MASK:
   19397       601251 :         case IX86_BUILTIN_PSRAD512:
   19398       601251 :         case IX86_BUILTIN_PSRADI:
   19399       601251 :         case IX86_BUILTIN_PSRADI128:
   19400       601251 :         case IX86_BUILTIN_PSRADI128_MASK:
   19401       601251 :         case IX86_BUILTIN_PSRADI256:
   19402       601251 :         case IX86_BUILTIN_PSRADI256_MASK:
   19403       601251 :         case IX86_BUILTIN_PSRADI512:
   19404       601251 :         case IX86_BUILTIN_PSRAQ128_MASK:
   19405       601251 :         case IX86_BUILTIN_PSRAQ256_MASK:
   19406       601251 :         case IX86_BUILTIN_PSRAQ512:
   19407       601251 :         case IX86_BUILTIN_PSRAQI128_MASK:
   19408       601251 :         case IX86_BUILTIN_PSRAQI256_MASK:
   19409       601251 :         case IX86_BUILTIN_PSRAQI512:
   19410       601251 :         case IX86_BUILTIN_PSRAW:
   19411       601251 :         case IX86_BUILTIN_PSRAW128:
   19412       601251 :         case IX86_BUILTIN_PSRAW128_MASK:
   19413       601251 :         case IX86_BUILTIN_PSRAW256:
   19414       601251 :         case IX86_BUILTIN_PSRAW256_MASK:
   19415       601251 :         case IX86_BUILTIN_PSRAW512:
   19416       601251 :         case IX86_BUILTIN_PSRAWI:
   19417       601251 :         case IX86_BUILTIN_PSRAWI128:
   19418       601251 :         case IX86_BUILTIN_PSRAWI128_MASK:
   19419       601251 :         case IX86_BUILTIN_PSRAWI256:
   19420       601251 :         case IX86_BUILTIN_PSRAWI256_MASK:
   19421       601251 :         case IX86_BUILTIN_PSRAWI512:
   19422       601251 :           rcode = ASHIFTRT;
   19423       601251 :           is_vshift = false;
   19424       601251 :           goto do_shift;
   19425       633525 :         case IX86_BUILTIN_PSRLD:
   19426       633525 :         case IX86_BUILTIN_PSRLD128:
   19427       633525 :         case IX86_BUILTIN_PSRLD128_MASK:
   19428       633525 :         case IX86_BUILTIN_PSRLD256:
   19429       633525 :         case IX86_BUILTIN_PSRLD256_MASK:
   19430       633525 :         case IX86_BUILTIN_PSRLD512:
   19431       633525 :         case IX86_BUILTIN_PSRLDI:
   19432       633525 :         case IX86_BUILTIN_PSRLDI128:
   19433       633525 :         case IX86_BUILTIN_PSRLDI128_MASK:
   19434       633525 :         case IX86_BUILTIN_PSRLDI256:
   19435       633525 :         case IX86_BUILTIN_PSRLDI256_MASK:
   19436       633525 :         case IX86_BUILTIN_PSRLDI512:
   19437       633525 :         case IX86_BUILTIN_PSRLQ:
   19438       633525 :         case IX86_BUILTIN_PSRLQ128:
   19439       633525 :         case IX86_BUILTIN_PSRLQ128_MASK:
   19440       633525 :         case IX86_BUILTIN_PSRLQ256:
   19441       633525 :         case IX86_BUILTIN_PSRLQ256_MASK:
   19442       633525 :         case IX86_BUILTIN_PSRLQ512:
   19443       633525 :         case IX86_BUILTIN_PSRLQI:
   19444       633525 :         case IX86_BUILTIN_PSRLQI128:
   19445       633525 :         case IX86_BUILTIN_PSRLQI128_MASK:
   19446       633525 :         case IX86_BUILTIN_PSRLQI256:
   19447       633525 :         case IX86_BUILTIN_PSRLQI256_MASK:
   19448       633525 :         case IX86_BUILTIN_PSRLQI512:
   19449       633525 :         case IX86_BUILTIN_PSRLW:
   19450       633525 :         case IX86_BUILTIN_PSRLW128:
   19451       633525 :         case IX86_BUILTIN_PSRLW128_MASK:
   19452       633525 :         case IX86_BUILTIN_PSRLW256:
   19453       633525 :         case IX86_BUILTIN_PSRLW256_MASK:
   19454       633525 :         case IX86_BUILTIN_PSRLW512:
   19455       633525 :         case IX86_BUILTIN_PSRLWI:
   19456       633525 :         case IX86_BUILTIN_PSRLWI128:
   19457       633525 :         case IX86_BUILTIN_PSRLWI128_MASK:
   19458       633525 :         case IX86_BUILTIN_PSRLWI256:
   19459       633525 :         case IX86_BUILTIN_PSRLWI256_MASK:
   19460       633525 :         case IX86_BUILTIN_PSRLWI512:
   19461       633525 :           rcode = LSHIFTRT;
   19462       633525 :           is_vshift = false;
   19463       633525 :           goto do_shift;
   19464       276009 :         case IX86_BUILTIN_PSLLVV16HI:
   19465       276009 :         case IX86_BUILTIN_PSLLVV16SI:
   19466       276009 :         case IX86_BUILTIN_PSLLVV2DI:
   19467       276009 :         case IX86_BUILTIN_PSLLVV2DI_MASK:
   19468       276009 :         case IX86_BUILTIN_PSLLVV32HI:
   19469       276009 :         case IX86_BUILTIN_PSLLVV4DI:
   19470       276009 :         case IX86_BUILTIN_PSLLVV4DI_MASK:
   19471       276009 :         case IX86_BUILTIN_PSLLVV4SI:
   19472       276009 :         case IX86_BUILTIN_PSLLVV4SI_MASK:
   19473       276009 :         case IX86_BUILTIN_PSLLVV8DI:
   19474       276009 :         case IX86_BUILTIN_PSLLVV8HI:
   19475       276009 :         case IX86_BUILTIN_PSLLVV8SI:
   19476       276009 :         case IX86_BUILTIN_PSLLVV8SI_MASK:
   19477       276009 :           rcode = ASHIFT;
   19478       276009 :           is_vshift = true;
   19479       276009 :           goto do_shift;
   19480       275588 :         case IX86_BUILTIN_PSRAVQ128:
   19481       275588 :         case IX86_BUILTIN_PSRAVQ256:
   19482       275588 :         case IX86_BUILTIN_PSRAVV16HI:
   19483       275588 :         case IX86_BUILTIN_PSRAVV16SI:
   19484       275588 :         case IX86_BUILTIN_PSRAVV32HI:
   19485       275588 :         case IX86_BUILTIN_PSRAVV4SI:
   19486       275588 :         case IX86_BUILTIN_PSRAVV4SI_MASK:
   19487       275588 :         case IX86_BUILTIN_PSRAVV8DI:
   19488       275588 :         case IX86_BUILTIN_PSRAVV8HI:
   19489       275588 :         case IX86_BUILTIN_PSRAVV8SI:
   19490       275588 :         case IX86_BUILTIN_PSRAVV8SI_MASK:
   19491       275588 :           rcode = ASHIFTRT;
   19492       275588 :           is_vshift = true;
   19493       275588 :           goto do_shift;
   19494       276000 :         case IX86_BUILTIN_PSRLVV16HI:
   19495       276000 :         case IX86_BUILTIN_PSRLVV16SI:
   19496       276000 :         case IX86_BUILTIN_PSRLVV2DI:
   19497       276000 :         case IX86_BUILTIN_PSRLVV2DI_MASK:
   19498       276000 :         case IX86_BUILTIN_PSRLVV32HI:
   19499       276000 :         case IX86_BUILTIN_PSRLVV4DI:
   19500       276000 :         case IX86_BUILTIN_PSRLVV4DI_MASK:
   19501       276000 :         case IX86_BUILTIN_PSRLVV4SI:
   19502       276000 :         case IX86_BUILTIN_PSRLVV4SI_MASK:
   19503       276000 :         case IX86_BUILTIN_PSRLVV8DI:
   19504       276000 :         case IX86_BUILTIN_PSRLVV8HI:
   19505       276000 :         case IX86_BUILTIN_PSRLVV8SI:
   19506       276000 :         case IX86_BUILTIN_PSRLVV8SI_MASK:
   19507       276000 :           rcode = LSHIFTRT;
   19508       276000 :           is_vshift = true;
   19509       276000 :           goto do_shift;
   19510              : 
   19511      2722021 :         do_shift:
   19512      2722021 :           gcc_assert (n_args >= 2);
   19513      2722021 :           if (TREE_CODE (args[0]) != VECTOR_CST)
   19514              :             break;
   19515          927 :           mask = HOST_WIDE_INT_M1U;
   19516          927 :           if (n_args > 2)
   19517              :             {
   19518              :               /* This is masked shift.  */
   19519          678 :               if (!tree_fits_uhwi_p (args[n_args - 1])
   19520          678 :                   || TREE_SIDE_EFFECTS (args[n_args - 2]))
   19521              :                 break;
   19522          678 :               mask = tree_to_uhwi (args[n_args - 1]);
   19523          678 :               unsigned elems = TYPE_VECTOR_SUBPARTS (TREE_TYPE (args[0]));
   19524          678 :               mask |= HOST_WIDE_INT_M1U << elems;
   19525          678 :               if (mask != HOST_WIDE_INT_M1U
   19526          567 :                   && TREE_CODE (args[n_args - 2]) != VECTOR_CST)
   19527              :                 break;
   19528          633 :               if (mask == (HOST_WIDE_INT_M1U << elems))
   19529              :                 return args[n_args - 2];
   19530              :             }
   19531          879 :           if (is_vshift && TREE_CODE (args[1]) != VECTOR_CST)
   19532              :             break;
   19533          879 :           if (tree tem = (is_vshift ? integer_one_node
   19534          879 :                           : ix86_vector_shift_count (args[1])))
   19535              :             {
   19536          558 :               unsigned HOST_WIDE_INT count = tree_to_uhwi (tem);
   19537          558 :               unsigned HOST_WIDE_INT prec
   19538          558 :                 = TYPE_PRECISION (TREE_TYPE (TREE_TYPE (args[0])));
   19539          558 :               if (count == 0 && mask == HOST_WIDE_INT_M1U)
   19540              :                 return args[0];
   19541          558 :               if (count >= prec)
   19542              :                 {
   19543           72 :                   if (rcode == ASHIFTRT)
   19544           27 :                     count = prec - 1;
   19545           45 :                   else if (mask == HOST_WIDE_INT_M1U)
   19546            3 :                     return build_zero_cst (TREE_TYPE (args[0]));
   19547              :                 }
   19548          555 :               tree countt = NULL_TREE;
   19549          555 :               if (!is_vshift)
   19550              :                 {
   19551          377 :                   if (count >= prec)
   19552           42 :                     countt = integer_zero_node;
   19553              :                   else
   19554          335 :                     countt = build_int_cst (integer_type_node, count);
   19555              :                 }
   19556          555 :               tree_vector_builder builder;
   19557          555 :               if (mask != HOST_WIDE_INT_M1U || is_vshift)
   19558          392 :                 builder.new_vector (TREE_TYPE (args[0]),
   19559          784 :                                     TYPE_VECTOR_SUBPARTS (TREE_TYPE (args[0])),
   19560              :                                     1);
   19561              :               else
   19562          163 :                 builder.new_unary_operation (TREE_TYPE (args[0]), args[0],
   19563              :                                              false);
   19564          555 :               unsigned int cnt = builder.encoded_nelts ();
   19565         5967 :               for (unsigned int i = 0; i < cnt; ++i)
   19566              :                 {
   19567         5412 :                   tree elt = VECTOR_CST_ELT (args[0], i);
   19568         5412 :                   if (TREE_CODE (elt) != INTEGER_CST || TREE_OVERFLOW (elt))
   19569            0 :                     return NULL_TREE;
   19570         5412 :                   tree type = TREE_TYPE (elt);
   19571         5412 :                   if (rcode == LSHIFTRT)
   19572         2040 :                     elt = fold_convert (unsigned_type_for (type), elt);
   19573         5412 :                   if (is_vshift)
   19574              :                     {
   19575         1846 :                       countt = VECTOR_CST_ELT (args[1], i);
   19576         1846 :                       if (TREE_CODE (countt) != INTEGER_CST
   19577         1846 :                           || TREE_OVERFLOW (countt))
   19578              :                         return NULL_TREE;
   19579         1846 :                       if (wi::neg_p (wi::to_wide (countt))
   19580         3610 :                           || wi::to_widest (countt) >= prec)
   19581              :                         {
   19582          325 :                           if (rcode == ASHIFTRT)
   19583          108 :                             countt = build_int_cst (TREE_TYPE (countt),
   19584          108 :                                                     prec - 1);
   19585              :                           else
   19586              :                             {
   19587          217 :                               elt = build_zero_cst (TREE_TYPE (elt));
   19588          217 :                               countt = build_zero_cst (TREE_TYPE (countt));
   19589              :                             }
   19590              :                         }
   19591              :                     }
   19592         3566 :                   else if (count >= prec)
   19593          504 :                     elt = build_zero_cst (TREE_TYPE (elt));
   19594         8950 :                   elt = const_binop (rcode == ASHIFT
   19595              :                                      ? LSHIFT_EXPR : RSHIFT_EXPR,
   19596         5412 :                                      TREE_TYPE (elt), elt, countt);
   19597         5412 :                   if (!elt || TREE_CODE (elt) != INTEGER_CST)
   19598              :                     return NULL_TREE;
   19599         5412 :                   if (rcode == LSHIFTRT)
   19600         2040 :                     elt = fold_convert (type, elt);
   19601         5412 :                   if ((mask & (HOST_WIDE_INT_1U << i)) == 0)
   19602              :                     {
   19603         1566 :                       elt = VECTOR_CST_ELT (args[n_args - 2], i);
   19604         1566 :                       if (TREE_CODE (elt) != INTEGER_CST
   19605         1566 :                           || TREE_OVERFLOW (elt))
   19606              :                         return NULL_TREE;
   19607              :                     }
   19608         5412 :                   builder.quick_push (elt);
   19609              :                 }
   19610          555 :               return builder.build ();
   19611          555 :             }
   19612              :           break;
   19613              : 
   19614        32718 :         case IX86_BUILTIN_MINSS:
   19615        32718 :         case IX86_BUILTIN_MINSH_MASK:
   19616        32718 :           tcode = LT_EXPR;
   19617        32718 :           is_scalar = true;
   19618        32718 :           goto do_minmax;
   19619              : 
   19620        32718 :         case IX86_BUILTIN_MAXSS:
   19621        32718 :         case IX86_BUILTIN_MAXSH_MASK:
   19622        32718 :           tcode = GT_EXPR;
   19623        32718 :           is_scalar = true;
   19624        32718 :           goto do_minmax;
   19625              : 
   19626       350576 :         case IX86_BUILTIN_MINPS:
   19627       350576 :         case IX86_BUILTIN_MINPD:
   19628       350576 :         case IX86_BUILTIN_MINPS256:
   19629       350576 :         case IX86_BUILTIN_MINPD256:
   19630       350576 :         case IX86_BUILTIN_MINPS512:
   19631       350576 :         case IX86_BUILTIN_MINPD512:
   19632       350576 :         case IX86_BUILTIN_MINPS128_MASK:
   19633       350576 :         case IX86_BUILTIN_MINPD128_MASK:
   19634       350576 :         case IX86_BUILTIN_MINPS256_MASK:
   19635       350576 :         case IX86_BUILTIN_MINPD256_MASK:
   19636       350576 :         case IX86_BUILTIN_MINPH128_MASK:
   19637       350576 :         case IX86_BUILTIN_MINPH256_MASK:
   19638       350576 :         case IX86_BUILTIN_MINPH512_MASK:
   19639       350576 :           tcode = LT_EXPR;
   19640       350576 :           is_scalar = false;
   19641       350576 :           goto do_minmax;
   19642              : 
   19643              :         case IX86_BUILTIN_MAXPS:
   19644              :         case IX86_BUILTIN_MAXPD:
   19645              :         case IX86_BUILTIN_MAXPS256:
   19646              :         case IX86_BUILTIN_MAXPD256:
   19647              :         case IX86_BUILTIN_MAXPS512:
   19648              :         case IX86_BUILTIN_MAXPD512:
   19649              :         case IX86_BUILTIN_MAXPS128_MASK:
   19650              :         case IX86_BUILTIN_MAXPD128_MASK:
   19651              :         case IX86_BUILTIN_MAXPS256_MASK:
   19652              :         case IX86_BUILTIN_MAXPD256_MASK:
   19653              :         case IX86_BUILTIN_MAXPH128_MASK:
   19654              :         case IX86_BUILTIN_MAXPH256_MASK:
   19655              :         case IX86_BUILTIN_MAXPH512_MASK:
   19656              :           tcode = GT_EXPR;
   19657              :           is_scalar = false;
   19658       766608 :         do_minmax:
   19659       766608 :           gcc_assert (n_args >= 2);
   19660       766608 :           if (TREE_CODE (args[0]) != VECTOR_CST
   19661           76 :               || TREE_CODE (args[1]) != VECTOR_CST)
   19662              :             break;
   19663           76 :           mask = HOST_WIDE_INT_M1U;
   19664           76 :           if (n_args > 2)
   19665              :             {
   19666           36 :               gcc_assert (n_args >= 4);
   19667              :               /* This is masked minmax.  */
   19668           36 :               if (TREE_CODE (args[3]) != INTEGER_CST
   19669           36 :                   || TREE_SIDE_EFFECTS (args[2]))
   19670              :                 break;
   19671           36 :               mask = TREE_INT_CST_LOW (args[3]);
   19672           36 :               unsigned elems = TYPE_VECTOR_SUBPARTS (TREE_TYPE (args[0]));
   19673           36 :               mask |= HOST_WIDE_INT_M1U << elems;
   19674           36 :               if (mask != HOST_WIDE_INT_M1U
   19675           32 :                   && TREE_CODE (args[2]) != VECTOR_CST)
   19676              :                 break;
   19677           36 :               if (n_args >= 5)
   19678              :                 {
   19679           20 :                   if (!tree_fits_uhwi_p (args[4]))
   19680              :                     break;
   19681           20 :                   if (tree_to_uhwi (args[4]) != 4
   19682            0 :                       && tree_to_uhwi (args[4]) != 8)
   19683              :                     break;
   19684              :                 }
   19685           36 :               if (mask == (HOST_WIDE_INT_M1U << elems))
   19686              :                 return args[2];
   19687              :             }
   19688              :           /* Punt on NaNs, unless exceptions are disabled.  */
   19689           76 :           if (HONOR_NANS (args[0])
   19690           76 :               && (n_args < 5 || tree_to_uhwi (args[4]) != 8))
   19691          184 :             for (int i = 0; i < 2; ++i)
   19692              :               {
   19693          134 :                 unsigned count = vector_cst_encoded_nelts (args[i]);
   19694          957 :                 for (unsigned j = 0; j < count; ++j)
   19695          849 :                   if (tree_expr_nan_p (VECTOR_CST_ENCODED_ELT (args[i], j)))
   19696              :                     return NULL_TREE;
   19697              :               }
   19698           50 :           {
   19699           50 :             tree res = const_binop (tcode,
   19700           50 :                                     truth_type_for (TREE_TYPE (args[0])),
   19701              :                                     args[0], args[1]);
   19702           50 :             if (res == NULL_TREE || TREE_CODE (res) != VECTOR_CST)
   19703              :               break;
   19704           50 :             res = fold_ternary (VEC_COND_EXPR, TREE_TYPE (args[0]), res,
   19705              :                                 args[0], args[1]);
   19706           50 :             if (res == NULL_TREE || TREE_CODE (res) != VECTOR_CST)
   19707              :               break;
   19708           50 :             if (mask != HOST_WIDE_INT_M1U)
   19709              :               {
   19710           32 :                 unsigned nelts = TYPE_VECTOR_SUBPARTS (TREE_TYPE (args[0]));
   19711           32 :                 vec_perm_builder sel (nelts, nelts, 1);
   19712          328 :                 for (unsigned int i = 0; i < nelts; i++)
   19713          296 :                   if (mask & (HOST_WIDE_INT_1U << i))
   19714          160 :                     sel.quick_push (i);
   19715              :                   else
   19716          136 :                     sel.quick_push (nelts + i);
   19717           32 :                 vec_perm_indices indices (sel, 2, nelts);
   19718           32 :                 res = fold_vec_perm (TREE_TYPE (args[0]), res, args[2],
   19719              :                                      indices);
   19720           32 :                 if (res == NULL_TREE || TREE_CODE (res) != VECTOR_CST)
   19721              :                   break;
   19722           32 :               }
   19723           50 :             if (is_scalar)
   19724              :               {
   19725           10 :                 unsigned nelts = TYPE_VECTOR_SUBPARTS (TREE_TYPE (args[0]));
   19726           10 :                 vec_perm_builder sel (nelts, nelts, 1);
   19727           10 :                 sel.quick_push (0);
   19728           40 :                 for (unsigned int i = 1; i < nelts; i++)
   19729           30 :                   sel.quick_push (nelts + i);
   19730           10 :                 vec_perm_indices indices (sel, 2, nelts);
   19731           10 :                 res = fold_vec_perm (TREE_TYPE (args[0]), res, args[0],
   19732              :                                      indices);
   19733           10 :                 if (res == NULL_TREE || TREE_CODE (res) != VECTOR_CST)
   19734              :                   break;
   19735           10 :               }
   19736           50 :             return res;
   19737              :           }
   19738              : 
   19739              :         default:
   19740              :           break;
   19741              :         }
   19742              :     }
   19743              : 
   19744              : #ifdef SUBTARGET_FOLD_BUILTIN
   19745              :   return SUBTARGET_FOLD_BUILTIN (fndecl, n_args, args, ignore);
   19746              : #endif
   19747              : 
   19748              :   return NULL_TREE;
   19749              : }
   19750              : 
   19751              : /* Fold a MD builtin (use ix86_fold_builtin for folding into
   19752              :    constant) in GIMPLE.  */
   19753              : 
   19754              : bool
   19755      1131057 : ix86_gimple_fold_builtin (gimple_stmt_iterator *gsi)
   19756              : {
   19757      1131057 :   gimple *stmt = gsi_stmt (*gsi), *g;
   19758      1131057 :   gimple_seq stmts = NULL;
   19759      1131057 :   tree fndecl = gimple_call_fndecl (stmt);
   19760      1131057 :   gcc_checking_assert (fndecl && fndecl_built_in_p (fndecl, BUILT_IN_MD));
   19761      1131057 :   int n_args = gimple_call_num_args (stmt);
   19762      1131057 :   enum ix86_builtins fn_code
   19763      1131057 :     = (enum ix86_builtins) DECL_MD_FUNCTION_CODE (fndecl);
   19764      1131057 :   tree decl = NULL_TREE;
   19765      1131057 :   tree arg0, arg1, arg2;
   19766      1131057 :   enum rtx_code rcode;
   19767      1131057 :   enum tree_code tcode;
   19768      1131057 :   unsigned HOST_WIDE_INT count;
   19769      1131057 :   bool is_vshift;
   19770      1131057 :   unsigned HOST_WIDE_INT elems;
   19771      1131057 :   location_t loc;
   19772              : 
   19773              :   /* Don't fold when there's isa mismatch.  */
   19774      1131057 :   if (!ix86_check_builtin_isa_match (fn_code, NULL, NULL))
   19775              :     return false;
   19776              : 
   19777      1130930 :   switch (fn_code)
   19778              :     {
   19779          288 :     case IX86_BUILTIN_TZCNT32:
   19780          288 :       decl = builtin_decl_implicit (BUILT_IN_CTZ);
   19781          288 :       goto fold_tzcnt_lzcnt;
   19782              : 
   19783          237 :     case IX86_BUILTIN_TZCNT64:
   19784          237 :       decl = builtin_decl_implicit (BUILT_IN_CTZLL);
   19785          237 :       goto fold_tzcnt_lzcnt;
   19786              : 
   19787          215 :     case IX86_BUILTIN_LZCNT32:
   19788          215 :       decl = builtin_decl_implicit (BUILT_IN_CLZ);
   19789          215 :       goto fold_tzcnt_lzcnt;
   19790              : 
   19791          224 :     case IX86_BUILTIN_LZCNT64:
   19792          224 :       decl = builtin_decl_implicit (BUILT_IN_CLZLL);
   19793          224 :       goto fold_tzcnt_lzcnt;
   19794              : 
   19795          964 :     fold_tzcnt_lzcnt:
   19796          964 :       gcc_assert (n_args == 1);
   19797          964 :       arg0 = gimple_call_arg (stmt, 0);
   19798          964 :       if (TREE_CODE (arg0) == SSA_NAME && decl && gimple_call_lhs (stmt))
   19799              :         {
   19800          799 :           int prec = TYPE_PRECISION (TREE_TYPE (arg0));
   19801              :           /* If arg0 is provably non-zero, optimize into generic
   19802              :              __builtin_c[tl]z{,ll} function the middle-end handles
   19803              :              better.  */
   19804          799 :           if (!expr_not_equal_to (arg0, wi::zero (prec)))
   19805              :             return false;
   19806              : 
   19807            9 :           loc = gimple_location (stmt);
   19808            9 :           g = gimple_build_call (decl, 1, arg0);
   19809            9 :           gimple_set_location (g, loc);
   19810            9 :           tree lhs = make_ssa_name (integer_type_node);
   19811            9 :           gimple_call_set_lhs (g, lhs);
   19812            9 :           gsi_insert_before (gsi, g, GSI_SAME_STMT);
   19813            9 :           g = gimple_build_assign (gimple_call_lhs (stmt), NOP_EXPR, lhs);
   19814            9 :           gimple_set_location (g, loc);
   19815            9 :           gsi_replace (gsi, g, false);
   19816            9 :           return true;
   19817              :         }
   19818              :       break;
   19819              : 
   19820          491 :     case IX86_BUILTIN_BZHI32:
   19821          491 :     case IX86_BUILTIN_BZHI64:
   19822          491 :       gcc_assert (n_args == 2);
   19823          491 :       arg1 = gimple_call_arg (stmt, 1);
   19824          491 :       if (tree_fits_uhwi_p (arg1) && gimple_call_lhs (stmt))
   19825              :         {
   19826          195 :           unsigned int idx = tree_to_uhwi (arg1) & 0xff;
   19827          195 :           arg0 = gimple_call_arg (stmt, 0);
   19828          195 :           if (idx < TYPE_PRECISION (TREE_TYPE (arg0)))
   19829              :             break;
   19830           31 :           loc = gimple_location (stmt);
   19831           31 :           g = gimple_build_assign (gimple_call_lhs (stmt), arg0);
   19832           31 :           gimple_set_location (g, loc);
   19833           31 :           gsi_replace (gsi, g, false);
   19834           31 :           return true;
   19835              :         }
   19836              :       break;
   19837              : 
   19838          502 :     case IX86_BUILTIN_PDEP32:
   19839          502 :     case IX86_BUILTIN_PDEP64:
   19840          502 :     case IX86_BUILTIN_PEXT32:
   19841          502 :     case IX86_BUILTIN_PEXT64:
   19842          502 :       gcc_assert (n_args == 2);
   19843          502 :       arg1 = gimple_call_arg (stmt, 1);
   19844          502 :       if (integer_all_onesp (arg1) && gimple_call_lhs (stmt))
   19845              :         {
   19846            4 :           loc = gimple_location (stmt);
   19847            4 :           arg0 = gimple_call_arg (stmt, 0);
   19848            4 :           g = gimple_build_assign (gimple_call_lhs (stmt), arg0);
   19849            4 :           gimple_set_location (g, loc);
   19850            4 :           gsi_replace (gsi, g, false);
   19851            4 :           return true;
   19852              :         }
   19853              :       break;
   19854              : 
   19855          145 :     case IX86_BUILTIN_PBLENDVB256:
   19856          145 :     case IX86_BUILTIN_BLENDVPS256:
   19857          145 :     case IX86_BUILTIN_BLENDVPD256:
   19858              :       /* pcmpeqb/d/q is under avx2, w/o avx2, it's veclower
   19859              :          to scalar operations and not combined back.  */
   19860          145 :       if (!TARGET_AVX2)
   19861              :         break;
   19862              : 
   19863              :       /* FALLTHRU.  */
   19864          112 :     case IX86_BUILTIN_BLENDVPD:
   19865              :       /* blendvpd is under sse4.1 but pcmpgtq is under sse4.2,
   19866              :          w/o sse4.2, it's veclowered to scalar operations and
   19867              :          not combined back.  */
   19868          112 :       if (!TARGET_SSE4_2)
   19869              :         break;
   19870              :       /* FALLTHRU.  */
   19871          166 :     case IX86_BUILTIN_PBLENDVB128:
   19872          166 :     case IX86_BUILTIN_BLENDVPS:
   19873          166 :       gcc_assert (n_args == 3);
   19874          166 :       arg0 = gimple_call_arg (stmt, 0);
   19875          166 :       arg1 = gimple_call_arg (stmt, 1);
   19876          166 :       arg2 = gimple_call_arg (stmt, 2);
   19877          166 :       if (gimple_call_lhs (stmt))
   19878              :         {
   19879          166 :           loc = gimple_location (stmt);
   19880          166 :           tree type = TREE_TYPE (arg2);
   19881          166 :           if (VECTOR_FLOAT_TYPE_P (type))
   19882              :             {
   19883           73 :               tree itype = GET_MODE_INNER (TYPE_MODE (type)) == E_SFmode
   19884           73 :                 ? intSI_type_node : intDI_type_node;
   19885           73 :               type = get_same_sized_vectype (itype, type);
   19886              :             }
   19887              :           else
   19888           93 :             type = signed_type_for (type);
   19889          166 :           arg2 = gimple_build (&stmts, VIEW_CONVERT_EXPR, type, arg2);
   19890          166 :           tree zero_vec = build_zero_cst (type);
   19891          166 :           tree cmp_type = truth_type_for (type);
   19892          166 :           tree cmp = gimple_build (&stmts, LT_EXPR, cmp_type, arg2, zero_vec);
   19893          166 :           gsi_insert_seq_before (gsi, stmts, GSI_SAME_STMT);
   19894          166 :           g = gimple_build_assign (gimple_call_lhs (stmt),
   19895              :                                    VEC_COND_EXPR, cmp,
   19896              :                                    arg1, arg0);
   19897          166 :           gimple_set_location (g, loc);
   19898          166 :           gsi_replace (gsi, g, false);
   19899              :         }
   19900              :       else
   19901            0 :         gsi_replace (gsi, gimple_build_nop (), false);
   19902              :       return true;
   19903              : 
   19904              : 
   19905           16 :     case IX86_BUILTIN_PCMPEQB128:
   19906           16 :     case IX86_BUILTIN_PCMPEQW128:
   19907           16 :     case IX86_BUILTIN_PCMPEQD128:
   19908           16 :     case IX86_BUILTIN_PCMPEQQ:
   19909           16 :     case IX86_BUILTIN_PCMPEQB256:
   19910           16 :     case IX86_BUILTIN_PCMPEQW256:
   19911           16 :     case IX86_BUILTIN_PCMPEQD256:
   19912           16 :     case IX86_BUILTIN_PCMPEQQ256:
   19913           16 :       tcode = EQ_EXPR;
   19914           16 :       goto do_cmp;
   19915              : 
   19916              :     case IX86_BUILTIN_PCMPGTB128:
   19917              :     case IX86_BUILTIN_PCMPGTW128:
   19918              :     case IX86_BUILTIN_PCMPGTD128:
   19919              :     case IX86_BUILTIN_PCMPGTQ:
   19920              :     case IX86_BUILTIN_PCMPGTB256:
   19921              :     case IX86_BUILTIN_PCMPGTW256:
   19922              :     case IX86_BUILTIN_PCMPGTD256:
   19923              :     case IX86_BUILTIN_PCMPGTQ256:
   19924              :       tcode = GT_EXPR;
   19925              : 
   19926           33 :     do_cmp:
   19927           33 :       gcc_assert (n_args == 2);
   19928           33 :       arg0 = gimple_call_arg (stmt, 0);
   19929           33 :       arg1 = gimple_call_arg (stmt, 1);
   19930           33 :       if (gimple_call_lhs (stmt))
   19931              :         {
   19932           32 :           loc = gimple_location (stmt);
   19933           32 :           tree type = TREE_TYPE (arg0);
   19934           32 :           tree zero_vec = build_zero_cst (type);
   19935           32 :           tree minus_one_vec = build_minus_one_cst (type);
   19936           32 :           tree cmp_type = truth_type_for (type);
   19937           32 :           tree cmp = gimple_build (&stmts, tcode, cmp_type, arg0, arg1);
   19938           32 :           gsi_insert_seq_before (gsi, stmts, GSI_SAME_STMT);
   19939           32 :           g = gimple_build_assign (gimple_call_lhs (stmt),
   19940              :                                    VEC_COND_EXPR, cmp,
   19941              :                                    minus_one_vec, zero_vec);
   19942           32 :           gimple_set_location (g, loc);
   19943           32 :           gsi_replace (gsi, g, false);
   19944              :         }
   19945              :       else
   19946            1 :         gsi_replace (gsi, gimple_build_nop (), false);
   19947              :       return true;
   19948              : 
   19949         9297 :     case IX86_BUILTIN_PSLLD:
   19950         9297 :     case IX86_BUILTIN_PSLLD128:
   19951         9297 :     case IX86_BUILTIN_PSLLD128_MASK:
   19952         9297 :     case IX86_BUILTIN_PSLLD256:
   19953         9297 :     case IX86_BUILTIN_PSLLD256_MASK:
   19954         9297 :     case IX86_BUILTIN_PSLLD512:
   19955         9297 :     case IX86_BUILTIN_PSLLDI:
   19956         9297 :     case IX86_BUILTIN_PSLLDI128:
   19957         9297 :     case IX86_BUILTIN_PSLLDI128_MASK:
   19958         9297 :     case IX86_BUILTIN_PSLLDI256:
   19959         9297 :     case IX86_BUILTIN_PSLLDI256_MASK:
   19960         9297 :     case IX86_BUILTIN_PSLLDI512:
   19961         9297 :     case IX86_BUILTIN_PSLLQ:
   19962         9297 :     case IX86_BUILTIN_PSLLQ128:
   19963         9297 :     case IX86_BUILTIN_PSLLQ128_MASK:
   19964         9297 :     case IX86_BUILTIN_PSLLQ256:
   19965         9297 :     case IX86_BUILTIN_PSLLQ256_MASK:
   19966         9297 :     case IX86_BUILTIN_PSLLQ512:
   19967         9297 :     case IX86_BUILTIN_PSLLQI:
   19968         9297 :     case IX86_BUILTIN_PSLLQI128:
   19969         9297 :     case IX86_BUILTIN_PSLLQI128_MASK:
   19970         9297 :     case IX86_BUILTIN_PSLLQI256:
   19971         9297 :     case IX86_BUILTIN_PSLLQI256_MASK:
   19972         9297 :     case IX86_BUILTIN_PSLLQI512:
   19973         9297 :     case IX86_BUILTIN_PSLLW:
   19974         9297 :     case IX86_BUILTIN_PSLLW128:
   19975         9297 :     case IX86_BUILTIN_PSLLW128_MASK:
   19976         9297 :     case IX86_BUILTIN_PSLLW256:
   19977         9297 :     case IX86_BUILTIN_PSLLW256_MASK:
   19978         9297 :     case IX86_BUILTIN_PSLLW512_MASK:
   19979         9297 :     case IX86_BUILTIN_PSLLWI:
   19980         9297 :     case IX86_BUILTIN_PSLLWI128:
   19981         9297 :     case IX86_BUILTIN_PSLLWI128_MASK:
   19982         9297 :     case IX86_BUILTIN_PSLLWI256:
   19983         9297 :     case IX86_BUILTIN_PSLLWI256_MASK:
   19984         9297 :     case IX86_BUILTIN_PSLLWI512_MASK:
   19985         9297 :       rcode = ASHIFT;
   19986         9297 :       is_vshift = false;
   19987         9297 :       goto do_shift;
   19988         6495 :     case IX86_BUILTIN_PSRAD:
   19989         6495 :     case IX86_BUILTIN_PSRAD128:
   19990         6495 :     case IX86_BUILTIN_PSRAD128_MASK:
   19991         6495 :     case IX86_BUILTIN_PSRAD256:
   19992         6495 :     case IX86_BUILTIN_PSRAD256_MASK:
   19993         6495 :     case IX86_BUILTIN_PSRAD512:
   19994         6495 :     case IX86_BUILTIN_PSRADI:
   19995         6495 :     case IX86_BUILTIN_PSRADI128:
   19996         6495 :     case IX86_BUILTIN_PSRADI128_MASK:
   19997         6495 :     case IX86_BUILTIN_PSRADI256:
   19998         6495 :     case IX86_BUILTIN_PSRADI256_MASK:
   19999         6495 :     case IX86_BUILTIN_PSRADI512:
   20000         6495 :     case IX86_BUILTIN_PSRAQ128_MASK:
   20001         6495 :     case IX86_BUILTIN_PSRAQ256_MASK:
   20002         6495 :     case IX86_BUILTIN_PSRAQ512:
   20003         6495 :     case IX86_BUILTIN_PSRAQI128_MASK:
   20004         6495 :     case IX86_BUILTIN_PSRAQI256_MASK:
   20005         6495 :     case IX86_BUILTIN_PSRAQI512:
   20006         6495 :     case IX86_BUILTIN_PSRAW:
   20007         6495 :     case IX86_BUILTIN_PSRAW128:
   20008         6495 :     case IX86_BUILTIN_PSRAW128_MASK:
   20009         6495 :     case IX86_BUILTIN_PSRAW256:
   20010         6495 :     case IX86_BUILTIN_PSRAW256_MASK:
   20011         6495 :     case IX86_BUILTIN_PSRAW512:
   20012         6495 :     case IX86_BUILTIN_PSRAWI:
   20013         6495 :     case IX86_BUILTIN_PSRAWI128:
   20014         6495 :     case IX86_BUILTIN_PSRAWI128_MASK:
   20015         6495 :     case IX86_BUILTIN_PSRAWI256:
   20016         6495 :     case IX86_BUILTIN_PSRAWI256_MASK:
   20017         6495 :     case IX86_BUILTIN_PSRAWI512:
   20018         6495 :       rcode = ASHIFTRT;
   20019         6495 :       is_vshift = false;
   20020         6495 :       goto do_shift;
   20021         7960 :     case IX86_BUILTIN_PSRLD:
   20022         7960 :     case IX86_BUILTIN_PSRLD128:
   20023         7960 :     case IX86_BUILTIN_PSRLD128_MASK:
   20024         7960 :     case IX86_BUILTIN_PSRLD256:
   20025         7960 :     case IX86_BUILTIN_PSRLD256_MASK:
   20026         7960 :     case IX86_BUILTIN_PSRLD512:
   20027         7960 :     case IX86_BUILTIN_PSRLDI:
   20028         7960 :     case IX86_BUILTIN_PSRLDI128:
   20029         7960 :     case IX86_BUILTIN_PSRLDI128_MASK:
   20030         7960 :     case IX86_BUILTIN_PSRLDI256:
   20031         7960 :     case IX86_BUILTIN_PSRLDI256_MASK:
   20032         7960 :     case IX86_BUILTIN_PSRLDI512:
   20033         7960 :     case IX86_BUILTIN_PSRLQ:
   20034         7960 :     case IX86_BUILTIN_PSRLQ128:
   20035         7960 :     case IX86_BUILTIN_PSRLQ128_MASK:
   20036         7960 :     case IX86_BUILTIN_PSRLQ256:
   20037         7960 :     case IX86_BUILTIN_PSRLQ256_MASK:
   20038         7960 :     case IX86_BUILTIN_PSRLQ512:
   20039         7960 :     case IX86_BUILTIN_PSRLQI:
   20040         7960 :     case IX86_BUILTIN_PSRLQI128:
   20041         7960 :     case IX86_BUILTIN_PSRLQI128_MASK:
   20042         7960 :     case IX86_BUILTIN_PSRLQI256:
   20043         7960 :     case IX86_BUILTIN_PSRLQI256_MASK:
   20044         7960 :     case IX86_BUILTIN_PSRLQI512:
   20045         7960 :     case IX86_BUILTIN_PSRLW:
   20046         7960 :     case IX86_BUILTIN_PSRLW128:
   20047         7960 :     case IX86_BUILTIN_PSRLW128_MASK:
   20048         7960 :     case IX86_BUILTIN_PSRLW256:
   20049         7960 :     case IX86_BUILTIN_PSRLW256_MASK:
   20050         7960 :     case IX86_BUILTIN_PSRLW512:
   20051         7960 :     case IX86_BUILTIN_PSRLWI:
   20052         7960 :     case IX86_BUILTIN_PSRLWI128:
   20053         7960 :     case IX86_BUILTIN_PSRLWI128_MASK:
   20054         7960 :     case IX86_BUILTIN_PSRLWI256:
   20055         7960 :     case IX86_BUILTIN_PSRLWI256_MASK:
   20056         7960 :     case IX86_BUILTIN_PSRLWI512:
   20057         7960 :       rcode = LSHIFTRT;
   20058         7960 :       is_vshift = false;
   20059         7960 :       goto do_shift;
   20060         2384 :     case IX86_BUILTIN_PSLLVV16HI:
   20061         2384 :     case IX86_BUILTIN_PSLLVV16SI:
   20062         2384 :     case IX86_BUILTIN_PSLLVV2DI:
   20063         2384 :     case IX86_BUILTIN_PSLLVV2DI_MASK:
   20064         2384 :     case IX86_BUILTIN_PSLLVV32HI:
   20065         2384 :     case IX86_BUILTIN_PSLLVV4DI:
   20066         2384 :     case IX86_BUILTIN_PSLLVV4DI_MASK:
   20067         2384 :     case IX86_BUILTIN_PSLLVV4SI:
   20068         2384 :     case IX86_BUILTIN_PSLLVV4SI_MASK:
   20069         2384 :     case IX86_BUILTIN_PSLLVV8DI:
   20070         2384 :     case IX86_BUILTIN_PSLLVV8HI:
   20071         2384 :     case IX86_BUILTIN_PSLLVV8SI:
   20072         2384 :     case IX86_BUILTIN_PSLLVV8SI_MASK:
   20073         2384 :       rcode = ASHIFT;
   20074         2384 :       is_vshift = true;
   20075         2384 :       goto do_shift;
   20076         2341 :     case IX86_BUILTIN_PSRAVQ128:
   20077         2341 :     case IX86_BUILTIN_PSRAVQ256:
   20078         2341 :     case IX86_BUILTIN_PSRAVV16HI:
   20079         2341 :     case IX86_BUILTIN_PSRAVV16SI:
   20080         2341 :     case IX86_BUILTIN_PSRAVV32HI:
   20081         2341 :     case IX86_BUILTIN_PSRAVV4SI:
   20082         2341 :     case IX86_BUILTIN_PSRAVV4SI_MASK:
   20083         2341 :     case IX86_BUILTIN_PSRAVV8DI:
   20084         2341 :     case IX86_BUILTIN_PSRAVV8HI:
   20085         2341 :     case IX86_BUILTIN_PSRAVV8SI:
   20086         2341 :     case IX86_BUILTIN_PSRAVV8SI_MASK:
   20087         2341 :       rcode = ASHIFTRT;
   20088         2341 :       is_vshift = true;
   20089         2341 :       goto do_shift;
   20090         2380 :     case IX86_BUILTIN_PSRLVV16HI:
   20091         2380 :     case IX86_BUILTIN_PSRLVV16SI:
   20092         2380 :     case IX86_BUILTIN_PSRLVV2DI:
   20093         2380 :     case IX86_BUILTIN_PSRLVV2DI_MASK:
   20094         2380 :     case IX86_BUILTIN_PSRLVV32HI:
   20095         2380 :     case IX86_BUILTIN_PSRLVV4DI:
   20096         2380 :     case IX86_BUILTIN_PSRLVV4DI_MASK:
   20097         2380 :     case IX86_BUILTIN_PSRLVV4SI:
   20098         2380 :     case IX86_BUILTIN_PSRLVV4SI_MASK:
   20099         2380 :     case IX86_BUILTIN_PSRLVV8DI:
   20100         2380 :     case IX86_BUILTIN_PSRLVV8HI:
   20101         2380 :     case IX86_BUILTIN_PSRLVV8SI:
   20102         2380 :     case IX86_BUILTIN_PSRLVV8SI_MASK:
   20103         2380 :       rcode = LSHIFTRT;
   20104         2380 :       is_vshift = true;
   20105         2380 :       goto do_shift;
   20106              : 
   20107        30857 :     do_shift:
   20108        30857 :       gcc_assert (n_args >= 2);
   20109        30857 :       if (!gimple_call_lhs (stmt))
   20110              :         {
   20111            1 :           gsi_replace (gsi, gimple_build_nop (), false);
   20112            1 :           return true;
   20113              :         }
   20114        30856 :       arg0 = gimple_call_arg (stmt, 0);
   20115        30856 :       arg1 = gimple_call_arg (stmt, 1);
   20116        30856 :       elems = TYPE_VECTOR_SUBPARTS (TREE_TYPE (arg0));
   20117              :       /* For masked shift, only optimize if the mask is all ones.  */
   20118        30856 :       if (n_args > 2
   20119        30856 :           && !ix86_masked_all_ones (elems, gimple_call_arg (stmt, n_args - 1)))
   20120              :         break;
   20121        16081 :       if (is_vshift)
   20122              :         {
   20123         2640 :           if (TREE_CODE (arg1) != VECTOR_CST)
   20124              :             break;
   20125           69 :           count = TYPE_PRECISION (TREE_TYPE (TREE_TYPE (arg0)));
   20126           69 :           if (integer_zerop (arg1))
   20127           27 :             count = 0;
   20128           42 :           else if (rcode == ASHIFTRT)
   20129              :             break;
   20130              :           else
   20131          230 :             for (unsigned int i = 0; i < VECTOR_CST_NELTS (arg1); ++i)
   20132              :               {
   20133          212 :                 tree elt = VECTOR_CST_ELT (arg1, i);
   20134          212 :                 if (!wi::neg_p (wi::to_wide (elt))
   20135          375 :                     && wi::to_widest (elt) < count)
   20136           16 :                   return false;
   20137              :               }
   20138              :         }
   20139              :       else
   20140              :         {
   20141        13441 :           arg1 = ix86_vector_shift_count (arg1);
   20142        13441 :           if (!arg1)
   20143              :             break;
   20144         5608 :           count = tree_to_uhwi (arg1);
   20145              :         }
   20146         5653 :       if (count == 0)
   20147              :         {
   20148              :           /* Just return the first argument for shift by 0.  */
   20149           93 :           loc = gimple_location (stmt);
   20150           93 :           g = gimple_build_assign (gimple_call_lhs (stmt), arg0);
   20151           93 :           gimple_set_location (g, loc);
   20152           93 :           gsi_replace (gsi, g, false);
   20153           93 :           return true;
   20154              :         }
   20155         5560 :       if (rcode != ASHIFTRT
   20156         5560 :           && count >= TYPE_PRECISION (TREE_TYPE (TREE_TYPE (arg0))))
   20157              :         {
   20158              :           /* For shift counts equal or greater than precision, except for
   20159              :              arithmetic right shift the result is zero.  */
   20160           78 :           loc = gimple_location (stmt);
   20161           78 :           g = gimple_build_assign (gimple_call_lhs (stmt),
   20162           78 :                                    build_zero_cst (TREE_TYPE (arg0)));
   20163           78 :           gimple_set_location (g, loc);
   20164           78 :           gsi_replace (gsi, g, false);
   20165           78 :           return true;
   20166              :         }
   20167              :       break;
   20168              : 
   20169          531 :     case IX86_BUILTIN_SHUFPD512:
   20170          531 :     case IX86_BUILTIN_SHUFPS512:
   20171          531 :     case IX86_BUILTIN_SHUFPD:
   20172          531 :     case IX86_BUILTIN_SHUFPD256:
   20173          531 :     case IX86_BUILTIN_SHUFPS:
   20174          531 :     case IX86_BUILTIN_SHUFPS256:
   20175          531 :       arg0 = gimple_call_arg (stmt, 0);
   20176          531 :       elems = TYPE_VECTOR_SUBPARTS (TREE_TYPE (arg0));
   20177              :       /* This is masked shuffle.  Only optimize if the mask is all ones.  */
   20178          531 :       if (n_args > 3
   20179          895 :           && !ix86_masked_all_ones (elems,
   20180          364 :                                     gimple_call_arg (stmt, n_args - 1)))
   20181              :         break;
   20182          203 :       arg2 = gimple_call_arg (stmt, 2);
   20183          203 :       if (TREE_CODE (arg2) == INTEGER_CST && gimple_call_lhs (stmt))
   20184              :         {
   20185          146 :           unsigned HOST_WIDE_INT shuffle_mask = TREE_INT_CST_LOW (arg2);
   20186              :           /* Check valid imm, refer to gcc.target/i386/testimm-10.c.  */
   20187          146 :           if (shuffle_mask > 255)
   20188              :             return false;
   20189              : 
   20190          144 :           machine_mode imode = GET_MODE_INNER (TYPE_MODE (TREE_TYPE (arg0)));
   20191          144 :           loc = gimple_location (stmt);
   20192          144 :           tree itype = (imode == E_DFmode
   20193          144 :                         ? long_long_integer_type_node : integer_type_node);
   20194          144 :           tree vtype = build_vector_type (itype, elems);
   20195          144 :           tree_vector_builder elts (vtype, elems, 1);
   20196              : 
   20197              : 
   20198              :           /* Transform integer shuffle_mask to vector perm_mask which
   20199              :              is used by vec_perm_expr, refer to shuflp[sd]256/512 in sse.md.  */
   20200          840 :           for (unsigned i = 0; i != elems; i++)
   20201              :             {
   20202          696 :               unsigned sel_idx;
   20203              :               /* Imm[1:0](if VL > 128, then use Imm[3:2],Imm[5:4],Imm[7:6])
   20204              :                  provide 2 select controls for each element of the
   20205              :                  destination.  */
   20206          696 :               if (imode == E_DFmode)
   20207          240 :                 sel_idx = (i & 1) * elems + (i & ~1)
   20208          240 :                           + ((shuffle_mask >> i) & 1);
   20209              :               else
   20210              :                 {
   20211              :                   /* Imm[7:0](if VL > 128, also use Imm[7:0]) provide 4 select
   20212              :                      controls for each element of the destination.  */
   20213          456 :                   unsigned j = i % 4;
   20214          456 :                   sel_idx = ((i >> 1) & 1) * elems + (i & ~3)
   20215          456 :                             + ((shuffle_mask >> 2 * j) & 3);
   20216              :                 }
   20217          696 :               elts.quick_push (build_int_cst (itype, sel_idx));
   20218              :             }
   20219              : 
   20220          144 :           tree perm_mask = elts.build ();
   20221          144 :           arg1 = gimple_call_arg (stmt, 1);
   20222          144 :           g = gimple_build_assign (gimple_call_lhs (stmt),
   20223              :                                    VEC_PERM_EXPR,
   20224              :                                    arg0, arg1, perm_mask);
   20225          144 :           gimple_set_location (g, loc);
   20226          144 :           gsi_replace (gsi, g, false);
   20227          144 :           return true;
   20228          144 :         }
   20229              :       // Do not error yet, the constant could be propagated later?
   20230              :       break;
   20231              : 
   20232           48 :     case IX86_BUILTIN_PABSB:
   20233           48 :     case IX86_BUILTIN_PABSW:
   20234           48 :     case IX86_BUILTIN_PABSD:
   20235              :       /* 64-bit vector abs<mode>2 is only supported under TARGET_MMX_WITH_SSE.  */
   20236           48 :       if (!TARGET_MMX_WITH_SSE)
   20237              :         break;
   20238              :       /* FALLTHRU.  */
   20239         2189 :     case IX86_BUILTIN_PABSB128:
   20240         2189 :     case IX86_BUILTIN_PABSB256:
   20241         2189 :     case IX86_BUILTIN_PABSB512:
   20242         2189 :     case IX86_BUILTIN_PABSW128:
   20243         2189 :     case IX86_BUILTIN_PABSW256:
   20244         2189 :     case IX86_BUILTIN_PABSW512:
   20245         2189 :     case IX86_BUILTIN_PABSD128:
   20246         2189 :     case IX86_BUILTIN_PABSD256:
   20247         2189 :     case IX86_BUILTIN_PABSD512:
   20248         2189 :     case IX86_BUILTIN_PABSQ128:
   20249         2189 :     case IX86_BUILTIN_PABSQ256:
   20250         2189 :     case IX86_BUILTIN_PABSQ512:
   20251         2189 :     case IX86_BUILTIN_PABSB128_MASK:
   20252         2189 :     case IX86_BUILTIN_PABSB256_MASK:
   20253         2189 :     case IX86_BUILTIN_PABSW128_MASK:
   20254         2189 :     case IX86_BUILTIN_PABSW256_MASK:
   20255         2189 :     case IX86_BUILTIN_PABSD128_MASK:
   20256         2189 :     case IX86_BUILTIN_PABSD256_MASK:
   20257         2189 :       gcc_assert (n_args >= 1);
   20258         2189 :       if (!gimple_call_lhs (stmt))
   20259              :         {
   20260            1 :           gsi_replace (gsi, gimple_build_nop (), false);
   20261            1 :           return true;
   20262              :         }
   20263         2188 :       arg0 = gimple_call_arg (stmt, 0);
   20264         2188 :       elems = TYPE_VECTOR_SUBPARTS (TREE_TYPE (arg0));
   20265              :       /* For masked ABS, only optimize if the mask is all ones.  */
   20266         2188 :       if (n_args > 1
   20267         2188 :           && !ix86_masked_all_ones (elems, gimple_call_arg (stmt, n_args - 1)))
   20268              :         break;
   20269          228 :       {
   20270          228 :         tree utype, ures, vce;
   20271          228 :         utype = unsigned_type_for (TREE_TYPE (arg0));
   20272              :         /* PABSB/W/D/Q store the unsigned result in dst, use ABSU_EXPR
   20273              :            instead of ABS_EXPR to handle overflow case(TYPE_MIN).  */
   20274          228 :         ures = gimple_build (&stmts, ABSU_EXPR, utype, arg0);
   20275          228 :         gsi_insert_seq_before (gsi, stmts, GSI_SAME_STMT);
   20276          228 :         loc = gimple_location (stmt);
   20277          228 :         vce = build1 (VIEW_CONVERT_EXPR, TREE_TYPE (arg0), ures);
   20278          228 :         g = gimple_build_assign (gimple_call_lhs (stmt),
   20279              :                                  VIEW_CONVERT_EXPR, vce);
   20280          228 :         gsi_replace (gsi, g, false);
   20281              :       }
   20282          228 :       return true;
   20283              : 
   20284         2225 :     case IX86_BUILTIN_MINPS:
   20285         2225 :     case IX86_BUILTIN_MINPD:
   20286         2225 :     case IX86_BUILTIN_MINPS256:
   20287         2225 :     case IX86_BUILTIN_MINPD256:
   20288         2225 :     case IX86_BUILTIN_MINPS512:
   20289         2225 :     case IX86_BUILTIN_MINPD512:
   20290         2225 :     case IX86_BUILTIN_MINPS128_MASK:
   20291         2225 :     case IX86_BUILTIN_MINPD128_MASK:
   20292         2225 :     case IX86_BUILTIN_MINPS256_MASK:
   20293         2225 :     case IX86_BUILTIN_MINPD256_MASK:
   20294         2225 :     case IX86_BUILTIN_MINPH128_MASK:
   20295         2225 :     case IX86_BUILTIN_MINPH256_MASK:
   20296         2225 :     case IX86_BUILTIN_MINPH512_MASK:
   20297         2225 :       tcode = LT_EXPR;
   20298         2225 :       goto do_minmax;
   20299              : 
   20300              :     case IX86_BUILTIN_MAXPS:
   20301              :     case IX86_BUILTIN_MAXPD:
   20302              :     case IX86_BUILTIN_MAXPS256:
   20303              :     case IX86_BUILTIN_MAXPD256:
   20304              :     case IX86_BUILTIN_MAXPS512:
   20305              :     case IX86_BUILTIN_MAXPD512:
   20306              :     case IX86_BUILTIN_MAXPS128_MASK:
   20307              :     case IX86_BUILTIN_MAXPD128_MASK:
   20308              :     case IX86_BUILTIN_MAXPS256_MASK:
   20309              :     case IX86_BUILTIN_MAXPD256_MASK:
   20310              :     case IX86_BUILTIN_MAXPH128_MASK:
   20311              :     case IX86_BUILTIN_MAXPH256_MASK:
   20312              :     case IX86_BUILTIN_MAXPH512_MASK:
   20313              :       tcode = GT_EXPR;
   20314         4435 :     do_minmax:
   20315         4435 :       gcc_assert (n_args >= 2);
   20316              :       /* Without SSE4.1 we often aren't able to pattern match it back to the
   20317              :          desired instruction.  */
   20318         4435 :       if (!gimple_call_lhs (stmt) || !optimize || !TARGET_SSE4_1)
   20319              :         break;
   20320         3865 :       arg0 = gimple_call_arg (stmt, 0);
   20321         3865 :       arg1 = gimple_call_arg (stmt, 1);
   20322         3865 :       elems = TYPE_VECTOR_SUBPARTS (TREE_TYPE (arg0));
   20323              :       /* For masked minmax, only optimize if the mask is all ones.  */
   20324         3865 :       if (n_args > 2
   20325         3865 :           && !ix86_masked_all_ones (elems, gimple_call_arg (stmt, 3)))
   20326              :         break;
   20327          647 :       if (n_args >= 5)
   20328              :         {
   20329          436 :           tree arg4 = gimple_call_arg (stmt, 4);
   20330          436 :           if (!tree_fits_uhwi_p (arg4))
   20331              :             break;
   20332          424 :           if (tree_to_uhwi (arg4) == 4)
   20333              :             /* Ok.  */;
   20334          416 :           else if (tree_to_uhwi (arg4) != 8)
   20335              :             /* Invalid round argument.  */
   20336              :             break;
   20337          416 :           else if (HONOR_NANS (arg0))
   20338              :             /* Lowering to comparison would raise exceptions which
   20339              :                shouldn't be raised.  */
   20340              :             break;
   20341              :         }
   20342          219 :       {
   20343          219 :         tree type = truth_type_for (TREE_TYPE (arg0));
   20344          219 :         tree cmpres = gimple_build (&stmts, tcode, type, arg0, arg1);
   20345          219 :         gsi_insert_seq_before (gsi, stmts, GSI_SAME_STMT);
   20346          219 :         g = gimple_build_assign (gimple_call_lhs (stmt),
   20347              :                                  VEC_COND_EXPR, cmpres, arg0, arg1);
   20348          219 :         gsi_replace (gsi, g, false);
   20349              :       }
   20350          219 :       return true;
   20351              : 
   20352              :     default:
   20353              :       break;
   20354              :     }
   20355              : 
   20356              :   return false;
   20357              : }
   20358              : 
   20359              : /* Handler for an SVML-style interface to
   20360              :    a library with vectorized intrinsics.  */
   20361              : 
   20362              : tree
   20363           10 : ix86_veclibabi_svml (combined_fn fn, tree type_out, tree type_in)
   20364              : {
   20365           10 :   char name[20];
   20366           10 :   tree fntype, new_fndecl, args;
   20367           10 :   unsigned arity;
   20368           10 :   const char *bname;
   20369           10 :   machine_mode el_mode, in_mode;
   20370           10 :   int n, in_n;
   20371              : 
   20372              :   /* The SVML is suitable for unsafe math only.  */
   20373           10 :   if (!flag_unsafe_math_optimizations)
   20374              :     return NULL_TREE;
   20375              : 
   20376           10 :   el_mode = TYPE_MODE (TREE_TYPE (type_out));
   20377           10 :   n = TYPE_VECTOR_SUBPARTS (type_out);
   20378           10 :   in_mode = TYPE_MODE (TREE_TYPE (type_in));
   20379           10 :   in_n = TYPE_VECTOR_SUBPARTS (type_in);
   20380           10 :   if (el_mode != in_mode
   20381           10 :       || n != in_n)
   20382              :     return NULL_TREE;
   20383              : 
   20384           10 :   switch (fn)
   20385              :     {
   20386           10 :     CASE_CFN_EXP:
   20387           10 :     CASE_CFN_LOG:
   20388           10 :     CASE_CFN_LOG10:
   20389           10 :     CASE_CFN_POW:
   20390           10 :     CASE_CFN_TANH:
   20391           10 :     CASE_CFN_TAN:
   20392           10 :     CASE_CFN_ATAN:
   20393           10 :     CASE_CFN_ATAN2:
   20394           10 :     CASE_CFN_ATANH:
   20395           10 :     CASE_CFN_CBRT:
   20396           10 :     CASE_CFN_SINH:
   20397           10 :     CASE_CFN_SIN:
   20398           10 :     CASE_CFN_ASINH:
   20399           10 :     CASE_CFN_ASIN:
   20400           10 :     CASE_CFN_COSH:
   20401           10 :     CASE_CFN_COS:
   20402           10 :     CASE_CFN_ACOSH:
   20403           10 :     CASE_CFN_ACOS:
   20404           10 :       if ((el_mode != DFmode || n != 2)
   20405            8 :           && (el_mode != SFmode || n != 4))
   20406              :         return NULL_TREE;
   20407            6 :       break;
   20408              : 
   20409              :     default:
   20410              :       return NULL_TREE;
   20411              :     }
   20412              : 
   20413            6 :   tree fndecl = mathfn_built_in (el_mode == DFmode
   20414              :                                  ? double_type_node : float_type_node, fn);
   20415            6 :   bname = IDENTIFIER_POINTER (DECL_NAME (fndecl));
   20416              : 
   20417            6 :   if (DECL_FUNCTION_CODE (fndecl) == BUILT_IN_LOGF)
   20418            2 :     strcpy (name, "vmlsLn4");
   20419            4 :   else if (DECL_FUNCTION_CODE (fndecl) == BUILT_IN_LOG)
   20420            0 :     strcpy (name, "vmldLn2");
   20421            4 :   else if (n == 4)
   20422              :     {
   20423            2 :       sprintf (name, "vmls%s", bname+10);
   20424            2 :       name[strlen (name)-1] = '4';
   20425              :     }
   20426              :   else
   20427            2 :     sprintf (name, "vmld%s2", bname+10);
   20428              : 
   20429              :   /* Convert to uppercase. */
   20430            6 :   name[4] &= ~0x20;
   20431              : 
   20432            6 :   arity = 0;
   20433            6 :   for (args = DECL_ARGUMENTS (fndecl); args; args = TREE_CHAIN (args))
   20434            0 :     arity++;
   20435              : 
   20436            6 :   if (arity == 1)
   20437            0 :     fntype = build_function_type_list (type_out, type_in, NULL);
   20438              :   else
   20439            6 :     fntype = build_function_type_list (type_out, type_in, type_in, NULL);
   20440              : 
   20441              :   /* Build a function declaration for the vectorized function.  */
   20442            6 :   new_fndecl = build_decl (BUILTINS_LOCATION,
   20443              :                            FUNCTION_DECL, get_identifier (name), fntype);
   20444            6 :   TREE_PUBLIC (new_fndecl) = 1;
   20445            6 :   DECL_EXTERNAL (new_fndecl) = 1;
   20446            6 :   DECL_IS_NOVOPS (new_fndecl) = 1;
   20447            6 :   TREE_READONLY (new_fndecl) = 1;
   20448              : 
   20449            6 :   return new_fndecl;
   20450              : }
   20451              : 
   20452              : /* Handler for an ACML-style interface to
   20453              :    a library with vectorized intrinsics.  */
   20454              : 
   20455              : tree
   20456            3 : ix86_veclibabi_acml (combined_fn fn, tree type_out, tree type_in)
   20457              : {
   20458            3 :   char name[20] = "__vr.._";
   20459            3 :   tree fntype, new_fndecl, args;
   20460            3 :   unsigned arity;
   20461            3 :   const char *bname;
   20462            3 :   machine_mode el_mode, in_mode;
   20463            3 :   int n, in_n;
   20464              : 
   20465              :   /* The ACML is 64bits only and suitable for unsafe math only as
   20466              :      it does not correctly support parts of IEEE with the required
   20467              :      precision such as denormals.  */
   20468            3 :   if (!TARGET_64BIT
   20469            3 :       || !flag_unsafe_math_optimizations)
   20470              :     return NULL_TREE;
   20471              : 
   20472            3 :   el_mode = TYPE_MODE (TREE_TYPE (type_out));
   20473            3 :   n = TYPE_VECTOR_SUBPARTS (type_out);
   20474            3 :   in_mode = TYPE_MODE (TREE_TYPE (type_in));
   20475            3 :   in_n = TYPE_VECTOR_SUBPARTS (type_in);
   20476            3 :   if (el_mode != in_mode
   20477            3 :       || n != in_n)
   20478              :     return NULL_TREE;
   20479              : 
   20480            3 :   switch (fn)
   20481              :     {
   20482            3 :     CASE_CFN_SIN:
   20483            3 :     CASE_CFN_COS:
   20484            3 :     CASE_CFN_EXP:
   20485            3 :     CASE_CFN_LOG:
   20486            3 :     CASE_CFN_LOG2:
   20487            3 :     CASE_CFN_LOG10:
   20488            3 :       if (el_mode == DFmode && n == 2)
   20489              :         {
   20490            3 :           name[4] = 'd';
   20491            3 :           name[5] = '2';
   20492              :         }
   20493            0 :       else if (el_mode == SFmode && n == 4)
   20494              :         {
   20495            0 :           name[4] = 's';
   20496            0 :           name[5] = '4';
   20497              :         }
   20498              :       else
   20499              :         return NULL_TREE;
   20500            3 :       break;
   20501              : 
   20502              :     default:
   20503              :       return NULL_TREE;
   20504              :     }
   20505              : 
   20506            3 :   tree fndecl = mathfn_built_in (el_mode == DFmode
   20507              :                                  ? double_type_node : float_type_node, fn);
   20508            3 :   bname = IDENTIFIER_POINTER (DECL_NAME (fndecl));
   20509            3 :   sprintf (name + 7, "%s", bname+10);
   20510              : 
   20511            3 :   arity = 0;
   20512            3 :   for (args = DECL_ARGUMENTS (fndecl); args; args = TREE_CHAIN (args))
   20513            0 :     arity++;
   20514              : 
   20515            3 :   if (arity == 1)
   20516            0 :     fntype = build_function_type_list (type_out, type_in, NULL);
   20517              :   else
   20518            3 :     fntype = build_function_type_list (type_out, type_in, type_in, NULL);
   20519              : 
   20520              :   /* Build a function declaration for the vectorized function.  */
   20521            3 :   new_fndecl = build_decl (BUILTINS_LOCATION,
   20522              :                            FUNCTION_DECL, get_identifier (name), fntype);
   20523            3 :   TREE_PUBLIC (new_fndecl) = 1;
   20524            3 :   DECL_EXTERNAL (new_fndecl) = 1;
   20525            3 :   DECL_IS_NOVOPS (new_fndecl) = 1;
   20526            3 :   TREE_READONLY (new_fndecl) = 1;
   20527              : 
   20528            3 :   return new_fndecl;
   20529              : }
   20530              : 
   20531              : /* Handler for an AOCL-LibM-style interface to
   20532              :    a library with vectorized intrinsics.  */
   20533              : 
   20534              : tree
   20535          386 : ix86_veclibabi_aocl (combined_fn fn, tree type_out, tree type_in)
   20536              : {
   20537          386 :   char name[20] = "amd_vr";
   20538          386 :   int name_len = 6;
   20539          386 :   tree fntype, new_fndecl, args;
   20540          386 :   unsigned arity;
   20541          386 :   const char *bname;
   20542          386 :   machine_mode el_mode, in_mode;
   20543          386 :   int n, in_n;
   20544              : 
   20545              :   /* AOCL-LibM is 64bits only.  It is also only suitable for unsafe math only
   20546              :      as it trades off some accuracy for increased performance.  */
   20547          386 :   if (!TARGET_64BIT
   20548          386 :       || !flag_unsafe_math_optimizations)
   20549              :     return NULL_TREE;
   20550              : 
   20551          386 :   el_mode = TYPE_MODE (TREE_TYPE (type_out));
   20552          386 :   n = TYPE_VECTOR_SUBPARTS (type_out);
   20553          386 :   in_mode = TYPE_MODE (TREE_TYPE (type_in));
   20554          386 :   in_n = TYPE_VECTOR_SUBPARTS (type_in);
   20555          386 :   if (el_mode != in_mode
   20556          386 :       || n != in_n)
   20557              :     return NULL_TREE;
   20558              : 
   20559          386 :   gcc_checking_assert (n > 0);
   20560              : 
   20561              :   /* Decide whether there exists a function for the combination of FN, the mode
   20562              :      and the vector width.  Return early if it doesn't.  */
   20563              : 
   20564          386 :   if (el_mode != DFmode && el_mode != SFmode)
   20565              :     return NULL_TREE;
   20566              : 
   20567              :   /* Supported vector widths for given FN and single/double precision.  Zeros
   20568              :      are used to fill out unused positions in the arrays.  */
   20569          386 :   static const int supported_n[][2][3] = {
   20570              :   /*   Single prec. ,  Double prec.  */
   20571              :     { { 16,  0,  0 }, {  2,  4,  8 } }, /* TAN.  */
   20572              :     { {  4,  8, 16 }, {  2,  4,  8 } }, /* EXP.  */
   20573              :     { {  4,  8, 16 }, {  2,  4,  8 } }, /* EXP2.  */
   20574              :     { {  4,  8, 16 }, {  2,  4,  8 } }, /* LOG.  */
   20575              :     { {  4,  8, 16 }, {  2,  4,  8 } }, /* LOG2.  */
   20576              :     { {  4,  8, 16 }, {  2,  4,  8 } }, /* COS.  */
   20577              :     { {  4,  8, 16 }, {  2,  4,  8 } }, /* SIN.  */
   20578              :     { {  4,  8, 16 }, {  2,  4,  8 } }, /* POW.  */
   20579              :     { {  4,  8, 16 }, {  2,  4,  8 } }, /* ERF.  */
   20580              :     { {  4,  8, 16 }, {  2,  8,  0 } }, /* ATAN.  */
   20581              :     { {  4,  8, 16 }, {  2,  0,  0 } }, /* LOG10.  */
   20582              :     { {  4,  0,  0 }, {  2,  0,  0 } }, /* EXP10.  */
   20583              :     { {  4,  0,  0 }, {  2,  0,  0 } }, /* LOG1P.  */
   20584              :     { {  4,  8, 16 }, {  8,  0,  0 } }, /* ASIN.  */
   20585              :     { {  4, 16,  0 }, {  0,  0,  0 } }, /* ACOS.  */
   20586              :     { {  4,  8, 16 }, {  0,  0,  0 } }, /* TANH.  */
   20587              :     { {  4,  0,  0 }, {  0,  0,  0 } }, /* EXPM1.  */
   20588              :     { {  4,  8,  0 }, {  0,  0,  0 } }, /* COSH.  */
   20589              :   };
   20590              : 
   20591              :   /* We cannot simply index the supported_n array with FN since multiple FNs
   20592              :      may correspond to a single operation (see the definitions of these
   20593              :      CASE_CFN_* macros).  */
   20594          386 :   int i;
   20595          386 :   switch (fn)
   20596              :     {
   20597              :     CASE_CFN_TAN   :  i = 0; break;
   20598           28 :     CASE_CFN_EXP   :  i = 1; break;
   20599           28 :     CASE_CFN_EXP2  :  i = 2; break;
   20600           28 :     CASE_CFN_LOG   :  i = 3; break;
   20601           28 :     CASE_CFN_LOG2  :  i = 4; break;
   20602           28 :     CASE_CFN_COS   :  i = 5; break;
   20603           28 :     CASE_CFN_SIN   :  i = 6; break;
   20604           28 :     CASE_CFN_POW   :  i = 7; break;
   20605           28 :     CASE_CFN_ERF   :  i = 8; break;
   20606           25 :     CASE_CFN_ATAN  :  i = 9; break;
   20607           20 :     CASE_CFN_LOG10 : i = 10; break;
   20608           10 :     CASE_CFN_EXP10 : i = 11; break;
   20609           10 :     CASE_CFN_LOG1P : i = 12; break;
   20610           24 :     CASE_CFN_ASIN  : i = 13; break;
   20611           14 :     CASE_CFN_ACOS  : i = 14; break;
   20612           18 :     CASE_CFN_TANH  : i = 15; break;
   20613            9 :     CASE_CFN_EXPM1 : i = 16; break;
   20614           14 :     CASE_CFN_COSH  : i = 17; break;
   20615              :     default: return NULL_TREE;
   20616              :     }
   20617              : 
   20618          386 :   int j = el_mode == DFmode;
   20619          386 :   bool n_is_supported = false;
   20620          976 :   for (unsigned k = 0; k < 3; k++)
   20621          857 :     if (supported_n[i][j][k] == n)
   20622              :       {
   20623              :         n_is_supported = true;
   20624              :         break;
   20625              :       }
   20626          386 :   if (!n_is_supported)
   20627              :     return NULL_TREE;
   20628              : 
   20629              :   /* Append the precision and the vector width to the function name we are
   20630              :      constructing.  */
   20631          267 :   name[name_len++] = el_mode == DFmode ? 'd' : 's';
   20632          267 :   switch (n)
   20633              :     {
   20634          214 :       case 2:
   20635          214 :       case 4:
   20636          214 :       case 8:
   20637          214 :         name[name_len++] = '0' + n;
   20638          214 :         break;
   20639           53 :       case 16:
   20640           53 :         name[name_len++] = '1';
   20641           53 :         name[name_len++] = '6';
   20642           53 :         break;
   20643            0 :       default:
   20644            0 :         gcc_unreachable ();
   20645              :     }
   20646          267 :   name[name_len++] = '_';
   20647              : 
   20648              :   /* Append the operation name (steal it from the name of a builtin).  */
   20649          267 :   tree fndecl = mathfn_built_in (el_mode == DFmode
   20650              :                                  ? double_type_node : float_type_node, fn);
   20651          267 :   bname = IDENTIFIER_POINTER (DECL_NAME (fndecl));
   20652          267 :   sprintf (name + name_len, "%s", bname + 10);
   20653              : 
   20654          267 :   arity = 0;
   20655          267 :   for (args = DECL_ARGUMENTS (fndecl); args; args = TREE_CHAIN (args))
   20656            0 :     arity++;
   20657              : 
   20658          267 :   if (arity == 1)
   20659            0 :     fntype = build_function_type_list (type_out, type_in, NULL);
   20660              :   else
   20661          267 :     fntype = build_function_type_list (type_out, type_in, type_in, NULL);
   20662              : 
   20663              :   /* Build a function declaration for the vectorized function.  */
   20664          267 :   new_fndecl = build_decl (BUILTINS_LOCATION,
   20665              :                            FUNCTION_DECL, get_identifier (name), fntype);
   20666          267 :   TREE_PUBLIC (new_fndecl) = 1;
   20667          267 :   DECL_EXTERNAL (new_fndecl) = 1;
   20668          267 :   TREE_READONLY (new_fndecl) = 1;
   20669              : 
   20670          267 :   return new_fndecl;
   20671              : }
   20672              : 
   20673              : /* Returns a decl of a function that implements scatter store with
   20674              :    register type VECTYPE and index type INDEX_TYPE and SCALE.
   20675              :    Return NULL_TREE if it is not available.  */
   20676              : 
   20677              : static tree
   20678       129737 : ix86_vectorize_builtin_scatter (const_tree vectype,
   20679              :                                 const_tree index_type, int scale)
   20680              : {
   20681       129737 :   bool si;
   20682       129737 :   enum ix86_builtins code;
   20683              : 
   20684       129737 :   if (!TARGET_AVX512F)
   20685              :     return NULL_TREE;
   20686              : 
   20687         4193 :   if (known_eq (TYPE_VECTOR_SUBPARTS (vectype), 2u)
   20688         7367 :       ? !TARGET_USE_SCATTER_2PARTS
   20689         7367 :       : (known_eq (TYPE_VECTOR_SUBPARTS (vectype), 4u)
   20690         3174 :          ? !TARGET_USE_SCATTER_4PARTS
   20691         2057 :          : !TARGET_USE_SCATTER_8PARTS))
   20692              :     return NULL_TREE;
   20693              : 
   20694         4193 :   if ((TREE_CODE (index_type) != INTEGER_TYPE
   20695          463 :        && !POINTER_TYPE_P (index_type))
   20696         4656 :       || (TYPE_MODE (index_type) != SImode
   20697         1777 :           && TYPE_MODE (index_type) != DImode))
   20698            0 :     return NULL_TREE;
   20699              : 
   20700         4423 :   if (TYPE_PRECISION (index_type) > POINTER_SIZE)
   20701              :     return NULL_TREE;
   20702              : 
   20703              :   /* v*scatter* insn sign extends index to pointer mode.  */
   20704         4193 :   if (TYPE_PRECISION (index_type) < POINTER_SIZE
   20705         4193 :       && TYPE_UNSIGNED (index_type))
   20706              :     return NULL_TREE;
   20707              : 
   20708              :   /* Scale can be 1, 2, 4 or 8.  */
   20709         4193 :   if (scale <= 0
   20710         4193 :       || scale > 8
   20711         4177 :       || (scale & (scale - 1)) != 0)
   20712              :     return NULL_TREE;
   20713              : 
   20714         4177 :   si = TYPE_MODE (index_type) == SImode;
   20715         4177 :   switch (TYPE_MODE (vectype))
   20716              :     {
   20717          169 :     case E_V8DFmode:
   20718          169 :       code = si ? IX86_BUILTIN_SCATTERALTSIV8DF : IX86_BUILTIN_SCATTERDIV8DF;
   20719              :       break;
   20720          104 :     case E_V8DImode:
   20721          104 :       code = si ? IX86_BUILTIN_SCATTERALTSIV8DI : IX86_BUILTIN_SCATTERDIV8DI;
   20722              :       break;
   20723          177 :     case E_V16SFmode:
   20724          177 :       code = si ? IX86_BUILTIN_SCATTERSIV16SF : IX86_BUILTIN_SCATTERALTDIV16SF;
   20725              :       break;
   20726          257 :     case E_V16SImode:
   20727          257 :       code = si ? IX86_BUILTIN_SCATTERSIV16SI : IX86_BUILTIN_SCATTERALTDIV16SI;
   20728              :       break;
   20729          206 :     case E_V4DFmode:
   20730          206 :       if (TARGET_AVX512VL)
   20731           34 :         code = si ? IX86_BUILTIN_SCATTERALTSIV4DF : IX86_BUILTIN_SCATTERDIV4DF;
   20732              :       else
   20733              :         return NULL_TREE;
   20734              :       break;
   20735          142 :     case E_V4DImode:
   20736          142 :       if (TARGET_AVX512VL)
   20737           34 :         code = si ? IX86_BUILTIN_SCATTERALTSIV4DI : IX86_BUILTIN_SCATTERDIV4DI;
   20738              :       else
   20739              :         return NULL_TREE;
   20740              :       break;
   20741          248 :     case E_V8SFmode:
   20742          248 :       if (TARGET_AVX512VL)
   20743           40 :         code = si ? IX86_BUILTIN_SCATTERSIV8SF : IX86_BUILTIN_SCATTERALTDIV8SF;
   20744              :       else
   20745              :         return NULL_TREE;
   20746              :       break;
   20747          268 :     case E_V8SImode:
   20748          268 :       if (TARGET_AVX512VL)
   20749           82 :         code = si ? IX86_BUILTIN_SCATTERSIV8SI : IX86_BUILTIN_SCATTERALTDIV8SI;
   20750              :       else
   20751              :         return NULL_TREE;
   20752              :       break;
   20753          251 :     case E_V2DFmode:
   20754          251 :       if (TARGET_AVX512VL)
   20755           94 :         code = si ? IX86_BUILTIN_SCATTERALTSIV2DF : IX86_BUILTIN_SCATTERDIV2DF;
   20756              :       else
   20757              :         return NULL_TREE;
   20758              :       break;
   20759          196 :     case E_V2DImode:
   20760          196 :       if (TARGET_AVX512VL)
   20761           94 :         code = si ? IX86_BUILTIN_SCATTERALTSIV2DI : IX86_BUILTIN_SCATTERDIV2DI;
   20762              :       else
   20763              :         return NULL_TREE;
   20764              :       break;
   20765          301 :     case E_V4SFmode:
   20766          301 :       if (TARGET_AVX512VL)
   20767           96 :         code = si ? IX86_BUILTIN_SCATTERSIV4SF : IX86_BUILTIN_SCATTERALTDIV4SF;
   20768              :       else
   20769              :         return NULL_TREE;
   20770              :       break;
   20771          324 :     case E_V4SImode:
   20772          324 :       if (TARGET_AVX512VL)
   20773          138 :         code = si ? IX86_BUILTIN_SCATTERSIV4SI : IX86_BUILTIN_SCATTERALTDIV4SI;
   20774              :       else
   20775              :         return NULL_TREE;
   20776              :       break;
   20777              :     default:
   20778              :       return NULL_TREE;
   20779              :     }
   20780              : 
   20781         1319 :   return get_ix86_builtin (code);
   20782              : }
   20783              : 
   20784              : /* Return true if it is safe to use the rsqrt optabs to optimize
   20785              :    1.0/sqrt.  */
   20786              : 
   20787              : static bool
   20788           66 : use_rsqrt_p (machine_mode mode)
   20789              : {
   20790           66 :   return ((mode == HFmode
   20791           42 :            || (TARGET_SSE && TARGET_SSE_MATH))
   20792           66 :           && flag_finite_math_only
   20793           65 :           && !flag_trapping_math
   20794          119 :           && flag_unsafe_math_optimizations);
   20795              : }
   20796              : 
   20797              : /* Helper for avx_vpermilps256_operand et al.  This is also used by
   20798              :    the expansion functions to turn the parallel back into a mask.
   20799              :    The return value is 0 for no match and the imm8+1 for a match.  */
   20800              : 
   20801              : int
   20802        64039 : avx_vpermilp_parallel (rtx par, machine_mode mode)
   20803              : {
   20804        64039 :   unsigned i, nelt = GET_MODE_NUNITS (mode);
   20805        64039 :   unsigned mask = 0;
   20806        64039 :   unsigned char ipar[16] = {};  /* Silence -Wuninitialized warning.  */
   20807              : 
   20808        64039 :   if (XVECLEN (par, 0) != (int) nelt)
   20809              :     return 0;
   20810              : 
   20811              :   /* Validate that all of the elements are constants, and not totally
   20812              :      out of range.  Copy the data into an integral array to make the
   20813              :      subsequent checks easier.  */
   20814       312319 :   for (i = 0; i < nelt; ++i)
   20815              :     {
   20816       248280 :       rtx er = XVECEXP (par, 0, i);
   20817       248280 :       unsigned HOST_WIDE_INT ei;
   20818              : 
   20819       248280 :       if (!CONST_INT_P (er))
   20820              :         return 0;
   20821       248280 :       ei = INTVAL (er);
   20822       248280 :       if (ei >= nelt)
   20823              :         return 0;
   20824       248280 :       ipar[i] = ei;
   20825              :     }
   20826              : 
   20827        64039 :   switch (mode)
   20828              :     {
   20829              :     case E_V8DFmode:
   20830              :     case E_V8DImode:
   20831              :       /* In the 512-bit DFmode case, we can only move elements within
   20832              :          a 128-bit lane.  First fill the second part of the mask,
   20833              :          then fallthru.  */
   20834         4762 :       for (i = 4; i < 6; ++i)
   20835              :         {
   20836         3305 :           if (!IN_RANGE (ipar[i], 4, 5))
   20837              :             return 0;
   20838         3080 :           mask |= (ipar[i] - 4) << i;
   20839              :         }
   20840         3519 :       for (i = 6; i < 8; ++i)
   20841              :         {
   20842         2488 :           if (!IN_RANGE (ipar[i], 6, 7))
   20843              :             return 0;
   20844         2062 :           mask |= (ipar[i] - 6) << i;
   20845              :         }
   20846              :       /* FALLTHRU */
   20847              : 
   20848              :     case E_V4DFmode:
   20849              :     case E_V4DImode:
   20850              :       /* In the 256-bit DFmode case, we can only move elements within
   20851              :          a 128-bit lane.  */
   20852        46176 :       for (i = 0; i < 2; ++i)
   20853              :         {
   20854        38891 :           if (!IN_RANGE (ipar[i], 0, 1))
   20855              :             return 0;
   20856        25894 :           mask |= ipar[i] << i;
   20857              :         }
   20858        19165 :       for (i = 2; i < 4; ++i)
   20859              :         {
   20860        13230 :           if (!IN_RANGE (ipar[i], 2, 3))
   20861              :             return 0;
   20862        11880 :           mask |= (ipar[i] - 2) << i;
   20863              :         }
   20864              :       break;
   20865              : 
   20866              :     case E_V16SFmode:
   20867              :     case E_V16SImode:
   20868              :       /* In 512 bit SFmode case, permutation in the upper 256 bits
   20869              :          must mirror the permutation in the lower 256-bits.  */
   20870         3652 :       for (i = 0; i < 8; ++i)
   20871         3256 :         if (ipar[i] + 8 != ipar[i + 8])
   20872              :           return 0;
   20873              :       /* FALLTHRU */
   20874              : 
   20875              :     case E_V8SFmode:
   20876              :     case E_V8SImode:
   20877              :       /* In 256 bit SFmode case, we have full freedom of
   20878              :          movement within the low 128-bit lane, but the high 128-bit
   20879              :          lane must mirror the exact same pattern.  */
   20880        33768 :       for (i = 0; i < 4; ++i)
   20881        28657 :         if (ipar[i] + 4 != ipar[i + 4])
   20882              :           return 0;
   20883              :       nelt = 4;
   20884              :       /* FALLTHRU */
   20885              : 
   20886        38401 :     case E_V2DFmode:
   20887        38401 :     case E_V2DImode:
   20888        38401 :     case E_V4SFmode:
   20889        38401 :     case E_V4SImode:
   20890              :       /* In the 128-bit case, we've full freedom in the placement of
   20891              :          the elements from the source operand.  */
   20892       134281 :       for (i = 0; i < nelt; ++i)
   20893        95880 :         mask |= ipar[i] << (i * (nelt / 2));
   20894              :       break;
   20895              : 
   20896            0 :     default:
   20897            0 :       gcc_unreachable ();
   20898              :     }
   20899              : 
   20900              :   /* Make sure success has a non-zero value by adding one.  */
   20901        44336 :   return mask + 1;
   20902              : }
   20903              : 
   20904              : /* Helper for avx_vperm2f128_v4df_operand et al.  This is also used by
   20905              :    the expansion functions to turn the parallel back into a mask.
   20906              :    The return value is 0 for no match and the imm8+1 for a match.  */
   20907              : 
   20908              : int
   20909        42902 : avx_vperm2f128_parallel (rtx par, machine_mode mode)
   20910              : {
   20911        42902 :   unsigned i, nelt = GET_MODE_NUNITS (mode), nelt2 = nelt / 2;
   20912        42902 :   unsigned mask = 0;
   20913        42902 :   unsigned char ipar[8] = {};  /* Silence -Wuninitialized warning.  */
   20914              : 
   20915        42902 :   if (XVECLEN (par, 0) != (int) nelt)
   20916              :     return 0;
   20917              : 
   20918              :   /* Validate that all of the elements are constants, and not totally
   20919              :      out of range.  Copy the data into an integral array to make the
   20920              :      subsequent checks easier.  */
   20921       345806 :   for (i = 0; i < nelt; ++i)
   20922              :     {
   20923       302904 :       rtx er = XVECEXP (par, 0, i);
   20924       302904 :       unsigned HOST_WIDE_INT ei;
   20925              : 
   20926       302904 :       if (!CONST_INT_P (er))
   20927              :         return 0;
   20928       302904 :       ei = INTVAL (er);
   20929       302904 :       if (ei >= 2 * nelt)
   20930              :         return 0;
   20931       302904 :       ipar[i] = ei;
   20932              :     }
   20933              : 
   20934              :   /* Validate that the halves of the permute are halves.  */
   20935        82667 :   for (i = 0; i < nelt2 - 1; ++i)
   20936        66570 :     if (ipar[i] + 1 != ipar[i + 1])
   20937              :       return 0;
   20938        50942 :   for (i = nelt2; i < nelt - 1; ++i)
   20939        35479 :     if (ipar[i] + 1 != ipar[i + 1])
   20940              :       return 0;
   20941              : 
   20942              :   /* Reconstruct the mask.  */
   20943        46293 :   for (i = 0; i < 2; ++i)
   20944              :     {
   20945        30880 :       unsigned e = ipar[i * nelt2];
   20946        30880 :       if (e % nelt2)
   20947              :         return 0;
   20948        30830 :       e /= nelt2;
   20949        30830 :       mask |= e << (i * 4);
   20950              :     }
   20951              : 
   20952              :   /* Make sure success has a non-zero value by adding one.  */
   20953        15413 :   return mask + 1;
   20954              : }
   20955              : 
   20956              : /* Return a mask of VPTERNLOG operands that do not affect output.  */
   20957              : 
   20958              : int
   20959         2431 : vpternlog_redundant_operand_mask (rtx pternlog_imm)
   20960              : {
   20961         2431 :   int mask = 0;
   20962         2431 :   int imm8 = INTVAL (pternlog_imm);
   20963              : 
   20964         2431 :   if (((imm8 >> 4) & 0x0F) == (imm8 & 0x0F))
   20965            6 :     mask |= 1;
   20966         2431 :   if (((imm8 >> 2) & 0x33) == (imm8 & 0x33))
   20967            6 :     mask |= 2;
   20968         2431 :   if (((imm8 >> 1) & 0x55) == (imm8 & 0x55))
   20969          155 :     mask |= 4;
   20970              : 
   20971         2431 :   return mask;
   20972              : }
   20973              : 
   20974              : /* Eliminate false dependencies on operands that do not affect output
   20975              :    by substituting other operands of a VPTERNLOG.  */
   20976              : 
   20977              : void
   20978           81 : substitute_vpternlog_operands (rtx *operands)
   20979              : {
   20980           81 :   int mask = vpternlog_redundant_operand_mask (operands[4]);
   20981              : 
   20982           81 :   if (mask & 1) /* The first operand is redundant.  */
   20983            2 :     operands[1] = operands[2];
   20984              : 
   20985           81 :   if (mask & 2) /* The second operand is redundant.  */
   20986            2 :     operands[2] = operands[1];
   20987              : 
   20988           81 :   if (mask & 4) /* The third operand is redundant.  */
   20989           77 :     operands[3] = operands[1];
   20990            4 :   else if (REG_P (operands[3]))
   20991              :     {
   20992            0 :       if (mask & 1)
   20993            0 :         operands[1] = operands[3];
   20994            0 :       if (mask & 2)
   20995            0 :         operands[2] = operands[3];
   20996              :     }
   20997           81 : }
   20998              : 
   20999              : /* Return a register priority for hard reg REGNO.  */
   21000              : static int
   21001     57926219 : ix86_register_priority (int hard_regno)
   21002              : {
   21003              :   /* ebp and r13 as the base always wants a displacement, r12 as the
   21004              :      base always wants an index.  So discourage their usage in an
   21005              :      address.  */
   21006     57926219 :   if (hard_regno == R12_REG || hard_regno == R13_REG)
   21007              :     return 0;
   21008     53523919 :   if (hard_regno == BP_REG)
   21009              :     return 1;
   21010              :   /* New x86-64 int registers result in bigger code size.  Discourage them.  */
   21011     51590749 :   if (REX_INT_REGNO_P (hard_regno))
   21012              :     return 2;
   21013     35171096 :   if (REX2_INT_REGNO_P (hard_regno))
   21014              :     return 2;
   21015              :   /* New x86-64 SSE registers result in bigger code size.  Discourage them.  */
   21016     35168654 :   if (REX_SSE_REGNO_P (hard_regno))
   21017              :     return 2;
   21018     29049150 :   if (EXT_REX_SSE_REGNO_P (hard_regno))
   21019              :     return 1;
   21020              :   /* Usage of AX register results in smaller code.  Prefer it.  */
   21021     28771993 :   if (hard_regno == AX_REG)
   21022      3777613 :     return 4;
   21023              :   return 3;
   21024              : }
   21025              : 
   21026              : /* Implement TARGET_PREFERRED_RELOAD_CLASS.
   21027              : 
   21028              :    Put float CONST_DOUBLE in the constant pool instead of fp regs.
   21029              :    QImode must go into class Q_REGS.
   21030              :    Narrow ALL_REGS to GENERAL_REGS.  This supports allowing movsf and
   21031              :    movdf to do mem-to-mem moves through integer regs.  */
   21032              : 
   21033              : static reg_class_t
   21034    545208504 : ix86_preferred_reload_class (rtx x, reg_class_t regclass)
   21035              : {
   21036    545208504 :   machine_mode mode = GET_MODE (x);
   21037              : 
   21038              :   /* We're only allowed to return a subclass of CLASS.  Many of the
   21039              :      following checks fail for NO_REGS, so eliminate that early.  */
   21040    545208504 :   if (regclass == NO_REGS)
   21041              :     return NO_REGS;
   21042              : 
   21043              :   /* All classes can load zeros.  */
   21044    544362660 :   if (x == CONST0_RTX (mode))
   21045              :     return regclass;
   21046              : 
   21047              :   /* Force constants into memory if we are loading a (nonzero) constant into
   21048              :      an MMX, SSE or MASK register.  This is because there are no MMX/SSE/MASK
   21049              :      instructions to load from a constant.  */
   21050    519619903 :   if (CONSTANT_P (x)
   21051    519619903 :       && (MAYBE_MMX_CLASS_P (regclass)
   21052    151684257 :           || MAYBE_SSE_CLASS_P (regclass)
   21053    121714517 :           || MAYBE_MASK_CLASS_P (regclass)))
   21054     30100375 :     return NO_REGS;
   21055              : 
   21056              :   /* Floating-point constants need more complex checks.  */
   21057    489519528 :   if (CONST_DOUBLE_P (x))
   21058              :     {
   21059              :       /* General regs can load everything.  */
   21060       304043 :       if (INTEGER_CLASS_P (regclass))
   21061              :         return regclass;
   21062              : 
   21063              :       /* Floats can load 0 and 1 plus some others.  Note that we eliminated
   21064              :          zero above.  We only want to wind up preferring 80387 registers if
   21065              :          we plan on doing computation with them.  */
   21066       179992 :       if (IS_STACK_MODE (mode)
   21067       238377 :           && standard_80387_constant_p (x) > 0)
   21068              :         {
   21069              :           /* Limit class to FP regs.  */
   21070        40502 :           if (FLOAT_CLASS_P (regclass))
   21071              :             return FLOAT_REGS;
   21072              :         }
   21073              : 
   21074       139490 :       return NO_REGS;
   21075              :     }
   21076              : 
   21077              :   /* Prefer SSE if we can use them for math.  Also allow integer regs
   21078              :      when moves between register units are cheap.  */
   21079    489215485 :   if (SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH)
   21080              :     {
   21081     31087954 :       if (TARGET_INTER_UNIT_MOVES_FROM_VEC
   21082     31073041 :           && TARGET_INTER_UNIT_MOVES_TO_VEC
   21083     93224770 :           && GET_MODE_SIZE (mode) <= GET_MODE_SIZE (word_mode))
   21084     30931295 :         return INT_SSE_CLASS_P (regclass) ? regclass : NO_REGS;
   21085              :       else
   21086       156659 :         return SSE_CLASS_P (regclass) ? regclass : NO_REGS;
   21087              :     }
   21088              : 
   21089              :   /* Generally when we see PLUS here, it's the function invariant
   21090              :      (plus soft-fp const_int).  Which can only be computed into general
   21091              :      regs.  */
   21092    458127531 :   if (GET_CODE (x) == PLUS)
   21093      1885352 :     return INTEGER_CLASS_P (regclass) ? regclass : NO_REGS;
   21094              : 
   21095              :   /* QImode constants are easy to load, but non-constant QImode data
   21096              :      must go into Q_REGS or ALL_MASK_REGS.  */
   21097    456242179 :   if (GET_MODE (x) == QImode && !CONSTANT_P (x))
   21098              :     {
   21099     24380107 :       if (Q_CLASS_P (regclass))
   21100              :         return regclass;
   21101     19682147 :       else if (reg_class_subset_p (Q_REGS, regclass))
   21102              :         return Q_REGS;
   21103        55529 :       else if (MASK_CLASS_P (regclass))
   21104              :         return regclass;
   21105              :       else
   21106              :         return NO_REGS;
   21107              :     }
   21108              : 
   21109              :   return regclass;
   21110              : }
   21111              : 
   21112              : /* Discourage putting floating-point values in SSE registers unless
   21113              :    SSE math is being used, and likewise for the 387 registers.  */
   21114              : static reg_class_t
   21115     74116585 : ix86_preferred_output_reload_class (rtx x, reg_class_t regclass)
   21116              : {
   21117              :   /* Restrict the output reload class to the register bank that we are doing
   21118              :      math on.  If we would like not to return a subset of CLASS, reject this
   21119              :      alternative: if reload cannot do this, it will still use its choice.  */
   21120     74116585 :   machine_mode mode = GET_MODE (x);
   21121     74116585 :   if (SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH)
   21122      7214536 :     return MAYBE_SSE_CLASS_P (regclass) ? ALL_SSE_REGS : NO_REGS;
   21123              : 
   21124     66902049 :   if (IS_STACK_MODE (mode))
   21125       207707 :     return FLOAT_CLASS_P (regclass) ? regclass : NO_REGS;
   21126              : 
   21127              :   return regclass;
   21128              : }
   21129              : 
   21130              : static reg_class_t
   21131    384410967 : ix86_secondary_reload (bool in_p, rtx x, reg_class_t rclass,
   21132              :                        machine_mode mode, secondary_reload_info *sri)
   21133              : {
   21134              :   /* Double-word spills from general registers to non-offsettable memory
   21135              :      references (zero-extended addresses) require special handling.  */
   21136    384410967 :   if (TARGET_64BIT
   21137    331456987 :       && MEM_P (x)
   21138    179824754 :       && GET_MODE_SIZE (mode) > UNITS_PER_WORD
   21139     18816078 :       && INTEGER_CLASS_P (rclass)
   21140    387125098 :       && !offsettable_memref_p (x))
   21141              :     {
   21142      2461374 :       sri->icode = (in_p
   21143      1230687 :                     ? CODE_FOR_reload_noff_load
   21144              :                     : CODE_FOR_reload_noff_store);
   21145              :       /* Add the cost of moving address to a temporary.  */
   21146      1230687 :       sri->extra_cost = 1;
   21147              : 
   21148      1230687 :       return NO_REGS;
   21149              :     }
   21150              : 
   21151              :   /* QImode spills from non-QI registers require
   21152              :      intermediate register on 32bit targets.  */
   21153    383180280 :   if (mode == QImode
   21154    383180280 :       && ((!TARGET_64BIT && !in_p
   21155       586226 :            && INTEGER_CLASS_P (rclass)
   21156       586186 :            && MAYBE_NON_Q_CLASS_P (rclass))
   21157     21999864 :           || (!TARGET_AVX512DQ
   21158     21800982 :               && MAYBE_MASK_CLASS_P (rclass))))
   21159              :     {
   21160         6476 :       int regno = true_regnum (x);
   21161              : 
   21162              :       /* Return Q_REGS if the operand is in memory.  */
   21163         6476 :       if (regno == -1)
   21164              :         return Q_REGS;
   21165              : 
   21166              :       return NO_REGS;
   21167              :     }
   21168              : 
   21169              :   /* Require movement to gpr, and then store to memory.  */
   21170    383173804 :   if ((mode == HFmode || mode == HImode || mode == V2QImode
   21171              :        || mode == BFmode)
   21172      3956887 :       && !TARGET_SSE4_1
   21173      3365984 :       && SSE_CLASS_P (rclass)
   21174       272554 :       && !in_p && MEM_P (x))
   21175              :     {
   21176       167324 :       sri->extra_cost = 1;
   21177       167324 :       return GENERAL_REGS;
   21178              :     }
   21179              : 
   21180              :   /* This condition handles corner case where an expression involving
   21181              :      pointers gets vectorized.  We're trying to use the address of a
   21182              :      stack slot as a vector initializer.
   21183              : 
   21184              :      (set (reg:V2DI 74 [ vect_cst_.2 ])
   21185              :           (vec_duplicate:V2DI (reg/f:DI 20 frame)))
   21186              : 
   21187              :      Eventually frame gets turned into sp+offset like this:
   21188              : 
   21189              :      (set (reg:V2DI 21 xmm0 [orig:74 vect_cst_.2 ] [74])
   21190              :           (vec_duplicate:V2DI (plus:DI (reg/f:DI 7 sp)
   21191              :                                        (const_int 392 [0x188]))))
   21192              : 
   21193              :      That later gets turned into:
   21194              : 
   21195              :      (set (reg:V2DI 21 xmm0 [orig:74 vect_cst_.2 ] [74])
   21196              :           (vec_duplicate:V2DI (plus:DI (reg/f:DI 7 sp)
   21197              :             (mem/u/c/i:DI (symbol_ref/u:DI ("*.LC0") [flags 0x2]) [0 S8 A64]))))
   21198              : 
   21199              :      We'll have the following reload recorded:
   21200              : 
   21201              :      Reload 0: reload_in (DI) =
   21202              :            (plus:DI (reg/f:DI 7 sp)
   21203              :             (mem/u/c/i:DI (symbol_ref/u:DI ("*.LC0") [flags 0x2]) [0 S8 A64]))
   21204              :      reload_out (V2DI) = (reg:V2DI 21 xmm0 [orig:74 vect_cst_.2 ] [74])
   21205              :      SSE_REGS, RELOAD_OTHER (opnum = 0), can't combine
   21206              :      reload_in_reg: (plus:DI (reg/f:DI 7 sp) (const_int 392 [0x188]))
   21207              :      reload_out_reg: (reg:V2DI 21 xmm0 [orig:74 vect_cst_.2 ] [74])
   21208              :      reload_reg_rtx: (reg:V2DI 22 xmm1)
   21209              : 
   21210              :      Which isn't going to work since SSE instructions can't handle scalar
   21211              :      additions.  Returning GENERAL_REGS forces the addition into integer
   21212              :      register and reload can handle subsequent reloads without problems.  */
   21213              : 
   21214    220290201 :   if (in_p && GET_CODE (x) == PLUS
   21215            2 :       && SSE_CLASS_P (rclass)
   21216    383006480 :       && SCALAR_INT_MODE_P (mode))
   21217              :     return GENERAL_REGS;
   21218              : 
   21219              :   return NO_REGS;
   21220              : }
   21221              : 
   21222              : /* Implement TARGET_CLASS_LIKELY_SPILLED_P.  */
   21223              : 
   21224              : static bool
   21225    714463470 : ix86_class_likely_spilled_p (reg_class_t rclass)
   21226              : {
   21227    704523415 :   switch (rclass)
   21228              :     {
   21229              :       case AREG:
   21230              :       case DREG:
   21231              :       case CREG:
   21232              :       case BREG:
   21233              :       case AD_REGS:
   21234              :       case SIREG:
   21235              :       case DIREG:
   21236              :       case SSE_FIRST_REG:
   21237              :       case FP_TOP_REG:
   21238              :       case FP_SECOND_REG:
   21239              :         return true;
   21240              : 
   21241    683124542 :       default:
   21242    683124542 :         break;
   21243              :     }
   21244              : 
   21245    683124542 :   return false;
   21246              : }
   21247              : 
   21248              : /* Implement TARGET_CALLEE_SAVE_COST.  */
   21249              : 
   21250              : static int
   21251     81459600 : ix86_callee_save_cost (spill_cost_type, unsigned int hard_regno, machine_mode,
   21252              :                        unsigned int, int mem_cost, const HARD_REG_SET &, bool)
   21253              : {
   21254              :   /* Account for the fact that push and pop are shorter and do their
   21255              :      own allocation and deallocation.  */
   21256     81459600 :   if (GENERAL_REGNO_P (hard_regno))
   21257              :     {
   21258              :       /* push is 1 byte while typical spill is 4-5 bytes.
   21259              :          ??? We probably should adjust size costs accordingly.
   21260              :          Costs are relative to reg-reg move that has 2 bytes for 32bit
   21261              :          and 3 bytes otherwise.  Be sure that no cost table sets cost
   21262              :          to 2, so we end up with 0.  */
   21263     81449742 :       if (mem_cost <= 2 || optimize_function_for_size_p (cfun))
   21264      3576776 :         return 1;
   21265     77872966 :       return mem_cost - 2;
   21266              :     }
   21267              :   return mem_cost;
   21268              : }
   21269              : 
   21270              : /* Return true if a set of DST by the expression SRC should be allowed.
   21271              :    This prevents complex sets of likely_spilled hard regs before split1.  */
   21272              : 
   21273              : bool
   21274    633284862 : ix86_hardreg_mov_ok (rtx dst, rtx src)
   21275              : {
   21276              :   /* Avoid complex sets of likely_spilled hard registers before reload.  */
   21277    515351550 :   if (REG_P (dst) && HARD_REGISTER_P (dst)
   21278    307720254 :       && !REG_P (src) && !MEM_P (src)
   21279     95022611 :       && !(VECTOR_MODE_P (GET_MODE (dst))
   21280     95022611 :            ? standard_sse_constant_p (src, GET_MODE (dst))
   21281     47275342 :            : x86_64_immediate_operand (src, GET_MODE (dst)))
   21282      9940055 :       && ix86_class_likely_spilled_p (REGNO_REG_CLASS (REGNO (dst)))
   21283    642004431 :       && ix86_pre_reload_split ())
   21284              :     return false;
   21285              :   return true;
   21286              : }
   21287              : 
   21288              : /* If we are copying between registers from different register sets
   21289              :    (e.g. FP and integer), we may need a memory location.
   21290              : 
   21291              :    The function can't work reliably when one of the CLASSES is a class
   21292              :    containing registers from multiple sets.  We avoid this by never combining
   21293              :    different sets in a single alternative in the machine description.
   21294              :    Ensure that this constraint holds to avoid unexpected surprises.
   21295              : 
   21296              :    When STRICT is false, we are being called from REGISTER_MOVE_COST,
   21297              :    so do not enforce these sanity checks.
   21298              : 
   21299              :    To optimize register_move_cost performance, define inline variant.  */
   21300              : 
   21301              : static inline bool
   21302   5808522681 : inline_secondary_memory_needed (machine_mode mode, reg_class_t class1,
   21303              :                                 reg_class_t class2, int strict)
   21304              : {
   21305   5808522681 :   if (lra_in_progress && (class1 == NO_REGS || class2 == NO_REGS))
   21306              :     return false;
   21307              : 
   21308   5776302054 :   if (MAYBE_FLOAT_CLASS_P (class1) != FLOAT_CLASS_P (class1)
   21309   4922161376 :       || MAYBE_FLOAT_CLASS_P (class2) != FLOAT_CLASS_P (class2)
   21310   4203031161 :       || MAYBE_SSE_CLASS_P (class1) != SSE_CLASS_P (class1)
   21311   4010055544 :       || MAYBE_SSE_CLASS_P (class2) != SSE_CLASS_P (class2)
   21312   3827432583 :       || MAYBE_MMX_CLASS_P (class1) != MMX_CLASS_P (class1)
   21313   3827432583 :       || MAYBE_MMX_CLASS_P (class2) != MMX_CLASS_P (class2)
   21314   3827432583 :       || MAYBE_MASK_CLASS_P (class1) != MASK_CLASS_P (class1)
   21315   9429718743 :       || MAYBE_MASK_CLASS_P (class2) != MASK_CLASS_P (class2))
   21316              :     {
   21317   2288702893 :       gcc_assert (!strict || lra_in_progress);
   21318              :       return true;
   21319              :     }
   21320              : 
   21321   3487599161 :   if (FLOAT_CLASS_P (class1) != FLOAT_CLASS_P (class2))
   21322              :     return true;
   21323              : 
   21324              :   /* ??? This is a lie.  We do have moves between mmx/general, and for
   21325              :      mmx/sse2.  But by saying we need secondary memory we discourage the
   21326              :      register allocator from using the mmx registers unless needed.  */
   21327   3335180667 :   if (MMX_CLASS_P (class1) != MMX_CLASS_P (class2))
   21328              :     return true;
   21329              : 
   21330              :   /* Between mask and general, we have moves no larger than word size.  */
   21331   3236812426 :   if (MASK_CLASS_P (class1) != MASK_CLASS_P (class2))
   21332              :     {
   21333      2699571 :       if (!(INTEGER_CLASS_P (class1) || INTEGER_CLASS_P (class2))
   21334      3518937 :           || GET_MODE_SIZE (mode) > UNITS_PER_WORD)
   21335       200475 :         return true;
   21336              :     }
   21337              : 
   21338   3236611951 :   if (SSE_CLASS_P (class1) != SSE_CLASS_P (class2))
   21339              :     {
   21340              :       /* SSE1 doesn't have any direct moves from other classes.  */
   21341    703161876 :       if (!TARGET_SSE2)
   21342              :         return true;
   21343              : 
   21344    700498144 :       if (!(INTEGER_CLASS_P (class1) || INTEGER_CLASS_P (class2)))
   21345              :         return true;
   21346              : 
   21347              :       /* If the target says that inter-unit moves are more expensive
   21348              :          than moving through memory, then don't generate them.  */
   21349   1050273289 :       if ((SSE_CLASS_P (class1) && !TARGET_INTER_UNIT_MOVES_FROM_VEC)
   21350   1049787445 :           || (SSE_CLASS_P (class2) && !TARGET_INTER_UNIT_MOVES_TO_VEC))
   21351      1321114 :         return true;
   21352              : 
   21353              :       /* With SSE4.1, *mov{ti,di}_internal supports moves between
   21354              :          SSE_REGS and GENERAL_REGS using pinsr{q,d} or pextr{q,d}.  */
   21355    699177030 :       if (TARGET_SSE4_1
   21356     37999464 :           && (TARGET_64BIT ? mode == TImode : mode == DImode))
   21357              :         return false;
   21358              : 
   21359    697529813 :       int msize = GET_MODE_SIZE (mode);
   21360              : 
   21361              :       /* Between SSE and general, we have moves no larger than word size.  */
   21362    713893855 :       if (msize > UNITS_PER_WORD)
   21363              :         return true;
   21364              : 
   21365              :       /* In addition to SImode moves, HImode moves are supported for SSE2 and above,
   21366              :          Use vmovw with AVX512FP16, or pinsrw/pextrw without AVX512FP16.  */
   21367    603495183 :       int minsize = GET_MODE_SIZE (TARGET_SSE2 ? HImode : SImode);
   21368              : 
   21369    603495183 :       if (msize < minsize)
   21370              :         return true;
   21371              :     }
   21372              : 
   21373              :   return false;
   21374              : }
   21375              : 
   21376              : /* Implement TARGET_SECONDARY_MEMORY_NEEDED.  */
   21377              : 
   21378              : static bool
   21379     70950048 : ix86_secondary_memory_needed (machine_mode mode, reg_class_t class1,
   21380              :                               reg_class_t class2)
   21381              : {
   21382     70950048 :   return inline_secondary_memory_needed (mode, class1, class2, true);
   21383              : }
   21384              : 
   21385              : /* Implement TARGET_SECONDARY_MEMORY_NEEDED_MODE.
   21386              : 
   21387              :    get_secondary_mem widens integral modes to BITS_PER_WORD.
   21388              :    There is no need to emit full 64 bit move on 64 bit targets
   21389              :    for integral modes that can be moved using 32 bit move.  */
   21390              : 
   21391              : static machine_mode
   21392        13074 : ix86_secondary_memory_needed_mode (machine_mode mode)
   21393              : {
   21394        26148 :   if (GET_MODE_BITSIZE (mode) < 32 && INTEGRAL_MODE_P (mode))
   21395           19 :     return mode_for_size (32, GET_MODE_CLASS (mode), 0).require ();
   21396              :   return mode;
   21397              : }
   21398              : 
   21399              : /* Implement the TARGET_CLASS_MAX_NREGS hook.
   21400              : 
   21401              :    On the 80386, this is the size of MODE in words,
   21402              :    except in the FP regs, where a single reg is always enough.  */
   21403              : 
   21404              : static unsigned char
   21405   6061848900 : ix86_class_max_nregs (reg_class_t rclass, machine_mode mode)
   21406              : {
   21407   6061848900 :   if (MAYBE_INTEGER_CLASS_P (rclass))
   21408              :     {
   21409   4078415048 :       if (mode == XFmode)
   21410    149069411 :         return (TARGET_64BIT ? 2 : 3);
   21411   3929345637 :       else if (mode == XCmode)
   21412    149069036 :         return (TARGET_64BIT ? 4 : 6);
   21413              :       else
   21414   7666466321 :         return CEIL (GET_MODE_SIZE (mode), UNITS_PER_WORD);
   21415              :     }
   21416              :   else
   21417              :     {
   21418   1983433852 :       if (COMPLEX_MODE_P (mode))
   21419              :         return 2;
   21420              :       else
   21421   1693839513 :         return 1;
   21422              :     }
   21423              : }
   21424              : 
   21425              : /* Implement TARGET_CAN_CHANGE_MODE_CLASS.  */
   21426              : 
   21427              : static bool
   21428     40288537 : ix86_can_change_mode_class (machine_mode from, machine_mode to,
   21429              :                             reg_class_t regclass)
   21430              : {
   21431     40288537 :   if (from == to)
   21432              :     return true;
   21433              : 
   21434              :   /* x87 registers can't do subreg at all, as all values are reformatted
   21435              :      to extended precision.
   21436              : 
   21437              :      ??? middle-end queries mode changes for ALL_REGS and this makes
   21438              :      vec_series_lowpart_p to always return false.  We probably should
   21439              :      restrict this to modes supported by i387 and check if it is enabled.  */
   21440     38884506 :   if (MAYBE_FLOAT_CLASS_P (regclass))
   21441              :     return false;
   21442              : 
   21443     34231946 :   if (MAYBE_SSE_CLASS_P (regclass) || MAYBE_MMX_CLASS_P (regclass))
   21444              :     {
   21445              :       /* Vector registers do not support QI or HImode loads.  If we don't
   21446              :          disallow a change to these modes, reload will assume it's ok to
   21447              :          drop the subreg from (subreg:SI (reg:HI 100) 0).  This affects
   21448              :          the vec_dupv4hi pattern.
   21449              :          NB: SSE2 can load 16bit data to sse register via pinsrw.  */
   21450     16507695 :       int mov_size = MAYBE_SSE_CLASS_P (regclass) && TARGET_SSE2 ? 2 : 4;
   21451     16507695 :       if (GET_MODE_SIZE (from) < mov_size
   21452     33015078 :           || GET_MODE_SIZE (to) < mov_size)
   21453              :         return false;
   21454              :     }
   21455              : 
   21456              :   return true;
   21457              : }
   21458              : 
   21459              : /* Return index of MODE in the sse load/store tables.  */
   21460              : 
   21461              : static inline int
   21462    791200603 : sse_store_index (machine_mode mode)
   21463              : {
   21464              :   /* NB: Use SFmode cost for HFmode instead of adding HFmode load/store
   21465              :      costs to processor_costs, which requires changes to all entries in
   21466              :      processor cost table.  */
   21467    791200603 :   if (mode == E_HFmode)
   21468    140218844 :     mode = E_SFmode;
   21469              : 
   21470   1582401206 :   switch (GET_MODE_SIZE (mode))
   21471              :     {
   21472              :     case 4:
   21473              :       return 0;
   21474              :     case 8:
   21475              :       return 1;
   21476              :     case 16:
   21477              :       return 2;
   21478              :     case 32:
   21479              :       return 3;
   21480              :     case 64:
   21481              :       return 4;
   21482              :     default:
   21483              :       return -1;
   21484              :     }
   21485              : }
   21486              : 
   21487              : /* Return the cost of moving data of mode M between a
   21488              :    register and memory.  A value of 2 is the default; this cost is
   21489              :    relative to those in `REGISTER_MOVE_COST'.
   21490              : 
   21491              :    This function is used extensively by register_move_cost that is used to
   21492              :    build tables at startup.  Make it inline in this case.
   21493              :    When IN is 2, return maximum of in and out move cost.
   21494              : 
   21495              :    If moving between registers and memory is more expensive than
   21496              :    between two registers, you should define this macro to express the
   21497              :    relative cost.
   21498              : 
   21499              :    Model also increased moving costs of QImode registers in non
   21500              :    Q_REGS classes.
   21501              :  */
   21502              : static inline int
   21503   7071590040 : inline_memory_move_cost (machine_mode mode, enum reg_class regclass, int in)
   21504              : {
   21505   7071590040 :   int cost;
   21506              : 
   21507   7071590040 :   if (FLOAT_CLASS_P (regclass))
   21508              :     {
   21509    361127475 :       int index;
   21510    361127475 :       switch (mode)
   21511              :         {
   21512              :           case E_SFmode:
   21513              :             index = 0;
   21514              :             break;
   21515              :           case E_DFmode:
   21516              :             index = 1;
   21517              :             break;
   21518              :           case E_XFmode:
   21519              :             index = 2;
   21520              :             break;
   21521              :           default:
   21522              :             return 100;
   21523              :         }
   21524    107946991 :       if (in == 2)
   21525    103919262 :         return MAX (ix86_cost->hard_register.fp_load [index],
   21526              :                     ix86_cost->hard_register.fp_store [index]);
   21527      4027729 :       return in ? ix86_cost->hard_register.fp_load [index]
   21528      4027729 :                 : ix86_cost->hard_register.fp_store [index];
   21529              :     }
   21530   6710462565 :   if (SSE_CLASS_P (regclass))
   21531              :     {
   21532    659329647 :       int index = sse_store_index (mode);
   21533    659329647 :       if (index == -1)
   21534              :         return 100;
   21535    573393452 :       if (in == 2)
   21536    406320345 :         return MAX (ix86_cost->hard_register.sse_load [index],
   21537              :                     ix86_cost->hard_register.sse_store [index]);
   21538    167073107 :       return in ? ix86_cost->hard_register.sse_load [index]
   21539    167073107 :                 : ix86_cost->hard_register.sse_store [index];
   21540              :     }
   21541   6051132918 :   if (MASK_CLASS_P (regclass))
   21542              :     {
   21543    110748435 :       int index;
   21544    221496870 :       switch (GET_MODE_SIZE (mode))
   21545              :         {
   21546              :         case 1:
   21547              :           index = 0;
   21548              :           break;
   21549      9142669 :         case 2:
   21550      9142669 :           index = 1;
   21551      9142669 :           break;
   21552              :         /* DImode loads and stores assumed to cost the same as SImode.  */
   21553     41100846 :         case 4:
   21554     41100846 :         case 8:
   21555     41100846 :           index = 2;
   21556     41100846 :           break;
   21557              :         default:
   21558              :           return 100;
   21559              :         }
   21560              : 
   21561     53904915 :       if (in == 2)
   21562       614067 :         return MAX (ix86_cost->hard_register.mask_load[index],
   21563              :                     ix86_cost->hard_register.mask_store[index]);
   21564     53290848 :       return in ? ix86_cost->hard_register.mask_load[2]
   21565     53290848 :                 : ix86_cost->hard_register.mask_store[2];
   21566              :     }
   21567   5940384483 :   if (MMX_CLASS_P (regclass))
   21568              :     {
   21569    176322170 :       int index;
   21570    352644340 :       switch (GET_MODE_SIZE (mode))
   21571              :         {
   21572              :           case 4:
   21573              :             index = 0;
   21574              :             break;
   21575    103482270 :           case 8:
   21576    103482270 :             index = 1;
   21577    103482270 :             break;
   21578              :           default:
   21579              :             return 100;
   21580              :         }
   21581    141683150 :       if (in == 2)
   21582    121254988 :         return MAX (ix86_cost->hard_register.mmx_load [index],
   21583              :                     ix86_cost->hard_register.mmx_store [index]);
   21584     20428162 :       return in ? ix86_cost->hard_register.mmx_load [index]
   21585     20428162 :                 : ix86_cost->hard_register.mmx_store [index];
   21586              :     }
   21587  11528124626 :   switch (GET_MODE_SIZE (mode))
   21588              :     {
   21589    127590891 :       case 1:
   21590    127590891 :         if (Q_CLASS_P (regclass) || TARGET_64BIT)
   21591              :           {
   21592    124963440 :             if (!in)
   21593     20044824 :               return ix86_cost->hard_register.int_store[0];
   21594    104918616 :             if (TARGET_PARTIAL_REG_DEPENDENCY
   21595    104918616 :                 && optimize_function_for_speed_p (cfun))
   21596     97974362 :               cost = ix86_cost->hard_register.movzbl_load;
   21597              :             else
   21598      6944254 :               cost = ix86_cost->hard_register.int_load[0];
   21599    104918616 :             if (in == 2)
   21600     84845520 :               return MAX (cost, ix86_cost->hard_register.int_store[0]);
   21601              :             return cost;
   21602              :           }
   21603              :         else
   21604              :           {
   21605      2627451 :            if (in == 2)
   21606      1860710 :              return MAX (ix86_cost->hard_register.movzbl_load,
   21607              :                          ix86_cost->hard_register.int_store[0] + 4);
   21608       766741 :            if (in)
   21609       383425 :              return ix86_cost->hard_register.movzbl_load;
   21610              :            else
   21611       383316 :              return ix86_cost->hard_register.int_store[0] + 4;
   21612              :           }
   21613    658592209 :         break;
   21614    658592209 :       case 2:
   21615    658592209 :         {
   21616    658592209 :           int cost;
   21617    658592209 :           if (in == 2)
   21618    556431119 :             cost = MAX (ix86_cost->hard_register.int_load[1],
   21619              :                         ix86_cost->hard_register.int_store[1]);
   21620              :           else
   21621    102161090 :             cost = in ? ix86_cost->hard_register.int_load[1]
   21622              :                       : ix86_cost->hard_register.int_store[1];
   21623              : 
   21624    658592209 :           if (mode == E_HFmode)
   21625              :             {
   21626              :               /* Prefer SSE over GPR for HFmode.  */
   21627    127618180 :               int sse_cost;
   21628    127618180 :               int index = sse_store_index (mode);
   21629    127618180 :               if (in == 2)
   21630    117404048 :                 sse_cost = MAX (ix86_cost->hard_register.sse_load[index],
   21631              :                                 ix86_cost->hard_register.sse_store[index]);
   21632              :               else
   21633     20428264 :                 sse_cost = (in
   21634     10214132 :                             ? ix86_cost->hard_register.sse_load [index]
   21635              :                             : ix86_cost->hard_register.sse_store [index]);
   21636    127618180 :               if (sse_cost >= cost)
   21637    127618180 :                 cost = sse_cost + 1;
   21638              :             }
   21639              :           return cost;
   21640              :         }
   21641   4977879213 :       default:
   21642   4977879213 :         if (in == 2)
   21643   3853955688 :           cost = MAX (ix86_cost->hard_register.int_load[2],
   21644              :                       ix86_cost->hard_register.int_store[2]);
   21645   1123923525 :         else if (in)
   21646    562149675 :           cost = ix86_cost->hard_register.int_load[2];
   21647              :         else
   21648    561773850 :           cost = ix86_cost->hard_register.int_store[2];
   21649              :         /* Multiply with the number of GPR moves needed.  */
   21650  10074654555 :         return cost * CEIL ((int) GET_MODE_SIZE (mode), UNITS_PER_WORD);
   21651              :     }
   21652              : }
   21653              : 
   21654              : static int
   21655   1817858222 : ix86_memory_move_cost (machine_mode mode, reg_class_t regclass, bool in)
   21656              : {
   21657   2726466362 :   return inline_memory_move_cost (mode, (enum reg_class) regclass, in ? 1 : 0);
   21658              : }
   21659              : 
   21660              : 
   21661              : /* Return the cost of moving data from a register in class CLASS1 to
   21662              :    one in class CLASS2.
   21663              : 
   21664              :    It is not required that the cost always equal 2 when FROM is the same as TO;
   21665              :    on some machines it is expensive to move between registers if they are not
   21666              :    general registers.  */
   21667              : 
   21668              : static int
   21669   5737572633 : ix86_register_move_cost (machine_mode mode, reg_class_t class1_i,
   21670              :                          reg_class_t class2_i)
   21671              : {
   21672   5737572633 :   enum reg_class class1 = (enum reg_class) class1_i;
   21673   5737572633 :   enum reg_class class2 = (enum reg_class) class2_i;
   21674              : 
   21675              :   /* In case we require secondary memory, compute cost of the store followed
   21676              :      by load.  In order to avoid bad register allocation choices, we need
   21677              :      for this to be *at least* as high as the symmetric MEMORY_MOVE_COST.  */
   21678              : 
   21679   5737572633 :   if (inline_secondary_memory_needed (mode, class1, class2, false))
   21680              :     {
   21681   2626865909 :       int cost = 1;
   21682              : 
   21683   2626865909 :       cost += inline_memory_move_cost (mode, class1, 2);
   21684   2626865909 :       cost += inline_memory_move_cost (mode, class2, 2);
   21685              : 
   21686              :       /* In case of copying from general_purpose_register we may emit multiple
   21687              :          stores followed by single load causing memory size mismatch stall.
   21688              :          Count this as arbitrarily high cost of 20.  */
   21689   5253731818 :       if (GET_MODE_BITSIZE (mode) > BITS_PER_WORD
   21690    786005540 :           && TARGET_MEMORY_MISMATCH_STALL
   21691   4198876989 :           && targetm.class_max_nregs (class1, mode)
   21692    786005540 :              > targetm.class_max_nregs (class2, mode))
   21693    149524934 :         cost += 20;
   21694              : 
   21695              :       /* In the case of FP/MMX moves, the registers actually overlap, and we
   21696              :          have to switch modes in order to treat them differently.  */
   21697     60627530 :       if ((MMX_CLASS_P (class1) && MAYBE_FLOAT_CLASS_P (class2))
   21698   2677945504 :           || (MMX_CLASS_P (class2) && MAYBE_FLOAT_CLASS_P (class1)))
   21699     19095870 :         cost += 20;
   21700              : 
   21701   2626865909 :       return cost;
   21702              :     }
   21703              : 
   21704              :   /* Moves between MMX and non-MMX units require secondary memory.  */
   21705   3110706724 :   if (MMX_CLASS_P (class1) != MMX_CLASS_P (class2))
   21706            0 :     gcc_unreachable ();
   21707              : 
   21708   3110706724 :   if (SSE_CLASS_P (class1) != SSE_CLASS_P (class2))
   21709    595691688 :     return (SSE_CLASS_P (class1)
   21710    595691688 :             ? ix86_cost->hard_register.sse_to_integer
   21711    595691688 :             : ix86_cost->hard_register.integer_to_sse);
   21712              : 
   21713              :   /* Moves between mask register and GPR.  */
   21714   2515015036 :   if (MASK_CLASS_P (class1) != MASK_CLASS_P (class2))
   21715              :     {
   21716      1106063 :       return (MASK_CLASS_P (class1)
   21717      1106063 :               ? ix86_cost->hard_register.mask_to_integer
   21718      1106063 :               : ix86_cost->hard_register.integer_to_mask);
   21719              :     }
   21720              :   /* Moving between mask registers.  */
   21721   2513908973 :   if (MASK_CLASS_P (class1) && MASK_CLASS_P (class2))
   21722       106408 :     return ix86_cost->hard_register.mask_move;
   21723              : 
   21724   2513802565 :   if (MAYBE_FLOAT_CLASS_P (class1))
   21725     12066084 :     return ix86_cost->hard_register.fp_move;
   21726   2501736481 :   if (MAYBE_SSE_CLASS_P (class1))
   21727              :     {
   21728    234885228 :       if (GET_MODE_BITSIZE (mode) <= 128)
   21729    114864086 :         return ix86_cost->hard_register.xmm_move;
   21730      5157056 :       if (GET_MODE_BITSIZE (mode) <= 256)
   21731      1635081 :         return ix86_cost->hard_register.ymm_move;
   21732       943447 :       return ix86_cost->hard_register.zmm_move;
   21733              :     }
   21734   2384293867 :   if (MAYBE_MMX_CLASS_P (class1))
   21735      2220505 :     return ix86_cost->hard_register.mmx_move;
   21736              :   return 2;
   21737              : }
   21738              : 
   21739              : /* Implement TARGET_HARD_REGNO_NREGS.  This is ordinarily the length in
   21740              :    words of a value of mode MODE but can be less for certain modes in
   21741              :    special long registers.
   21742              : 
   21743              :    Actually there are no two word move instructions for consecutive
   21744              :    registers.  And only registers 0-3 may have mov byte instructions
   21745              :    applied to them.  */
   21746              : 
   21747              : static unsigned int
   21748   9008053408 : ix86_hard_regno_nregs (unsigned int regno, machine_mode mode)
   21749              : {
   21750   9008053408 :   if (GENERAL_REGNO_P (regno))
   21751              :     {
   21752   3133235968 :       if (mode == XFmode)
   21753     25746432 :         return TARGET_64BIT ? 2 : 3;
   21754   3107967936 :       if (mode == XCmode)
   21755     25746432 :         return TARGET_64BIT ? 4 : 6;
   21756   6223764608 :       return CEIL (GET_MODE_SIZE (mode), UNITS_PER_WORD);
   21757              :     }
   21758   5874817440 :   if (COMPLEX_MODE_P (mode))
   21759              :     return 2;
   21760              :   /* Register pair for mask registers.  */
   21761   5116776480 :   if (mode == P2QImode || mode == P2HImode)
   21762     94755120 :     return 2;
   21763              : 
   21764              :   return 1;
   21765              : }
   21766              : 
   21767              : /* Implement REGMODE_NATURAL_SIZE(MODE).  */
   21768              : unsigned int
   21769    110701356 : ix86_regmode_natural_size (machine_mode mode)
   21770              : {
   21771    110701356 :   if (mode == P2HImode || mode == P2QImode)
   21772         2462 :     return GET_MODE_SIZE (mode) / 2;
   21773    110700125 :   return UNITS_PER_WORD;
   21774              : }
   21775              : 
   21776              : /* Implement TARGET_HARD_REGNO_MODE_OK.  */
   21777              : 
   21778              : static bool
   21779  55132258831 : ix86_hard_regno_mode_ok (unsigned int regno, machine_mode mode)
   21780              : {
   21781              :   /* Flags and only flags can only hold CCmode values.  */
   21782  55132258831 :   if (CC_REGNO_P (regno))
   21783    438239777 :     return GET_MODE_CLASS (mode) == MODE_CC;
   21784  54694019054 :   if (GET_MODE_CLASS (mode) == MODE_CC
   21785              :       || GET_MODE_CLASS (mode) == MODE_RANDOM)
   21786              :     return false;
   21787  49067883297 :   if (STACK_REGNO_P (regno))
   21788   4778884507 :     return VALID_FP_MODE_P (mode);
   21789  44288998790 :   if (MASK_REGNO_P (regno))
   21790              :     {
   21791              :       /* Register pair only starts at even register number.  */
   21792   3725023477 :       if ((mode == P2QImode || mode == P2HImode))
   21793     51932098 :         return MASK_PAIR_REGNO_P(regno);
   21794              : 
   21795   1003720843 :       return ((TARGET_AVX512F && VALID_MASK_REG_MODE (mode))
   21796   4656401300 :               || (TARGET_AVX512BW && VALID_MASK_AVX512BW_MODE (mode)));
   21797              :     }
   21798              : 
   21799  40563975313 :   if (GET_MODE_CLASS (mode) == MODE_PARTIAL_INT)
   21800              :     return false;
   21801              : 
   21802  39568605866 :   if (SSE_REGNO_P (regno))
   21803              :     {
   21804              :       /* We implement the move patterns for all vector modes into and
   21805              :          out of SSE registers, even when no operation instructions
   21806              :          are available.  */
   21807              : 
   21808              :       /* For AVX-512 we allow, regardless of regno:
   21809              :           - XI mode
   21810              :           - any of 512-bit wide vector mode
   21811              :           - any scalar mode.  */
   21812  17110189365 :       if (TARGET_AVX512F
   21813              :           && ((VALID_AVX512F_REG_OR_XI_MODE (mode))
   21814              :               || VALID_AVX512F_SCALAR_MODE (mode)))
   21815              :         return true;
   21816              : 
   21817              :       /* TODO check for QI/HI scalars.  */
   21818              :       /* AVX512VL allows sse regs16+ for 128/256 bit modes.  */
   21819  16417921243 :       if (TARGET_AVX512VL
   21820   1752504624 :           && (VALID_AVX256_REG_OR_OI_MODE (mode)
   21821   1540093745 :               || VALID_AVX512VL_128_REG_MODE (mode)))
   21822              :         return true;
   21823              : 
   21824              :       /* xmm16-xmm31 are only available for AVX-512.  */
   21825  15971075831 :       if (EXT_REX_SSE_REGNO_P (regno))
   21826              :         return false;
   21827              : 
   21828              :       /* OImode and AVX modes are available only when AVX is enabled.  */
   21829   9246003422 :       return ((TARGET_AVX
   21830   1933732993 :                && VALID_AVX256_REG_OR_OI_MODE (mode))
   21831              :               || VALID_SSE_REG_MODE (mode)
   21832              :               || VALID_SSE2_REG_MODE (mode)
   21833              :               || VALID_MMX_REG_MODE (mode)
   21834   9246003422 :               || VALID_MMX_REG_MODE_3DNOW (mode));
   21835              :     }
   21836  22458416501 :   if (MMX_REGNO_P (regno))
   21837              :     {
   21838              :       /* We implement the move patterns for 3DNOW modes even in MMX mode,
   21839              :          so if the register is available at all, then we can move data of
   21840              :          the given mode into or out of it.  */
   21841   4002010879 :       return (VALID_MMX_REG_MODE (mode)
   21842              :               || VALID_MMX_REG_MODE_3DNOW (mode));
   21843              :     }
   21844              : 
   21845  18456405622 :   if (mode == QImode)
   21846              :     {
   21847              :       /* Take care for QImode values - they can be in non-QI regs,
   21848              :          but then they do cause partial register stalls.  */
   21849    208821652 :       if (ANY_QI_REGNO_P (regno))
   21850              :         return true;
   21851     14431639 :       if (!TARGET_PARTIAL_REG_STALL)
   21852              :         return true;
   21853              :       /* LRA checks if the hard register is OK for the given mode.
   21854              :          QImode values can live in non-QI regs, so we allow all
   21855              :          registers here.  */
   21856            0 :       if (lra_in_progress)
   21857              :        return true;
   21858            0 :       return !can_create_pseudo_p ();
   21859              :     }
   21860              :   /* We handle both integer and floats in the general purpose registers.  */
   21861  18247583970 :   else if (VALID_INT_MODE_P (mode)
   21862  13349782955 :            || VALID_FP_MODE_P (mode))
   21863              :     return true;
   21864              :   /* Lots of MMX code casts 8 byte vector modes to DImode.  If we then go
   21865              :      on to use that value in smaller contexts, this can easily force a
   21866              :      pseudo to be allocated to GENERAL_REGS.  Since this is no worse than
   21867              :      supporting DImode, allow it.  */
   21868  12274520046 :   else if (VALID_MMX_REG_MODE_3DNOW (mode) || VALID_MMX_REG_MODE (mode))
   21869              :     return true;
   21870              : 
   21871              :   return false;
   21872              : }
   21873              : 
   21874              : /* Initialize function_abis with corresponding abi_id,
   21875              :    currently only handle vzeroupper.  */
   21876              : void
   21877        21870 : ix86_initialize_callee_abi (unsigned int abi_id)
   21878              : {
   21879        21870 :   gcc_assert (abi_id == ABI_VZEROUPPER);
   21880        21870 :   predefined_function_abi &vzeroupper_abi = function_abis[abi_id];
   21881        21870 :   if (!vzeroupper_abi.initialized_p ())
   21882              :     {
   21883              :       HARD_REG_SET full_reg_clobbers;
   21884         4279 :       CLEAR_HARD_REG_SET (full_reg_clobbers);
   21885         4279 :       vzeroupper_abi.initialize (ABI_VZEROUPPER, full_reg_clobbers);
   21886              :     }
   21887        21870 : }
   21888              : 
   21889              : void
   21890        21870 : ix86_expand_avx_vzeroupper (void)
   21891              : {
   21892              :   /* Initialize vzeroupper_abi here.  */
   21893        21870 :   ix86_initialize_callee_abi (ABI_VZEROUPPER);
   21894        21870 :   rtx_insn *insn = emit_call_insn (gen_avx_vzeroupper_callee_abi ());
   21895        21870 :   CALL_INSN_ABI_ID (insn) = ABI_VZEROUPPER;
   21896              :   /* Return false for non-local goto in can_nonlocal_goto.  */
   21897        21870 :   make_reg_eh_region_note (insn, 0, INT_MIN);
   21898              :   /* Flag used for call_insn indicates it's a fake call.  */
   21899        21870 :   RTX_FLAG (insn, used) = 1;
   21900        21870 : }
   21901              : 
   21902              : 
   21903              : /* Implement TARGET_HARD_REGNO_CALL_PART_CLOBBERED.  The only ABI that
   21904              :    saves SSE registers across calls is Win64 (thus no need to check the
   21905              :    current ABI here), and with AVX enabled Win64 only guarantees that
   21906              :    the low 16 bytes are saved.  */
   21907              : 
   21908              : static bool
   21909   2070650478 : ix86_hard_regno_call_part_clobbered (unsigned int abi_id, unsigned int regno,
   21910              :                                      machine_mode mode)
   21911              : {
   21912              :   /* Special ABI for vzeroupper which only clobber higher part of sse regs.  */
   21913   2070650478 :   if (abi_id == ABI_VZEROUPPER)
   21914     30934615 :       return (GET_MODE_SIZE (mode) > 16
   21915     30934615 :               && ((TARGET_64BIT && REX_SSE_REGNO_P (regno))
   21916      4732774 :                   || LEGACY_SSE_REGNO_P (regno)));
   21917              : 
   21918   2682541503 :   return SSE_REGNO_P (regno) && GET_MODE_SIZE (mode) > 16;
   21919              : }
   21920              : 
   21921              : /* A subroutine of ix86_modes_tieable_p.  Return true if MODE is a
   21922              :    tieable integer mode.  */
   21923              : 
   21924              : static bool
   21925     52431626 : ix86_tieable_integer_mode_p (machine_mode mode)
   21926              : {
   21927     52431626 :   switch (mode)
   21928              :     {
   21929              :     case E_HImode:
   21930              :     case E_SImode:
   21931              :       return true;
   21932              : 
   21933      5330771 :     case E_QImode:
   21934      5330771 :       return TARGET_64BIT || !TARGET_PARTIAL_REG_STALL;
   21935              : 
   21936     10122448 :     case E_DImode:
   21937     10122448 :       return TARGET_64BIT;
   21938              : 
   21939              :     default:
   21940              :       return false;
   21941              :     }
   21942              : }
   21943              : 
   21944              : /* Implement TARGET_MODES_TIEABLE_P.
   21945              : 
   21946              :    Return true if MODE1 is accessible in a register that can hold MODE2
   21947              :    without copying.  That is, all register classes that can hold MODE2
   21948              :    can also hold MODE1.  */
   21949              : 
   21950              : static bool
   21951     33986372 : ix86_modes_tieable_p (machine_mode mode1, machine_mode mode2)
   21952              : {
   21953     33986372 :   if (mode1 == mode2)
   21954              :     return true;
   21955              : 
   21956     33899914 :   if (ix86_tieable_integer_mode_p (mode1)
   21957     33899914 :       && ix86_tieable_integer_mode_p (mode2))
   21958              :     return true;
   21959              : 
   21960              :   /* MODE2 being XFmode implies fp stack or general regs, which means we
   21961              :      can tie any smaller floating point modes to it.  Note that we do not
   21962              :      tie this with TFmode.  */
   21963     24923455 :   if (mode2 == XFmode)
   21964         4314 :     return mode1 == SFmode || mode1 == DFmode;
   21965              : 
   21966              :   /* MODE2 being DFmode implies fp stack, general or sse regs, which means
   21967              :      that we can tie it with SFmode.  */
   21968     24919141 :   if (mode2 == DFmode)
   21969       249932 :     return mode1 == SFmode;
   21970              : 
   21971              :   /* If MODE2 is only appropriate for an SSE register, then tie with
   21972              :      any vector modes or scalar floating point modes acceptable to SSE
   21973              :      registers, excluding scalar integer modes with SUBREG:
   21974              :         (subreg:QI (reg:TI 99) 0))
   21975              :         (subreg:HI (reg:TI 99) 0))
   21976              :         (subreg:SI (reg:TI 99) 0))
   21977              :         (subreg:DI (reg:TI 99) 0))
   21978              :      to avoid unnecessary move from SSE register to integer register.
   21979              :    */
   21980     24669209 :   if (GET_MODE_SIZE (mode2) >= 16
   21981     38653244 :       && (GET_MODE_SIZE (mode1) == GET_MODE_SIZE (mode2)
   21982     13719643 :           || ((VECTOR_MODE_P (mode1) || SCALAR_FLOAT_MODE_P (mode1))
   21983       486118 :               && GET_MODE_SIZE (mode1) <= GET_MODE_SIZE (mode2)))
   21984     30518157 :       && ix86_hard_regno_mode_ok (FIRST_SSE_REG, mode2))
   21985      5406314 :     return ix86_hard_regno_mode_ok (FIRST_SSE_REG, mode1);
   21986              : 
   21987              :   /* If MODE2 is appropriate for an MMX register, then tie
   21988              :      with any other mode acceptable to MMX registers.  */
   21989     19262895 :   if (GET_MODE_SIZE (mode2) == 8
   21990     19262895 :       && ix86_hard_regno_mode_ok (FIRST_MMX_REG, mode2))
   21991      3304058 :     return (GET_MODE_SIZE (mode1) == 8
   21992      3304058 :             && ix86_hard_regno_mode_ok (FIRST_MMX_REG, mode1));
   21993              : 
   21994              :   /* SCmode and DImode can be tied.  */
   21995     15958837 :   if ((mode1 == E_SCmode && mode2 == E_DImode)
   21996     15958837 :       || (mode1 == E_DImode && mode2 == E_SCmode))
   21997          108 :     return TARGET_64BIT;
   21998              : 
   21999              :   /* [SD]Cmode and V2[SD]Fmode modes can be tied.  */
   22000     15958729 :   if ((mode1 == E_SCmode && mode2 == E_V2SFmode)
   22001     15958729 :       || (mode1 == E_V2SFmode && mode2 == E_SCmode)
   22002     15958729 :       || (mode1 == E_DCmode && mode2 == E_V2DFmode)
   22003     15958729 :       || (mode1 == E_V2DFmode && mode2 == E_DCmode))
   22004            0 :     return true;
   22005              : 
   22006              :   return false;
   22007              : }
   22008              : 
   22009              : /* Return the cost of moving between two registers of mode MODE.  */
   22010              : 
   22011              : static int
   22012     29863581 : ix86_set_reg_reg_cost (machine_mode mode)
   22013              : {
   22014     29863581 :   unsigned int units = UNITS_PER_WORD;
   22015              : 
   22016     29863581 :   switch (GET_MODE_CLASS (mode))
   22017              :     {
   22018              :     default:
   22019              :       break;
   22020              : 
   22021              :     case MODE_CC:
   22022     29863581 :       units = GET_MODE_SIZE (CCmode);
   22023              :       break;
   22024              : 
   22025      1185201 :     case MODE_FLOAT:
   22026      1185201 :       if ((TARGET_SSE && mode == TFmode)
   22027       693396 :           || (TARGET_80387 && mode == XFmode)
   22028       210960 :           || ((TARGET_80387 || TARGET_SSE2) && mode == DFmode)
   22029       142495 :           || ((TARGET_80387 || TARGET_SSE) && mode == SFmode))
   22030      2340486 :         units = GET_MODE_SIZE (mode);
   22031              :       break;
   22032              : 
   22033      1336644 :     case MODE_COMPLEX_FLOAT:
   22034      1336644 :       if ((TARGET_SSE && mode == TCmode)
   22035       896026 :           || (TARGET_80387 && mode == XCmode)
   22036       455286 :           || ((TARGET_80387 || TARGET_SSE2) && mode == DCmode)
   22037        14518 :           || ((TARGET_80387 || TARGET_SSE) && mode == SCmode))
   22038      2666800 :         units = GET_MODE_SIZE (mode);
   22039              :       break;
   22040              : 
   22041     19125022 :     case MODE_VECTOR_INT:
   22042     19125022 :     case MODE_VECTOR_FLOAT:
   22043     19125022 :       if ((TARGET_AVX512F && VALID_AVX512F_REG_MODE (mode))
   22044     19024539 :           || (TARGET_AVX && VALID_AVX256_REG_MODE (mode))
   22045     18846155 :           || (TARGET_SSE2 && VALID_SSE2_REG_MODE (mode))
   22046     16159656 :           || (TARGET_SSE && VALID_SSE_REG_MODE (mode))
   22047     14825466 :           || ((TARGET_MMX || TARGET_MMX_WITH_SSE)
   22048     14779956 :               && VALID_MMX_REG_MODE (mode)))
   22049      8705478 :         units = GET_MODE_SIZE (mode);
   22050              :     }
   22051              : 
   22052              :   /* Return the cost of moving between two registers of mode MODE,
   22053              :      assuming that the move will be in pieces of at most UNITS bytes.  */
   22054     29863581 :   return COSTS_N_INSNS (CEIL (GET_MODE_SIZE (mode), units));
   22055              : }
   22056              : 
   22057              : /* Return cost of vector operation in MODE given that scalar version has
   22058              :    COST.  */
   22059              : 
   22060              : static int
   22061   2895881329 : ix86_vec_cost (machine_mode mode, int cost)
   22062              : {
   22063   2895881329 :   if (!VECTOR_MODE_P (mode))
   22064              :     return cost;
   22065              : 
   22066   2895647164 :   if (GET_MODE_BITSIZE (mode) == 128
   22067   2895647164 :       && TARGET_SSE_SPLIT_REGS)
   22068      2861918 :     return cost * GET_MODE_BITSIZE (mode) / 64;
   22069   2894216205 :   else if (GET_MODE_BITSIZE (mode) > 128
   22070   2894216205 :       && TARGET_AVX256_SPLIT_REGS)
   22071      1674620 :     return cost * GET_MODE_BITSIZE (mode) / 128;
   22072   2893378895 :   else if (GET_MODE_BITSIZE (mode) > 256
   22073   2893378895 :       && TARGET_AVX512_SPLIT_REGS)
   22074       265000 :     return cost * GET_MODE_BITSIZE (mode) / 256;
   22075              :   return cost;
   22076              : }
   22077              : 
   22078              : /* Return cost of vec_widen_<s>mult_hi/lo_<mode>,
   22079              :    vec_widen_<s>mul_hi/lo_<mode> is only available for VI124_AVX2.  */
   22080              : static int
   22081         1074 : ix86_widen_mult_cost (const struct processor_costs *cost,
   22082              :                       enum machine_mode mode, bool uns_p)
   22083              : {
   22084         1074 :   gcc_assert (GET_MODE_CLASS (mode) == MODE_VECTOR_INT);
   22085         1074 :   int extra_cost = 0;
   22086         1074 :   int basic_cost = 0;
   22087         1074 :   switch (mode)
   22088              :     {
   22089          124 :     case V8HImode:
   22090          124 :     case V16HImode:
   22091          124 :       if (!uns_p || mode == V16HImode)
   22092           53 :         extra_cost = cost->sse_op * 2;
   22093          124 :       basic_cost = cost->mulss * 2 + cost->sse_op * 4;
   22094          124 :       break;
   22095          203 :     case V4SImode:
   22096          203 :     case V8SImode:
   22097              :       /* pmulhw/pmullw can be used.  */
   22098          203 :       basic_cost = cost->mulss * 2 + cost->sse_op * 2;
   22099          203 :       break;
   22100          679 :     case V2DImode:
   22101              :       /* pmuludq under sse2, pmuldq under sse4.1, for sign_extend,
   22102              :          require extra 4 mul, 4 add, 4 cmp and 2 shift.  */
   22103          679 :       if (!TARGET_SSE4_1 && !uns_p)
   22104          401 :         extra_cost = (cost->mulss + cost->sse_op + cost->sse_op) * 4
   22105          401 :                       + cost->sse_op * 2;
   22106              :       /* Fallthru.  */
   22107          735 :     case V4DImode:
   22108          735 :       basic_cost = cost->mulss * 2 + cost->sse_op * 4;
   22109          735 :       break;
   22110              :     default:
   22111              :       /* Not implemented.  */
   22112              :       return 100;
   22113              :     }
   22114         1062 :   return ix86_vec_cost (mode, basic_cost + extra_cost);
   22115              : }
   22116              : 
   22117              : /* Return cost of multiplication in MODE.  */
   22118              : 
   22119              : static int
   22120   1235871490 : ix86_multiplication_cost (const struct processor_costs *cost,
   22121              :                           enum machine_mode mode)
   22122              : {
   22123   1235871490 :   machine_mode inner_mode = mode;
   22124   1235871490 :   if (VECTOR_MODE_P (mode))
   22125   1234850538 :     inner_mode = GET_MODE_INNER (mode);
   22126              : 
   22127   1235871490 :   if (SSE_FLOAT_MODE_SSEMATH_OR_HFBF_P (mode))
   22128       752472 :     return inner_mode == DFmode ? cost->mulsd : cost->mulss;
   22129   1235119018 :   else if (X87_FLOAT_MODE_P (mode))
   22130       162211 :     return cost->fmul;
   22131   1234956807 :   else if (FLOAT_MODE_P (mode))
   22132       230926 :     return  ix86_vec_cost (mode,
   22133       230926 :                            inner_mode == DFmode ? cost->mulsd : cost->mulss);
   22134   1234725881 :   else if (GET_MODE_CLASS (mode) == MODE_VECTOR_INT)
   22135              :     {
   22136   1234643498 :       int nmults, nops;
   22137              :       /* Cost of reading the memory.  */
   22138   1234643498 :       int extra;
   22139              : 
   22140   1234643498 :       switch (mode)
   22141              :         {
   22142     19442874 :         case V4QImode:
   22143     19442874 :         case V8QImode:
   22144              :           /* Partial V*QImode is emulated with 4-6 insns.  */
   22145     19442874 :           nmults = 1;
   22146     19442874 :           nops = 3;
   22147     19442874 :           extra = 0;
   22148              : 
   22149     19442874 :           if (TARGET_AVX512BW && TARGET_AVX512VL)
   22150              :             ;
   22151     19333216 :           else if (TARGET_AVX2)
   22152              :             nops += 2;
   22153     18797356 :           else if (TARGET_XOP)
   22154        10040 :             extra += COSTS_N_INSNS (cost->sse_load[2]) / 2;
   22155              :           else
   22156              :             {
   22157     18787316 :               nops += 1;
   22158     18787316 :               extra += COSTS_N_INSNS (cost->sse_load[2]) / 2;
   22159              :             }
   22160     19442874 :           goto do_qimode;
   22161              : 
   22162      9721912 :         case V16QImode:
   22163              :           /* V*QImode is emulated with 4-11 insns.  */
   22164      9721912 :           nmults = 1;
   22165      9721912 :           nops = 3;
   22166      9721912 :           extra = 0;
   22167              : 
   22168      9721912 :           if (TARGET_AVX2 && !TARGET_PREFER_AVX128)
   22169              :             {
   22170       320723 :               if (!(TARGET_AVX512BW && TARGET_AVX512VL))
   22171       266180 :                 nops += 3;
   22172              :             }
   22173      9401189 :           else if (TARGET_XOP)
   22174              :             {
   22175         5464 :               nmults += 1;
   22176         5464 :               nops += 2;
   22177         5464 :               extra += COSTS_N_INSNS (cost->sse_load[2]) / 2;
   22178              :             }
   22179              :           else
   22180              :             {
   22181      9395725 :               nmults += 1;
   22182      9395725 :               nops += 4;
   22183      9395725 :               extra += COSTS_N_INSNS (cost->sse_load[2]) / 2;
   22184              :             }
   22185      9721912 :           goto do_qimode;
   22186              : 
   22187      9720292 :         case V32QImode:
   22188      9720292 :           nmults = 1;
   22189      9720292 :           nops = 3;
   22190      9720292 :           extra = 0;
   22191              : 
   22192      9720292 :           if (!TARGET_AVX512BW || TARGET_PREFER_AVX256)
   22193              :             {
   22194      9632588 :               nmults += 1;
   22195      9632588 :               nops += 4;
   22196              :               /* 2 loads, so no division by 2.  */
   22197      9632588 :               extra += COSTS_N_INSNS (cost->sse_load[3]);
   22198              :             }
   22199      9720292 :           goto do_qimode;
   22200              : 
   22201      9719813 :         case V64QImode:
   22202      9719813 :           nmults = 2;
   22203      9719813 :           nops = 9;
   22204              :           /* 2 loads of each size, so no division by 2.  */
   22205      9719813 :           extra = COSTS_N_INSNS (cost->sse_load[3] + cost->sse_load[4]);
   22206              : 
   22207     48604891 :         do_qimode:
   22208     48604891 :           return ix86_vec_cost (mode, cost->mulss * nmults
   22209     48604891 :                                 + cost->sse_op * nops) + extra;
   22210              : 
   22211     41568190 :         case V4SImode:
   22212              :           /* pmulld is used in this case. No emulation is needed.  */
   22213     41568190 :           if (TARGET_SSE4_1)
   22214      2322413 :             goto do_native;
   22215              :           /* V4SImode is emulated with 7 insns.  */
   22216              :           else
   22217     39245777 :             return ix86_vec_cost (mode, cost->mulss * 2 + cost->sse_op * 5);
   22218              : 
   22219    168010411 :         case V2DImode:
   22220    168010411 :         case V4DImode:
   22221              :           /* vpmullq is used in this case. No emulation is needed.  */
   22222    168010411 :           if (TARGET_AVX512DQ && TARGET_AVX512VL)
   22223       593747 :             goto do_native;
   22224              :           /* V*DImode is emulated with 6-8 insns.  */
   22225    167416664 :           else if (TARGET_XOP && mode == V2DImode)
   22226        55100 :             return ix86_vec_cost (mode, cost->mulss * 2 + cost->sse_op * 4);
   22227              :           /* FALLTHRU */
   22228    251306326 :         case V8DImode:
   22229              :           /* vpmullq is used in this case. No emulation is needed.  */
   22230    251306326 :           if (TARGET_AVX512DQ && mode == V8DImode)
   22231       391110 :             goto do_native;
   22232              :           else
   22233    250915216 :             return ix86_vec_cost (mode, cost->mulss * 3 + cost->sse_op * 5);
   22234              : 
   22235    895822514 :         default:
   22236    895822514 :         do_native:
   22237    895822514 :           return ix86_vec_cost (mode, cost->mulss);
   22238              :         }
   22239              :     }
   22240              :   else
   22241       164758 :     return (cost->mult_init[MODE_INDEX (mode)] + cost->mult_bit * 7);
   22242              : }
   22243              : 
   22244              : /* Return cost of multiplication in MODE.  */
   22245              : 
   22246              : static int
   22247     74228603 : ix86_division_cost (const struct processor_costs *cost,
   22248              :                           enum machine_mode mode)
   22249              : {
   22250     74228603 :   machine_mode inner_mode = mode;
   22251     74228603 :   if (VECTOR_MODE_P (mode))
   22252     54797957 :     inner_mode = GET_MODE_INNER (mode);
   22253              : 
   22254     74228603 :   if (SSE_FLOAT_MODE_SSEMATH_OR_HFBF_P (mode))
   22255       248095 :     return inner_mode == DFmode ? cost->divsd : cost->divss;
   22256     73980508 :   else if (X87_FLOAT_MODE_P (mode))
   22257        44880 :     return cost->fdiv;
   22258     73935628 :   else if (FLOAT_MODE_P (mode))
   22259        17606 :     return ix86_vec_cost (mode,
   22260        17606 :                           inner_mode == DFmode ? cost->divsd : cost->divss);
   22261              :   else
   22262     82450858 :     return cost->divide[MODE_INDEX (mode)];
   22263              : }
   22264              : 
   22265              : /* Return cost of shift in MODE.
   22266              :    If CONSTANT_OP1 is true, the op1 value is known and set in OP1_VAL.
   22267              :    AND_IN_OP1 specify in op1 is result of AND and SHIFT_AND_TRUNCATE
   22268              :    if op1 is a result of subreg.
   22269              : 
   22270              :    SKIP_OP0/1 is set to true if cost of OP0/1 should be ignored.  */
   22271              : 
   22272              : static int
   22273    793148593 : ix86_shift_rotate_cost (const struct processor_costs *cost,
   22274              :                         enum rtx_code code,
   22275              :                         enum machine_mode mode, bool constant_op1,
   22276              :                         HOST_WIDE_INT op1_val,
   22277              :                         bool and_in_op1,
   22278              :                         bool shift_and_truncate,
   22279              :                         bool *skip_op0, bool *skip_op1)
   22280              : {
   22281    793148593 :   if (skip_op0)
   22282    793076245 :     *skip_op0 = *skip_op1 = false;
   22283              : 
   22284    793148593 :   if (GET_MODE_CLASS (mode) == MODE_VECTOR_INT)
   22285              :     {
   22286    407540913 :       int count;
   22287              :       /* Cost of reading the memory.  */
   22288    407540913 :       int extra;
   22289              : 
   22290    407540913 :       switch (mode)
   22291              :         {
   22292      6186377 :         case V4QImode:
   22293      6186377 :         case V8QImode:
   22294      6186377 :           if (TARGET_AVX2)
   22295              :             /* Use vpbroadcast.  */
   22296       205455 :             extra = cost->sse_op;
   22297              :           else
   22298      5980922 :             extra = COSTS_N_INSNS (cost->sse_load[2]) / 2;
   22299              : 
   22300      6186377 :           if (constant_op1)
   22301              :             {
   22302      6186347 :               if (code == ASHIFTRT)
   22303              :                 {
   22304          190 :                   count = 4;
   22305          190 :                   extra *= 2;
   22306              :                 }
   22307              :               else
   22308              :                 count = 2;
   22309              :             }
   22310           30 :           else if (TARGET_AVX512BW && TARGET_AVX512VL)
   22311           30 :             return ix86_vec_cost (mode, cost->sse_op * 4);
   22312            0 :           else if (TARGET_SSE4_1)
   22313              :             count = 5;
   22314            0 :           else if (code == ASHIFTRT)
   22315              :             count = 6;
   22316              :           else
   22317            0 :             count = 5;
   22318      6186347 :           return ix86_vec_cost (mode, cost->sse_op * count) + extra;
   22319              : 
   22320      3096237 :         case V16QImode:
   22321      3096237 :           if (TARGET_XOP)
   22322              :             {
   22323              :               /* For XOP we use vpshab, which requires a broadcast of the
   22324              :                  value to the variable shift insn.  For constants this
   22325              :                  means a V16Q const in mem; even when we can perform the
   22326              :                  shift with one insn set the cost to prefer paddb.  */
   22327         3573 :               if (constant_op1)
   22328              :                 {
   22329         2614 :                   extra = COSTS_N_INSNS (cost->sse_load[2]) / 2;
   22330         2614 :                   return ix86_vec_cost (mode, cost->sse_op) + extra;
   22331              :                 }
   22332              :               else
   22333              :                 {
   22334          959 :                   count = (code == ASHIFT) ? 3 : 4;
   22335          959 :                   return ix86_vec_cost (mode, cost->sse_op * count);
   22336              :                 }
   22337              :             }
   22338              :           /* FALLTHRU */
   22339      6185547 :         case V32QImode:
   22340      6185547 :           if (TARGET_GFNI && constant_op1)
   22341              :             {
   22342              :               /* Use vgf2p8affine.  One extra load for the mask, but in a loop
   22343              :                  with enough registers it will be moved out.  So for now don't
   22344              :                  account the constant mask load.  This is not quite right
   22345              :                  for non loop vectorization.  */
   22346        11990 :               extra = 0;
   22347        11990 :               return ix86_vec_cost (mode, cost->sse_op) + extra;
   22348              :             }
   22349      6173557 :           if (TARGET_AVX2)
   22350              :             /* Use vpbroadcast.  */
   22351       198290 :             extra = cost->sse_op;
   22352              :           else
   22353      5975267 :             extra = COSTS_N_INSNS (mode == V16QImode
   22354              :                                    ? cost->sse_load[2]
   22355      5975267 :                                    : cost->sse_load[3]) / 2;
   22356              : 
   22357      6173557 :           if (constant_op1)
   22358              :             {
   22359      6173369 :               if (code == ASHIFTRT)
   22360              :                 {
   22361          198 :                   count = 4;
   22362          198 :                   extra *= 2;
   22363              :                 }
   22364              :               else
   22365              :                 count = 2;
   22366              :             }
   22367          188 :           else if (TARGET_AVX512BW
   22368           76 :                    && ((mode == V32QImode && !TARGET_PREFER_AVX256)
   22369           38 :                        || (mode == V16QImode && TARGET_AVX512VL
   22370           38 :                            && !TARGET_PREFER_AVX128)))
   22371           76 :             return ix86_vec_cost (mode, cost->sse_op * 4);
   22372          112 :           else if (TARGET_AVX2
   22373            0 :                    && mode == V16QImode && !TARGET_PREFER_AVX128)
   22374              :             count = 6;
   22375          112 :           else if (TARGET_SSE4_1)
   22376              :             count = 9;
   22377          112 :           else if (code == ASHIFTRT)
   22378              :             count = 10;
   22379              :           else
   22380           76 :             count = 9;
   22381      6173481 :           return ix86_vec_cost (mode, cost->sse_op * count) + extra;
   22382              : 
   22383      3093132 :         case V64QImode:
   22384              :           /* Ignore the mask load for GF2P8AFFINEQB.  */
   22385      3093132 :           extra = 0;
   22386      3093132 :           return ix86_vec_cost (mode, cost->sse_op) + extra;
   22387              : 
   22388     55758041 :         case V2DImode:
   22389     55758041 :         case V4DImode:
   22390              :           /* V*DImode arithmetic right shift is emulated.  */
   22391     55758041 :           if (code == ASHIFTRT && !TARGET_AVX512VL)
   22392              :             {
   22393         1387 :               if (constant_op1)
   22394              :                 {
   22395          648 :                   if (op1_val == 63)
   22396          438 :                     count = TARGET_SSE4_2 ? 1 : 2;
   22397          509 :                   else if (TARGET_XOP)
   22398              :                     count = 2;
   22399          210 :                   else if (TARGET_SSE4_1)
   22400              :                     count = 3;
   22401              :                   else
   22402          230 :                     count = 4;
   22403              :                 }
   22404          739 :               else if (TARGET_XOP)
   22405              :                 count = 3;
   22406           74 :               else if (TARGET_SSE4_2)
   22407              :                 count = 4;
   22408              :               else
   22409         1387 :                 count = 5;
   22410              : 
   22411         1387 :               return ix86_vec_cost (mode, cost->sse_op * count);
   22412              :             }
   22413              :           /* FALLTHRU */
   22414    392070897 :         default:
   22415    392070897 :           return ix86_vec_cost (mode, cost->sse_op);
   22416              :         }
   22417              :     }
   22418              : 
   22419    779907676 :   if (GET_MODE_SIZE (mode) > UNITS_PER_WORD)
   22420              :     {
   22421    197449456 :       if (constant_op1)
   22422              :         {
   22423    197414500 :           if (op1_val > 32)
   22424    140272857 :             return cost->shift_const + COSTS_N_INSNS (2);
   22425              :           else
   22426     57141643 :             return cost->shift_const * 2;
   22427              :         }
   22428              :       else
   22429              :         {
   22430        34956 :           if (and_in_op1)
   22431           63 :             return cost->shift_var * 2;
   22432              :           else
   22433        34893 :             return cost->shift_var * 6 + COSTS_N_INSNS (2);
   22434              :         }
   22435              :     }
   22436              :   else
   22437              :     {
   22438    188158224 :       if (constant_op1)
   22439    187420514 :         return cost->shift_const;
   22440       737710 :       else if (shift_and_truncate)
   22441              :         {
   22442        22906 :           if (skip_op0)
   22443        22906 :             *skip_op0 = *skip_op1 = true;
   22444              :           /* Return the cost after shift-and truncation.  */
   22445        22906 :           return cost->shift_var;
   22446              :         }
   22447              :       else
   22448       714804 :         return cost->shift_var;
   22449              :     }
   22450              : }
   22451              : 
   22452              : static int
   22453    146260970 : ix86_insn_cost (rtx_insn *insn, bool speed)
   22454              : {
   22455    146260970 :   int insn_cost = 0;
   22456              :   /* Add extra cost to avoid post_reload late_combine revert
   22457              :      the optimization did in pass_rpad.  */
   22458    146260970 :   if (reload_completed
   22459      4547960 :       && ix86_rpad_gate ()
   22460       253318 :       && recog_memoized (insn) >= 0
   22461    146514026 :       && get_attr_avx_partial_xmm_update (insn)
   22462              :       == AVX_PARTIAL_XMM_UPDATE_TRUE)
   22463              :     insn_cost += COSTS_N_INSNS (3);
   22464              : 
   22465    146260970 :   rtx pat = PATTERN (insn);
   22466              :   /* A USE of a memory is more expensive than a use of a REG.
   22467              :      For example *<absneg>mode2_1's use of a signbit mask.  */
   22468    146260970 :   if (GET_CODE (pat) == PARALLEL)
   22469              :     {
   22470     46444687 :       for (int i = 0; i < XVECLEN (pat, 0); i++)
   22471              :         {
   22472     31192467 :           rtx x = XVECEXP (pat, 0, i);
   22473     31192467 :           if (GET_CODE (x) == USE && MEM_P (XEXP (x, 0)))
   22474        57173 :             insn_cost += !speed ? COSTS_N_BYTES (4)
   22475        26911 :                                 : TARGET_64BIT ? COSTS_N_INSNS (1) + 1
   22476              :                                                : COSTS_N_INSNS (3) + 1;
   22477              :         }
   22478              :     }
   22479              : 
   22480    146260970 :   return insn_cost + pattern_cost (pat, speed);
   22481              : }
   22482              : 
   22483              : /* Return cost of SSE/AVX FP->FP conversion (extensions and truncates).  */
   22484              : 
   22485              : static int
   22486       757435 : vec_fp_conversion_cost (const struct processor_costs *cost, int size)
   22487              : {
   22488       757435 :   if (size < 128)
   22489       752298 :     return cost->cvtss2sd;
   22490         5137 :   else if (size < 256)
   22491              :     {
   22492         2352 :       if (TARGET_SSE_SPLIT_REGS)
   22493            0 :         return cost->cvtss2sd * size / 64;
   22494         2352 :       return cost->cvtss2sd;
   22495              :     }
   22496         2785 :   if (size < 512)
   22497         1483 :     return cost->vcvtps2pd256;
   22498              :   else
   22499         1302 :     return cost->vcvtps2pd512;
   22500              : }
   22501              : 
   22502              : /* Return true of X is UNSPEC with UNSPEC_PCMP or UNSPEC_UNSIGNED_PCMP.  */
   22503              : 
   22504              : static bool
   22505       272908 : unspec_pcmp_p (rtx x)
   22506              : {
   22507       272908 :   return GET_CODE (x) == UNSPEC
   22508       272908 :          && (XINT (x, 1) == UNSPEC_PCMP || XINT (x, 1) == UNSPEC_UNSIGNED_PCMP);
   22509              : }
   22510              : 
   22511              : /* Compute a (partial) cost for rtx X.  Return true if the complete
   22512              :    cost has been computed, and false if subexpressions should be
   22513              :    scanned.  In either case, *TOTAL contains the cost result.  */
   22514              : 
   22515              : static bool
   22516   7858874154 : ix86_rtx_costs (rtx x, machine_mode mode, int outer_code_i, int opno,
   22517              :                 int *total, bool speed)
   22518              : {
   22519   7858874154 :   rtx mask;
   22520   7858874154 :   enum rtx_code code = GET_CODE (x);
   22521   7858874154 :   enum rtx_code outer_code = (enum rtx_code) outer_code_i;
   22522   4200151359 :   const struct processor_costs *cost
   22523   7858874154 :     = speed ? ix86_tune_cost : &ix86_size_cost;
   22524   7858874154 :   int src_cost;
   22525              : 
   22526              :   /* Handling different vternlog variants.  */
   22527   7858874154 :   if ((GET_MODE_SIZE (mode) == 64
   22528   7858874154 :        ? TARGET_AVX512F
   22529   6647169655 :        : (TARGET_AVX512VL
   22530   6584577895 :           || (TARGET_AVX512F && !TARGET_PREFER_AVX256)))
   22531    186350006 :       && GET_MODE_SIZE (mode) >= 16
   22532    126846639 :       && outer_code_i == SET
   22533   7907813036 :       && ternlog_operand (x, mode))
   22534              :     {
   22535        33644 :       rtx args[3];
   22536              : 
   22537        33644 :       args[0] = NULL_RTX;
   22538        33644 :       args[1] = NULL_RTX;
   22539        33644 :       args[2] = NULL_RTX;
   22540        33644 :       int idx = ix86_ternlog_idx (x, args);
   22541        33644 :       gcc_assert (idx >= 0);
   22542              : 
   22543        33644 :       *total = cost->sse_op;
   22544       134576 :       for (int i = 0; i != 3; i++)
   22545       100932 :         if (args[i])
   22546        71162 :           *total += rtx_cost (args[i], GET_MODE (args[i]), UNSPEC, i, speed);
   22547        33644 :       return true;
   22548              :     }
   22549              : 
   22550              : 
   22551   7858840510 :   switch (code)
   22552              :     {
   22553     48281420 :     case SET:
   22554     48281420 :       if (register_operand (SET_DEST (x), VOIDmode)
   22555     48281420 :           && register_operand (SET_SRC (x), VOIDmode))
   22556              :         {
   22557     29863581 :           *total = ix86_set_reg_reg_cost (GET_MODE (SET_DEST (x)));
   22558     29863581 :           return true;
   22559              :         }
   22560              : 
   22561     18417839 :       if (register_operand (SET_SRC (x), VOIDmode))
   22562              :         /* Avoid potentially incorrect high cost from rtx_costs
   22563              :            for non-tieable SUBREGs.  */
   22564              :         src_cost = 0;
   22565              :       else
   22566              :         {
   22567     15610730 :           src_cost = rtx_cost (SET_SRC (x), mode, SET, 1, speed);
   22568              : 
   22569     15610730 :           if (CONSTANT_P (SET_SRC (x)))
   22570              :             /* Constant costs assume a base value of COSTS_N_INSNS (1) and add
   22571              :                a small value, possibly zero for cheap constants.  */
   22572      6978898 :             src_cost += COSTS_N_INSNS (1);
   22573              :         }
   22574              : 
   22575     18417839 :       *total = src_cost + rtx_cost (SET_DEST (x), mode, SET, 0, speed);
   22576     18417839 :       return true;
   22577              : 
   22578   2893220731 :     case CONST_INT:
   22579   2893220731 :     case CONST:
   22580   2893220731 :     case LABEL_REF:
   22581   2893220731 :     case SYMBOL_REF:
   22582   2893220731 :       if (x86_64_immediate_operand (x, VOIDmode))
   22583   2273630377 :         *total = 0;
   22584    619590354 :       else if (TARGET_64BIT && x86_64_zext_immediate_operand (x, VOIDmode))
   22585              :         /* Consider the zext constants slightly more expensive, as they
   22586              :            can't appear in most instructions.  */
   22587     28688549 :         *total = 1;
   22588              :       else
   22589              :         /* movabsq is slightly more expensive than a simple instruction. */
   22590    590901805 :         *total = COSTS_N_INSNS (1) + 1;
   22591              :       return true;
   22592              : 
   22593      7534402 :     case CONST_DOUBLE:
   22594      7534402 :       if (IS_STACK_MODE (mode))
   22595      1300462 :         switch (standard_80387_constant_p (x))
   22596              :           {
   22597              :           case -1:
   22598              :           case 0:
   22599              :             break;
   22600       279974 :           case 1: /* 0.0 */
   22601       279974 :             *total = 1;
   22602       279974 :             return true;
   22603       485555 :           default: /* Other constants */
   22604       485555 :             *total = 2;
   22605       485555 :             return true;
   22606              :           }
   22607              :       /* FALLTHRU */
   22608              : 
   22609     14508842 :     case CONST_VECTOR:
   22610     14508842 :       switch (standard_sse_constant_p (x, mode))
   22611              :         {
   22612              :         case 0:
   22613              :           break;
   22614      4206217 :         case 1:  /* 0: xor eliminates false dependency */
   22615      4206217 :           *total = 0;
   22616      4206217 :           return true;
   22617       192833 :         default: /* -1: cmp contains false dependency */
   22618       192833 :           *total = 1;
   22619       192833 :           return true;
   22620              :         }
   22621              :       /* FALLTHRU */
   22622              : 
   22623     11106217 :     case CONST_WIDE_INT:
   22624              :       /* Fall back to (MEM (SYMBOL_REF)), since that's where
   22625              :          it'll probably end up.  Add a penalty for size.  */
   22626     22212434 :       *total = (COSTS_N_INSNS (1)
   22627     21986408 :                 + (!TARGET_64BIT && flag_pic)
   22628     22212434 :                 + (GET_MODE_SIZE (mode) <= 4
   22629     19441198 :                    ? 0 : GET_MODE_SIZE (mode) <= 8 ? 1 : 2));
   22630     11106217 :       return true;
   22631              : 
   22632     22864899 :     case ZERO_EXTEND:
   22633              :       /* The zero extensions is often completely free on x86_64, so make
   22634              :          it as cheap as possible.  */
   22635     22864899 :       if (TARGET_64BIT && mode == DImode
   22636      4886760 :           && GET_MODE (XEXP (x, 0)) == SImode)
   22637      2969411 :         *total = 1;
   22638     19895488 :       else if (TARGET_ZERO_EXTEND_WITH_AND)
   22639            0 :         *total = cost->add;
   22640              :       else
   22641     19895488 :         *total = cost->movzx;
   22642              :       return false;
   22643              : 
   22644      2714271 :     case SIGN_EXTEND:
   22645      2714271 :       *total = cost->movsx;
   22646      2714271 :       return false;
   22647              : 
   22648    652569832 :     case ASHIFT:
   22649    652569832 :       if (SCALAR_INT_MODE_P (mode)
   22650    251964801 :           && GET_MODE_SIZE (mode) < UNITS_PER_WORD
   22651    696679927 :           && CONST_INT_P (XEXP (x, 1)))
   22652              :         {
   22653     43931421 :           HOST_WIDE_INT value = INTVAL (XEXP (x, 1));
   22654     43931421 :           if (value == 1)
   22655              :             {
   22656      2528988 :               *total = cost->add;
   22657      2528988 :               return false;
   22658              :             }
   22659     41402433 :           if ((value == 2 || value == 3)
   22660      4650050 :               && cost->lea <= cost->shift_const)
   22661              :             {
   22662      2197374 :               *total = cost->lea;
   22663      2197374 :               return false;
   22664              :             }
   22665              :         }
   22666              :       /* FALLTHRU */
   22667              : 
   22668    793076245 :     case ROTATE:
   22669    793076245 :     case ASHIFTRT:
   22670    793076245 :     case LSHIFTRT:
   22671    793076245 :     case ROTATERT:
   22672    793076245 :       bool skip_op0, skip_op1;
   22673    793076245 :       *total = ix86_shift_rotate_cost (cost, code, mode,
   22674    793076245 :                                        CONSTANT_P (XEXP (x, 1)),
   22675              :                                        CONST_INT_P (XEXP (x, 1))
   22676              :                                          ? INTVAL (XEXP (x, 1)) : -1,
   22677              :                                        GET_CODE (XEXP (x, 1)) == AND,
   22678    793076245 :                                        SUBREG_P (XEXP (x, 1))
   22679    793076245 :                                        && GET_CODE (XEXP (XEXP (x, 1),
   22680              :                                                           0)) == AND,
   22681              :                                        &skip_op0, &skip_op1);
   22682    793076245 :       if (skip_op0 || skip_op1)
   22683              :         {
   22684        22906 :           if (!skip_op0)
   22685            0 :             *total += rtx_cost (XEXP (x, 0), mode, code, 0, speed);
   22686        22906 :           if (!skip_op1)
   22687            0 :             *total += rtx_cost (XEXP (x, 1), mode, code, 0, speed);
   22688        22906 :           return true;
   22689              :         }
   22690              :       return false;
   22691              : 
   22692       232120 :     case FMA:
   22693       232120 :       {
   22694       232120 :         rtx sub;
   22695              : 
   22696       232120 :         gcc_assert (FLOAT_MODE_P (mode));
   22697       232120 :         gcc_assert (TARGET_FMA || TARGET_FMA4 || TARGET_AVX512F);
   22698              : 
   22699       464240 :         *total = ix86_vec_cost (mode,
   22700       232120 :                                 GET_MODE_INNER (mode) == SFmode
   22701              :                                 ? cost->fmass : cost->fmasd);
   22702       232120 :         *total += rtx_cost (XEXP (x, 1), mode, FMA, 1, speed);
   22703              : 
   22704              :         /* Negate in op0 or op2 is free: FMS, FNMA, FNMS.  */
   22705       232120 :         sub = XEXP (x, 0);
   22706       232120 :         if (GET_CODE (sub) == NEG)
   22707        51516 :           sub = XEXP (sub, 0);
   22708       232120 :         *total += rtx_cost (sub, mode, FMA, 0, speed);
   22709              : 
   22710       232120 :         sub = XEXP (x, 2);
   22711       232120 :         if (GET_CODE (sub) == NEG)
   22712        40543 :           sub = XEXP (sub, 0);
   22713       232120 :         *total += rtx_cost (sub, mode, FMA, 2, speed);
   22714       232120 :         return true;
   22715              :       }
   22716              : 
   22717   1800223609 :     case MULT:
   22718   1800223609 :       if (!FLOAT_MODE_P (mode) && !VECTOR_MODE_P (mode))
   22719              :         {
   22720    564604377 :           rtx op0 = XEXP (x, 0);
   22721    564604377 :           rtx op1 = XEXP (x, 1);
   22722    564604377 :           int nbits;
   22723    564604377 :           if (CONST_INT_P (XEXP (x, 1)))
   22724              :             {
   22725    545862554 :               unsigned HOST_WIDE_INT value = INTVAL (XEXP (x, 1));
   22726   1107512381 :               for (nbits = 0; value != 0; value &= value - 1)
   22727    561649827 :                 nbits++;
   22728              :             }
   22729              :           else
   22730              :             /* This is arbitrary.  */
   22731              :             nbits = 7;
   22732              : 
   22733              :           /* Compute costs correctly for widening multiplication.  */
   22734    564604377 :           if ((GET_CODE (op0) == SIGN_EXTEND || GET_CODE (op0) == ZERO_EXTEND)
   22735    570260615 :               && GET_MODE_SIZE (GET_MODE (XEXP (op0, 0))) * 2
   22736      5656238 :                  == GET_MODE_SIZE (mode))
   22737              :             {
   22738      5642313 :               int is_mulwiden = 0;
   22739      5642313 :               machine_mode inner_mode = GET_MODE (op0);
   22740              : 
   22741      5642313 :               if (GET_CODE (op0) == GET_CODE (op1))
   22742      5541712 :                 is_mulwiden = 1, op1 = XEXP (op1, 0);
   22743       100601 :               else if (CONST_INT_P (op1))
   22744              :                 {
   22745        90620 :                   if (GET_CODE (op0) == SIGN_EXTEND)
   22746        40529 :                     is_mulwiden = trunc_int_for_mode (INTVAL (op1), inner_mode)
   22747        40529 :                                   == INTVAL (op1);
   22748              :                   else
   22749        50091 :                     is_mulwiden = !(INTVAL (op1) & ~GET_MODE_MASK (inner_mode));
   22750              :                 }
   22751              : 
   22752      5632332 :               if (is_mulwiden)
   22753      5632332 :                 op0 = XEXP (op0, 0), mode = GET_MODE (op0);
   22754              :             }
   22755              : 
   22756    564604377 :           int mult_init;
   22757              :           // Double word multiplication requires 3 mults and 2 adds.
   22758   1144980966 :           if (GET_MODE_SIZE (mode) > UNITS_PER_WORD)
   22759              :             {
   22760    339732672 :               mult_init = 3 * cost->mult_init[MODE_INDEX (word_mode)]
   22761    339732672 :                           + 2 * cost->add;
   22762    339732672 :               nbits *= 3;
   22763              :             }
   22764    387892810 :           else mult_init = cost->mult_init[MODE_INDEX (mode)];
   22765              : 
   22766   1129208754 :           *total = (mult_init
   22767    564604377 :                     + nbits * cost->mult_bit
   22768    564604377 :                     + rtx_cost (op0, mode, outer_code, opno, speed)
   22769    564604377 :                     + rtx_cost (op1, mode, outer_code, opno, speed));
   22770              : 
   22771    564604377 :           return true;
   22772              :         }
   22773   1235619232 :       *total = ix86_multiplication_cost (cost, mode);
   22774   1235619232 :       return false;
   22775              : 
   22776     74214512 :     case DIV:
   22777     74214512 :     case UDIV:
   22778     74214512 :     case MOD:
   22779     74214512 :     case UMOD:
   22780     74214512 :       *total = ix86_division_cost (cost, mode);
   22781     74214512 :       return false;
   22782              : 
   22783    702596783 :     case PLUS:
   22784    702596783 :       if (GET_MODE_CLASS (mode) == MODE_INT
   22785    961106604 :           && GET_MODE_SIZE (mode) <= UNITS_PER_WORD)
   22786              :         {
   22787    142965899 :           if (GET_CODE (XEXP (x, 0)) == PLUS
   22788      3696317 :               && GET_CODE (XEXP (XEXP (x, 0), 0)) == MULT
   22789       832882 :               && CONST_INT_P (XEXP (XEXP (XEXP (x, 0), 0), 1))
   22790       832857 :               && CONSTANT_P (XEXP (x, 1)))
   22791              :             {
   22792       832800 :               HOST_WIDE_INT val = INTVAL (XEXP (XEXP (XEXP (x, 0), 0), 1));
   22793       832800 :               if (val == 2 || val == 4 || val == 8)
   22794              :                 {
   22795       832696 :                   *total = cost->lea;
   22796       832696 :                   *total += rtx_cost (XEXP (XEXP (x, 0), 1), mode,
   22797              :                                       outer_code, opno, speed);
   22798       832696 :                   *total += rtx_cost (XEXP (XEXP (XEXP (x, 0), 0), 0), mode,
   22799              :                                       outer_code, opno, speed);
   22800       832696 :                   *total += rtx_cost (XEXP (x, 1), mode,
   22801              :                                       outer_code, opno, speed);
   22802       832696 :                   return true;
   22803              :                 }
   22804              :             }
   22805    142133099 :           else if (GET_CODE (XEXP (x, 0)) == MULT
   22806     53559212 :                    && CONST_INT_P (XEXP (XEXP (x, 0), 1)))
   22807              :             {
   22808     53497875 :               HOST_WIDE_INT val = INTVAL (XEXP (XEXP (x, 0), 1));
   22809     53497875 :               if (val == 2 || val == 4 || val == 8)
   22810              :                 {
   22811      8129942 :                   *total = cost->lea;
   22812      8129942 :                   *total += rtx_cost (XEXP (XEXP (x, 0), 0), mode,
   22813              :                                       outer_code, opno, speed);
   22814      8129942 :                   *total += rtx_cost (XEXP (x, 1), mode,
   22815              :                                       outer_code, opno, speed);
   22816      8129942 :                   return true;
   22817              :                 }
   22818              :             }
   22819     88635224 :           else if (GET_CODE (XEXP (x, 0)) == PLUS)
   22820              :             {
   22821      2863517 :               rtx op = XEXP (XEXP (x, 0), 0);
   22822              : 
   22823              :               /* Add with carry, ignore the cost of adding a carry flag.  */
   22824      2863517 :               if (ix86_carry_flag_operator (op, mode)
   22825      2863517 :                   || ix86_carry_flag_unset_operator (op, mode))
   22826        70510 :                 *total = cost->add;
   22827              :               else
   22828              :                 {
   22829      2793007 :                   *total = cost->lea;
   22830      2793007 :                   *total += rtx_cost (op, mode,
   22831              :                                       outer_code, opno, speed);
   22832              :                 }
   22833              : 
   22834      2863517 :               *total += rtx_cost (XEXP (XEXP (x, 0), 1), mode,
   22835              :                                   outer_code, opno, speed);
   22836      2863517 :               *total += rtx_cost (XEXP (x, 1), mode,
   22837              :                                   outer_code, opno, speed);
   22838      2863517 :               return true;
   22839              :             }
   22840              :         }
   22841              :       /* FALLTHRU */
   22842              : 
   22843   1876578748 :     case MINUS:
   22844              :       /* Subtract with borrow, ignore the cost of subtracting a carry flag.  */
   22845   1876578748 :       if (GET_MODE_CLASS (mode) == MODE_INT
   22846    527896868 :           && GET_MODE_SIZE (mode) <= UNITS_PER_WORD
   22847    237520167 :           && GET_CODE (XEXP (x, 0)) == MINUS
   22848   1876619183 :           && (ix86_carry_flag_operator (XEXP (XEXP (x, 0), 1), mode)
   22849        15506 :               || ix86_carry_flag_unset_operator (XEXP (XEXP (x, 0), 1), mode)))
   22850              :         {
   22851        24929 :           *total = cost->add;
   22852        24929 :           *total += rtx_cost (XEXP (XEXP (x, 0), 0), mode,
   22853              :                               outer_code, opno, speed);
   22854        24929 :           *total += rtx_cost (XEXP (x, 1), mode,
   22855              :                               outer_code, opno, speed);
   22856        24929 :           return true;
   22857              :         }
   22858              : 
   22859   1876553819 :       if (SSE_FLOAT_MODE_SSEMATH_OR_HFBF_P (mode))
   22860      2411533 :         *total = cost->addss;
   22861   1874142286 :       else if (X87_FLOAT_MODE_P (mode))
   22862       220127 :         *total = cost->fadd;
   22863   1873922159 :       else if (FLOAT_MODE_P (mode))
   22864       448648 :         *total = ix86_vec_cost (mode, cost->addss);
   22865   1873473511 :       else if (GET_MODE_CLASS (mode) == MODE_VECTOR_INT)
   22866   1235437463 :         *total = ix86_vec_cost (mode, cost->sse_op);
   22867   1315437049 :       else if (GET_MODE_SIZE (mode) > UNITS_PER_WORD)
   22868    338690210 :         *total = cost->add * 2;
   22869              :       else
   22870    299345838 :         *total = cost->add;
   22871              :       return false;
   22872              : 
   22873      3940081 :     case IOR:
   22874      3940081 :       if (GET_MODE_CLASS (mode) == MODE_VECTOR_INT
   22875      3693395 :           || SSE_FLOAT_MODE_P (mode))
   22876              :         {
   22877              :           /* (ior (not ...) ...) can be a single insn in AVX512.  */
   22878          480 :           if (GET_CODE (XEXP (x, 0)) == NOT && TARGET_AVX512F
   22879       256277 :               && (GET_MODE_SIZE (mode) == 64
   22880            0 :                   || (TARGET_AVX512VL
   22881            0 :                       && (GET_MODE_SIZE (mode) == 32
   22882            0 :                           || GET_MODE_SIZE (mode) == 16))))
   22883              :             {
   22884            0 :               rtx right = GET_CODE (XEXP (x, 1)) != NOT
   22885            0 :                           ? XEXP (x, 1) : XEXP (XEXP (x, 1), 0);
   22886              : 
   22887            0 :               *total = ix86_vec_cost (mode, cost->sse_op)
   22888            0 :                        + rtx_cost (XEXP (XEXP (x, 0), 0), mode,
   22889              :                                    outer_code, opno, speed)
   22890            0 :                        + rtx_cost (right, mode, outer_code, opno, speed);
   22891            0 :               return true;
   22892              :             }
   22893       256277 :           *total = ix86_vec_cost (mode, cost->sse_op);
   22894       256277 :         }
   22895      3683804 :       else if (TARGET_64BIT
   22896      3391663 :                && mode == TImode
   22897      1690196 :                && GET_CODE (XEXP (x, 0)) == ASHIFT
   22898       252796 :                && GET_CODE (XEXP (XEXP (x, 0), 0)) == ZERO_EXTEND
   22899       250800 :                && GET_MODE (XEXP (XEXP (XEXP (x, 0), 0), 0)) == DImode
   22900       250800 :                && CONST_INT_P (XEXP (XEXP (x, 0), 1))
   22901       250800 :                && INTVAL (XEXP (XEXP (x, 0), 1)) == 64
   22902       250800 :                && GET_CODE (XEXP (x, 1)) == ZERO_EXTEND
   22903       228698 :                && GET_MODE (XEXP (XEXP (x, 1), 0)) == DImode)
   22904              :         {
   22905              :           /* *concatditi3 is cheap.  */
   22906       228698 :           rtx op0 = XEXP (XEXP (XEXP (x, 0), 0), 0);
   22907       228698 :           rtx op1 = XEXP (XEXP (x, 1), 0);
   22908         1386 :           *total = (SUBREG_P (op0) && GET_MODE (SUBREG_REG (op0)) == DFmode)
   22909       228698 :                    ? COSTS_N_INSNS (1)    /* movq.  */
   22910       227312 :                    : set_src_cost (op0, DImode, speed);
   22911         2348 :           *total += (SUBREG_P (op1) && GET_MODE (SUBREG_REG (op1)) == DFmode)
   22912       228698 :                     ? COSTS_N_INSNS (1)    /* movq.  */
   22913       226363 :                     : set_src_cost (op1, DImode, speed);
   22914       228698 :           return true;
   22915              :         }
   22916      3455106 :       else if (TARGET_64BIT
   22917      3162965 :                && mode == TImode
   22918      1461498 :                && GET_CODE (XEXP (x, 0)) == AND
   22919      1401561 :                && REG_P (XEXP (XEXP (x, 0), 0))
   22920      1396419 :                && CONST_WIDE_INT_P (XEXP (XEXP (x, 0), 1))
   22921      1393709 :                && CONST_WIDE_INT_NUNITS (XEXP (XEXP (x, 0), 1)) == 2
   22922      1393709 :                && CONST_WIDE_INT_ELT (XEXP (XEXP (x, 0), 1), 0) == -1
   22923       909659 :                && CONST_WIDE_INT_ELT (XEXP (XEXP (x, 0), 1), 1) == 0
   22924       909659 :                && GET_CODE (XEXP (x, 1)) == ASHIFT
   22925       907497 :                && GET_CODE (XEXP (XEXP (x, 1), 0)) == ZERO_EXTEND
   22926       907497 :                && GET_MODE (XEXP (XEXP (XEXP (x, 1), 0), 0)) == DImode
   22927       907497 :                && CONST_INT_P (XEXP (XEXP (x, 1), 1))
   22928      4362603 :                && INTVAL (XEXP (XEXP (x, 1), 1)) == 64)
   22929              :         {
   22930              :           /* *insvti_highpart is cheap.  */
   22931       907497 :           rtx op = XEXP (XEXP (XEXP (x, 1), 0), 0);
   22932       907497 :           *total = COSTS_N_INSNS (1) + 1;
   22933         1389 :           *total += (SUBREG_P (op) && GET_MODE (SUBREG_REG (op)) == DFmode)
   22934       907497 :                     ? COSTS_N_INSNS (1)    /* movq.  */
   22935       906594 :                     : set_src_cost (op, DImode, speed);
   22936       907497 :           return true;
   22937              :         }
   22938      5387359 :       else if (GET_MODE_SIZE (mode) > UNITS_PER_WORD)
   22939       747323 :         *total = cost->add * 2;
   22940              :       else
   22941      1800286 :         *total = cost->add;
   22942              :       return false;
   22943              : 
   22944       569361 :     case XOR:
   22945       569361 :       if (GET_MODE_CLASS (mode) == MODE_VECTOR_INT
   22946       436106 :           || SSE_FLOAT_MODE_P (mode))
   22947       133255 :         *total = ix86_vec_cost (mode, cost->sse_op);
   22948       931750 :       else if (GET_MODE_SIZE (mode) > UNITS_PER_WORD)
   22949        16513 :         *total = cost->add * 2;
   22950              :       else
   22951       419593 :         *total = cost->add;
   22952              :       return false;
   22953              : 
   22954      6947691 :     case AND:
   22955      6947691 :       if (address_no_seg_operand (x, mode))
   22956              :         {
   22957        15792 :           *total = cost->lea;
   22958        15792 :           return true;
   22959              :         }
   22960      6931899 :       else if (GET_MODE_CLASS (mode) == MODE_VECTOR_INT
   22961      6526443 :                || SSE_FLOAT_MODE_P (mode))
   22962              :         {
   22963              :           /* pandn is a single instruction.  */
   22964       438937 :           if (GET_CODE (XEXP (x, 0)) == NOT)
   22965              :             {
   22966        56519 :               rtx right = XEXP (x, 1);
   22967              : 
   22968              :               /* (and (not ...) (not ...)) can be a single insn in AVX512.  */
   22969          362 :               if (GET_CODE (right) == NOT && TARGET_AVX512F
   22970        56519 :                   && (GET_MODE_SIZE (mode) == 64
   22971            0 :                       || (TARGET_AVX512VL
   22972            0 :                           && (GET_MODE_SIZE (mode) == 32
   22973            0 :                               || GET_MODE_SIZE (mode) == 16))))
   22974            0 :                 right = XEXP (right, 0);
   22975              : 
   22976        56519 :               *total = ix86_vec_cost (mode, cost->sse_op)
   22977        56519 :                        + rtx_cost (XEXP (XEXP (x, 0), 0), mode,
   22978              :                                    outer_code, opno, speed)
   22979        56519 :                        + rtx_cost (right, mode, outer_code, opno, speed);
   22980        56519 :               return true;
   22981              :             }
   22982       382418 :           else if (GET_CODE (XEXP (x, 1)) == NOT)
   22983              :             {
   22984          740 :               *total = ix86_vec_cost (mode, cost->sse_op)
   22985          740 :                        + rtx_cost (XEXP (x, 0), mode,
   22986              :                                    outer_code, opno, speed)
   22987          740 :                        + rtx_cost (XEXP (XEXP (x, 1), 0), mode,
   22988              :                                    outer_code, opno, speed);
   22989          740 :               return true;
   22990              :             }
   22991       381678 :           *total = ix86_vec_cost (mode, cost->sse_op);
   22992       381678 :         }
   22993     13685615 :       else if (GET_MODE_SIZE (mode) > UNITS_PER_WORD)
   22994              :         {
   22995      1134095 :           if (TARGET_BMI && GET_CODE (XEXP (x,0)) == NOT)
   22996              :             {
   22997         1670 :               *total = cost->add * 2
   22998          835 :                        + rtx_cost (XEXP (XEXP (x, 0), 0), mode,
   22999              :                                    outer_code, opno, speed)
   23000          835 :                        + rtx_cost (XEXP (x, 1), mode,
   23001              :                                    outer_code, opno, speed);
   23002          835 :               return true;
   23003              :             }
   23004      1133260 :           else if (TARGET_BMI && GET_CODE (XEXP (x, 1)) == NOT)
   23005              :             {
   23006            0 :               *total = cost->add * 2
   23007            0 :                        + rtx_cost (XEXP (x, 0), mode,
   23008              :                                    outer_code, opno, speed)
   23009            0 :                        + rtx_cost (XEXP (XEXP (x, 1), 0), mode,
   23010              :                                    outer_code, opno, speed);
   23011            0 :               return true;
   23012              :             }
   23013      1133260 :           *total = cost->add * 2;
   23014              :         }
   23015      5358867 :       else if (TARGET_BMI && GET_CODE (XEXP (x,0)) == NOT)
   23016              :         {
   23017         7578 :           *total = cost->add
   23018         3789 :                    + rtx_cost (XEXP (XEXP (x, 0), 0), mode,
   23019              :                                outer_code, opno, speed)
   23020         3789 :                    + rtx_cost (XEXP (x, 1), mode, outer_code, opno, speed);
   23021         3789 :           return true;
   23022              :         }
   23023      5355078 :       else if (TARGET_BMI && GET_CODE (XEXP (x,1)) == NOT)
   23024              :         {
   23025          112 :           *total = cost->add
   23026           56 :                    + rtx_cost (XEXP (x, 0), mode, outer_code, opno, speed)
   23027           56 :                    + rtx_cost (XEXP (XEXP (x, 1), 0), mode,
   23028              :                                outer_code, opno, speed);
   23029           56 :           return true;
   23030              :         }
   23031              :       else
   23032      5355022 :         *total = cost->add;
   23033              :       return false;
   23034              : 
   23035       518018 :     case NOT:
   23036       518018 :       if (GET_MODE_CLASS (mode) == MODE_VECTOR_INT)
   23037              :         {
   23038              :           /* (not (xor ...)) can be a single insn in AVX512.  */
   23039            0 :           if (GET_CODE (XEXP (x, 0)) == XOR && TARGET_AVX512F
   23040        10968 :               && (GET_MODE_SIZE (mode) == 64
   23041            0 :                   || (TARGET_AVX512VL
   23042            0 :                       && (GET_MODE_SIZE (mode) == 32
   23043            0 :                           || GET_MODE_SIZE (mode) == 16))))
   23044              :             {
   23045            0 :               *total = ix86_vec_cost (mode, cost->sse_op)
   23046            0 :                        + rtx_cost (XEXP (XEXP (x, 0), 0), mode,
   23047              :                                    outer_code, opno, speed)
   23048            0 :                        + rtx_cost (XEXP (XEXP (x, 0), 1), mode,
   23049              :                                    outer_code, opno, speed);
   23050            0 :               return true;
   23051              :             }
   23052              : 
   23053              :           // vnot is pxor -1.
   23054        10968 :           *total = ix86_vec_cost (mode, cost->sse_op) + 1;
   23055              :         }
   23056      1160013 :       else if (GET_MODE_SIZE (mode) > UNITS_PER_WORD)
   23057        45968 :         *total = cost->add * 2;
   23058              :       else
   23059       461082 :         *total = cost->add;
   23060              :       return false;
   23061              : 
   23062     18618208 :     case NEG:
   23063     18618208 :       if (SSE_FLOAT_MODE_SSEMATH_OR_HFBF_P (mode))
   23064        52068 :         *total = cost->sse_op;
   23065     18566140 :       else if (X87_FLOAT_MODE_P (mode))
   23066        15087 :         *total = cost->fchs;
   23067     18551053 :       else if (FLOAT_MODE_P (mode))
   23068        27070 :         *total = ix86_vec_cost (mode, cost->sse_op);
   23069     18523983 :       else if (GET_MODE_CLASS (mode) == MODE_VECTOR_INT)
   23070     13708129 :         *total = ix86_vec_cost (mode, cost->sse_op);
   23071      9783093 :       else if (GET_MODE_SIZE (mode) > UNITS_PER_WORD)
   23072      1804335 :         *total = cost->add * 3;
   23073              :       else
   23074      3011519 :         *total = cost->add;
   23075              :       return false;
   23076              : 
   23077     53381026 :     case COMPARE:
   23078     53381026 :       rtx op0, op1;
   23079     53381026 :       op0 = XEXP (x, 0);
   23080     53381026 :       op1 = XEXP (x, 1);
   23081     53381026 :       if (GET_CODE (op0) == ZERO_EXTRACT
   23082       163826 :           && XEXP (op0, 1) == const1_rtx
   23083       146630 :           && CONST_INT_P (XEXP (op0, 2))
   23084       146594 :           && op1 == const0_rtx)
   23085              :         {
   23086              :           /* This kind of construct is implemented using test[bwl].
   23087              :              Treat it as if we had an AND.  */
   23088       146594 :           mode = GET_MODE (XEXP (op0, 0));
   23089       293188 :           *total = (cost->add
   23090       146594 :                     + rtx_cost (XEXP (op0, 0), mode, outer_code,
   23091              :                                 opno, speed)
   23092       146594 :                     + rtx_cost (const1_rtx, mode, outer_code, opno, speed));
   23093       146594 :           return true;
   23094              :         }
   23095              : 
   23096     53234432 :       if (GET_CODE (op0) == PLUS && rtx_equal_p (XEXP (op0, 0), op1))
   23097              :         {
   23098              :           /* This is an overflow detection, count it as a normal compare.  */
   23099       143477 :           *total = rtx_cost (op0, GET_MODE (op0), COMPARE, 0, speed);
   23100       143477 :           return true;
   23101              :         }
   23102              : 
   23103     53090955 :       rtx geu;
   23104              :       /* Match x
   23105              :          (compare:CCC (neg:QI (geu:QI (reg:CC_CCC FLAGS_REG) (const_int 0)))
   23106              :                       (ltu:QI (reg:CC_CCC FLAGS_REG) (const_int 0)))  */
   23107     53090955 :       if (mode == CCCmode
   23108       291442 :           && GET_CODE (op0) == NEG
   23109         7878 :           && GET_CODE (geu = XEXP (op0, 0)) == GEU
   23110         7875 :           && REG_P (XEXP (geu, 0))
   23111         7875 :           && (GET_MODE (XEXP (geu, 0)) == CCCmode
   23112          759 :               || GET_MODE (XEXP (geu, 0)) == CCmode)
   23113         7875 :           && REGNO (XEXP (geu, 0)) == FLAGS_REG
   23114         7875 :           && XEXP (geu, 1) == const0_rtx
   23115         7875 :           && GET_CODE (op1) == LTU
   23116         7875 :           && REG_P (XEXP (op1, 0))
   23117         7875 :           && GET_MODE (XEXP (op1, 0)) == GET_MODE (XEXP (geu, 0))
   23118         7875 :           && REGNO (XEXP (op1, 0)) == FLAGS_REG
   23119     53098830 :           && XEXP (op1, 1) == const0_rtx)
   23120              :         {
   23121              :           /* This is *setcc_qi_addqi3_cconly_overflow_1_* patterns, a nop.  */
   23122         7875 :           *total = 0;
   23123         7875 :           return true;
   23124              :         }
   23125              :       /* Match x
   23126              :          (compare:CCC (neg:QI (ltu:QI (reg:CCC FLAGS_REG) (const_int 0)))
   23127              :                       (geu:QI (reg:CCC FLAGS_REG) (const_int 0)))  */
   23128     53083080 :       if (mode == CCCmode
   23129       283567 :           && GET_CODE (op0) == NEG
   23130            3 :           && GET_CODE (XEXP (op0, 0)) == LTU
   23131            3 :           && REG_P (XEXP (XEXP (op0, 0), 0))
   23132            3 :           && GET_MODE (XEXP (XEXP (op0, 0), 0)) == CCCmode
   23133            3 :           && REGNO (XEXP (XEXP (op0, 0), 0)) == FLAGS_REG
   23134            3 :           && XEXP (XEXP (op0, 0), 1) == const0_rtx
   23135            3 :           && GET_CODE (op1) == GEU
   23136            3 :           && REG_P (XEXP (op1, 0))
   23137            3 :           && GET_MODE (XEXP (op1, 0)) == CCCmode
   23138            3 :           && REGNO (XEXP (op1, 0)) == FLAGS_REG
   23139     53083083 :           && XEXP (op1, 1) == const0_rtx)
   23140              :         {
   23141              :           /* This is *x86_cmc.  */
   23142            3 :           if (!speed)
   23143            0 :             *total = COSTS_N_BYTES (1);
   23144            3 :           else if (TARGET_SLOW_STC)
   23145            0 :             *total = COSTS_N_INSNS (2);
   23146              :           else
   23147            3 :             *total = COSTS_N_INSNS (1);
   23148            3 :           return true;
   23149              :         }
   23150              : 
   23151     53083077 :       if (SCALAR_INT_MODE_P (GET_MODE (op0))
   23152    110625641 :           && GET_MODE_SIZE (GET_MODE (op0)) > UNITS_PER_WORD)
   23153              :         {
   23154       756338 :           if (op1 == const0_rtx)
   23155       217984 :             *total = cost->add
   23156       108992 :                      + rtx_cost (op0, GET_MODE (op0), outer_code, opno, speed);
   23157              :           else
   23158      1294692 :             *total = 3*cost->add
   23159       647346 :                      + rtx_cost (op0, GET_MODE (op0), outer_code, opno, speed)
   23160       647346 :                      + rtx_cost (op1, GET_MODE (op0), outer_code, opno, speed);
   23161       756338 :           return true;
   23162              :         }
   23163              : 
   23164              :       /* The embedded comparison operand is completely free.  */
   23165     52326739 :       if (!general_operand (op0, GET_MODE (op0)) && op1 == const0_rtx)
   23166       372337 :         *total = 0;
   23167              : 
   23168              :       return false;
   23169              : 
   23170      1369760 :     case FLOAT_EXTEND:
   23171              :       /* x87 represents all values extended to 80bit.  */
   23172      1369760 :       if (!SSE_FLOAT_MODE_SSEMATH_OR_HFBF_P (mode))
   23173       664905 :         *total = 0;
   23174              :       else
   23175      1409710 :         *total = vec_fp_conversion_cost (cost, GET_MODE_BITSIZE (mode));
   23176              :       return false;
   23177              : 
   23178        83682 :     case FLOAT_TRUNCATE:
   23179        83682 :       if (!SSE_FLOAT_MODE_SSEMATH_OR_HFBF_P (mode))
   23180        58083 :         *total = cost->fadd;
   23181              :       else
   23182        51198 :         *total = vec_fp_conversion_cost (cost, GET_MODE_BITSIZE (mode));
   23183              :       return false;
   23184       683164 :     case FLOAT:
   23185       683164 :     case UNSIGNED_FLOAT:
   23186       683164 :       if (!SSE_FLOAT_MODE_SSEMATH_OR_HFBF_P (mode))
   23187              :         /* TODO: We do not have cost tables for x87.  */
   23188        93902 :         *total = cost->fadd;
   23189       589262 :       else if (VECTOR_MODE_P (mode))
   23190            0 :         *total = ix86_vec_cost (mode, cost->cvtpi2ps);
   23191              :       else
   23192       589262 :         *total = cost->cvtsi2ss;
   23193              :       return false;
   23194              : 
   23195       284974 :     case FIX:
   23196       284974 :     case UNSIGNED_FIX:
   23197       284974 :       if (!SSE_FLOAT_MODE_SSEMATH_OR_HFBF_P (mode))
   23198              :         /* TODO: We do not have cost tables for x87.  */
   23199       284974 :         *total = cost->fadd;
   23200            0 :       else if (VECTOR_MODE_P (mode))
   23201            0 :         *total = ix86_vec_cost (mode, cost->cvtps2pi);
   23202              :       else
   23203            0 :         *total = cost->cvtss2si;
   23204              :       return false;
   23205              : 
   23206       387534 :     case ABS:
   23207              :       /* SSE requires memory load for the constant operand. It may make
   23208              :          sense to account for this.  Of course the constant operand may or
   23209              :          may not be reused. */
   23210       387534 :       if (SSE_FLOAT_MODE_SSEMATH_OR_HFBF_P (mode))
   23211       280477 :         *total = cost->sse_op;
   23212       107057 :       else if (X87_FLOAT_MODE_P (mode))
   23213        31493 :         *total = cost->fabs;
   23214        75564 :       else if (FLOAT_MODE_P (mode))
   23215        25869 :         *total = ix86_vec_cost (mode, cost->sse_op);
   23216        49695 :       else if (GET_MODE_CLASS (mode) == MODE_VECTOR_INT)
   23217         6354 :         *total = cost->sse_op;
   23218              :       return false;
   23219              : 
   23220        28683 :     case SQRT:
   23221        28683 :       if (SSE_FLOAT_MODE_SSEMATH_OR_HFBF_P (mode))
   23222        18326 :         *total = mode == SFmode ? cost->sqrtss : cost->sqrtsd;
   23223        10357 :       else if (X87_FLOAT_MODE_P (mode))
   23224         4315 :         *total = cost->fsqrt;
   23225         6042 :       else if (FLOAT_MODE_P (mode))
   23226         6042 :         *total = ix86_vec_cost (mode,
   23227              :                                 mode == SFmode ? cost->sqrtss : cost->sqrtsd);
   23228              :       return false;
   23229              : 
   23230      3948714 :     case UNSPEC:
   23231      3948714 :       switch (XINT (x, 1))
   23232              :         {
   23233       126118 :         case UNSPEC_TP:
   23234       126118 :           *total = 0;
   23235       126118 :           break;
   23236              : 
   23237         5210 :         case UNSPEC_VTERNLOG:
   23238         5210 :           *total = cost->sse_op;
   23239         5210 :           if (!REG_P (XVECEXP (x, 0, 0)))
   23240          720 :             *total += rtx_cost (XVECEXP (x, 0, 0), mode, code, 0, speed);
   23241         5210 :           if (!REG_P (XVECEXP (x, 0, 1)))
   23242          694 :             *total += rtx_cost (XVECEXP (x, 0, 1), mode, code, 1, speed);
   23243         5210 :           if (!REG_P (XVECEXP (x, 0, 2)))
   23244          733 :             *total += rtx_cost (XVECEXP (x, 0, 2), mode, code, 2, speed);
   23245              :           return true;
   23246              : 
   23247        95241 :         case UNSPEC_PTEST:
   23248        95241 :           {
   23249        95241 :             *total = cost->sse_op;
   23250        95241 :             rtx test_op0 = XVECEXP (x, 0, 0);
   23251        95241 :             if (!rtx_equal_p (test_op0, XVECEXP (x, 0, 1)))
   23252              :               return false;
   23253        94593 :             if (GET_CODE (test_op0) == AND)
   23254              :               {
   23255           23 :                 rtx and_op0 = XEXP (test_op0, 0);
   23256           23 :                 if (GET_CODE (and_op0) == NOT)
   23257            0 :                   and_op0 = XEXP (and_op0, 0);
   23258           23 :                 *total += rtx_cost (and_op0, GET_MODE (and_op0),
   23259              :                                     AND, 0, speed)
   23260           23 :                           + rtx_cost (XEXP (test_op0, 1), GET_MODE (and_op0),
   23261              :                                       AND, 1, speed);
   23262              :              }
   23263              :             else
   23264        94570 :               *total = rtx_cost (test_op0, GET_MODE (test_op0),
   23265              :                                  UNSPEC, 0, speed);
   23266              :           }
   23267              :           return true;
   23268              : 
   23269        20846 :         case UNSPEC_BLENDV:
   23270        20846 :           *total = cost->sse_op;
   23271        20846 :           if (!REG_P (XVECEXP (x, 0, 0)))
   23272         8683 :             *total += rtx_cost (XVECEXP (x, 0, 0), mode, code, 0, speed);
   23273        20846 :           if (!REG_P (XVECEXP (x, 0, 1)))
   23274        10262 :             *total += rtx_cost (XVECEXP (x, 0, 1), mode, code, 1, speed);
   23275        20846 :           if (!REG_P (XVECEXP (x, 0, 2)))
   23276              :             {
   23277        12984 :               rtx cond = XVECEXP (x, 0, 2);
   23278        12984 :               if ((GET_CODE (cond) == LT || GET_CODE (cond) == GT)
   23279          773 :                   && CONST_VECTOR_P (XEXP (cond, 1)))
   23280              :                 {
   23281              :                   /* avx2_blendvpd256_gt and friends.  */
   23282          153 :                   if (!REG_P (XEXP (cond, 0)))
   23283           70 :                     *total += rtx_cost (XEXP (cond, 0), mode, code, 2, speed);
   23284              :                 }
   23285              :               else
   23286        12831 :                 *total += rtx_cost (cond, mode, code, 2, speed);
   23287              :             }
   23288              :           return true;
   23289              : 
   23290        28353 :         case UNSPEC_MOVMSK:
   23291        28353 :           *total = cost->sse_op;
   23292        28353 :           return true;
   23293              : 
   23294              :         default:
   23295              :           break;
   23296              :         }
   23297              :       return false;
   23298              : 
   23299      2020951 :     case VEC_CONCAT:
   23300              :       /* ??? Assume all of these vector manipulation patterns are
   23301              :          recognizable.  In which case they all pretty much have the
   23302              :          same cost.
   23303              :          ??? We should still recruse when computing cost.  */
   23304      2020951 :      *total = cost->sse_op;
   23305      2020951 :      return true;
   23306              : 
   23307      2449813 :     case VEC_SELECT:
   23308              :      /* Special case extracting lower part from the vector.
   23309              :         This by itself needs to code and most of SSE/AVX instructions have
   23310              :         packed and single forms where the single form may be represented
   23311              :         by such VEC_SELECT.
   23312              : 
   23313              :         Use cost 1 (despite the fact that functionally equivalent SUBREG has
   23314              :         cost 0).  Making VEC_SELECT completely free, for example instructs CSE
   23315              :         to forward propagate VEC_SELECT into
   23316              : 
   23317              :            (set (reg eax) (reg src))
   23318              : 
   23319              :         which then prevents fwprop and combining. See i.e.
   23320              :         gcc.target/i386/pr91103-1.c.
   23321              : 
   23322              :         ??? rtvec_series_p test should be, for valid patterns, equivalent to
   23323              :         vec_series_lowpart_p but is not, since the latter calls
   23324              :         can_cange_mode_class on ALL_REGS and this return false since x87 does
   23325              :         not support subregs at all.  */
   23326      2449813 :      if (rtvec_series_p (XVEC (XEXP (x, 1), 0), 0))
   23327       762248 :        *total = rtx_cost (XEXP (x, 0), GET_MODE (XEXP (x, 0)),
   23328       762248 :                           outer_code, opno, speed) + 1;
   23329              :      else
   23330              :        /* ??? We should still recruse when computing cost.  */
   23331      1687565 :        *total = cost->sse_op;
   23332              :      return true;
   23333              : 
   23334      1224343 :     case VEC_DUPLICATE:
   23335      2448686 :       *total = rtx_cost (XEXP (x, 0),
   23336      1224343 :                          GET_MODE (XEXP (x, 0)),
   23337              :                          VEC_DUPLICATE, 0, speed);
   23338              :       /* It's broadcast instruction, not embedded broadcasting.  */
   23339      1224343 :       if (outer_code == SET)
   23340      1176056 :         *total += cost->sse_op;
   23341              : 
   23342              :      return true;
   23343              : 
   23344       725714 :     case VEC_MERGE:
   23345       725714 :       mask = XEXP (x, 2);
   23346              :       /* Scalar versions of SSE instructions may be represented as:
   23347              : 
   23348              :          (vec_merge (vec_duplicate (operation ....))
   23349              :                      (register or memory)
   23350              :                      (const_int 1))
   23351              : 
   23352              :          In this case vec_merge and vec_duplicate is for free.
   23353              :          Just recurse into operation and second operand.  */
   23354       725714 :       if (mask == const1_rtx
   23355       214994 :           && GET_CODE (XEXP (x, 0)) == VEC_DUPLICATE)
   23356              :         {
   23357        75554 :           *total = rtx_cost (XEXP (XEXP (x, 0), 0), mode,
   23358              :                              outer_code, opno, speed)
   23359        75554 :                    + rtx_cost (XEXP (x, 1), mode, outer_code, opno, speed);
   23360        75554 :           return true;
   23361              :         }
   23362              :       /* This is masked instruction, assume the same cost,
   23363              :          as nonmasked variant.  */
   23364       650160 :       else if (TARGET_AVX512F
   23365       650160 :                && (register_operand (mask, GET_MODE (mask))
   23366              :                    /* Redunduant clean up of high bits for kmask with VL=2/4
   23367              :                       .i.e (vec_merge op0, op1, (and op3 15)).  */
   23368       121973 :                    || (GET_CODE (mask) == AND
   23369          372 :                        && register_operand (XEXP (mask, 0), GET_MODE (mask))
   23370          372 :                        && CONST_INT_P (XEXP (mask, 1))
   23371          372 :                        && ((INTVAL (XEXP (mask, 1)) == 3
   23372          131 :                             && GET_MODE_NUNITS (mode) == 2)
   23373          241 :                            || (INTVAL (XEXP (mask, 1)) == 15
   23374          241 :                                && GET_MODE_NUNITS (mode) == 4)))))
   23375              :         {
   23376       375002 :           *total = rtx_cost (XEXP (x, 0), mode, outer_code, opno, speed)
   23377       375002 :                    + rtx_cost (XEXP (x, 1), mode, outer_code, opno, speed);
   23378       375002 :           return true;
   23379              :         }
   23380              :       /* Combination of the two above:
   23381              : 
   23382              :          (vec_merge (vec_merge (vec_duplicate (operation ...))
   23383              :                        (register or memory)
   23384              :                        (reg:QI mask))
   23385              :                     (register or memory)
   23386              :                     (const_int 1))
   23387              : 
   23388              :          i.e. avx512fp16_vcvtss2sh_mask.  */
   23389       275158 :       else if (TARGET_AVX512F
   23390       121601 :                && mask == const1_rtx
   23391        48667 :                && GET_CODE (XEXP (x, 0)) == VEC_MERGE
   23392        27158 :                && GET_CODE (XEXP (XEXP (x, 0), 0)) == VEC_DUPLICATE
   23393       277420 :                && register_operand (XEXP (XEXP (x, 0), 2),
   23394         2262 :                                     GET_MODE (XEXP (XEXP (x, 0), 2))))
   23395              :         {
   23396         2250 :           *total = rtx_cost (XEXP (XEXP (XEXP (x, 0), 0), 0),
   23397              :                              mode, outer_code, opno, speed)
   23398         2250 :                    + rtx_cost (XEXP (XEXP (x, 0), 1),
   23399              :                                mode, outer_code, opno, speed)
   23400         2250 :                    + rtx_cost (XEXP (x, 1), mode, outer_code, opno, speed);
   23401         2250 :           return true;
   23402              :         }
   23403              :       /* vcmp.  */
   23404       272908 :       else if (unspec_pcmp_p (mask)
   23405       272908 :                || (GET_CODE (mask) == NOT
   23406            0 :                    && unspec_pcmp_p (XEXP (mask, 0))))
   23407              :         {
   23408         1950 :           rtx uns = GET_CODE (mask) == NOT ? XEXP (mask, 0) : mask;
   23409         1950 :           rtx unsop0 = XVECEXP (uns, 0, 0);
   23410              :           /* Make (subreg:V4SI (not:V16QI (reg:V16QI ..)) 0)
   23411              :              cost the same as register.
   23412              :              This is used by avx_cmp<mode>3_ltint_not.  */
   23413         1950 :           if (SUBREG_P (unsop0))
   23414          417 :             unsop0 = XEXP (unsop0, 0);
   23415         1950 :           if (GET_CODE (unsop0) == NOT)
   23416           18 :             unsop0 = XEXP (unsop0, 0);
   23417         1950 :           *total = rtx_cost (XEXP (x, 0), mode, outer_code, opno, speed)
   23418         1950 :                    + rtx_cost (XEXP (x, 1), mode, outer_code, opno, speed)
   23419         1950 :                    + rtx_cost (unsop0, mode, UNSPEC, opno, speed)
   23420         1950 :                    + rtx_cost (XVECEXP (uns, 0, 1), mode, UNSPEC, opno, speed)
   23421         1950 :                    + cost->sse_op;
   23422         1950 :           return true;
   23423              :         }
   23424              :       else
   23425       270958 :         *total = cost->sse_op;
   23426       270958 :       return false;
   23427              : 
   23428    105951522 :     case MEM:
   23429              :       /* CONST_VECTOR_DUPLICATE_P in constant_pool is just broadcast.
   23430              :          or variants in ix86_vector_duplicate_simode_const.  */
   23431              : 
   23432    105951522 :       if (GET_MODE_SIZE (mode) >= 16
   23433     17958449 :           && VECTOR_MODE_P (mode)
   23434     11978443 :           && SYMBOL_REF_P (XEXP (x, 0))
   23435      2207390 :           && CONSTANT_POOL_ADDRESS_P (XEXP (x, 0))
   23436    107929794 :           && ix86_broadcast_from_constant (mode, x))
   23437              :         {
   23438       488504 :           *total = COSTS_N_INSNS (2) + speed;
   23439       488504 :           return true;
   23440              :         }
   23441              : 
   23442              :       /* An insn that accesses memory is slightly more expensive
   23443              :          than one that does not.  */
   23444    105463018 :       if (speed)
   23445              :         {
   23446     94332895 :           *total += 1;
   23447     94332895 :           rtx addr = XEXP (x, 0);
   23448              :           /* For MEM, rtx_cost iterates each subrtx, and adds up the costs,
   23449              :              so for MEM (reg) and MEM (reg + 4), the former costs 5,
   23450              :              the latter costs 9, it is not accurate for x86. Ideally
   23451              :              address_cost should be used, but it reduce cost too much.
   23452              :              So current solution is make constant disp as cheap as possible.  */
   23453     94332895 :           if (GET_CODE (addr) == PLUS
   23454     76986370 :               && x86_64_immediate_operand (XEXP (addr, 1), Pmode)
   23455              :               /* Only handle (reg + disp) since other forms of addr are mostly LEA,
   23456              :                  there's no additional cost for the plus of disp.  */
   23457    165708507 :               && register_operand (XEXP (addr, 0), Pmode))
   23458              :             {
   23459     55367816 :               *total += 1;
   23460     68203950 :               *total += rtx_cost (XEXP (addr, 0), Pmode, PLUS, 0, speed);
   23461     55367816 :               return true;
   23462              :             }
   23463              :         }
   23464              : 
   23465              :       return false;
   23466              : 
   23467        52720 :     case ZERO_EXTRACT:
   23468        52720 :       if (XEXP (x, 1) == const1_rtx
   23469        11575 :           && GET_CODE (XEXP (x, 2)) == ZERO_EXTEND
   23470            0 :           && GET_MODE (XEXP (x, 2)) == SImode
   23471            0 :           && GET_MODE (XEXP (XEXP (x, 2), 0)) == QImode)
   23472              :         {
   23473              :           /* Ignore cost of zero extension and masking of last argument.  */
   23474            0 :           *total += rtx_cost (XEXP (x, 0), mode, code, 0, speed);
   23475            0 :           *total += rtx_cost (XEXP (x, 1), mode, code, 1, speed);
   23476            0 :           *total += rtx_cost (XEXP (XEXP (x, 2), 0), mode, code, 2, speed);
   23477            0 :           return true;
   23478              :         }
   23479              :       return false;
   23480              : 
   23481     28813631 :     case IF_THEN_ELSE:
   23482     28813631 :       if (TARGET_XOP
   23483        25097 :           && VECTOR_MODE_P (mode)
   23484     28819022 :           && (GET_MODE_SIZE (mode) == 16 || GET_MODE_SIZE (mode) == 32))
   23485              :         {
   23486              :           /* vpcmov.  */
   23487         4823 :           *total = speed ? COSTS_N_INSNS (2) : COSTS_N_BYTES (6);
   23488         4823 :           if (!REG_P (XEXP (x, 0)))
   23489         4663 :             *total += rtx_cost (XEXP (x, 0), mode, code, 0, speed);
   23490         4823 :           if (!REG_P (XEXP (x, 1)))
   23491         4630 :             *total += rtx_cost (XEXP (x, 1), mode, code, 1, speed);
   23492         4823 :           if (!REG_P (XEXP (x, 2)))
   23493         4632 :             *total += rtx_cost (XEXP (x, 2), mode, code, 2, speed);
   23494         4823 :           return true;
   23495              :         }
   23496            0 :       else if (TARGET_CMOVE
   23497     28808808 :                && SCALAR_INT_MODE_P (mode)
   23498     31199832 :                && GET_MODE_SIZE (mode) <= UNITS_PER_WORD)
   23499              :         {
   23500              :           /* cmov.  */
   23501      2196060 :           *total = COSTS_N_INSNS (1);
   23502      2196060 :           if (!COMPARISON_P (XEXP (x, 0)) && !REG_P (XEXP (x, 0)))
   23503            0 :             *total += rtx_cost (XEXP (x, 0), mode, code, 0, speed);
   23504      2196060 :           if (!REG_P (XEXP (x, 1)))
   23505       115421 :             *total += rtx_cost (XEXP (x, 1), mode, code, 1, speed);
   23506      2196060 :           if (!REG_P (XEXP (x, 2)))
   23507       707482 :             *total += rtx_cost (XEXP (x, 2), mode, code, 2, speed);
   23508      2196060 :           return true;
   23509              :         }
   23510              :       return false;
   23511              : 
   23512     17884883 :     case EQ:
   23513     17884883 :     case GT:
   23514     17884883 :     case GTU:
   23515     17884883 :     case LT:
   23516     17884883 :     case LTU:
   23517     17884883 :       if (TARGET_SSE2
   23518     17881685 :           && GET_MODE_CLASS (mode) == MODE_VECTOR_INT
   23519     18222427 :           && GET_MODE_SIZE (mode) >= 8)
   23520              :         {
   23521              :           /* vpcmpeq */
   23522       332979 :           *total = speed ? COSTS_N_INSNS (1) : COSTS_N_BYTES (4);
   23523       332979 :           if (!REG_P (XEXP (x, 0)))
   23524        63809 :             *total += rtx_cost (XEXP (x, 0), mode, code, 0, speed);
   23525       332979 :           if (!REG_P (XEXP (x, 1)))
   23526       127592 :             *total += rtx_cost (XEXP (x, 1), mode, code, 1, speed);
   23527       332979 :           return true;
   23528              :         }
   23529     17551904 :       if (TARGET_XOP
   23530        12206 :           && GET_MODE_CLASS (mode) == MODE_VECTOR_INT
   23531     17552012 :           && GET_MODE_SIZE (mode) <= 16)
   23532              :         {
   23533              :           /* vpcomeq */
   23534          108 :           *total = speed ? COSTS_N_INSNS (1) : COSTS_N_BYTES (6);
   23535          108 :           if (!REG_P (XEXP (x, 0)))
   23536            0 :             *total += rtx_cost (XEXP (x, 0), mode, code, 0, speed);
   23537          108 :           if (!REG_P (XEXP (x, 1)))
   23538            0 :             *total += rtx_cost (XEXP (x, 1), mode, code, 1, speed);
   23539          108 :           return true;
   23540              :         }
   23541              :       return false;
   23542              : 
   23543     15872815 :     case NE:
   23544     15872815 :     case GE:
   23545     15872815 :     case GEU:
   23546     15872815 :       if (TARGET_XOP
   23547        21900 :           && GET_MODE_CLASS (mode) == MODE_VECTOR_INT
   23548     15879569 :           && GET_MODE_SIZE (mode) <= 16)
   23549              :         {
   23550              :           /* vpcomneq */
   23551         6754 :           *total = speed ? COSTS_N_INSNS (1) : COSTS_N_BYTES (6);
   23552         6754 :           if (!REG_P (XEXP (x, 0)))
   23553         1401 :             *total += rtx_cost (XEXP (x, 0), mode, code, 0, speed);
   23554         6754 :           if (!REG_P (XEXP (x, 1)))
   23555         5734 :             *total += rtx_cost (XEXP (x, 1), mode, code, 1, speed);
   23556         6754 :           return true;
   23557              :         }
   23558     15866061 :       if (TARGET_SSE2
   23559     15863954 :           && GET_MODE_CLASS (mode) == MODE_VECTOR_INT
   23560     15867367 :           && GET_MODE_SIZE (mode) >= 8)
   23561              :         {
   23562         1330 :           if (TARGET_AVX512F && GET_MODE_SIZE (mode) >= 16)
   23563              :             /* vpcmpeq + vpternlog */
   23564           40 :             *total = speed ? COSTS_N_INSNS (2) : COSTS_N_BYTES (11);
   23565              :           else
   23566              :             /* vpcmpeq + pxor + vpcmpeq */
   23567         1264 :             *total = speed ? COSTS_N_INSNS (3) : COSTS_N_BYTES (12);
   23568         1282 :           if (!REG_P (XEXP (x, 0)))
   23569           28 :             *total += rtx_cost (XEXP (x, 0), mode, code, 0, speed);
   23570         1282 :           if (!REG_P (XEXP (x, 1)))
   23571           28 :             *total += rtx_cost (XEXP (x, 1), mode, code, 1, speed);
   23572         1282 :           return true;
   23573              :         }
   23574              :       return false;
   23575              : 
   23576              :     default:
   23577              :       return false;
   23578              :     }
   23579              : }
   23580              : 
   23581              : #if TARGET_MACHO
   23582              : 
   23583              : static int current_machopic_label_num;
   23584              : 
   23585              : /* Given a symbol name and its associated stub, write out the
   23586              :    definition of the stub.  */
   23587              : 
   23588              : void
   23589              : machopic_output_stub (FILE *file, const char *symb, const char *stub)
   23590              : {
   23591              :   unsigned int length;
   23592              :   char *binder_name, *symbol_name, lazy_ptr_name[32];
   23593              :   int label = ++current_machopic_label_num;
   23594              : 
   23595              :   /* For 64-bit we shouldn't get here.  */
   23596              :   gcc_assert (!TARGET_64BIT);
   23597              : 
   23598              :   /* Lose our funky encoding stuff so it doesn't contaminate the stub.  */
   23599              :   symb = targetm.strip_name_encoding (symb);
   23600              : 
   23601              :   length = strlen (stub);
   23602              :   binder_name = XALLOCAVEC (char, length + 32);
   23603              :   GEN_BINDER_NAME_FOR_STUB (binder_name, stub, length);
   23604              : 
   23605              :   length = strlen (symb);
   23606              :   symbol_name = XALLOCAVEC (char, length + 32);
   23607              :   GEN_SYMBOL_NAME_FOR_SYMBOL (symbol_name, symb, length);
   23608              : 
   23609              :   sprintf (lazy_ptr_name, "L%d$lz", label);
   23610              : 
   23611              :   if (MACHOPIC_ATT_STUB)
   23612              :     switch_to_section (darwin_sections[machopic_picsymbol_stub3_section]);
   23613              :   else if (MACHOPIC_PURE)
   23614              :     switch_to_section (darwin_sections[machopic_picsymbol_stub2_section]);
   23615              :   else
   23616              :     switch_to_section (darwin_sections[machopic_symbol_stub_section]);
   23617              : 
   23618              :   fprintf (file, "%s:\n", stub);
   23619              :   fprintf (file, "\t.indirect_symbol %s\n", symbol_name);
   23620              : 
   23621              :   if (MACHOPIC_ATT_STUB)
   23622              :     {
   23623              :       fprintf (file, "\thlt ; hlt ; hlt ; hlt ; hlt\n");
   23624              :     }
   23625              :   else if (MACHOPIC_PURE)
   23626              :     {
   23627              :       /* PIC stub.  */
   23628              :       /* 25-byte PIC stub using "CALL get_pc_thunk".  */
   23629              :       rtx tmp = gen_rtx_REG (SImode, 2 /* ECX */);
   23630              :       output_set_got (tmp, NULL_RTX);   /* "CALL ___<cpu>.get_pc_thunk.cx".  */
   23631              :       fprintf (file, "LPC$%d:\tmovl\t%s-LPC$%d(%%ecx),%%ecx\n",
   23632              :                label, lazy_ptr_name, label);
   23633              :       fprintf (file, "\tjmp\t*%%ecx\n");
   23634              :     }
   23635              :   else
   23636              :     fprintf (file, "\tjmp\t*%s\n", lazy_ptr_name);
   23637              : 
   23638              :   /* The AT&T-style ("self-modifying") stub is not lazily bound, thus
   23639              :      it needs no stub-binding-helper.  */
   23640              :   if (MACHOPIC_ATT_STUB)
   23641              :     return;
   23642              : 
   23643              :   fprintf (file, "%s:\n", binder_name);
   23644              : 
   23645              :   if (MACHOPIC_PURE)
   23646              :     {
   23647              :       fprintf (file, "\tlea\t%s-%s(%%ecx),%%ecx\n", lazy_ptr_name, binder_name);
   23648              :       fprintf (file, "\tpushl\t%%ecx\n");
   23649              :     }
   23650              :   else
   23651              :     fprintf (file, "\tpushl\t$%s\n", lazy_ptr_name);
   23652              : 
   23653              :   fputs ("\tjmp\tdyld_stub_binding_helper\n", file);
   23654              : 
   23655              :   /* N.B. Keep the correspondence of these
   23656              :      'symbol_ptr/symbol_ptr2/symbol_ptr3' sections consistent with the
   23657              :      old-pic/new-pic/non-pic stubs; altering this will break
   23658              :      compatibility with existing dylibs.  */
   23659              :   if (MACHOPIC_PURE)
   23660              :     {
   23661              :       /* 25-byte PIC stub using "CALL get_pc_thunk".  */
   23662              :       switch_to_section (darwin_sections[machopic_lazy_symbol_ptr2_section]);
   23663              :     }
   23664              :   else
   23665              :     /* 16-byte -mdynamic-no-pic stub.  */
   23666              :     switch_to_section(darwin_sections[machopic_lazy_symbol_ptr3_section]);
   23667              : 
   23668              :   fprintf (file, "%s:\n", lazy_ptr_name);
   23669              :   fprintf (file, "\t.indirect_symbol %s\n", symbol_name);
   23670              :   fprintf (file, ASM_LONG "%s\n", binder_name);
   23671              : }
   23672              : #endif /* TARGET_MACHO */
   23673              : 
   23674              : /* Order the registers for register allocator.  */
   23675              : 
   23676              : void
   23677       222045 : x86_order_regs_for_local_alloc (void)
   23678              : {
   23679       222045 :    int pos = 0;
   23680       222045 :    int i;
   23681              : 
   23682              :    /* First allocate the local general purpose registers.  */
   23683     20650185 :    for (i = 0; i < FIRST_PSEUDO_REGISTER; i++)
   23684     27533580 :      if (GENERAL_REGNO_P (i) && call_used_or_fixed_reg_p (i))
   23685      5782146 :         reg_alloc_order [pos++] = i;
   23686              : 
   23687              :    /* Global general purpose registers.  */
   23688     20650185 :    for (i = 0; i < FIRST_PSEUDO_REGISTER; i++)
   23689     23715027 :      if (GENERAL_REGNO_P (i) && !call_used_or_fixed_reg_p (i))
   23690      1323294 :         reg_alloc_order [pos++] = i;
   23691              : 
   23692              :    /* x87 registers come first in case we are doing FP math
   23693              :       using them.  */
   23694       222045 :    if (!TARGET_SSE_MATH)
   23695        57663 :      for (i = FIRST_STACK_REG; i <= LAST_STACK_REG; i++)
   23696        51256 :        reg_alloc_order [pos++] = i;
   23697              : 
   23698              :    /* SSE registers.  */
   23699      1998405 :    for (i = FIRST_SSE_REG; i <= LAST_SSE_REG; i++)
   23700      1776360 :      reg_alloc_order [pos++] = i;
   23701      1998405 :    for (i = FIRST_REX_SSE_REG; i <= LAST_REX_SSE_REG; i++)
   23702      1776360 :      reg_alloc_order [pos++] = i;
   23703              : 
   23704              :    /* Extended REX SSE registers.  */
   23705      3774765 :    for (i = FIRST_EXT_REX_SSE_REG; i <= LAST_EXT_REX_SSE_REG; i++)
   23706      3552720 :      reg_alloc_order [pos++] = i;
   23707              : 
   23708              :    /* Mask register.  */
   23709      1998405 :    for (i = FIRST_MASK_REG; i <= LAST_MASK_REG; i++)
   23710      1776360 :      reg_alloc_order [pos++] = i;
   23711              : 
   23712              :    /* x87 registers.  */
   23713       222045 :    if (TARGET_SSE_MATH)
   23714      1940742 :      for (i = FIRST_STACK_REG; i <= LAST_STACK_REG; i++)
   23715      1725104 :        reg_alloc_order [pos++] = i;
   23716              : 
   23717      1998405 :    for (i = FIRST_MMX_REG; i <= LAST_MMX_REG; i++)
   23718      1776360 :      reg_alloc_order [pos++] = i;
   23719              : 
   23720              :    /* Initialize the rest of array as we do not allocate some registers
   23721              :       at all.  */
   23722      1110225 :    while (pos < FIRST_PSEUDO_REGISTER)
   23723       888180 :      reg_alloc_order [pos++] = 0;
   23724       222045 : }
   23725              : 
   23726              : static bool
   23727    246046844 : ix86_ms_bitfield_layout_p (const_tree record_type)
   23728              : {
   23729    246046844 :   return ((TARGET_MS_BITFIELD_LAYOUT
   23730          215 :            && !lookup_attribute ("gcc_struct", TYPE_ATTRIBUTES (record_type)))
   23731    246046844 :           || lookup_attribute ("ms_struct", TYPE_ATTRIBUTES (record_type)));
   23732              : }
   23733              : 
   23734              : /* Returns an expression indicating where the this parameter is
   23735              :    located on entry to the FUNCTION.  */
   23736              : 
   23737              : static rtx
   23738         1767 : x86_this_parameter (tree function)
   23739              : {
   23740         1767 :   tree type = TREE_TYPE (function);
   23741         1767 :   bool aggr = aggregate_value_p (TREE_TYPE (type), type) != 0;
   23742         1767 :   int nregs;
   23743              : 
   23744         1767 :   if (TARGET_64BIT)
   23745              :     {
   23746         1765 :       const int *parm_regs;
   23747              : 
   23748         1765 :       if (lookup_attribute ("preserve_none", TYPE_ATTRIBUTES (type)))
   23749              :         parm_regs = x86_64_preserve_none_int_parameter_registers;
   23750         1765 :       else if (ix86_function_type_abi (type) == MS_ABI)
   23751              :         parm_regs = x86_64_ms_abi_int_parameter_registers;
   23752              :       else
   23753         1765 :         parm_regs = x86_64_int_parameter_registers;
   23754         1765 :       return gen_rtx_REG (Pmode, parm_regs[aggr]);
   23755              :     }
   23756              : 
   23757            2 :   nregs = ix86_function_regparm (type, function);
   23758              : 
   23759            2 :   if (nregs > 0 && !stdarg_p (type))
   23760              :     {
   23761            0 :       int regno;
   23762            0 :       unsigned int ccvt = ix86_get_callcvt (type);
   23763              : 
   23764            0 :       if ((ccvt & IX86_CALLCVT_FASTCALL) != 0)
   23765            0 :         regno = aggr ? DX_REG : CX_REG;
   23766            0 :       else if ((ccvt & IX86_CALLCVT_THISCALL) != 0)
   23767              :         {
   23768            0 :           regno = CX_REG;
   23769            0 :           if (aggr)
   23770            0 :             return gen_rtx_MEM (SImode,
   23771            0 :                                 plus_constant (Pmode, stack_pointer_rtx, 4));
   23772              :         }
   23773              :       else
   23774              :         {
   23775            0 :           regno = AX_REG;
   23776            0 :           if (aggr)
   23777              :             {
   23778            0 :               regno = DX_REG;
   23779            0 :               if (nregs == 1)
   23780            0 :                 return gen_rtx_MEM (SImode,
   23781            0 :                                     plus_constant (Pmode,
   23782              :                                                    stack_pointer_rtx, 4));
   23783              :             }
   23784              :         }
   23785            0 :       return gen_rtx_REG (SImode, regno);
   23786              :     }
   23787              : 
   23788            4 :   return gen_rtx_MEM (SImode, plus_constant (Pmode, stack_pointer_rtx,
   23789            4 :                                              aggr ? 8 : 4));
   23790              : }
   23791              : 
   23792              : /* Determine whether x86_output_mi_thunk can succeed.  */
   23793              : 
   23794              : static bool
   23795         4919 : x86_can_output_mi_thunk (const_tree, HOST_WIDE_INT, HOST_WIDE_INT vcall_offset,
   23796              :                          const_tree function)
   23797              : {
   23798              :   /* 64-bit can handle anything.  */
   23799         4919 :   if (TARGET_64BIT)
   23800              :     return true;
   23801              : 
   23802              :   /* For 32-bit, everything's fine if we have one free register.  */
   23803           76 :   if (ix86_function_regparm (TREE_TYPE (function), function) < 3)
   23804              :     return true;
   23805              : 
   23806              :   /* Need a free register for vcall_offset.  */
   23807            0 :   if (vcall_offset)
   23808              :     return false;
   23809              : 
   23810              :   /* Need a free register for GOT references.  */
   23811            0 :   if (flag_pic && !targetm.binds_local_p (function))
   23812              :     return false;
   23813              : 
   23814              :   /* Otherwise ok.  */
   23815              :   return true;
   23816              : }
   23817              : 
   23818              : /* Output the assembler code for a thunk function.  THUNK_DECL is the
   23819              :    declaration for the thunk function itself, FUNCTION is the decl for
   23820              :    the target function.  DELTA is an immediate constant offset to be
   23821              :    added to THIS.  If VCALL_OFFSET is nonzero, the word at
   23822              :    *(*this + vcall_offset) should be added to THIS.  */
   23823              : 
   23824              : static void
   23825         1767 : x86_output_mi_thunk (FILE *file, tree thunk_fndecl, HOST_WIDE_INT delta,
   23826              :                      HOST_WIDE_INT vcall_offset, tree function)
   23827              : {
   23828         1767 :   const char *fnname = IDENTIFIER_POINTER (DECL_ASSEMBLER_NAME (thunk_fndecl));
   23829         1767 :   rtx this_param = x86_this_parameter (function);
   23830         1767 :   rtx this_reg, tmp, fnaddr;
   23831         1767 :   unsigned int tmp_regno;
   23832         1767 :   rtx_insn *insn;
   23833         1767 :   int saved_flag_force_indirect_call = flag_force_indirect_call;
   23834              : 
   23835         1767 :   if (TARGET_64BIT)
   23836              :     tmp_regno = R10_REG;
   23837              :   else
   23838              :     {
   23839            2 :       unsigned int ccvt = ix86_get_callcvt (TREE_TYPE (function));
   23840            2 :       if ((ccvt & IX86_CALLCVT_FASTCALL) != 0)
   23841              :         tmp_regno = AX_REG;
   23842            2 :       else if ((ccvt & IX86_CALLCVT_THISCALL) != 0)
   23843              :         tmp_regno = DX_REG;
   23844              :       else
   23845            2 :         tmp_regno = CX_REG;
   23846              : 
   23847            2 :       if (flag_pic)
   23848            2 :   flag_force_indirect_call = 0;
   23849              :     }
   23850              : 
   23851         1767 :   emit_note (NOTE_INSN_PROLOGUE_END);
   23852              : 
   23853              :   /* CET is enabled, insert EB instruction.  */
   23854         1767 :   if ((flag_cf_protection & CF_BRANCH))
   23855           20 :     emit_insn (gen_nop_endbr ());
   23856              : 
   23857              :   /* If VCALL_OFFSET, we'll need THIS in a register.  Might as well
   23858              :      pull it in now and let DELTA benefit.  */
   23859         1767 :   if (REG_P (this_param))
   23860              :     this_reg = this_param;
   23861            2 :   else if (vcall_offset)
   23862              :     {
   23863              :       /* Put the this parameter into %eax.  */
   23864            2 :       this_reg = gen_rtx_REG (Pmode, AX_REG);
   23865            1 :       emit_move_insn (this_reg, this_param);
   23866              :     }
   23867              :   else
   23868              :     this_reg = NULL_RTX;
   23869              : 
   23870              :   /* Adjust the this parameter by a fixed constant.  */
   23871         1767 :   if (delta)
   23872              :     {
   23873          828 :       rtx delta_rtx = GEN_INT (delta);
   23874          828 :       rtx delta_dst = this_reg ? this_reg : this_param;
   23875              : 
   23876          828 :       if (TARGET_64BIT)
   23877              :         {
   23878          827 :           if (!x86_64_general_operand (delta_rtx, Pmode))
   23879              :             {
   23880            0 :               tmp = gen_rtx_REG (Pmode, tmp_regno);
   23881            0 :               emit_move_insn (tmp, delta_rtx);
   23882            0 :               delta_rtx = tmp;
   23883              :             }
   23884              :         }
   23885              : 
   23886          829 :       ix86_emit_binop (PLUS, Pmode, delta_dst, delta_rtx);
   23887              :     }
   23888              : 
   23889              :   /* Adjust the this parameter by a value stored in the vtable.  */
   23890         1767 :   if (vcall_offset)
   23891              :     {
   23892          990 :       rtx vcall_addr, vcall_mem, this_mem;
   23893              : 
   23894          991 :       tmp = gen_rtx_REG (Pmode, tmp_regno);
   23895              : 
   23896          990 :       this_mem = gen_rtx_MEM (ptr_mode, this_reg);
   23897          991 :       if (Pmode != ptr_mode)
   23898            0 :         this_mem = gen_rtx_ZERO_EXTEND (Pmode, this_mem);
   23899          990 :       emit_move_insn (tmp, this_mem);
   23900              : 
   23901              :       /* Adjust the this parameter.  */
   23902          991 :       vcall_addr = plus_constant (Pmode, tmp, vcall_offset);
   23903          990 :       if (TARGET_64BIT
   23904          990 :           && !ix86_legitimate_address_p (ptr_mode, vcall_addr, true))
   23905              :         {
   23906            0 :           rtx tmp2 = gen_rtx_REG (Pmode, R11_REG);
   23907            0 :           emit_move_insn (tmp2, GEN_INT (vcall_offset));
   23908            0 :           vcall_addr = gen_rtx_PLUS (Pmode, tmp, tmp2);
   23909              :         }
   23910              : 
   23911          990 :       vcall_mem = gen_rtx_MEM (ptr_mode, vcall_addr);
   23912          991 :       if (Pmode != ptr_mode)
   23913            0 :         emit_insn (gen_addsi_1_zext (this_reg,
   23914              :                                      gen_rtx_REG (ptr_mode,
   23915              :                                                   REGNO (this_reg)),
   23916              :                                      vcall_mem));
   23917              :       else
   23918          990 :         ix86_emit_binop (PLUS, Pmode, this_reg, vcall_mem);
   23919              :     }
   23920              : 
   23921              :   /* If necessary, drop THIS back to its stack slot.  */
   23922         1767 :   if (this_reg && this_reg != this_param)
   23923            1 :     emit_move_insn (this_param, this_reg);
   23924              : 
   23925         1767 :   fnaddr = XEXP (DECL_RTL (function), 0);
   23926         1767 :   if (TARGET_64BIT)
   23927              :     {
   23928           25 :       if (!flag_pic || targetm.binds_local_p (function)
   23929         1790 :           || TARGET_PECOFF)
   23930              :         ;
   23931              :       else
   23932              :         {
   23933            0 :           tmp = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, fnaddr), UNSPEC_GOTPCREL);
   23934            0 :           tmp = gen_rtx_CONST (Pmode, tmp);
   23935            0 :           fnaddr = gen_const_mem (Pmode, tmp);
   23936              :         }
   23937              :     }
   23938              :   else
   23939              :     {
   23940            2 :       if (!flag_pic || targetm.binds_local_p (function))
   23941              :         ;
   23942              : #if TARGET_MACHO
   23943              :       else if (TARGET_MACHO)
   23944              :         {
   23945              :           fnaddr = machopic_indirect_call_target (DECL_RTL (function));
   23946              :           fnaddr = XEXP (fnaddr, 0);
   23947              :         }
   23948              : #endif /* TARGET_MACHO */
   23949              :       else
   23950              :         {
   23951            0 :           tmp = gen_rtx_REG (Pmode, CX_REG);
   23952            0 :           output_set_got (tmp, NULL_RTX);
   23953              : 
   23954            0 :           fnaddr = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, fnaddr), UNSPEC_GOT);
   23955            0 :           fnaddr = gen_rtx_CONST (Pmode, fnaddr);
   23956            0 :           fnaddr = gen_rtx_PLUS (Pmode, tmp, fnaddr);
   23957            0 :           fnaddr = gen_const_mem (Pmode, fnaddr);
   23958              :         }
   23959              :     }
   23960              : 
   23961              :   /* Our sibling call patterns do not allow memories, because we have no
   23962              :      predicate that can distinguish between frame and non-frame memory.
   23963              :      For our purposes here, we can get away with (ab)using a jump pattern,
   23964              :      because we're going to do no optimization.  */
   23965         1767 :   if (MEM_P (fnaddr))
   23966              :     {
   23967            0 :       if (sibcall_insn_operand (fnaddr, word_mode))
   23968              :         {
   23969            0 :           fnaddr = XEXP (DECL_RTL (function), 0);
   23970            0 :           tmp = gen_rtx_MEM (QImode, fnaddr);
   23971            0 :           tmp = gen_rtx_CALL (VOIDmode, tmp, const0_rtx);
   23972            0 :           tmp = emit_call_insn (tmp);
   23973            0 :           SIBLING_CALL_P (tmp) = 1;
   23974              :         }
   23975              :       else
   23976            0 :         emit_jump_insn (gen_indirect_jump (fnaddr));
   23977              :     }
   23978              :   else
   23979              :     {
   23980         1767 :       if (ix86_cmodel == CM_LARGE_PIC && SYMBOLIC_CONST (fnaddr))
   23981              :         {
   23982              :           // CM_LARGE_PIC always uses pseudo PIC register which is
   23983              :           // uninitialized.  Since FUNCTION is local and calling it
   23984              :           // doesn't go through PLT, we use scratch register %r11 as
   23985              :           // PIC register and initialize it here.
   23986            3 :           pic_offset_table_rtx = gen_rtx_REG (Pmode, R11_REG);
   23987            3 :           ix86_init_large_pic_reg (tmp_regno);
   23988            3 :           fnaddr = legitimize_pic_address (fnaddr,
   23989            3 :                                            gen_rtx_REG (Pmode, tmp_regno));
   23990              :         }
   23991              : 
   23992         1767 :       if (!sibcall_insn_operand (fnaddr, word_mode))
   23993              :         {
   23994            9 :           tmp = gen_rtx_REG (word_mode, tmp_regno);
   23995            9 :           if (GET_MODE (fnaddr) != word_mode)
   23996            0 :             fnaddr = gen_rtx_ZERO_EXTEND (word_mode, fnaddr);
   23997            9 :           emit_move_insn (tmp, fnaddr);
   23998            9 :           fnaddr = tmp;
   23999              :         }
   24000              : 
   24001         1767 :       tmp = gen_rtx_MEM (QImode, fnaddr);
   24002         1767 :       tmp = gen_rtx_CALL (VOIDmode, tmp, const0_rtx);
   24003         1767 :       tmp = emit_call_insn (tmp);
   24004         1767 :       SIBLING_CALL_P (tmp) = 1;
   24005              :     }
   24006         1767 :   emit_barrier ();
   24007              : 
   24008              :   /* Emit just enough of rest_of_compilation to get the insns emitted.  */
   24009         1767 :   insn = get_insns ();
   24010         1767 :   shorten_branches (insn);
   24011         1767 :   assemble_start_function (thunk_fndecl, fnname);
   24012         1767 :   final_start_function (insn, file, 1);
   24013         1767 :   final (insn, file, 1);
   24014         1767 :   final_end_function ();
   24015         1767 :   assemble_end_function (thunk_fndecl, fnname);
   24016              : 
   24017         1767 :   flag_force_indirect_call = saved_flag_force_indirect_call;
   24018         1767 : }
   24019              : 
   24020              : static void
   24021       281763 : x86_file_start (void)
   24022              : {
   24023       281763 :   default_file_start ();
   24024       281763 :   if (TARGET_16BIT)
   24025            6 :     fputs ("\t.code16gcc\n", asm_out_file);
   24026              : #if TARGET_MACHO
   24027              :   darwin_file_start ();
   24028              : #endif
   24029       281763 :   if (X86_FILE_START_VERSION_DIRECTIVE)
   24030              :     fputs ("\t.version\t\"01.01\"\n", asm_out_file);
   24031       281763 :   if (X86_FILE_START_FLTUSED)
   24032              :     fputs ("\t.global\t__fltused\n", asm_out_file);
   24033       281763 :   if (ix86_asm_dialect == ASM_INTEL)
   24034           73 :     fputs ("\t.intel_syntax noprefix\n", asm_out_file);
   24035       281763 : }
   24036              : 
   24037              : int
   24038    102215919 : x86_field_alignment (tree type, int computed)
   24039              : {
   24040    102215919 :   machine_mode mode;
   24041              : 
   24042    102215919 :   if (TARGET_64BIT || TARGET_ALIGN_DOUBLE)
   24043              :     return computed;
   24044      9121371 :   if (TARGET_IAMCU)
   24045            0 :     return iamcu_alignment (type, computed);
   24046      9121371 :   type = strip_array_types (type);
   24047      9121371 :   mode = TYPE_MODE (type);
   24048      9121371 :   if (mode == DFmode || mode == DCmode
   24049      9015594 :       || GET_MODE_CLASS (mode) == MODE_INT
   24050      3017747 :       || GET_MODE_CLASS (mode) == MODE_COMPLEX_INT)
   24051              :     {
   24052      6103624 :       if (TYPE_ATOMIC (type) && computed > 32)
   24053              :         {
   24054            0 :           static bool warned;
   24055              : 
   24056            0 :           if (!warned && warn_psabi)
   24057              :             {
   24058            0 :               const char *url
   24059              :                 = CHANGES_ROOT_URL "gcc-11/changes.html#ia32_atomic";
   24060              : 
   24061            0 :               warned = true;
   24062            0 :               inform (input_location, "the alignment of %<_Atomic %T%> "
   24063              :                                       "fields changed in %{GCC 11.1%}",
   24064            0 :                       TYPE_MAIN_VARIANT (type), url);
   24065              :             }
   24066              :         }
   24067              :       else
   24068      6103624 :       return MIN (32, computed);
   24069              :     }
   24070              :   return computed;
   24071              : }
   24072              : 
   24073              : /* Print call to TARGET to FILE.  */
   24074              : 
   24075              : static void
   24076          389 : x86_print_call_or_nop (FILE *file, const char *target,
   24077              :                        const char *label)
   24078              : {
   24079          389 :   if (flag_nop_mcount || !strcmp (target, "nop"))
   24080              :     {
   24081            9 :       if (TARGET_16BIT)
   24082              :         /* 3 byte no-op: lea 0(%si), %si */
   24083            1 :         fprintf (file, "%s" ASM_BYTE "0x8d, 0x74, 0x00\n", label);
   24084              :       else
   24085              :         /* 5 byte nop: nopl 0(%[re]ax,%[re]ax,1) */
   24086            8 :         fprintf (file, "%s" ASM_BYTE "0x0f, 0x1f, 0x44, 0x00, 0x00\n",
   24087              :                  label);
   24088              :     }
   24089          380 :   else if (!TARGET_PECOFF && flag_pic)
   24090              :     {
   24091            8 :       gcc_assert (flag_plt);
   24092              : 
   24093            8 :       fprintf (file, "%s\tcall\t%s@PLT\n", label, target);
   24094              :     }
   24095              :   else
   24096          372 :     fprintf (file, "%s\tcall\t%s\n", label, target);
   24097          389 : }
   24098              : 
   24099              : static bool
   24100          409 : current_fentry_name (const char **name)
   24101              : {
   24102          409 :   tree attr = lookup_attribute ("fentry_name",
   24103          409 :                                 DECL_ATTRIBUTES (current_function_decl));
   24104          409 :   if (!attr)
   24105              :     return false;
   24106            2 :   *name = TREE_STRING_POINTER (TREE_VALUE (TREE_VALUE (attr)));
   24107            2 :   return true;
   24108              : }
   24109              : 
   24110              : static bool
   24111           16 : current_fentry_section (const char **name)
   24112              : {
   24113           16 :   tree attr = lookup_attribute ("fentry_section",
   24114           16 :                                 DECL_ATTRIBUTES (current_function_decl));
   24115           16 :   if (!attr)
   24116              :     return false;
   24117            2 :   *name = TREE_STRING_POINTER (TREE_VALUE (TREE_VALUE (attr)));
   24118            2 :   return true;
   24119              : }
   24120              : 
   24121              : /* Return a caller-saved register which isn't live or a callee-saved
   24122              :    register which has been saved on stack in the prologue at entry for
   24123              :    profile.  */
   24124              : 
   24125              : static int
   24126           17 : x86_64_select_profile_regnum (bool r11_ok ATTRIBUTE_UNUSED)
   24127              : {
   24128              :   /* Use %r10 if the profiler is emitted before the prologue or it isn't
   24129              :      used by DRAP.  */
   24130           17 :   if (ix86_profile_before_prologue ()
   24131            4 :       || !crtl->drap_reg
   24132           17 :       || REGNO (crtl->drap_reg) != R10_REG)
   24133              :     return R10_REG;
   24134              : 
   24135              :   /* The profiler is emitted after the prologue.  If there is a
   24136              :      caller-saved register which isn't live or a callee-saved
   24137              :      register saved on stack in the prologue, use it.  */
   24138              : 
   24139            0 :   bitmap reg_live = df_get_live_out (ENTRY_BLOCK_PTR_FOR_FN (cfun));
   24140              : 
   24141            0 :   int i;
   24142            0 :   for (i = 0; i < FIRST_PSEUDO_REGISTER; i++)
   24143            0 :     if (GENERAL_REGNO_P (i)
   24144            0 :         && i != R10_REG
   24145              : #ifdef NO_PROFILE_COUNTERS
   24146            0 :         && (r11_ok || i != R11_REG)
   24147              : #else
   24148              :         && i != R11_REG
   24149              : #endif
   24150            0 :         && TEST_HARD_REG_BIT (accessible_reg_set, i)
   24151            0 :         && (ix86_save_reg (i, true, true)
   24152            0 :             || (call_used_regs[i]
   24153            0 :                 && !fixed_regs[i]
   24154            0 :                 && !REGNO_REG_SET_P (reg_live, i))))
   24155            0 :       return i;
   24156              : 
   24157            0 :   sorry ("no register available for profiling %<-mcmodel=large%s%>",
   24158            0 :          ix86_cmodel == CM_LARGE_PIC ? " -fPIC" : "");
   24159              : 
   24160            0 :   return R10_REG;
   24161              : }
   24162              : 
   24163              : /* Output assembler code to FILE to increment profiler label # LABELNO
   24164              :    for profiling a function entry.  */
   24165              : void
   24166          409 : x86_function_profiler (FILE *file, int labelno ATTRIBUTE_UNUSED)
   24167              : {
   24168          409 :   if (cfun->machine->insn_queued_at_entrance)
   24169              :     {
   24170            7 :       if (cfun->machine->insn_queued_at_entrance == TYPE_ENDBR)
   24171            6 :         fprintf (file, "\t%s\n", TARGET_64BIT ? "endbr64" : "endbr32");
   24172            7 :       unsigned int patch_area_size
   24173            7 :         = crtl->patch_area_size - crtl->patch_area_entry;
   24174            7 :       if (patch_area_size)
   24175            2 :         ix86_output_patchable_area (patch_area_size,
   24176              :                                     crtl->patch_area_entry == 0);
   24177              :     }
   24178              : 
   24179          409 :   const char *mcount_name = MCOUNT_NAME;
   24180              : 
   24181          409 :   bool fentry_section_p
   24182          409 :     = (flag_record_mcount
   24183          803 :        || lookup_attribute ("fentry_section",
   24184          394 :                             DECL_ATTRIBUTES (current_function_decl)));
   24185              : 
   24186              :   const char *label = fentry_section_p ? "1:" : "";
   24187              : 
   24188          409 :   if (current_fentry_name (&mcount_name))
   24189              :     ;
   24190          407 :   else if (fentry_name)
   24191            1 :     mcount_name = fentry_name;
   24192          406 :   else if (flag_fentry)
   24193          394 :     mcount_name = MCOUNT_NAME_BEFORE_PROLOGUE;
   24194              : 
   24195          409 :   if (TARGET_64BIT)
   24196              :     {
   24197              : #ifndef NO_PROFILE_COUNTERS
   24198              :       if (ASSEMBLER_DIALECT == ASM_INTEL)
   24199              :         fprintf (file, "\tlea\tr11, %sP%d[rip]\n", LPREFIX, labelno);
   24200              :       else
   24201              :         fprintf (file, "\tleaq\t%sP%d(%%rip), %%r11\n", LPREFIX, labelno);
   24202              : #endif
   24203              : 
   24204          408 :       int scratch;
   24205          408 :       const char *reg;
   24206          408 :       char legacy_reg[4] = { 0 };
   24207              : 
   24208          408 :       if (!TARGET_PECOFF)
   24209              :         {
   24210          408 :           switch (ix86_cmodel)
   24211              :             {
   24212            7 :             case CM_LARGE:
   24213            7 :               scratch = x86_64_select_profile_regnum (true);
   24214            7 :               reg = hi_reg_name[scratch];
   24215            7 :               if (LEGACY_INT_REGNO_P (scratch))
   24216              :                 {
   24217            0 :                   legacy_reg[0] = 'r';
   24218            0 :                   legacy_reg[1] = reg[0];
   24219            0 :                   legacy_reg[2] = reg[1];
   24220            0 :                   reg = legacy_reg;
   24221              :                 }
   24222            7 :               if (ASSEMBLER_DIALECT == ASM_INTEL)
   24223            1 :                 fprintf (file, "%s\tmovabs\t%s, OFFSET FLAT:%s\n"
   24224              :                                "\tcall\t%s\n", label, reg, mcount_name,
   24225              :                                reg);
   24226              :               else
   24227            6 :                 fprintf (file, "%s\tmovabsq\t$%s, %%%s\n\tcall\t*%%%s\n",
   24228              :                          label, mcount_name, reg, reg);
   24229              :               break;
   24230           10 :             case CM_LARGE_PIC:
   24231              : #ifdef NO_PROFILE_COUNTERS
   24232           10 :               scratch = x86_64_select_profile_regnum (false);
   24233           10 :               reg = hi_reg_name[scratch];
   24234           10 :               if (LEGACY_INT_REGNO_P (scratch))
   24235              :                 {
   24236            0 :                   legacy_reg[0] = 'r';
   24237            0 :                   legacy_reg[1] = reg[0];
   24238            0 :                   legacy_reg[2] = reg[1];
   24239            0 :                   reg = legacy_reg;
   24240              :                 }
   24241           10 :               if (ASSEMBLER_DIALECT == ASM_INTEL)
   24242              :                 {
   24243            1 :                   fprintf (file, "1:movabs\tr11, "
   24244              :                                  "OFFSET FLAT:_GLOBAL_OFFSET_TABLE_-1b\n");
   24245            1 :                   fprintf (file, "\tlea\t%s, 1b[rip]\n", reg);
   24246            1 :                   fprintf (file, "\tadd\t%s, r11\n", reg);
   24247            1 :                   fprintf (file, "\tmovabs\tr11, OFFSET FLAT:%s@PLTOFF\n",
   24248              :                            mcount_name);
   24249            1 :                   fprintf (file, "\tadd\t%s, r11\n", reg);
   24250            1 :                   fprintf (file, "\tcall\t%s\n", reg);
   24251            1 :                   break;
   24252              :                 }
   24253            9 :               fprintf (file,
   24254              :                        "1:\tmovabsq\t$_GLOBAL_OFFSET_TABLE_-1b, %%r11\n");
   24255            9 :               fprintf (file, "\tleaq\t1b(%%rip), %%%s\n", reg);
   24256            9 :               fprintf (file, "\taddq\t%%r11, %%%s\n", reg);
   24257            9 :               fprintf (file, "\tmovabsq\t$%s@PLTOFF, %%r11\n", mcount_name);
   24258            9 :               fprintf (file, "\taddq\t%%r11, %%%s\n", reg);
   24259            9 :               fprintf (file, "\tcall\t*%%%s\n", reg);
   24260              : #else
   24261              :               sorry ("profiling %<-mcmodel=large%> with PIC is not supported");
   24262              : #endif
   24263            9 :               break;
   24264           12 :             case CM_SMALL_PIC:
   24265           12 :             case CM_MEDIUM_PIC:
   24266           12 :               if (!flag_plt)
   24267              :                 {
   24268            3 :                   if (ASSEMBLER_DIALECT == ASM_INTEL)
   24269            0 :                     fprintf (file, "%s\tcall\t[QWORD PTR %s@GOTPCREL[rip]]\n",
   24270              :                              label, mcount_name);
   24271              :                   else
   24272            3 :                     fprintf (file, "%s\tcall\t*%s@GOTPCREL(%%rip)\n",
   24273              :                              label, mcount_name);
   24274              :                   break;
   24275              :                 }
   24276              :               /* fall through */
   24277          388 :             default:
   24278          388 :               x86_print_call_or_nop (file, mcount_name, label);
   24279          388 :               break;
   24280              :             }
   24281              :         }
   24282              :       else
   24283              :         x86_print_call_or_nop (file, mcount_name, label);
   24284              :     }
   24285            1 :   else if (flag_pic)
   24286              :     {
   24287              : #ifndef NO_PROFILE_COUNTERS
   24288              :       if (ASSEMBLER_DIALECT == ASM_INTEL)
   24289              :         fprintf (file,
   24290              :                  "\tlea\t" PROFILE_COUNT_REGISTER ", %sP%d@GOTOFF[ebx]\n",
   24291              :                  LPREFIX, labelno);
   24292              :       else
   24293              :         fprintf (file,
   24294              :                  "\tleal\t%sP%d@GOTOFF(%%ebx), %%" PROFILE_COUNT_REGISTER "\n",
   24295              :                  LPREFIX, labelno);
   24296              : #endif
   24297            0 :       if (flag_plt)
   24298            0 :         x86_print_call_or_nop (file, mcount_name, label);
   24299            0 :       else if (ASSEMBLER_DIALECT == ASM_INTEL)
   24300            0 :         fprintf (file, "%s\tcall\t[DWORD PTR %s@GOT[ebx]]\n",
   24301              :                  label, mcount_name);
   24302              :       else
   24303            0 :         fprintf (file, "%s\tcall\t*%s@GOT(%%ebx)\n",
   24304              :                  label, mcount_name);
   24305              :     }
   24306              :   else
   24307              :     {
   24308              : #ifndef NO_PROFILE_COUNTERS
   24309              :       if (ASSEMBLER_DIALECT == ASM_INTEL)
   24310              :         fprintf (file,
   24311              :                  "\tmov\t" PROFILE_COUNT_REGISTER ", OFFSET FLAT:%sP%d\n",
   24312              :                  LPREFIX, labelno);
   24313              :       else
   24314              :         fprintf (file, "\tmovl\t$%sP%d, %%" PROFILE_COUNT_REGISTER "\n",
   24315              :                  LPREFIX, labelno);
   24316              : #endif
   24317            1 :       x86_print_call_or_nop (file, mcount_name, label);
   24318              :     }
   24319              : 
   24320          409 :   if (fentry_section_p)
   24321              :     {
   24322           16 :       const char *sname = "__mcount_loc";
   24323              : 
   24324           16 :       if (current_fentry_section (&sname))
   24325              :         ;
   24326           14 :       else if (fentry_section)
   24327            1 :         sname = fentry_section;
   24328              : 
   24329           16 :       fprintf (file, "\t.section %s, \"a\",@progbits\n", sname);
   24330           16 :       fprintf (file, "\t.%s 1b\n", TARGET_64BIT ? "quad" : "long");
   24331           16 :       fprintf (file, "\t.previous\n");
   24332              :     }
   24333          409 : }
   24334              : 
   24335              : /* We don't have exact information about the insn sizes, but we may assume
   24336              :    quite safely that we are informed about all 1 byte insns and memory
   24337              :    address sizes.  This is enough to eliminate unnecessary padding in
   24338              :    99% of cases.  */
   24339              : 
   24340              : int
   24341    382252554 : ix86_min_insn_size (rtx_insn *insn)
   24342              : {
   24343    382252554 :   int l = 0, len;
   24344              : 
   24345    382252554 :   if (!INSN_P (insn) || !active_insn_p (insn))
   24346       500406 :     return 0;
   24347              : 
   24348              :   /* Discard alignments we've emit and jump instructions.  */
   24349    381752148 :   if (GET_CODE (PATTERN (insn)) == UNSPEC_VOLATILE
   24350    381752148 :       && XINT (PATTERN (insn), 1) == UNSPECV_ALIGN)
   24351              :     return 0;
   24352              : 
   24353              :   /* Important case - calls are always 5 bytes.
   24354              :      It is common to have many calls in the row.  */
   24355    381752142 :   if (CALL_P (insn)
   24356      9051494 :       && symbolic_reference_mentioned_p (PATTERN (insn))
   24357    390476245 :       && !SIBLING_CALL_P (insn))
   24358              :     return 5;
   24359    373260108 :   len = get_attr_length (insn);
   24360    373260108 :   if (len <= 1)
   24361              :     return 1;
   24362              : 
   24363              :   /* For normal instructions we rely on get_attr_length being exact,
   24364              :      with a few exceptions.  */
   24365    364658192 :   if (!JUMP_P (insn))
   24366              :     {
   24367    359355062 :       enum attr_type type = get_attr_type (insn);
   24368              : 
   24369    359355062 :       switch (type)
   24370              :         {
   24371        95502 :         case TYPE_MULTI:
   24372        95502 :           if (GET_CODE (PATTERN (insn)) == ASM_INPUT
   24373        95502 :               || asm_noperands (PATTERN (insn)) >= 0)
   24374          527 :             return 0;
   24375              :           break;
   24376              :         case TYPE_OTHER:
   24377              :         case TYPE_FCMP:
   24378              :           break;
   24379              :         default:
   24380              :           /* Otherwise trust get_attr_length.  */
   24381              :           return len;
   24382              :         }
   24383              : 
   24384       474866 :       l = get_attr_length_address (insn);
   24385       474866 :       if (l < 4 && symbolic_reference_mentioned_p (PATTERN (insn)))
   24386              :         l = 4;
   24387              :     }
   24388       384328 :   if (l)
   24389        90538 :     return 1+l;
   24390              :   else
   24391      5687458 :     return 2;
   24392              : }
   24393              : 
   24394              : #ifdef ASM_OUTPUT_MAX_SKIP_ALIGN
   24395              : 
   24396              : /* AMD K8 core mispredicts jumps when there are more than 3 jumps in 16 byte
   24397              :    window.  */
   24398              : 
   24399              : static void
   24400        45424 : ix86_avoid_jump_mispredicts (void)
   24401              : {
   24402        45424 :   rtx_insn *insn, *start = get_insns ();
   24403        45424 :   int nbytes = 0, njumps = 0;
   24404        45424 :   bool isjump = false;
   24405              : 
   24406              :   /* Look for all minimal intervals of instructions containing 4 jumps.
   24407              :      The intervals are bounded by START and INSN.  NBYTES is the total
   24408              :      size of instructions in the interval including INSN and not including
   24409              :      START.  When the NBYTES is smaller than 16 bytes, it is possible
   24410              :      that the end of START and INSN ends up in the same 16byte page.
   24411              : 
   24412              :      The smallest offset in the page INSN can start is the case where START
   24413              :      ends on the offset 0.  Offset of INSN is then NBYTES - sizeof (INSN).
   24414              :      We add p2align to 16byte window with maxskip 15 - NBYTES + sizeof (INSN).
   24415              : 
   24416              :      Don't consider asm goto as jump, while it can contain a jump, it doesn't
   24417              :      have to, control transfer to label(s) can be performed through other
   24418              :      means, and also we estimate minimum length of all asm stmts as 0.  */
   24419       700828 :   for (insn = start; insn; insn = NEXT_INSN (insn))
   24420              :     {
   24421       655404 :       int min_size;
   24422              : 
   24423       655404 :       if (LABEL_P (insn))
   24424              :         {
   24425          961 :           align_flags alignment = label_to_alignment (insn);
   24426          961 :           int align = alignment.levels[0].log;
   24427          961 :           int max_skip = alignment.levels[0].maxskip;
   24428              : 
   24429          961 :           if (max_skip > 15)
   24430              :             max_skip = 15;
   24431              :           /* If align > 3, only up to 16 - max_skip - 1 bytes can be
   24432              :              already in the current 16 byte page, because otherwise
   24433              :              ASM_OUTPUT_MAX_SKIP_ALIGN could skip max_skip or fewer
   24434              :              bytes to reach 16 byte boundary.  */
   24435          961 :           if (align <= 0
   24436          328 :               || (align <= 3 && max_skip != (1 << align) - 1))
   24437          961 :             max_skip = 0;
   24438          961 :           if (dump_file)
   24439            0 :             fprintf (dump_file, "Label %i with max_skip %i\n",
   24440            0 :                      INSN_UID (insn), max_skip);
   24441          961 :           if (max_skip)
   24442              :             {
   24443         6293 :               while (nbytes + max_skip >= 16)
   24444              :                 {
   24445         5965 :                   start = NEXT_INSN (start);
   24446          310 :                   if ((JUMP_P (start) && asm_noperands (PATTERN (start)) < 0)
   24447         5982 :                       || CALL_P (start))
   24448          350 :                     njumps--, isjump = true;
   24449              :                   else
   24450              :                     isjump = false;
   24451         5965 :                   nbytes -= ix86_min_insn_size (start);
   24452              :                 }
   24453              :             }
   24454          961 :           continue;
   24455          961 :         }
   24456              : 
   24457       654443 :       min_size = ix86_min_insn_size (insn);
   24458       654443 :       nbytes += min_size;
   24459       654443 :       if (dump_file)
   24460            0 :         fprintf (dump_file, "Insn %i estimated to %i bytes\n",
   24461            0 :                  INSN_UID (insn), min_size);
   24462        46586 :       if ((JUMP_P (insn) && asm_noperands (PATTERN (insn)) < 0)
   24463       654463 :           || CALL_P (insn))
   24464        47601 :         njumps++;
   24465              :       else
   24466       606842 :         continue;
   24467              : 
   24468        55996 :       while (njumps > 3)
   24469              :         {
   24470         8395 :           start = NEXT_INSN (start);
   24471          549 :           if ((JUMP_P (start) && asm_noperands (PATTERN (start)) < 0)
   24472         8395 :               || CALL_P (start))
   24473         1251 :             njumps--, isjump = true;
   24474              :           else
   24475              :             isjump = false;
   24476         8395 :           nbytes -= ix86_min_insn_size (start);
   24477              :         }
   24478        47601 :       gcc_assert (njumps >= 0);
   24479        47601 :       if (dump_file)
   24480            0 :         fprintf (dump_file, "Interval %i to %i has %i bytes\n",
   24481            0 :                  INSN_UID (start), INSN_UID (insn), nbytes);
   24482              : 
   24483        47601 :       if (njumps == 3 && isjump && nbytes < 16)
   24484              :         {
   24485           40 :           int padsize = 15 - nbytes + ix86_min_insn_size (insn);
   24486              : 
   24487           40 :           if (dump_file)
   24488            0 :             fprintf (dump_file, "Padding insn %i by %i bytes!\n",
   24489            0 :                      INSN_UID (insn), padsize);
   24490           40 :           emit_insn_before (gen_max_skip_align (GEN_INT (4), GEN_INT (padsize)), insn);
   24491              :         }
   24492              :     }
   24493        45424 : }
   24494              : #endif
   24495              : 
   24496              : /* AMD Athlon works faster
   24497              :    when RET is not destination of conditional jump or directly preceded
   24498              :    by other jump instruction.  We avoid the penalty by inserting NOP just
   24499              :    before the RET instructions in such cases.  */
   24500              : static void
   24501        45144 : ix86_pad_returns (void)
   24502              : {
   24503        45144 :   edge e;
   24504        45144 :   edge_iterator ei;
   24505              : 
   24506        90312 :   FOR_EACH_EDGE (e, ei, EXIT_BLOCK_PTR_FOR_FN (cfun)->preds)
   24507              :     {
   24508        45168 :       basic_block bb = e->src;
   24509        45168 :       rtx_insn *ret = BB_END (bb);
   24510        45168 :       rtx_insn *prev;
   24511        45168 :       bool replace = false;
   24512              : 
   24513        45158 :       if (!JUMP_P (ret) || !ANY_RETURN_P (PATTERN (ret))
   24514        90326 :           || optimize_bb_for_size_p (bb))
   24515           23 :         continue;
   24516       179724 :       for (prev = PREV_INSN (ret); prev; prev = PREV_INSN (prev))
   24517       134161 :         if (active_insn_p (prev) || LABEL_P (prev))
   24518              :           break;
   24519        45145 :       if (prev && LABEL_P (prev))
   24520              :         {
   24521           43 :           edge e;
   24522           43 :           edge_iterator ei;
   24523              : 
   24524           56 :           FOR_EACH_EDGE (e, ei, bb->preds)
   24525          146 :             if (EDGE_FREQUENCY (e) && e->src->index >= 0
   24526           97 :                 && !(e->flags & EDGE_FALLTHRU))
   24527              :               {
   24528              :                 replace = true;
   24529              :                 break;
   24530              :               }
   24531              :         }
   24532           43 :       if (!replace)
   24533              :         {
   24534        45109 :           prev = prev_active_insn (ret);
   24535        45109 :           if (prev
   24536        45109 :               && ((JUMP_P (prev) && any_condjump_p (prev))
   24537        44673 :                   || CALL_P (prev)))
   24538              :             replace = true;
   24539              :           /* Empty functions get branch mispredict even when
   24540              :              the jump destination is not visible to us.  */
   24541        45109 :           if (!prev && !optimize_function_for_size_p (cfun))
   24542              :             replace = true;
   24543              :         }
   24544        44691 :       if (replace)
   24545              :         {
   24546          489 :           emit_jump_insn_before (gen_simple_return_internal_long (), ret);
   24547          489 :           delete_insn (ret);
   24548              :         }
   24549              :     }
   24550        45144 : }
   24551              : 
   24552              : /* Count the minimum number of instructions in BB.  Return 4 if the
   24553              :    number of instructions >= 4.  */
   24554              : 
   24555              : static int
   24556           42 : ix86_count_insn_bb (basic_block bb)
   24557              : {
   24558           42 :   rtx_insn *insn;
   24559           42 :   int insn_count = 0;
   24560              : 
   24561              :   /* Count number of instructions in this block.  Return 4 if the number
   24562              :      of instructions >= 4.  */
   24563          297 :   FOR_BB_INSNS (bb, insn)
   24564              :     {
   24565              :       /* Only happen in exit blocks.  */
   24566          291 :       if (JUMP_P (insn)
   24567          291 :           && ANY_RETURN_P (PATTERN (insn)))
   24568              :         break;
   24569              : 
   24570          267 :       if (NONDEBUG_INSN_P (insn)
   24571          102 :           && GET_CODE (PATTERN (insn)) != USE
   24572          351 :           && GET_CODE (PATTERN (insn)) != CLOBBER)
   24573              :         {
   24574           84 :           insn_count++;
   24575           84 :           if (insn_count >= 4)
   24576              :             return insn_count;
   24577              :         }
   24578              :     }
   24579              : 
   24580              :   return insn_count;
   24581              : }
   24582              : 
   24583              : 
   24584              : /* Count the minimum number of instructions in code path in BB.
   24585              :    Return 4 if the number of instructions >= 4.  */
   24586              : 
   24587              : static int
   24588           62 : ix86_count_insn (basic_block bb)
   24589              : {
   24590           62 :   edge e;
   24591           62 :   edge_iterator ei;
   24592           62 :   int min_prev_count;
   24593              : 
   24594              :   /* Only bother counting instructions along paths with no
   24595              :      more than 2 basic blocks between entry and exit.  Given
   24596              :      that BB has an edge to exit, determine if a predecessor
   24597              :      of BB has an edge from entry.  If so, compute the number
   24598              :      of instructions in the predecessor block.  If there
   24599              :      happen to be multiple such blocks, compute the minimum.  */
   24600           62 :   min_prev_count = 4;
   24601          145 :   FOR_EACH_EDGE (e, ei, bb->preds)
   24602              :     {
   24603          109 :       edge prev_e;
   24604          109 :       edge_iterator prev_ei;
   24605              : 
   24606          109 :       if (e->src == ENTRY_BLOCK_PTR_FOR_FN (cfun))
   24607              :         {
   24608           26 :           min_prev_count = 0;
   24609           26 :           break;
   24610              :         }
   24611          182 :       FOR_EACH_EDGE (prev_e, prev_ei, e->src->preds)
   24612              :         {
   24613          109 :           if (prev_e->src == ENTRY_BLOCK_PTR_FOR_FN (cfun))
   24614              :             {
   24615           10 :               int count = ix86_count_insn_bb (e->src);
   24616           10 :               if (count < min_prev_count)
   24617           83 :                 min_prev_count = count;
   24618              :               break;
   24619              :             }
   24620              :         }
   24621              :     }
   24622              : 
   24623           62 :   if (min_prev_count < 4)
   24624           32 :     min_prev_count += ix86_count_insn_bb (bb);
   24625              : 
   24626           62 :   return min_prev_count;
   24627              : }
   24628              : 
   24629              : /* Pad short function to 4 instructions.   */
   24630              : 
   24631              : static void
   24632           63 : ix86_pad_short_function (void)
   24633              : {
   24634           63 :   edge e;
   24635           63 :   edge_iterator ei;
   24636              : 
   24637          128 :   FOR_EACH_EDGE (e, ei, EXIT_BLOCK_PTR_FOR_FN (cfun)->preds)
   24638              :     {
   24639           65 :       rtx_insn *ret = BB_END (e->src);
   24640           65 :       if (JUMP_P (ret) && ANY_RETURN_P (PATTERN (ret)))
   24641              :         {
   24642           62 :           int insn_count = ix86_count_insn (e->src);
   24643              : 
   24644              :           /* Pad short function.  */
   24645           62 :           if (insn_count < 4)
   24646              :             {
   24647              :               rtx_insn *insn = ret;
   24648              : 
   24649              :               /* Find epilogue.  */
   24650              :               while (insn
   24651           60 :                      && (!NOTE_P (insn)
   24652           26 :                          || NOTE_KIND (insn) != NOTE_INSN_EPILOGUE_BEG))
   24653           37 :                 insn = PREV_INSN (insn);
   24654              : 
   24655           23 :               if (!insn)
   24656            0 :                 insn = ret;
   24657              : 
   24658              :               /* Two NOPs count as one instruction.  */
   24659           23 :               insn_count = 2 * (4 - insn_count);
   24660           23 :               emit_insn_before (gen_nops (GEN_INT (insn_count)), insn);
   24661              :             }
   24662              :         }
   24663              :     }
   24664           63 : }
   24665              : 
   24666              : /* Fix up a Windows system unwinder issue.  If an EH region falls through into
   24667              :    the epilogue, the Windows system unwinder will apply epilogue logic and
   24668              :    produce incorrect offsets.  This can be avoided by adding a nop between
   24669              :    the last insn that can throw and the first insn of the epilogue.  */
   24670              : 
   24671              : static void
   24672            0 : ix86_seh_fixup_eh_fallthru (void)
   24673              : {
   24674            0 :   edge e;
   24675            0 :   edge_iterator ei;
   24676              : 
   24677            0 :   FOR_EACH_EDGE (e, ei, EXIT_BLOCK_PTR_FOR_FN (cfun)->preds)
   24678              :     {
   24679            0 :       rtx_insn *insn, *next;
   24680              : 
   24681              :       /* Find the beginning of the epilogue.  */
   24682            0 :       for (insn = BB_END (e->src); insn != NULL; insn = PREV_INSN (insn))
   24683            0 :         if (NOTE_P (insn) && NOTE_KIND (insn) == NOTE_INSN_EPILOGUE_BEG)
   24684              :           break;
   24685            0 :       if (insn == NULL)
   24686            0 :         continue;
   24687              : 
   24688              :       /* We only care about preceding insns that can throw.  */
   24689            0 :       insn = prev_active_insn (insn);
   24690            0 :       if (insn == NULL || !can_throw_internal (insn))
   24691            0 :         continue;
   24692              : 
   24693              :       /* Do not separate calls from their debug information.  */
   24694            0 :       for (next = NEXT_INSN (insn); next != NULL; next = NEXT_INSN (next))
   24695            0 :         if (NOTE_P (next) && NOTE_KIND (next) == NOTE_INSN_VAR_LOCATION)
   24696            0 :           insn = next;
   24697              :         else
   24698              :           break;
   24699              : 
   24700            0 :       emit_insn_after (gen_nops (const1_rtx), insn);
   24701              :     }
   24702            0 : }
   24703              : /* Split vector load from parm_decl to elemental loads to avoid STLF
   24704              :    stalls.  */
   24705              : static void
   24706       974351 : ix86_split_stlf_stall_load ()
   24707              : {
   24708       974351 :   rtx_insn* insn, *start = get_insns ();
   24709       974351 :   unsigned window = 0;
   24710              : 
   24711     26587434 :   for (insn = start; insn; insn = NEXT_INSN (insn))
   24712              :     {
   24713     26586584 :       if (!NONDEBUG_INSN_P (insn))
   24714     14970398 :         continue;
   24715     11616186 :       window++;
   24716              :       /* Insert 64 vaddps %xmm18, %xmm19, %xmm20(no dependence between each
   24717              :          other, just emulate for pipeline) before stalled load, stlf stall
   24718              :          case is as fast as no stall cases on CLX.
   24719              :          Since CFG is freed before machine_reorg, just do a rough
   24720              :          calculation of the window according to the layout.  */
   24721     11616186 :       if (window > (unsigned) x86_stlf_window_ninsns)
   24722              :         return;
   24723              : 
   24724     11598180 :       if (any_uncondjump_p (insn)
   24725     11562839 :           || ANY_RETURN_P (PATTERN (insn))
   24726     22784619 :           || CALL_P (insn))
   24727              :         return;
   24728              : 
   24729     10642685 :       rtx set = single_set (insn);
   24730     10642685 :       if (!set)
   24731       435608 :         continue;
   24732     10207077 :       rtx src = SET_SRC (set);
   24733     20413802 :       if (!MEM_P (src)
   24734              :           /* Only handle V2DFmode load since it doesn't need any scratch
   24735              :              register.  */
   24736      1458926 :           || GET_MODE (src) != E_V2DFmode
   24737         5495 :           || !MEM_EXPR (src)
   24738     10211059 :           || TREE_CODE (get_base_address (MEM_EXPR (src))) != PARM_DECL)
   24739     10206725 :         continue;
   24740              : 
   24741          352 :       rtx zero = CONST0_RTX (V2DFmode);
   24742          352 :       rtx dest = SET_DEST (set);
   24743          352 :       rtx m = adjust_address (src, DFmode, 0);
   24744          352 :       rtx loadlpd = gen_sse2_loadlpd (dest, zero, m);
   24745          352 :       emit_insn_before (loadlpd, insn);
   24746          352 :       m = adjust_address (src, DFmode, 8);
   24747          352 :       rtx loadhpd = gen_sse2_loadhpd (dest, dest, m);
   24748          352 :       if (dump_file && (dump_flags & TDF_DETAILS))
   24749              :         {
   24750            0 :           fputs ("Due to potential STLF stall, split instruction:\n",
   24751              :                  dump_file);
   24752            0 :           print_rtl_single (dump_file, insn);
   24753            0 :           fputs ("To:\n", dump_file);
   24754            0 :           print_rtl_single (dump_file, loadlpd);
   24755            0 :           print_rtl_single (dump_file, loadhpd);
   24756              :         }
   24757          352 :       PATTERN (insn) = loadhpd;
   24758          352 :       INSN_CODE (insn) = -1;
   24759          352 :       gcc_assert (recog_memoized (insn) != -1);
   24760              :     }
   24761              : }
   24762              : 
   24763              : /* Implement machine specific optimizations.  We implement padding of returns
   24764              :    for K8 CPUs and pass to avoid 4 jumps in the single 16 byte window.  */
   24765              : static void
   24766      1488371 : ix86_reorg (void)
   24767              : {
   24768              :   /* We are freeing block_for_insn in the toplev to keep compatibility
   24769              :      with old MDEP_REORGS that are not CFG based.  Recompute it now.  */
   24770      1488371 :   compute_bb_for_insn ();
   24771              : 
   24772      1488371 :   if (TARGET_SEH && current_function_has_exception_handlers ())
   24773              :     ix86_seh_fixup_eh_fallthru ();
   24774              : 
   24775      1488371 :   if (optimize && optimize_function_for_speed_p (cfun))
   24776              :     {
   24777       976653 :       if (TARGET_SSE2)
   24778       974351 :         ix86_split_stlf_stall_load ();
   24779       976653 :       if (TARGET_PAD_SHORT_FUNCTION)
   24780           63 :         ix86_pad_short_function ();
   24781       976590 :       else if (TARGET_PAD_RETURNS)
   24782        45144 :         ix86_pad_returns ();
   24783              : #ifdef ASM_OUTPUT_MAX_SKIP_ALIGN
   24784       976653 :       if (TARGET_FOUR_JUMP_LIMIT)
   24785        45424 :         ix86_avoid_jump_mispredicts ();
   24786              : #endif
   24787              :     }
   24788      1488371 : }
   24789              : 
   24790              : /* Return nonzero when QImode register that must be represented via REX prefix
   24791              :    is used.  */
   24792              : bool
   24793      8549852 : x86_extended_QIreg_mentioned_p (rtx_insn *insn)
   24794              : {
   24795      8549852 :   int i;
   24796      8549852 :   extract_insn_cached (insn);
   24797     32314763 :   for (i = 0; i < recog_data.n_operands; i++)
   24798      4617400 :     if (GENERAL_REG_P (recog_data.operand[i])
   24799     21400426 :         && !QI_REGNO_P (REGNO (recog_data.operand[i])))
   24800              :        return true;
   24801              :   return false;
   24802              : }
   24803              : 
   24804              : /* Return true when INSN mentions register that must be encoded using REX
   24805              :    prefix.  */
   24806              : bool
   24807    195589151 : x86_extended_reg_mentioned_p (rtx insn)
   24808              : {
   24809    195589151 :   subrtx_iterator::array_type array;
   24810   1024858984 :   FOR_EACH_SUBRTX (iter, array, INSN_P (insn) ? PATTERN (insn) : insn, NONCONST)
   24811              :     {
   24812    877282300 :       const_rtx x = *iter;
   24813    877282300 :       if (REG_P (x)
   24814    877282300 :           && (REX_INT_REGNO_P (REGNO (x)) || REX_SSE_REGNO_P (REGNO (x))
   24815    252743673 :               || REX2_INT_REGNO_P (REGNO (x))))
   24816     48012467 :         return true;
   24817              :     }
   24818    147576684 :   return false;
   24819    195589151 : }
   24820              : 
   24821              : /* Return true when INSN mentions register that must be encoded using REX2
   24822              :    prefix.  */
   24823              : bool
   24824      2046937 : x86_extended_rex2reg_mentioned_p (rtx insn)
   24825              : {
   24826      2046937 :   subrtx_iterator::array_type array;
   24827      9532685 :   FOR_EACH_SUBRTX (iter, array, INSN_P (insn) ? PATTERN (insn) : insn, NONCONST)
   24828              :     {
   24829      7486404 :       const_rtx x = *iter;
   24830      7486404 :       if (REG_P (x) && REX2_INT_REGNO_P (REGNO (x)))
   24831          656 :         return true;
   24832              :     }
   24833      2046281 :   return false;
   24834      2046937 : }
   24835              : 
   24836              : /* Return true when rtx operands mentions register that must be encoded using
   24837              :    evex prefix.  */
   24838              : bool
   24839           10 : x86_evex_reg_mentioned_p (rtx operands[], int nops)
   24840              : {
   24841           10 :   int i;
   24842           28 :   for (i = 0; i < nops; i++)
   24843           22 :     if (EXT_REX_SSE_REG_P (operands[i])
   24844           40 :         || x86_extended_rex2reg_mentioned_p (operands[i]))
   24845            4 :       return true;
   24846              :   return false;
   24847              : }
   24848              : 
   24849              : /* If profitable, negate (without causing overflow) integer constant
   24850              :    of mode MODE at location LOC.  Return true in this case.  */
   24851              : bool
   24852      5902150 : x86_maybe_negate_const_int (rtx *loc, machine_mode mode)
   24853              : {
   24854      5902150 :   HOST_WIDE_INT val;
   24855              : 
   24856      5902150 :   if (!CONST_INT_P (*loc))
   24857              :     return false;
   24858              : 
   24859      4974992 :   switch (mode)
   24860              :     {
   24861      2823994 :     case E_DImode:
   24862              :       /* DImode x86_64 constants must fit in 32 bits.  */
   24863      2823994 :       gcc_assert (x86_64_immediate_operand (*loc, mode));
   24864              : 
   24865              :       mode = SImode;
   24866              :       break;
   24867              : 
   24868              :     case E_SImode:
   24869              :     case E_HImode:
   24870              :     case E_QImode:
   24871              :       break;
   24872              : 
   24873            0 :     default:
   24874            0 :       gcc_unreachable ();
   24875              :     }
   24876              : 
   24877              :   /* Avoid overflows.  */
   24878      4974992 :   if (mode_signbit_p (mode, *loc))
   24879              :     return false;
   24880              : 
   24881      4974472 :   val = INTVAL (*loc);
   24882              : 
   24883              :   /* Make things pretty and `subl $4,%eax' rather than `addl $-4,%eax'.
   24884              :      Exceptions: -128 encodes smaller than 128, so swap sign and op.  */
   24885      4974472 :   if ((val < 0 && val != -128)
   24886      3267294 :       || val == 128)
   24887              :     {
   24888      1718440 :       *loc = GEN_INT (-val);
   24889      1718440 :       return true;
   24890              :     }
   24891              : 
   24892              :   return false;
   24893              : }
   24894              : 
   24895              : /* Generate an unsigned DImode/SImode to FP conversion.  This is the same code
   24896              :    optabs would emit if we didn't have TFmode patterns.  */
   24897              : 
   24898              : void
   24899         4518 : x86_emit_floatuns (rtx operands[2])
   24900              : {
   24901         4518 :   rtx_code_label *neglab, *donelab;
   24902         4518 :   rtx i0, i1, f0, in, out;
   24903         4518 :   machine_mode mode, inmode;
   24904              : 
   24905         4518 :   inmode = GET_MODE (operands[1]);
   24906         4518 :   gcc_assert (inmode == SImode || inmode == DImode);
   24907              : 
   24908         4518 :   out = operands[0];
   24909         4518 :   in = force_reg (inmode, operands[1]);
   24910         4518 :   mode = GET_MODE (out);
   24911         4518 :   neglab = gen_label_rtx ();
   24912         4518 :   donelab = gen_label_rtx ();
   24913         4518 :   f0 = gen_reg_rtx (mode);
   24914              : 
   24915         4518 :   emit_cmp_and_jump_insns (in, const0_rtx, LT, const0_rtx, inmode, 0, neglab);
   24916              : 
   24917         4518 :   expand_float (out, in, 0);
   24918              : 
   24919         4518 :   emit_jump_insn (gen_jump (donelab));
   24920         4518 :   emit_barrier ();
   24921              : 
   24922         4518 :   emit_label (neglab);
   24923              : 
   24924         4518 :   i0 = expand_simple_binop (inmode, LSHIFTRT, in, const1_rtx, NULL,
   24925              :                             1, OPTAB_DIRECT);
   24926         4518 :   i1 = expand_simple_binop (inmode, AND, in, const1_rtx, NULL,
   24927              :                             1, OPTAB_DIRECT);
   24928         4518 :   i0 = expand_simple_binop (inmode, IOR, i0, i1, i0, 1, OPTAB_DIRECT);
   24929              : 
   24930         4518 :   expand_float (f0, i0, 0);
   24931              : 
   24932         4518 :   emit_insn (gen_rtx_SET (out, gen_rtx_PLUS (mode, f0, f0)));
   24933              : 
   24934         4518 :   emit_label (donelab);
   24935         4518 : }
   24936              : 
   24937              : /* Return the diagnostic message string if conversion from FROMTYPE to
   24938              :    TOTYPE is not allowed, NULL otherwise.  */
   24939              : 
   24940              : static const char *
   24941   1083307915 : ix86_invalid_conversion (const_tree fromtype, const_tree totype)
   24942              : {
   24943   1083307915 :   machine_mode from_mode = element_mode (fromtype);
   24944   1083307915 :   machine_mode to_mode = element_mode (totype);
   24945              : 
   24946   1083307915 :   if (!TARGET_SSE2 && from_mode != to_mode)
   24947              :     {
   24948              :       /* Do no allow conversions to/from BFmode/HFmode scalar types
   24949              :          when TARGET_SSE2 is not available.  */
   24950       468009 :       if (from_mode == BFmode)
   24951              :         return N_("invalid conversion from type %<__bf16%> "
   24952              :                   "without option %<-msse2%>");
   24953       468008 :       if (from_mode == HFmode)
   24954              :         return N_("invalid conversion from type %<_Float16%> "
   24955              :                   "without option %<-msse2%>");
   24956       468008 :       if (to_mode == BFmode)
   24957              :         return N_("invalid conversion to type %<__bf16%> "
   24958              :                   "without option %<-msse2%>");
   24959       468008 :       if (to_mode == HFmode)
   24960              :         return N_("invalid conversion to type %<_Float16%> "
   24961              :                   "without option %<-msse2%>");
   24962              :     }
   24963              : 
   24964              :   /* Warn for silent implicit conversion between __bf16 and short,
   24965              :      since __bfloat16 is refined as real __bf16 instead of short
   24966              :      since GCC13.  */
   24967   1083307913 :   if (element_mode (fromtype) != element_mode (totype)
   24968   1083307913 :       && (TARGET_AVX512BF16 || TARGET_AVXNECONVERT))
   24969              :     {
   24970              :       /* Warn for silent implicit conversion where user may expect
   24971              :          a bitcast.  */
   24972      7766538 :       if ((TYPE_MODE (fromtype) == BFmode
   24973          279 :            && TYPE_MODE (totype) == HImode)
   24974      7766816 :           || (TYPE_MODE (totype) == BFmode
   24975          423 :               && TYPE_MODE (fromtype) == HImode))
   24976            1 :         warning (0, "%<__bfloat16%> is redefined from typedef %<short%> "
   24977              :                 "to real %<__bf16%> since GCC 13.1, be careful of "
   24978              :                  "implicit conversion between %<__bf16%> and %<short%>; "
   24979              :                  "an explicit bitcast may be needed here");
   24980              :     }
   24981              : 
   24982              :   /* Conversion allowed.  */
   24983              :   return NULL;
   24984              : }
   24985              : 
   24986              : /* Return the diagnostic message string if the unary operation OP is
   24987              :    not permitted on TYPE, NULL otherwise.  */
   24988              : 
   24989              : static const char *
   24990     90885078 : ix86_invalid_unary_op (int op, const_tree type)
   24991              : {
   24992     90885078 :   machine_mode mmode = element_mode (type);
   24993              :   /* Reject all single-operand operations on BFmode/HFmode except for &
   24994              :      when TARGET_SSE2 is not available.  */
   24995     90885078 :   if (!TARGET_SSE2 && op != ADDR_EXPR)
   24996              :     {
   24997       111098 :       if (mmode == BFmode)
   24998              :         return N_("operation not permitted on type %<__bf16%> "
   24999              :                   "without option %<-msse2%>");
   25000       111098 :       if (mmode == HFmode)
   25001            0 :         return N_("operation not permitted on type %<_Float16%> "
   25002              :                   "without option %<-msse2%>");
   25003              :     }
   25004              : 
   25005              :   /* Operation allowed.  */
   25006              :   return NULL;
   25007              : }
   25008              : 
   25009              : /* Return the diagnostic message string if the binary operation OP is
   25010              :    not permitted on TYPE1 and TYPE2, NULL otherwise.  */
   25011              : 
   25012              : static const char *
   25013    161405897 : ix86_invalid_binary_op (int op ATTRIBUTE_UNUSED, const_tree type1,
   25014              :                         const_tree type2)
   25015              : {
   25016    161405897 :   machine_mode type1_mode = element_mode (type1);
   25017    161405897 :   machine_mode type2_mode = element_mode (type2);
   25018              :   /* Reject all 2-operand operations on BFmode or HFmode
   25019              :      when TARGET_SSE2 is not available.  */
   25020    161405897 :   if (!TARGET_SSE2)
   25021              :     {
   25022      1008820 :       if (type1_mode == BFmode || type2_mode == BFmode)
   25023              :         return N_("operation not permitted on type %<__bf16%> "
   25024              :                   "without option %<-msse2%>");
   25025              : 
   25026      1008820 :       if (type1_mode == HFmode || type2_mode == HFmode)
   25027            0 :         return N_("operation not permitted on type %<_Float16%> "
   25028              :                   "without option %<-msse2%>");
   25029              :     }
   25030              : 
   25031              :   /* Operation allowed.  */
   25032              :   return NULL;
   25033              : }
   25034              : 
   25035              : 
   25036              : /* Target hook for scalar_mode_supported_p.  */
   25037              : static bool
   25038      4715408 : ix86_scalar_mode_supported_p (scalar_mode mode)
   25039              : {
   25040      4715408 :   if (DECIMAL_FLOAT_MODE_P (mode))
   25041       650178 :     return default_decimal_float_supported_p ();
   25042      4065230 :   else if (mode == TFmode)
   25043              :     return true;
   25044      3732517 :   else if (mode == HFmode || mode == BFmode)
   25045              :     return true;
   25046              :   else
   25047      3069074 :     return default_scalar_mode_supported_p (mode);
   25048              : }
   25049              : 
   25050              : /* Implement TARGET_LIBGCC_FLOATING_POINT_MODE_SUPPORTED_P - return TRUE
   25051              :    if MODE is HFmode, and punt to the generic implementation otherwise.  */
   25052              : 
   25053              : static bool
   25054      2285955 : ix86_libgcc_floating_mode_supported_p (scalar_float_mode mode)
   25055              : {
   25056              :   /* NB: Always return TRUE for HFmode so that the _Float16 type will
   25057              :      be defined by the C front-end for AVX512FP16 intrinsics.  We will
   25058              :      issue an error in ix86_expand_move for HFmode if AVX512FP16 isn't
   25059              :      enabled.  */
   25060      1954709 :   return ((mode == HFmode || mode == BFmode)
   25061      3909418 :           ? true
   25062      1623463 :           : default_libgcc_floating_mode_supported_p (mode));
   25063              : }
   25064              : 
   25065              : /* Implements target hook vector_mode_supported_p.  */
   25066              : static bool
   25067   1345161298 : ix86_vector_mode_supported_p (machine_mode mode)
   25068              : {
   25069              :   /* For ia32, scalar TImode isn't supported and so V1TImode shouldn't be
   25070              :      either.  */
   25071   1482099567 :   if (!TARGET_64BIT && GET_MODE_INNER (mode) == TImode)
   25072              :     return false;
   25073   1345160888 :   if (TARGET_SSE && VALID_SSE_REG_MODE (mode))
   25074              :     return true;
   25075   1130710187 :   if (TARGET_SSE2 && VALID_SSE2_REG_MODE (mode))
   25076              :     return true;
   25077    508323464 :   if (TARGET_AVX && VALID_AVX256_REG_MODE (mode))
   25078              :     return true;
   25079    367748299 :   if (TARGET_AVX512F && VALID_AVX512F_REG_MODE (mode))
   25080              :     return true;
   25081    233494915 :   if ((TARGET_MMX || TARGET_MMX_WITH_SSE)
   25082    233438545 :       && VALID_MMX_REG_MODE (mode))
   25083              :     return true;
   25084     34323974 :   if ((TARGET_3DNOW || TARGET_MMX_WITH_SSE)
   25085     33687914 :       && VALID_MMX_REG_MODE_3DNOW (mode))
   25086              :     return true;
   25087     22864604 :   if (mode == V2QImode)
   25088        24808 :     return true;
   25089              :   return false;
   25090              : }
   25091              : 
   25092              : /* Target hook for c_mode_for_suffix.  */
   25093              : static machine_mode
   25094       191824 : ix86_c_mode_for_suffix (char suffix)
   25095              : {
   25096       191824 :   if (suffix == 'q')
   25097              :     return TFmode;
   25098           37 :   if (suffix == 'w')
   25099              :     return XFmode;
   25100              : 
   25101            0 :   return VOIDmode;
   25102              : }
   25103              : 
   25104              : /* Helper function to map common constraints to non-EGPR ones.
   25105              :    All related constraints have h prefix, and h plus Upper letter
   25106              :    means the constraint is strictly EGPR enabled, while h plus
   25107              :    lower letter indicates the constraint is strictly gpr16 only.
   25108              : 
   25109              :    Specially for "g" constraint, split it to rmi as there is
   25110              :    no corresponding general constraint define for backend.
   25111              : 
   25112              :    Here is the full list to map constraints that may involve
   25113              :    gpr to h prefixed.
   25114              : 
   25115              :    "g" -> "jrjmi"
   25116              :    "r" -> "jr"
   25117              :    "m" -> "jm"
   25118              :    "<" -> "j<"
   25119              :    ">" -> "j>"
   25120              :    "o" -> "jo"
   25121              :    "V" -> "jV"
   25122              :    "p" -> "jp"
   25123              :    "Bm" -> "ja"
   25124              : */
   25125              : 
   25126           57 : static void map_egpr_constraints (vec<const char *> &constraints)
   25127              : {
   25128           67 :   for (size_t i = 0; i < constraints.length(); i++)
   25129              :     {
   25130           10 :       const char *cur = constraints[i];
   25131              : 
   25132           10 :       if (startswith (cur, "=@cc"))
   25133            0 :         continue;
   25134              : 
   25135           10 :       int len = strlen (cur);
   25136           10 :       auto_vec<char> buf;
   25137              : 
   25138           24 :       for (int j = 0; j < len; j++)
   25139              :         {
   25140           14 :           switch (cur[j])
   25141              :             {
   25142            2 :             case 'g':
   25143            2 :               buf.safe_push ('j');
   25144            2 :               buf.safe_push ('r');
   25145            2 :               buf.safe_push ('j');
   25146            2 :               buf.safe_push ('m');
   25147            2 :               buf.safe_push ('i');
   25148            2 :               break;
   25149            8 :             case 'r':
   25150            8 :             case 'm':
   25151            8 :             case '<':
   25152            8 :             case '>':
   25153            8 :             case 'o':
   25154            8 :             case 'V':
   25155            8 :             case 'p':
   25156            8 :               buf.safe_push ('j');
   25157            8 :               buf.safe_push (cur[j]);
   25158            8 :               break;
   25159            0 :             case 'B':
   25160            0 :               if (cur[j + 1] == 'm')
   25161              :                 {
   25162            0 :                   buf.safe_push ('j');
   25163            0 :                   buf.safe_push ('a');
   25164            0 :                   j++;
   25165              :                 }
   25166              :               else
   25167              :                 {
   25168            0 :                   buf.safe_push (cur[j]);
   25169            0 :                   buf.safe_push (cur[j + 1]);
   25170            0 :                   j++;
   25171              :                 }
   25172              :               break;
   25173            0 :             case 'T':
   25174            0 :             case 'Y':
   25175            0 :             case 'W':
   25176            0 :             case 'j':
   25177            0 :               buf.safe_push (cur[j]);
   25178            0 :               buf.safe_push (cur[j + 1]);
   25179            0 :               j++;
   25180            0 :               break;
   25181            0 :             case '{':
   25182            0 :               do
   25183              :                 {
   25184            0 :                   buf.safe_push (cur[j]);
   25185            0 :                 } while (cur[j++] != '}');
   25186              :               break;
   25187            4 :             default:
   25188            4 :               buf.safe_push (cur[j]);
   25189            4 :               break;
   25190              :             }
   25191              :         }
   25192           10 :       buf.safe_push ('\0');
   25193           20 :       constraints[i] = xstrdup (buf.address ());
   25194           10 :     }
   25195           57 : }
   25196              : 
   25197              : /* Worker function for TARGET_MD_ASM_ADJUST.
   25198              : 
   25199              :    We implement asm flag outputs, and maintain source compatibility
   25200              :    with the old cc0-based compiler.  */
   25201              : 
   25202              : static rtx_insn *
   25203       108774 : ix86_md_asm_adjust (vec<rtx> &outputs, vec<rtx> & /*inputs*/,
   25204              :                     vec<machine_mode> & /*input_modes*/,
   25205              :                     vec<const char *> &constraints, vec<rtx> &/*uses*/,
   25206              :                     vec<rtx> &clobbers, HARD_REG_SET &clobbered_regs,
   25207              :                     location_t loc)
   25208              : {
   25209       108774 :   bool saw_asm_flag = false;
   25210              : 
   25211       108774 :   start_sequence ();
   25212              : 
   25213       108774 :   if (TARGET_APX_EGPR && !ix86_apx_inline_asm_use_gpr32)
   25214           57 :     map_egpr_constraints (constraints);
   25215              : 
   25216       294259 :   for (unsigned i = 0, n = outputs.length (); i < n; ++i)
   25217              :     {
   25218        77747 :       const char *con = constraints[i];
   25219        77747 :       if (!startswith (con, "=@cc"))
   25220        77659 :         continue;
   25221           88 :       con += 4;
   25222           88 :       if (strchr (con, ',') != NULL)
   25223              :         {
   25224            1 :           error_at (loc, "alternatives not allowed in %<asm%> flag output");
   25225            1 :           continue;
   25226              :         }
   25227              : 
   25228           87 :       bool invert = false;
   25229           87 :       if (con[0] == 'n')
   25230           19 :         invert = true, con++;
   25231              : 
   25232           87 :       machine_mode mode = CCmode;
   25233           87 :       rtx_code code = UNKNOWN;
   25234              : 
   25235           87 :       switch (con[0])
   25236              :         {
   25237           15 :         case 'a':
   25238           15 :           if (con[1] == 0)
   25239              :             mode = CCAmode, code = EQ;
   25240            4 :           else if (con[1] == 'e' && con[2] == 0)
   25241              :             mode = CCCmode, code = NE;
   25242              :           break;
   25243           11 :         case 'b':
   25244           11 :           if (con[1] == 0)
   25245              :             mode = CCCmode, code = EQ;
   25246            6 :           else if (con[1] == 'e' && con[2] == 0)
   25247              :             mode = CCAmode, code = NE;
   25248              :           break;
   25249           14 :         case 'c':
   25250           14 :           if (con[1] == 0)
   25251              :             mode = CCCmode, code = EQ;
   25252              :           break;
   25253            8 :         case 'e':
   25254            8 :           if (con[1] == 0)
   25255              :             mode = CCZmode, code = EQ;
   25256              :           break;
   25257           11 :         case 'g':
   25258           11 :           if (con[1] == 0)
   25259              :             mode = CCGCmode, code = GT;
   25260            5 :           else if (con[1] == 'e' && con[2] == 0)
   25261              :             mode = CCGCmode, code = GE;
   25262              :           break;
   25263           10 :         case 'l':
   25264           10 :           if (con[1] == 0)
   25265              :             mode = CCGCmode, code = LT;
   25266            5 :           else if (con[1] == 'e' && con[2] == 0)
   25267              :             mode = CCGCmode, code = LE;
   25268              :           break;
   25269            4 :         case 'o':
   25270            4 :           if (con[1] == 0)
   25271              :             mode = CCOmode, code = EQ;
   25272              :           break;
   25273            4 :         case 'p':
   25274            4 :           if (con[1] == 0)
   25275              :             mode = CCPmode, code = EQ;
   25276              :           break;
   25277            4 :         case 's':
   25278            4 :           if (con[1] == 0)
   25279              :             mode = CCSmode, code = EQ;
   25280              :           break;
   25281            6 :         case 'z':
   25282            6 :           if (con[1] == 0)
   25283              :             mode = CCZmode, code = EQ;
   25284              :           break;
   25285              :         }
   25286            1 :       if (code == UNKNOWN)
   25287              :         {
   25288            1 :           error_at (loc, "unknown %<asm%> flag output %qs", constraints[i]);
   25289            1 :           continue;
   25290              :         }
   25291           86 :       if (invert)
   25292           19 :         code = reverse_condition (code);
   25293              : 
   25294           86 :       rtx dest = outputs[i];
   25295           86 :       if (!saw_asm_flag)
   25296              :         {
   25297              :           /* This is the first asm flag output.  Here we put the flags
   25298              :              register in as the real output and adjust the condition to
   25299              :              allow it.  */
   25300           75 :           constraints[i] = "=Bf";
   25301           75 :           outputs[i] = gen_rtx_REG (CCmode, FLAGS_REG);
   25302           75 :           saw_asm_flag = true;
   25303              :         }
   25304              :       else
   25305              :         {
   25306              :           /* We don't need the flags register as output twice.  */
   25307           11 :           constraints[i] = "=X";
   25308           11 :           outputs[i] = gen_rtx_SCRATCH (SImode);
   25309              :         }
   25310              : 
   25311           86 :       rtx x = gen_rtx_REG (mode, FLAGS_REG);
   25312           86 :       x = gen_rtx_fmt_ee (code, QImode, x, const0_rtx);
   25313              : 
   25314           86 :       machine_mode dest_mode = GET_MODE (dest);
   25315           86 :       if (!SCALAR_INT_MODE_P (dest_mode))
   25316              :         {
   25317            3 :           error_at (loc, "invalid type for %<asm%> flag output");
   25318            3 :           continue;
   25319              :         }
   25320              : 
   25321           83 :       if (dest_mode == QImode)
   25322           73 :         emit_insn (gen_rtx_SET (dest, x));
   25323              :       else
   25324              :         {
   25325           10 :           rtx reg = gen_reg_rtx (QImode);
   25326           10 :           emit_insn (gen_rtx_SET (reg, x));
   25327              : 
   25328           10 :           reg = convert_to_mode (dest_mode, reg, 1);
   25329           10 :           emit_move_insn (dest, reg);
   25330              :         }
   25331              :     }
   25332              : 
   25333       108774 :   rtx_insn *seq = end_sequence ();
   25334              : 
   25335       108774 :   if (saw_asm_flag)
   25336              :     return seq;
   25337              :   else
   25338              :     {
   25339              :       /* If we had no asm flag outputs, clobber the flags.  */
   25340       108699 :       clobbers.safe_push (gen_rtx_REG (CCmode, FLAGS_REG));
   25341       108699 :       SET_HARD_REG_BIT (clobbered_regs, FLAGS_REG);
   25342       108699 :       return NULL;
   25343              :     }
   25344              : }
   25345              : 
   25346              : /* Implements target vector targetm.asm.encode_section_info.  */
   25347              : 
   25348              : static void ATTRIBUTE_UNUSED
   25349     10007564 : ix86_encode_section_info (tree decl, rtx rtl, int first)
   25350              : {
   25351     10007564 :   default_encode_section_info (decl, rtl, first);
   25352              : 
   25353     10007564 :   if (ix86_in_large_data_p (decl))
   25354           32 :     SYMBOL_REF_FLAGS (XEXP (rtl, 0)) |= SYMBOL_FLAG_FAR_ADDR;
   25355     10007564 : }
   25356              : 
   25357              : /* Worker function for REVERSE_CONDITION.  */
   25358              : 
   25359              : enum rtx_code
   25360     31387716 : ix86_reverse_condition (enum rtx_code code, machine_mode mode)
   25361              : {
   25362     31387716 :   return (mode == CCFPmode
   25363     31387716 :           ? reverse_condition_maybe_unordered (code)
   25364     27025750 :           : reverse_condition (code));
   25365              : }
   25366              : 
   25367              : /* Output code to perform an x87 FP register move, from OPERANDS[1]
   25368              :    to OPERANDS[0].  */
   25369              : 
   25370              : const char *
   25371       649136 : output_387_reg_move (rtx_insn *insn, rtx *operands)
   25372              : {
   25373       649136 :   if (REG_P (operands[0]))
   25374              :     {
   25375       544019 :       if (REG_P (operands[1])
   25376       544019 :           && find_regno_note (insn, REG_DEAD, REGNO (operands[1])))
   25377              :         {
   25378       295736 :           if (REGNO (operands[0]) == FIRST_STACK_REG)
   25379       275148 :             return output_387_ffreep (operands, 0);
   25380              :           return "fstp\t%y0";
   25381              :         }
   25382       248283 :       if (STACK_TOP_P (operands[0]))
   25383       248283 :         return "fld%Z1\t%y1";
   25384              :       return "fst\t%y0";
   25385              :     }
   25386       105117 :   else if (MEM_P (operands[0]))
   25387              :     {
   25388       105117 :       gcc_assert (REG_P (operands[1]));
   25389       105117 :       if (find_regno_note (insn, REG_DEAD, REGNO (operands[1])))
   25390              :         return "fstp%Z0\t%y0";
   25391              :       else
   25392              :         {
   25393              :           /* There is no non-popping store to memory for XFmode.
   25394              :              So if we need one, follow the store with a load.  */
   25395         6219 :           if (GET_MODE (operands[0]) == XFmode)
   25396              :             return "fstp%Z0\t%y0\n\tfld%Z0\t%y0";
   25397              :           else
   25398         1888 :             return "fst%Z0\t%y0";
   25399              :         }
   25400              :     }
   25401              :   else
   25402            0 :     gcc_unreachable();
   25403              : }
   25404              : #ifdef TARGET_SOLARIS
   25405              : /* Solaris implementation of TARGET_ASM_NAMED_SECTION.  */
   25406              : 
   25407              : static void
   25408              : i386_solaris_elf_named_section (const char *name, unsigned int flags,
   25409              :                                 tree decl)
   25410              : {
   25411              :   /* With Binutils 2.15, the "@unwind" marker must be specified on
   25412              :      every occurrence of the ".eh_frame" section, not just the first
   25413              :      one.  */
   25414              :   if (TARGET_64BIT
   25415              :       && strcmp (name, ".eh_frame") == 0)
   25416              :     {
   25417              :       fprintf (asm_out_file, "\t.section\t%s,\"%s\",@unwind\n", name,
   25418              :                flags & SECTION_WRITE ? "aw" : "a");
   25419              :       return;
   25420              :     }
   25421              : 
   25422              : #if HAVE_SOLARIS_AS
   25423              :   if (HAVE_COMDAT_GROUP && flags & SECTION_LINKONCE)
   25424              :     {
   25425              :       solaris_elf_asm_comdat_section (name, flags, decl);
   25426              :       return;
   25427              :     }
   25428              : 
   25429              :   /* Solaris/x86 as uses the same syntax for the SHF_EXCLUDE flags as the
   25430              :      SPARC assembler.  One cannot mix single-letter flags and #exclude, so
   25431              :      only emit the latter here.  */
   25432              :   if (flags & SECTION_EXCLUDE)
   25433              :     {
   25434              :       fprintf (asm_out_file, "\t.section\t%s,#exclude\n", name);
   25435              :       return;
   25436              :     }
   25437              : #endif
   25438              : 
   25439              :   default_elf_asm_named_section (name, flags, decl);
   25440              : }
   25441              : #endif /* TARGET_SOLARIS */
   25442              : 
   25443              : /* Return the mangling of TYPE if it is an extended fundamental type.  */
   25444              : 
   25445              : static const char *
   25446   1035597378 : ix86_mangle_type (const_tree type)
   25447              : {
   25448   1035597378 :   type = TYPE_MAIN_VARIANT (type);
   25449              : 
   25450   1035597378 :   if (TREE_CODE (type) != VOID_TYPE && TREE_CODE (type) != BOOLEAN_TYPE
   25451              :       && TREE_CODE (type) != INTEGER_TYPE && TREE_CODE (type) != REAL_TYPE)
   25452              :     return NULL;
   25453              : 
   25454    561408502 :   if (type == float128_type_node || type == float64x_type_node)
   25455              :     return NULL;
   25456              : 
   25457    560708157 :   switch (TYPE_MODE (type))
   25458              :     {
   25459              :     case E_BFmode:
   25460              :       return "DF16b";
   25461       326185 :     case E_HFmode:
   25462              :       /* _Float16 is "DF16_".
   25463              :          Align with clang's decision in https://reviews.llvm.org/D33719. */
   25464       326185 :       return "DF16_";
   25465       643786 :     case E_TFmode:
   25466              :       /* __float128 is "g".  */
   25467       643786 :       return "g";
   25468      7808326 :     case E_XFmode:
   25469              :       /* "long double" or __float80 is "e".  */
   25470      7808326 :       return "e";
   25471              :     default:
   25472              :       return NULL;
   25473              :     }
   25474              : }
   25475              : 
   25476              : /* Create C++ tinfo symbols for only conditionally available fundamental
   25477              :    types.  */
   25478              : 
   25479              : static void
   25480            5 : ix86_emit_support_tinfos (emit_support_tinfos_callback callback)
   25481              : {
   25482            5 :   extern tree ix86_float16_type_node;
   25483            5 :   extern tree ix86_bf16_type_node;
   25484              : 
   25485            5 :   if (!TARGET_SSE2)
   25486              :     {
   25487            0 :       if (!float16_type_node)
   25488            0 :         float16_type_node = ix86_float16_type_node;
   25489            0 :       if (!bfloat16_type_node)
   25490            0 :         bfloat16_type_node = ix86_bf16_type_node;
   25491            0 :       callback (float16_type_node);
   25492            0 :       callback (bfloat16_type_node);
   25493            0 :       float16_type_node = NULL_TREE;
   25494            0 :       bfloat16_type_node = NULL_TREE;
   25495              :     }
   25496            5 : }
   25497              : 
   25498              : static GTY(()) tree ix86_tls_stack_chk_guard_decl;
   25499              : 
   25500              : static tree
   25501          341 : ix86_stack_protect_guard (void)
   25502              : {
   25503          341 :   if (TARGET_SSP_TLS_GUARD)
   25504              :     {
   25505          266 :       tree type_node = lang_hooks.types.type_for_mode (ptr_mode, 1);
   25506          266 :       int qual = ENCODE_QUAL_ADDR_SPACE (ix86_stack_protector_guard_reg);
   25507          266 :       tree type = build_qualified_type (type_node, qual);
   25508          266 :       tree t;
   25509              : 
   25510          266 :       if (OPTION_SET_P (ix86_stack_protector_guard_symbol_str))
   25511              :         {
   25512            1 :           t = ix86_tls_stack_chk_guard_decl;
   25513              : 
   25514            1 :           if (t == NULL)
   25515              :             {
   25516            1 :               rtx x;
   25517              : 
   25518            1 :               t = build_decl
   25519            1 :                 (UNKNOWN_LOCATION, VAR_DECL,
   25520              :                  get_identifier (ix86_stack_protector_guard_symbol_str),
   25521              :                  type);
   25522            1 :               TREE_STATIC (t) = 1;
   25523            1 :               TREE_PUBLIC (t) = 1;
   25524            1 :               DECL_EXTERNAL (t) = 1;
   25525            1 :               TREE_USED (t) = 1;
   25526            1 :               TREE_THIS_VOLATILE (t) = 1;
   25527            1 :               DECL_ARTIFICIAL (t) = 1;
   25528            1 :               DECL_IGNORED_P (t) = 1;
   25529              : 
   25530              :               /* Do not share RTL as the declaration is visible outside of
   25531              :                  current function.  */
   25532            1 :               x = DECL_RTL (t);
   25533            1 :               RTX_FLAG (x, used) = 1;
   25534              : 
   25535            1 :               ix86_tls_stack_chk_guard_decl = t;
   25536              :             }
   25537              :         }
   25538              :       else
   25539              :         {
   25540          265 :           tree asptrtype = build_pointer_type (type);
   25541              : 
   25542          265 :           t = build_int_cst (asptrtype, ix86_stack_protector_guard_offset);
   25543          265 :           t = build2 (MEM_REF, asptrtype, t,
   25544              :                       build_int_cst (asptrtype, 0));
   25545          265 :           TREE_THIS_VOLATILE (t) = 1;
   25546              :         }
   25547              : 
   25548          266 :       return t;
   25549              :     }
   25550              : 
   25551           75 :   return default_stack_protect_guard ();
   25552              : }
   25553              : 
   25554              : /* Implement TARGET_STACK_PROTECT_GUARD_SYMBOL_P.  */
   25555              : 
   25556              : static bool
   25557       210517 : ix86_stack_protect_guard_symbol_p (void)
   25558              : {
   25559       210517 :   return TARGET_SSP_GLOBAL_GUARD;
   25560              : }
   25561              : 
   25562              : static bool
   25563          939 : ix86_stack_protect_runtime_enabled_p (void)
   25564              : {
   25565              :   /* Naked functions should not enable stack protector.  */
   25566          939 :   return !ix86_function_naked (current_function_decl);
   25567              : }
   25568              : 
   25569              : /* For 32-bit code we can save PIC register setup by using
   25570              :    __stack_chk_fail_local hidden function instead of calling
   25571              :    __stack_chk_fail directly.  64-bit code doesn't need to setup any PIC
   25572              :    register, so it is better to call __stack_chk_fail directly.  */
   25573              : 
   25574              : static tree ATTRIBUTE_UNUSED
   25575          334 : ix86_stack_protect_fail (void)
   25576              : {
   25577          334 :   return TARGET_64BIT
   25578          334 :          ? default_external_stack_protect_fail ()
   25579            1 :          : default_hidden_stack_protect_fail ();
   25580              : }
   25581              : 
   25582              : /* Select a format to encode pointers in exception handling data.  CODE
   25583              :    is 0 for data, 1 for code labels, 2 for function pointers.  GLOBAL is
   25584              :    true if the symbol may be affected by dynamic relocations.
   25585              : 
   25586              :    ??? All x86 object file formats are capable of representing this.
   25587              :    After all, the relocation needed is the same as for the call insn.
   25588              :    Whether or not a particular assembler allows us to enter such, I
   25589              :    guess we'll have to see.  */
   25590              : 
   25591              : int
   25592       801117 : asm_preferred_eh_data_format (int code, int global)
   25593              : {
   25594              :   /* PE-COFF is effectively always -fPIC because of the .reloc section.  */
   25595       801117 :   if (flag_pic || TARGET_PECOFF || !ix86_direct_extern_access)
   25596              :     {
   25597        38311 :       int type = DW_EH_PE_sdata8;
   25598        38311 :       if (ptr_mode == SImode
   25599        24333 :           || ix86_cmodel == CM_SMALL_PIC
   25600        38397 :           || (ix86_cmodel == CM_MEDIUM_PIC && (global || code)))
   25601              :         type = DW_EH_PE_sdata4;
   25602        53724 :       return (global ? DW_EH_PE_indirect : 0) | DW_EH_PE_pcrel | type;
   25603              :     }
   25604              : 
   25605       762806 :   if (ix86_cmodel == CM_SMALL
   25606        18662 :       || (ix86_cmodel == CM_MEDIUM && code))
   25607       744157 :     return DW_EH_PE_udata4;
   25608              : 
   25609              :   return DW_EH_PE_absptr;
   25610              : }
   25611              : 
   25612              : /* Cost of constructing or destructing a vector in VECMODE from/to elements
   25613              :    of ELMODE.  */
   25614              : static int
   25615       780652 : ix86_vector_cd_cost (machine_mode vecmode, machine_mode elmode)
   25616              : {
   25617      1561304 :   if (GET_MODE_BITSIZE (vecmode) < 128)
   25618       586166 :     return ((GET_MODE_BITSIZE (vecmode) / GET_MODE_BITSIZE (elmode) - 1)
   25619       293083 :             * ix86_cost->sse_op);
   25620              : 
   25621       487569 :   int n = GET_MODE_BITSIZE (vecmode) / 128;
   25622       487569 :   int cost = 0;
   25623              :   /* Element inserts/extracts into/from N SSE vectors, the possible
   25624              :      GPR <-> XMM moves have to be accounted for elsewhere.  */
   25625       975138 :   if (GET_MODE_BITSIZE (elmode) < 128)
   25626       973892 :     cost += n * (128 / GET_MODE_BITSIZE (elmode) - 1) * ix86_cost->sse_op;
   25627       487569 :   if (GET_MODE_BITSIZE (vecmode) >= 256
   25628       497295 :       && GET_MODE_BITSIZE (elmode) < 256)
   25629              :     /* N/2 vinserti128/vextracti128 for SSE <-> AVX256.  */
   25630         9726 :     cost += n * ix86_vec_cost (V32QImode, ix86_cost->sse_op) / 2;
   25631       975138 :   if (GET_MODE_BITSIZE (vecmode) == 512)
   25632              :     /* One vinserti64x4/vextracti64x4 for AVX256 <-> AVX512.  */
   25633         1966 :     cost += ix86_vec_cost (vecmode, ix86_cost->sse_op);
   25634              :   return cost;
   25635              : }
   25636              : 
   25637              : /* Worker for ix86_builtin_vectorization_cost and the fallback calls
   25638              :    from ix86_vector_costs::add_stmt_cost.  */
   25639              : static int
   25640     15161685 : ix86_default_vector_cost (enum vect_cost_for_stmt type_of_cost,
   25641              :                           machine_mode mode)
   25642              : {
   25643     15161685 :   bool fp = FLOAT_MODE_P (mode);
   25644     15161685 :   int index;
   25645     15161685 :   switch (type_of_cost)
   25646              :     {
   25647      1735102 :       case scalar_stmt:
   25648      1735102 :         return fp ? ix86_cost->addss : COSTS_N_INSNS (1);
   25649              : 
   25650      1815116 :       case scalar_load:
   25651              :         /* load/store costs are relative to register move which is 2. Recompute
   25652              :            it to COSTS_N_INSNS so everything have same base.  */
   25653      3630232 :         return COSTS_N_INSNS (fp ? ix86_cost->sse_load[0]
   25654      1815116 :                               : ix86_cost->int_load [2]) / 2;
   25655              : 
   25656      3943219 :       case scalar_store:
   25657      7886438 :         return COSTS_N_INSNS (fp ? ix86_cost->sse_store[0]
   25658      3943219 :                               : ix86_cost->int_store [2]) / 2;
   25659              : 
   25660      1172222 :       case vector_stmt:
   25661      2344444 :         return ix86_vec_cost (mode,
   25662      2344444 :                               fp ? ix86_cost->addss : ix86_cost->sse_op);
   25663              : 
   25664      1927995 :       case vector_load:
   25665      1927995 :         index = sse_store_index (mode);
   25666              :         /* See PR82713 - we may end up being called on non-vector type.  */
   25667      1927995 :         if (index < 0)
   25668        99077 :           index = 2;
   25669      1927995 :         return COSTS_N_INSNS (ix86_cost->sse_load[index]) / 2;
   25670              : 
   25671       974560 :       case vector_store:
   25672       974560 :         index = sse_store_index (mode);
   25673              :         /* See PR82713 - we may end up being called on non-vector type.  */
   25674       974560 :         if (index < 0)
   25675        91085 :           index = 2;
   25676       974560 :         return COSTS_N_INSNS (ix86_cost->sse_store[index]) / 2;
   25677              : 
   25678       760646 :       case vec_to_scalar:
   25679       760646 :       case scalar_to_vec:
   25680       760646 :         return ix86_vec_cost (mode, ix86_cost->sse_op);
   25681              : 
   25682              :       /* We should have separate costs for unaligned loads and gather/scatter.
   25683              :          Do that incrementally.  */
   25684       509627 :       case unaligned_load:
   25685       509627 :         index = sse_store_index (mode);
   25686              :         /* See PR82713 - we may end up being called on non-vector type.  */
   25687       509627 :         if (index < 0)
   25688         2708 :           index = 2;
   25689       509627 :         return COSTS_N_INSNS (ix86_cost->sse_unaligned_load[index]) / 2;
   25690              : 
   25691       839627 :       case unaligned_store:
   25692       839627 :         index = sse_store_index (mode);
   25693              :         /* See PR82713 - we may end up being called on non-vector type.  */
   25694       839627 :         if (index < 0)
   25695        17206 :           index = 2;
   25696       839627 :         return COSTS_N_INSNS (ix86_cost->sse_unaligned_store[index]) / 2;
   25697              : 
   25698            0 :       case vector_gather_load:
   25699            0 :         return ix86_vec_cost (mode,
   25700            0 :                               COSTS_N_INSNS
   25701              :                                  (ix86_cost->gather_static
   25702              :                                   + ix86_cost->gather_per_elt
   25703            0 :                                     * GET_MODE_NUNITS (mode)) / 2);
   25704              : 
   25705            0 :       case vector_scatter_store:
   25706            0 :         return ix86_vec_cost (mode,
   25707            0 :                               COSTS_N_INSNS
   25708              :                                  (ix86_cost->scatter_static
   25709              :                                   + ix86_cost->scatter_per_elt
   25710            0 :                                     * GET_MODE_NUNITS (mode)) / 2);
   25711              : 
   25712       355297 :       case cond_branch_taken:
   25713       355297 :         return ix86_cost->cond_taken_branch_cost;
   25714              : 
   25715         8587 :       case cond_branch_not_taken:
   25716         8587 :         return ix86_cost->cond_not_taken_branch_cost;
   25717              : 
   25718       281977 :       case vec_perm:
   25719       281977 :         return ix86_vec_cost (mode, ix86_cost->sse_op);
   25720              : 
   25721        89524 :       case vec_promote_demote:
   25722        89524 :         if (fp)
   25723        11664 :           return vec_fp_conversion_cost (ix86_tune_cost, mode);
   25724        77860 :         return ix86_vec_cost (mode, ix86_cost->sse_op);
   25725              : 
   25726       748186 :       case vec_construct:
   25727       748186 :       case vec_deconstruct:
   25728      1496372 :         return ix86_vector_cd_cost (mode, GET_MODE_INNER (mode));
   25729              : 
   25730            0 :       default:
   25731            0 :         gcc_unreachable ();
   25732              :     }
   25733              : }
   25734              : 
   25735              : /* Implement targetm.vectorize.builtin_vectorization_cost.  */
   25736              : static int
   25737      9118454 : ix86_builtin_vectorization_cost (enum vect_cost_for_stmt type_of_cost,
   25738              :                                  tree vectype, int)
   25739              : {
   25740      9118454 :   machine_mode mode = TImode;
   25741      9118454 :   if (vectype != NULL)
   25742      5786963 :     mode = TYPE_MODE (vectype);
   25743      9118454 :   return ix86_default_vector_cost (type_of_cost, mode);
   25744              : }
   25745              : 
   25746              : 
   25747              : /* This function returns the calling abi specific va_list type node.
   25748              :    It returns  the FNDECL specific va_list type.  */
   25749              : 
   25750              : static tree
   25751        47714 : ix86_fn_abi_va_list (tree fndecl)
   25752              : {
   25753        47714 :   if (!TARGET_64BIT)
   25754          726 :     return va_list_type_node;
   25755        46988 :   gcc_assert (fndecl != NULL_TREE);
   25756              : 
   25757        46988 :   if (ix86_function_abi ((const_tree) fndecl) == MS_ABI)
   25758        12868 :     return ms_va_list_type_node;
   25759              :   else
   25760        34120 :     return sysv_va_list_type_node;
   25761              : }
   25762              : 
   25763              : /* Returns the canonical va_list type specified by TYPE. If there
   25764              :    is no valid TYPE provided, it return NULL_TREE.  */
   25765              : 
   25766              : static tree
   25767       247334 : ix86_canonical_va_list_type (tree type)
   25768              : {
   25769       247334 :   if (TARGET_64BIT)
   25770              :     {
   25771       246832 :       if (lookup_attribute ("ms_abi va_list", TYPE_ATTRIBUTES (type)))
   25772         5944 :         return ms_va_list_type_node;
   25773              : 
   25774       240888 :       if ((TREE_CODE (type) == ARRAY_TYPE
   25775        50109 :            && integer_zerop (array_type_nelts_minus_one (type)))
   25776       240888 :           || POINTER_TYPE_P (type))
   25777              :         {
   25778       188877 :           tree elem_type = TREE_TYPE (type);
   25779       188877 :           if (TREE_CODE (elem_type) == RECORD_TYPE
   25780       340822 :               && lookup_attribute ("sysv_abi va_list",
   25781       151945 :                                    TYPE_ATTRIBUTES (elem_type)))
   25782       151945 :             return sysv_va_list_type_node;
   25783              :         }
   25784              : 
   25785        88943 :       return NULL_TREE;
   25786              :     }
   25787              : 
   25788          502 :   return std_canonical_va_list_type (type);
   25789              : }
   25790              : 
   25791              : /* Iterate through the target-specific builtin types for va_list.
   25792              :    IDX denotes the iterator, *PTREE is set to the result type of
   25793              :    the va_list builtin, and *PNAME to its internal type.
   25794              :    Returns zero if there is no element for this index, otherwise
   25795              :    IDX should be increased upon the next call.
   25796              :    Note, do not iterate a base builtin's name like __builtin_va_list.
   25797              :    Used from c_common_nodes_and_builtins.  */
   25798              : 
   25799              : static int
   25800       638177 : ix86_enum_va_list (int idx, const char **pname, tree *ptree)
   25801              : {
   25802       638177 :   if (TARGET_64BIT)
   25803              :     {
   25804       632805 :       switch (idx)
   25805              :         {
   25806              :         default:
   25807              :           break;
   25808              : 
   25809       210935 :         case 0:
   25810       210935 :           *ptree = ms_va_list_type_node;
   25811       210935 :           *pname = "__builtin_ms_va_list";
   25812       210935 :           return 1;
   25813              : 
   25814       210935 :         case 1:
   25815       210935 :           *ptree = sysv_va_list_type_node;
   25816       210935 :           *pname = "__builtin_sysv_va_list";
   25817       210935 :           return 1;
   25818              :         }
   25819              :     }
   25820              : 
   25821              :   return 0;
   25822              : }
   25823              : 
   25824              : #undef TARGET_SCHED_DISPATCH
   25825              : #define TARGET_SCHED_DISPATCH ix86_bd_has_dispatch
   25826              : #undef TARGET_SCHED_DISPATCH_DO
   25827              : #define TARGET_SCHED_DISPATCH_DO ix86_bd_do_dispatch
   25828              : #undef TARGET_SCHED_REASSOCIATION_WIDTH
   25829              : #define TARGET_SCHED_REASSOCIATION_WIDTH ix86_reassociation_width
   25830              : #undef TARGET_SCHED_REORDER
   25831              : #define TARGET_SCHED_REORDER ix86_atom_sched_reorder
   25832              : #undef TARGET_SCHED_ADJUST_PRIORITY
   25833              : #define TARGET_SCHED_ADJUST_PRIORITY ix86_adjust_priority
   25834              : #undef TARGET_SCHED_DEPENDENCIES_EVALUATION_HOOK
   25835              : #define TARGET_SCHED_DEPENDENCIES_EVALUATION_HOOK \
   25836              :   ix86_dependencies_evaluation_hook
   25837              : 
   25838              : 
   25839              : /* Implementation of reassociation_width target hook used by
   25840              :    reassoc phase to identify parallelism level in reassociated
   25841              :    tree.  Statements tree_code is passed in OPC.  Arguments type
   25842              :    is passed in MODE.  */
   25843              : 
   25844              : static int
   25845        30004 : ix86_reassociation_width (unsigned int op, machine_mode mode)
   25846              : {
   25847        30004 :   int width = 1;
   25848              :   /* Vector part.  */
   25849        30004 :   if (VECTOR_MODE_P (mode))
   25850              :     {
   25851         8609 :       int div = 1;
   25852         8609 :       if (INTEGRAL_MODE_P (mode))
   25853         2692 :         width = ix86_cost->reassoc_vec_int;
   25854         5917 :       else if (FLOAT_MODE_P (mode))
   25855         5917 :         width = ix86_cost->reassoc_vec_fp;
   25856              : 
   25857         8609 :       if (width == 1)
   25858              :         return 1;
   25859              : 
   25860              :       /* Znver1-4 Integer vector instructions execute in FP unit
   25861              :          and can execute 3 additions and one multiplication per cycle.  */
   25862         8604 :       if ((ix86_tune == PROCESSOR_ZNVER1 || ix86_tune == PROCESSOR_ZNVER2
   25863         8604 :            || ix86_tune == PROCESSOR_ZNVER3 || ix86_tune == PROCESSOR_ZNVER4
   25864         8604 :            || ix86_tune == PROCESSOR_C86_4G_M4
   25865         8604 :            || ix86_tune == PROCESSOR_C86_4G_M6
   25866         8604 :            || ix86_tune == PROCESSOR_C86_4G_M7)
   25867            2 :           && INTEGRAL_MODE_P (mode) && op != PLUS && op != MINUS)
   25868              :         return 1;
   25869              :       /* Znver5 can do 2 integer multiplications per cycle with latency
   25870              :          of 3.  */
   25871         8604 :       if ((ix86_tune == PROCESSOR_ZNVER5 || ix86_tune == PROCESSOR_ZNVER6)
   25872            0 :           && INTEGRAL_MODE_P (mode) && op != PLUS && op != MINUS)
   25873         8604 :         width = 6;
   25874              : 
   25875              :       /* Account for targets that splits wide vectors into multiple parts.  */
   25876         8606 :       if (TARGET_AVX512_SPLIT_REGS && GET_MODE_BITSIZE (mode) > 256)
   25877            0 :         div = GET_MODE_BITSIZE (mode) / 256;
   25878         8604 :       else if (TARGET_AVX256_SPLIT_REGS && GET_MODE_BITSIZE (mode) > 128)
   25879            0 :         div = GET_MODE_BITSIZE (mode) / 128;
   25880         8604 :       else if (TARGET_SSE_SPLIT_REGS && GET_MODE_BITSIZE (mode) > 64)
   25881            0 :         div = GET_MODE_BITSIZE (mode) / 64;
   25882         8604 :       width = (width + div - 1) / div;
   25883         8604 :     }
   25884              :   /* Scalar part.  */
   25885              :   else if (INTEGRAL_MODE_P (mode))
   25886        15539 :     width = ix86_cost->reassoc_int;
   25887              :   else if (FLOAT_MODE_P (mode))
   25888         5856 :     width = ix86_cost->reassoc_fp;
   25889              : 
   25890              :   /* Avoid using too many registers in 32bit mode.  */
   25891        29999 :   if (!TARGET_64BIT && width > 2)
   25892        30004 :     width = 2;
   25893              :   return width;
   25894              : }
   25895              : 
   25896              : /* ??? No autovectorization into MMX or 3DNOW until we can reliably
   25897              :    place emms and femms instructions.  */
   25898              : 
   25899              : static machine_mode
   25900      5152875 : ix86_preferred_simd_mode (scalar_mode mode)
   25901              : {
   25902      5152875 :   if (!TARGET_SSE)
   25903          859 :     return word_mode;
   25904              : 
   25905      5152016 :   switch (mode)
   25906              :     {
   25907       415560 :     case E_QImode:
   25908       415560 :       if (TARGET_AVX512BW && !TARGET_PREFER_AVX256)
   25909              :         return V64QImode;
   25910       404132 :       else if (TARGET_AVX && !TARGET_PREFER_AVX128)
   25911              :         return V32QImode;
   25912              :       else
   25913       383966 :         return V16QImode;
   25914              : 
   25915       195778 :     case E_HImode:
   25916       195778 :       if (TARGET_AVX512BW && !TARGET_PREFER_AVX256)
   25917              :         return V32HImode;
   25918       185308 :       else if (TARGET_AVX && !TARGET_PREFER_AVX128)
   25919              :         return V16HImode;
   25920              :       else
   25921       169225 :         return V8HImode;
   25922              : 
   25923      1522228 :     case E_SImode:
   25924      1522228 :       if (TARGET_AVX512F && !TARGET_PREFER_AVX256)
   25925              :         return V16SImode;
   25926      1454618 :       else if (TARGET_AVX && !TARGET_PREFER_AVX128)
   25927              :         return V8SImode;
   25928              :       else
   25929      1301455 :         return V4SImode;
   25930              : 
   25931      1873147 :     case E_DImode:
   25932      1873147 :       if (TARGET_AVX512F && !TARGET_PREFER_AVX256)
   25933              :         return V8DImode;
   25934      1469054 :       else if (TARGET_AVX && !TARGET_PREFER_AVX128)
   25935              :         return V4DImode;
   25936              :       else
   25937      1407145 :         return V2DImode;
   25938              : 
   25939       142653 :     case E_HFmode:
   25940       142653 :       if (TARGET_AVX512FP16)
   25941              :         {
   25942       141902 :           if (TARGET_AVX512VL)
   25943              :             {
   25944        69031 :               if (TARGET_PREFER_AVX128)
   25945              :                 return V8HFmode;
   25946        68809 :               else if (TARGET_PREFER_AVX256)
   25947              :                 return V16HFmode;
   25948              :             }
   25949       139559 :           return V32HFmode;
   25950              :         }
   25951          751 :       return word_mode;
   25952              : 
   25953        62894 :     case E_BFmode:
   25954        62894 :       if (TARGET_AVX512F && !TARGET_PREFER_AVX256)
   25955              :         return V32BFmode;
   25956        26462 :       else if (TARGET_AVX && !TARGET_PREFER_AVX128)
   25957              :         return V16BFmode;
   25958              :       else
   25959        13459 :         return V8BFmode;
   25960              : 
   25961       612226 :     case E_SFmode:
   25962       612226 :       if (TARGET_AVX512F && !TARGET_PREFER_AVX256)
   25963              :         return V16SFmode;
   25964       412011 :       else if (TARGET_AVX && !TARGET_PREFER_AVX128)
   25965              :         return V8SFmode;
   25966              :       else
   25967       345636 :         return V4SFmode;
   25968              : 
   25969       291906 :     case E_DFmode:
   25970       291906 :       if (TARGET_AVX512F && !TARGET_PREFER_AVX256)
   25971              :         return V8DFmode;
   25972       169991 :       else if (TARGET_AVX && !TARGET_PREFER_AVX128)
   25973              :         return V4DFmode;
   25974       116420 :       else if (TARGET_SSE2)
   25975              :         return V2DFmode;
   25976              :       /* FALLTHRU */
   25977              : 
   25978        35680 :     default:
   25979        35680 :       return word_mode;
   25980              :     }
   25981              : }
   25982              : 
   25983              : /* If AVX is enabled then try vectorizing with both 256bit and 128bit
   25984              :    vectors.  If AVX512F is enabled then try vectorizing with 512bit,
   25985              :    256bit and 128bit vectors.  */
   25986              : 
   25987              : static unsigned int
   25988      2174521 : ix86_autovectorize_vector_modes (vector_modes *modes, bool all)
   25989              : {
   25990      2174521 :   if (TARGET_AVX512F && !TARGET_PREFER_AVX256)
   25991              :     {
   25992        75244 :       modes->safe_push (V64QImode);
   25993        75244 :       modes->safe_push (V32QImode);
   25994        75244 :       modes->safe_push (V16QImode);
   25995              :     }
   25996      2099277 :   else if (TARGET_AVX512F && all)
   25997              :     {
   25998          558 :       modes->safe_push (V32QImode);
   25999          558 :       modes->safe_push (V16QImode);
   26000          558 :       modes->safe_push (V64QImode);
   26001              :     }
   26002      2098719 :   else if (TARGET_AVX && !TARGET_PREFER_AVX128)
   26003              :     {
   26004        28784 :       modes->safe_push (V32QImode);
   26005        28784 :       modes->safe_push (V16QImode);
   26006              :     }
   26007      2069935 :   else if (TARGET_AVX && all)
   26008              :     {
   26009           24 :       modes->safe_push (V16QImode);
   26010           24 :       modes->safe_push (V32QImode);
   26011              :     }
   26012      2069911 :   else if (TARGET_SSE2)
   26013      2067645 :     modes->safe_push (V16QImode);
   26014              : 
   26015      2174521 :   if (TARGET_MMX_WITH_SSE)
   26016      1777961 :     modes->safe_push (V8QImode);
   26017              : 
   26018      2174521 :   if (TARGET_SSE2)
   26019      2172255 :     modes->safe_push (V4QImode);
   26020              : 
   26021      2174521 :   return ix86_vect_compare_costs ? VECT_COMPARE_COSTS : 0;
   26022              : }
   26023              : 
   26024              : /* Implementation of targetm.vectorize.get_mask_mode.  */
   26025              : 
   26026              : static opt_machine_mode
   26027      3366788 : ix86_get_mask_mode (machine_mode data_mode)
   26028              : {
   26029      3366788 :   unsigned vector_size = GET_MODE_SIZE (data_mode);
   26030      3366788 :   unsigned nunits = GET_MODE_NUNITS (data_mode);
   26031      3366788 :   unsigned elem_size = vector_size / nunits;
   26032              : 
   26033              :   /* Scalar mask case.  */
   26034       478778 :   if ((TARGET_AVX512F && vector_size == 64)
   26035      3251628 :       || (TARGET_AVX512VL && (vector_size == 32 || vector_size == 16))
   26036              :       /* AVX512FP16 only supports vector comparison
   26037              :          to kmask for _Float16.  */
   26038      3077127 :       || (TARGET_AVX512VL && TARGET_AVX512FP16
   26039        18335 :           && GET_MODE_INNER (data_mode) == E_HFmode)
   26040      6448587 :       || (TARGET_AVX10_2 && GET_MODE_INNER (data_mode) == E_BFmode))
   26041              :     {
   26042       292685 :       if (elem_size == 4
   26043       292685 :           || elem_size == 8
   26044       135065 :           || (TARGET_AVX512BW && (elem_size == 1 || elem_size == 2)))
   26045       262229 :         return smallest_int_mode_for_size (nunits).require ();
   26046              :     }
   26047              : 
   26048      3104559 :   scalar_int_mode elem_mode
   26049      3104559 :     = smallest_int_mode_for_size (elem_size * BITS_PER_UNIT).require ();
   26050              : 
   26051      3104559 :   gcc_assert (elem_size * nunits == vector_size);
   26052              : 
   26053      3104559 :   return mode_for_vector (elem_mode, nunits);
   26054              : }
   26055              : 
   26056              : 
   26057              : 
   26058              : /* Return class of registers which could be used for pseudo of MODE
   26059              :    and of class RCLASS for spilling instead of memory.  Return NO_REGS
   26060              :    if it is not possible or non-profitable.  */
   26061              : 
   26062              : /* Disabled due to PRs 70902, 71453, 71555, 71596 and 71657.  */
   26063              : 
   26064              : static reg_class_t
   26065   6275391949 : ix86_spill_class (reg_class_t rclass, machine_mode mode)
   26066              : {
   26067   6275391949 :   if (0 && TARGET_GENERAL_REGS_SSE_SPILL
   26068              :       && TARGET_SSE2
   26069              :       && TARGET_INTER_UNIT_MOVES_TO_VEC
   26070              :       && TARGET_INTER_UNIT_MOVES_FROM_VEC
   26071              :       && (mode == SImode || (TARGET_64BIT && mode == DImode))
   26072              :       && INTEGER_CLASS_P (rclass))
   26073              :     return ALL_SSE_REGS;
   26074   6275391949 :   return NO_REGS;
   26075              : }
   26076              : 
   26077              : /* Implement TARGET_MAX_NOCE_IFCVT_SEQ_COST.  Like the default implementation,
   26078              :    but returns a lower bound.  */
   26079              : 
   26080              : static unsigned int
   26081      1826598 : ix86_max_noce_ifcvt_seq_cost (edge e)
   26082              : {
   26083      1826598 :   bool predictable_p = predictable_edge_p (e);
   26084      1826598 :   if (predictable_p)
   26085              :     {
   26086       144823 :       if (OPTION_SET_P (param_max_rtl_if_conversion_predictable_cost))
   26087            8 :         return param_max_rtl_if_conversion_predictable_cost;
   26088              :     }
   26089              :   else
   26090              :     {
   26091      1681775 :       if (OPTION_SET_P (param_max_rtl_if_conversion_unpredictable_cost))
   26092           73 :         return param_max_rtl_if_conversion_unpredictable_cost;
   26093              :     }
   26094              : 
   26095              :   /* For modern machines with deeper pipeline, the penalty for branch
   26096              :      misprediction could be higher than before to reset the pipeline
   26097              :      slots. Add parameter br_mispredict_scale as a factor to describe
   26098              :      the impact of resetting the pipeline.  */
   26099              : 
   26100      1826517 :   return BRANCH_COST (true, predictable_p)
   26101      1826517 :          * ix86_tune_cost->br_mispredict_scale;
   26102              : }
   26103              : 
   26104              : /* Return true if SEQ is a good candidate as a replacement for the
   26105              :    if-convertible sequence described in IF_INFO.  */
   26106              : 
   26107              : static bool
   26108       201790 : ix86_noce_conversion_profitable_p (rtx_insn *seq, struct noce_if_info *if_info)
   26109              : {
   26110       201790 :   if (TARGET_ONE_IF_CONV_INSN && if_info->speed_p)
   26111              :     {
   26112              :       int cmov_cnt = 0;
   26113              :       /* Punt if SEQ contains more than one CMOV or FCMOV instruction.
   26114              :          Maybe we should allow even more conditional moves as long as they
   26115              :          are used far enough not to stall the CPU, or also consider
   26116              :          IF_INFO->TEST_BB succ edge probabilities.  */
   26117          238 :       for (rtx_insn *insn = seq; insn; insn = NEXT_INSN (insn))
   26118              :         {
   26119          196 :           rtx set = single_set (insn);
   26120          196 :           if (!set)
   26121            0 :             continue;
   26122          196 :           if (GET_CODE (SET_SRC (set)) != IF_THEN_ELSE)
   26123          154 :             continue;
   26124           42 :           rtx src = SET_SRC (set);
   26125           42 :           machine_mode mode = GET_MODE (src);
   26126           42 :           if (GET_MODE_CLASS (mode) != MODE_INT
   26127            0 :               && GET_MODE_CLASS (mode) != MODE_FLOAT)
   26128            0 :             continue;
   26129           42 :           if ((!REG_P (XEXP (src, 1)) && !MEM_P (XEXP (src, 1)))
   26130           41 :               || (!REG_P (XEXP (src, 2)) && !MEM_P (XEXP (src, 2))))
   26131            1 :             continue;
   26132              :           /* insn is CMOV or FCMOV.  */
   26133           41 :           if (++cmov_cnt > 1)
   26134              :             return false;
   26135              :         }
   26136              :     }
   26137              : 
   26138              :   /* W/o TARGET_SSE4_1, it takes 3 instructions (pand, pandn and por)
   26139              :      for movdfcc/movsfcc, and could possibly fail cost comparison.
   26140              :      Increase branch cost will hurt performance for other modes, so
   26141              :      specially add some preference for floating point ifcvt.  */
   26142       201782 :   if (!TARGET_SSE4_1 && if_info->x
   26143       157824 :       && GET_MODE_CLASS (GET_MODE (if_info->x)) == MODE_FLOAT
   26144        34079 :       && if_info->speed_p)
   26145              :     {
   26146        27058 :       unsigned cost = seq_cost (seq, true);
   26147              : 
   26148        27058 :       if (cost <= if_info->original_cost)
   26149              :         return true;
   26150              : 
   26151        25872 :       return cost <= (if_info->max_seq_cost + COSTS_N_INSNS (2));
   26152              :     }
   26153              : 
   26154       174724 :   return default_noce_conversion_profitable_p (seq, if_info);
   26155              : }
   26156              : 
   26157              : /* x86-specific vector costs.  */
   26158              : class ix86_vector_costs : public vector_costs
   26159              : {
   26160              : public:
   26161              :   ix86_vector_costs (vec_info *, bool);
   26162              : 
   26163              :   unsigned int add_stmt_cost (int count, vect_cost_for_stmt kind,
   26164              :                               stmt_vec_info stmt_info, slp_tree node,
   26165              :                               tree vectype, int misalign,
   26166              :                               vect_cost_model_location where) override;
   26167              :   void finish_cost (const vector_costs *) override;
   26168              :   bool better_main_loop_than_p (const vector_costs *) const override;
   26169              :   bool better_epilogue_loop_than_p (const vector_costs *other,
   26170              :                                     loop_vec_info main_loop) const;
   26171              : 
   26172              : private:
   26173              : 
   26174              :   /* Estimate register pressure of the vectorized code.  */
   26175              :   void ix86_vect_estimate_reg_pressure ();
   26176              :   /* Number of GENERAL_REGS/SSE_REGS used in the vectorizer, it's used for
   26177              :      estimation of register pressure.
   26178              :      ??? Currently it's only used by vec_construct/scalar_to_vec
   26179              :      where we know it's not loaded from memory.  */
   26180              :   unsigned m_num_gpr_needed[3];
   26181              :   unsigned m_num_sse_needed[3];
   26182              :   /* Number of 256-bit vector permutation.  */
   26183              :   unsigned m_num_avx256_vec_perm[3];
   26184              :   /* Number of 512-bit vector permutation.  */
   26185              :   unsigned m_num_avx512_vec_perm[3];
   26186              :   /* Number of reductions for FMA/DOT_PROD_EXPR/SAD_EXPR  */
   26187              :   unsigned m_num_reduc[X86_REDUC_LAST];
   26188              :   /* Don't do unroll if m_prefer_unroll is false, default is true.  */
   26189              :   bool m_prefer_unroll;
   26190              : };
   26191              : 
   26192      2095344 : ix86_vector_costs::ix86_vector_costs (vec_info* vinfo, bool costing_for_scalar)
   26193              :   : vector_costs (vinfo, costing_for_scalar),
   26194      2095344 :     m_num_gpr_needed (),
   26195      2095344 :     m_num_sse_needed (),
   26196      2095344 :     m_num_avx256_vec_perm (),
   26197      2095344 :     m_num_avx512_vec_perm (),
   26198      2095344 :     m_num_reduc (),
   26199      2095344 :     m_prefer_unroll (true)
   26200      2095344 : {}
   26201              : 
   26202              : /* Implement targetm.vectorize.create_costs.  */
   26203              : 
   26204              : static vector_costs *
   26205      2095344 : ix86_vectorize_create_costs (vec_info *vinfo, bool costing_for_scalar)
   26206              : {
   26207      2095344 :   return new ix86_vector_costs (vinfo, costing_for_scalar);
   26208              : }
   26209              : 
   26210              : /* Return true if a vec_perm should be counted as a cross-lane vector
   26211              :    permutation for a vector with NUNITS elements.  */
   26212              : static bool
   26213         5344 : ix86_count_cross_lane_perm_p (vec_info *vinfo, slp_tree node, unsigned nunits)
   26214              : {
   26215              :   /* TODO: For loop vectorization with no SLP load-permutation
   26216              :      information, conservatively treat these perms as cross-lane.
   26217              :      Repeated-index cases such as {0, 0, 0, 0} are emitted as
   26218              :      separate vec_perm_exprs for each index, so we cannot reliably
   26219              :      separate false positives from real cross-lane shuffles yet.  */
   26220         5344 :   if (!node
   26221         5339 :       || !SLP_TREE_LOAD_PERMUTATION (node).exists ()
   26222         9624 :       || !is_a<bb_vec_info> (vinfo))
   26223              :     return true;
   26224              : 
   26225           41 :   unsigned half = nunits / 2;
   26226           41 :   bool allsame = true;
   26227           41 :   unsigned first = SLP_TREE_LOAD_PERMUTATION (node)[0];
   26228           41 :   bool cross_lane_p = false;
   26229              : 
   26230          217 :   for (unsigned i = 0; i != SLP_TREE_LANES (node); i++)
   26231              :     {
   26232          215 :       unsigned tmp = SLP_TREE_LOAD_PERMUTATION (node)[i];
   26233              :       /* allsame is just a broadcast.  */
   26234          215 :       if (tmp != first)
   26235          106 :         allsame = false;
   26236              : 
   26237              :       /* The load permutation can cover multiple vectors, so compare
   26238              :          source and destination lanes modulo NUNITS.  */
   26239          215 :       tmp = tmp & (nunits - 1);
   26240          215 :       unsigned index = i & (nunits - 1);
   26241          215 :       if ((index < half && tmp >= half) || (index >= half && tmp < half))
   26242           67 :         cross_lane_p = true;
   26243              : 
   26244          215 :       if (!allsame && cross_lane_p)
   26245              :         return true;
   26246              :     }
   26247              : 
   26248              :   return false;
   26249              : }
   26250              : 
   26251              : unsigned
   26252      7470412 : ix86_vector_costs::add_stmt_cost (int count, vect_cost_for_stmt kind,
   26253              :                                   stmt_vec_info stmt_info, slp_tree node,
   26254              :                                   tree vectype, int,
   26255              :                                   vect_cost_model_location where)
   26256              : {
   26257      7470412 :   unsigned retval = 0;
   26258      7470412 :   bool scalar_p
   26259              :     = (kind == scalar_stmt || kind == scalar_load || kind == scalar_store);
   26260      7470412 :   int stmt_cost = - 1;
   26261              : 
   26262      7470412 :   bool fp = false;
   26263      7470412 :   machine_mode mode = scalar_p ? SImode : TImode;
   26264              : 
   26265      7470412 :   if (vectype != NULL)
   26266              :     {
   26267      3358614 :       fp = FLOAT_TYPE_P (vectype);
   26268      3358614 :       mode = TYPE_MODE (vectype);
   26269      3358614 :       if (scalar_p)
   26270       283098 :         mode = TYPE_MODE (TREE_TYPE (vectype));
   26271              :     }
   26272              :   /* When we are costing a scalar stmt use the scalar stmt to get at the
   26273              :      type of the operation.  */
   26274      4111798 :   else if (scalar_p && stmt_info)
   26275      4028581 :     if (tree lhs = gimple_get_lhs (stmt_info->stmt))
   26276              :       {
   26277      3853104 :         fp = FLOAT_TYPE_P (TREE_TYPE (lhs));
   26278      3853104 :         mode = TYPE_MODE (TREE_TYPE (lhs));
   26279              :       }
   26280              : 
   26281      7470412 :   if ((kind == vector_stmt || kind == scalar_stmt)
   26282      1965246 :       && stmt_info
   26283      9426429 :       && stmt_info->stmt && gimple_code (stmt_info->stmt) == GIMPLE_ASSIGN)
   26284              :     {
   26285      1574566 :       tree_code subcode = gimple_assign_rhs_code (stmt_info->stmt);
   26286              :       /*machine_mode inner_mode = mode;
   26287              :       if (VECTOR_MODE_P (mode))
   26288              :         inner_mode = GET_MODE_INNER (mode);*/
   26289              : 
   26290      1574566 :       switch (subcode)
   26291              :         {
   26292       614485 :         case PLUS_EXPR:
   26293       614485 :         case POINTER_PLUS_EXPR:
   26294       614485 :         case MINUS_EXPR:
   26295       614485 :           if (kind == scalar_stmt)
   26296              :             {
   26297       388732 :               if (SSE_FLOAT_MODE_SSEMATH_OR_HFBF_P (mode))
   26298        96728 :                 stmt_cost = ix86_cost->addss;
   26299       292004 :               else if (X87_FLOAT_MODE_P (mode))
   26300          132 :                 stmt_cost = ix86_cost->fadd;
   26301              :               else
   26302       291872 :                 stmt_cost = ix86_cost->add;
   26303              :             }
   26304              :           else
   26305       225753 :             stmt_cost = ix86_vec_cost (mode, fp ? ix86_cost->addss
   26306              :                                        : ix86_cost->sse_op);
   26307              :           break;
   26308              : 
   26309       252196 :         case MULT_EXPR:
   26310              :           /* For MULT_HIGHPART_EXPR, x86 only supports pmulhw,
   26311              :              take it as MULT_EXPR.  */
   26312       252196 :         case MULT_HIGHPART_EXPR:
   26313       252196 :           stmt_cost = ix86_multiplication_cost (ix86_cost, mode);
   26314       252196 :           break;
   26315              :           /* There's no direct instruction for WIDEN_MULT_EXPR,
   26316              :              take emulation into account.  */
   26317         1074 :         case WIDEN_MULT_EXPR:
   26318         2148 :           stmt_cost = ix86_widen_mult_cost (ix86_cost, mode,
   26319         1074 :                                             TYPE_UNSIGNED (vectype));
   26320         1074 :           break;
   26321              : 
   26322        10671 :         case NEGATE_EXPR:
   26323        10671 :           if (SSE_FLOAT_MODE_SSEMATH_OR_HFBF_P (mode))
   26324         3548 :             stmt_cost = ix86_cost->sse_op;
   26325         7123 :           else if (X87_FLOAT_MODE_P (mode))
   26326            0 :             stmt_cost = ix86_cost->fchs;
   26327         7123 :           else if (VECTOR_MODE_P (mode))
   26328         3625 :             stmt_cost = ix86_vec_cost (mode, ix86_cost->sse_op);
   26329              :           else
   26330         3498 :             stmt_cost = ix86_cost->add;
   26331              :           break;
   26332        14091 :         case TRUNC_DIV_EXPR:
   26333        14091 :         case CEIL_DIV_EXPR:
   26334        14091 :         case FLOOR_DIV_EXPR:
   26335        14091 :         case ROUND_DIV_EXPR:
   26336        14091 :         case TRUNC_MOD_EXPR:
   26337        14091 :         case CEIL_MOD_EXPR:
   26338        14091 :         case FLOOR_MOD_EXPR:
   26339        14091 :         case RDIV_EXPR:
   26340        14091 :         case ROUND_MOD_EXPR:
   26341        14091 :         case EXACT_DIV_EXPR:
   26342        14091 :           stmt_cost = ix86_division_cost (ix86_cost, mode);
   26343        14091 :           break;
   26344              : 
   26345        72348 :         case RSHIFT_EXPR:
   26346        72348 :         case LSHIFT_EXPR:
   26347        72348 :         case LROTATE_EXPR:
   26348        72348 :         case RROTATE_EXPR:
   26349        72348 :           {
   26350        72348 :             tree op1 = gimple_assign_rhs1 (stmt_info->stmt);
   26351        72348 :             tree op2 = gimple_assign_rhs2 (stmt_info->stmt);
   26352        72348 :             stmt_cost = ix86_shift_rotate_cost
   26353        72348 :                            (ix86_cost,
   26354              :                             (subcode == RSHIFT_EXPR
   26355        37580 :                              && !TYPE_UNSIGNED (TREE_TYPE (op1)))
   26356              :                             ? ASHIFTRT : LSHIFTRT, mode,
   26357        72348 :                             TREE_CODE (op2) == INTEGER_CST,
   26358        72348 :                             cst_and_fits_in_hwi (op2)
   26359        41289 :                             ? int_cst_value (op2) : -1,
   26360              :                             false, false, NULL, NULL);
   26361              :           }
   26362        72348 :           break;
   26363        98399 :         case NOP_EXPR:
   26364              :           /* Only sign-conversions are free.  */
   26365        98399 :           if (tree_nop_conversion_p
   26366        98399 :                 (TREE_TYPE (gimple_assign_lhs (stmt_info->stmt)),
   26367        98399 :                  TREE_TYPE (gimple_assign_rhs1 (stmt_info->stmt))))
   26368              :             stmt_cost = 0;
   26369        98399 :           else if (fp)
   26370        10240 :             stmt_cost = vec_fp_conversion_cost
   26371        10240 :                           (ix86_tune_cost, GET_MODE_BITSIZE (mode));
   26372              :           break;
   26373              : 
   26374        23176 :         case FLOAT_EXPR:
   26375        23176 :             if (SSE_FLOAT_MODE_SSEMATH_OR_HFBF_P (mode))
   26376        17403 :               stmt_cost = ix86_cost->cvtsi2ss;
   26377         5773 :             else if (X87_FLOAT_MODE_P (mode))
   26378              :               /* TODO: We do not have cost tables for x87.  */
   26379           50 :               stmt_cost = ix86_cost->fadd;
   26380              :             else
   26381         5723 :               stmt_cost = ix86_vec_cost (mode, ix86_cost->cvtpi2ps);
   26382              :             break;
   26383              : 
   26384         2203 :         case FIX_TRUNC_EXPR:
   26385         2203 :             if (SSE_FLOAT_MODE_SSEMATH_OR_HFBF_P (mode))
   26386            0 :               stmt_cost = ix86_cost->cvtss2si;
   26387         2203 :             else if (X87_FLOAT_MODE_P (mode))
   26388              :               /* TODO: We do not have cost tables for x87.  */
   26389            0 :               stmt_cost = ix86_cost->fadd;
   26390              :             else
   26391         2203 :               stmt_cost = ix86_vec_cost (mode, ix86_cost->cvtps2pi);
   26392              :             break;
   26393              : 
   26394        56133 :         case COND_EXPR:
   26395        56133 :           {
   26396              :             /* SSE2 conditinal move sequence is:
   26397              :                  pcmpgtd %xmm5, %xmm0 (accounted separately)
   26398              :                  pand    %xmm0, %xmm2
   26399              :                  pandn   %xmm1, %xmm0
   26400              :                  por     %xmm2, %xmm0
   26401              :                while SSE4 uses cmp + blend
   26402              :                and AVX512 masked moves.
   26403              : 
   26404              :                The condition is accounted separately since we usually have
   26405              :                  p = a < b
   26406              :                  c = p ? x : y
   26407              :                and we will account first statement as setcc.  Exception is when
   26408              :                p is loaded from memory as bool and then we will not account
   26409              :                the compare, but there is no way to check for this.  */
   26410              : 
   26411        56133 :             int ninsns = TARGET_SSE4_1 ? 1 : 3;
   26412              : 
   26413              :             /* If one of parameters is 0 or -1 the sequence will be simplified:
   26414              :                (if_true & mask) | (if_false & ~mask) -> if_true & mask  */
   26415        23466 :             if (ninsns > 1
   26416        23466 :                 && (zerop (gimple_assign_rhs2 (stmt_info->stmt))
   26417        23112 :                     || zerop (gimple_assign_rhs3 (stmt_info->stmt))
   26418        13209 :                     || integer_minus_onep
   26419        13209 :                         (gimple_assign_rhs2 (stmt_info->stmt))
   26420        12770 :                     || integer_minus_onep
   26421        12770 :                         (gimple_assign_rhs3 (stmt_info->stmt))))
   26422              :               ninsns = 1;
   26423              : 
   26424        56133 :             if (SSE_FLOAT_MODE_SSEMATH_OR_HFBF_P (mode))
   26425         5044 :               stmt_cost = ninsns * ix86_cost->sse_op;
   26426        51089 :             else if (X87_FLOAT_MODE_P (mode))
   26427              :               /* x87 requires conditional branch.  We don't have cost for
   26428              :                  that.  */
   26429              :               ;
   26430        51080 :             else if (VECTOR_MODE_P (mode))
   26431        21259 :               stmt_cost = ix86_vec_cost (mode, ninsns * ix86_cost->sse_op);
   26432              :             else
   26433              :               /* compare (accounted separately) + cmov.  */
   26434        29821 :               stmt_cost = ix86_cost->add;
   26435              :           }
   26436              :           break;
   26437              : 
   26438        26729 :         case MIN_EXPR:
   26439        26729 :         case MAX_EXPR:
   26440        26729 :           if (fp)
   26441              :             {
   26442         1474 :               if (X87_FLOAT_MODE_P (mode)
   26443          512 :                   && !SSE_FLOAT_MODE_SSEMATH_OR_HFBF_P (mode))
   26444              :                 /* x87 requires conditional branch.  We don't have cost for
   26445              :                    that.  */
   26446              :                 ;
   26447              :               else
   26448              :                 /* minss  */
   26449         1474 :                 stmt_cost = ix86_vec_cost (mode, ix86_cost->sse_op);
   26450              :             }
   26451              :           else
   26452              :             {
   26453        25255 :               if (VECTOR_MODE_P (mode))
   26454              :                 {
   26455         5151 :                   stmt_cost = ix86_vec_cost (mode, ix86_cost->sse_op);
   26456              :                   /* vpmin was introduced in SSE3.
   26457              :                      SSE2 needs pcmpgtd + pand + pandn + pxor.
   26458              :                      If one of parameters is 0 or -1 the sequence is simplified
   26459              :                      to pcmpgtd + pand.  */
   26460         5151 :                   if (!TARGET_SSSE3)
   26461              :                     {
   26462         3194 :                       if (zerop (gimple_assign_rhs2 (stmt_info->stmt))
   26463         4617 :                           || integer_minus_onep
   26464         1423 :                                 (gimple_assign_rhs2 (stmt_info->stmt)))
   26465         1771 :                         stmt_cost *= 2;
   26466              :                       else
   26467         1423 :                         stmt_cost *= 4;
   26468              :                     }
   26469              :                 }
   26470              :               else
   26471              :                 /* cmp + cmov.  */
   26472        20104 :                 stmt_cost = ix86_cost->add * 2;
   26473              :             }
   26474              :           break;
   26475              : 
   26476         1313 :         case ABS_EXPR:
   26477         1313 :         case ABSU_EXPR:
   26478         1313 :           if (fp)
   26479              :             {
   26480          471 :               if (X87_FLOAT_MODE_P (mode)
   26481          171 :                   && !SSE_FLOAT_MODE_SSEMATH_OR_HFBF_P (mode))
   26482              :                 /* fabs.  */
   26483            0 :                 stmt_cost = ix86_cost->fabs;
   26484              :               else
   26485              :                 /* andss of sign bit.  */
   26486          471 :                 stmt_cost = ix86_vec_cost (mode, ix86_cost->sse_op);
   26487              :             }
   26488              :           else
   26489              :             {
   26490          842 :               if (VECTOR_MODE_P (mode))
   26491              :                 {
   26492          108 :                   stmt_cost = ix86_vec_cost (mode, ix86_cost->sse_op);
   26493              :                   /* vabs was introduced in SSE3.
   26494              :                      SSE3 uses psrat + pxor + psub.  */
   26495          108 :                   if (!TARGET_SSSE3)
   26496           78 :                     stmt_cost *= 3;
   26497              :                 }
   26498              :               else
   26499              :                 /* neg + cmov.  */
   26500          734 :                 stmt_cost = ix86_cost->add * 2;
   26501              :             }
   26502              :           break;
   26503              : 
   26504       148184 :         case BIT_IOR_EXPR:
   26505       148184 :         case BIT_XOR_EXPR:
   26506       148184 :         case BIT_AND_EXPR:
   26507       148184 :         case BIT_NOT_EXPR:
   26508       148184 :           gcc_assert (!SSE_FLOAT_MODE_SSEMATH_OR_HFBF_P (mode)
   26509              :                       && !X87_FLOAT_MODE_P (mode));
   26510       148184 :           if (VECTOR_MODE_P (mode))
   26511        50712 :             stmt_cost = ix86_vec_cost (mode, ix86_cost->sse_op);
   26512              :           else
   26513        97472 :             stmt_cost = ix86_cost->add;
   26514              :           break;
   26515              : 
   26516       253564 :         default:
   26517       253564 :           if (truth_value_p (subcode))
   26518              :             {
   26519        99008 :               if (SSE_FLOAT_MODE_SSEMATH_OR_HFBF_P (mode))
   26520              :                 /* CMPccS? insructions are cheap, so use sse_op.  While they
   26521              :                    produce a mask which may need to be turned to 0/1 by and,
   26522              :                    expect that this will be optimized away in a common case.  */
   26523            0 :                 stmt_cost = ix86_cost->sse_op;
   26524        99008 :               else if (X87_FLOAT_MODE_P (mode))
   26525              :                 /* fcmp + setcc.  */
   26526            0 :                 stmt_cost = ix86_cost->fadd + ix86_cost->add;
   26527        99008 :               else if (VECTOR_MODE_P (mode))
   26528        20623 :                 stmt_cost = ix86_vec_cost (mode, ix86_cost->sse_op);
   26529              :               else
   26530              :                 /* setcc.  */
   26531        78385 :                 stmt_cost = ix86_cost->add;
   26532              :               break;
   26533              :             }
   26534              :           break;
   26535              :         }
   26536              :     }
   26537              : 
   26538              :   /* Record number of load/store/gather/scatter in vectorized body.  */
   26539      7470412 :   if (where == vect_body && !m_costing_for_scalar)
   26540              :     {
   26541      1938505 :       int scale = 1;
   26542      1938505 :       if (vectype
   26543      3868540 :           && ((GET_MODE_SIZE (TYPE_MODE (vectype)) == 64
   26544        59915 :               && TARGET_AVX512_SPLIT_REGS)
   26545      3859772 :               || (GET_MODE_SIZE (TYPE_MODE (vectype)) == 32
   26546       118330 :                   && TARGET_AVX256_SPLIT_REGS)))
   26547              :         scale = 2;
   26548              : 
   26549      1938505 :       switch (kind)
   26550              :         {
   26551              :           /* Emulated gather/scatter or any scalarization.  */
   26552       111519 :         case scalar_load:
   26553       111519 :         case scalar_stmt:
   26554       111519 :         case scalar_store:
   26555       111519 :         case vector_gather_load:
   26556       111519 :         case vector_scatter_store:
   26557       111519 :           m_prefer_unroll = false;
   26558       111519 :           break;
   26559              : 
   26560       557814 :         case vector_stmt:
   26561       557814 :         case vec_to_scalar:
   26562              :           /* Count number of reduction FMA and "real" DOT_PROD_EXPR,
   26563              :              unroll in the vectorizer will enable partial sum.  */
   26564       557814 :           if (stmt_info
   26565       557788 :               && vect_is_reduction (stmt_info)
   26566       626992 :               && stmt_info->stmt)
   26567              :             {
   26568              :               /* Handle __builtin_fma.  */
   26569        69178 :               if (gimple_call_combined_fn (stmt_info->stmt) == CFN_FMA)
   26570              :                 {
   26571           11 :                   m_num_reduc[X86_REDUC_FMA] += count * scale;
   26572           11 :                   break;
   26573              :                 }
   26574              : 
   26575        69167 :               if (!is_gimple_assign (stmt_info->stmt))
   26576              :                 break;
   26577              : 
   26578        66401 :               tree_code subcode = gimple_assign_rhs_code (stmt_info->stmt);
   26579        66401 :               machine_mode inner_mode = GET_MODE_INNER (mode);
   26580        66401 :               tree rhs1, rhs2;
   26581        66401 :               bool native_vnni_p = true;
   26582        66401 :               gimple* def;
   26583        66401 :               machine_mode mode_rhs;
   26584        66401 :               switch (subcode)
   26585              :                 {
   26586        49730 :                 case PLUS_EXPR:
   26587        49730 :                 case MINUS_EXPR:
   26588        49730 :                   if (!fp || !flag_associative_math
   26589        26417 :                       || flag_fp_contract_mode != FP_CONTRACT_FAST)
   26590              :                     break;
   26591              : 
   26592              :                   /* FMA condition for different modes.  */
   26593        26417 :                   if (((inner_mode == DFmode || inner_mode == SFmode)
   26594        26387 :                        && !TARGET_FMA && !TARGET_AVX512VL)
   26595         8624 :                       || (inner_mode == HFmode && !TARGET_AVX512FP16)
   26596         8624 :                       || (inner_mode == BFmode && !TARGET_AVX10_2))
   26597              :                     break;
   26598              : 
   26599              :                   /* MULT_EXPR + PLUS_EXPR/MINUS_EXPR is transformed
   26600              :                      to FMA/FNMA after vectorization.  */
   26601         8624 :                   rhs1 = gimple_assign_rhs1 (stmt_info->stmt);
   26602         8624 :                   rhs2 = gimple_assign_rhs2 (stmt_info->stmt);
   26603         8624 :                   if (subcode == PLUS_EXPR
   26604         6771 :                       && TREE_CODE (rhs1) == SSA_NAME
   26605         6771 :                       && (def = SSA_NAME_DEF_STMT (rhs1), true)
   26606         6771 :                       && is_gimple_assign (def)
   26607        12029 :                       && gimple_assign_rhs_code (def) == MULT_EXPR)
   26608         1992 :                     m_num_reduc[X86_REDUC_FMA] += count * scale;
   26609         6632 :                   else if (TREE_CODE (rhs2) == SSA_NAME
   26610         6632 :                            && (def = SSA_NAME_DEF_STMT (rhs2), true)
   26611         6632 :                            && is_gimple_assign (def)
   26612        13175 :                            && gimple_assign_rhs_code (def) == MULT_EXPR)
   26613         6537 :                     m_num_reduc[X86_REDUC_FMA] += count * scale;
   26614              :                   break;
   26615              : 
   26616              :                   /* Vectorizer lane_reducing_op_p supports DOT_PROX_EXPR,
   26617              :                      WIDEN_SUM_EXPR and SAD_EXPR, x86 backend only supports
   26618              :                      SAD_EXPR (usad{v16qi,v32qi,v64qi}) and DOT_PROD_EXPR.  */
   26619          610 :                 case DOT_PROD_EXPR:
   26620          610 :                   rhs1 = gimple_assign_rhs1 (stmt_info->stmt);
   26621          610 :                   mode_rhs = TYPE_MODE (TREE_TYPE (rhs1));
   26622          610 :                   if (mode_rhs == QImode)
   26623              :                     {
   26624          337 :                       rhs2 = gimple_assign_rhs2 (stmt_info->stmt);
   26625          337 :                       signop signop1_p = TYPE_SIGN (TREE_TYPE (rhs1));
   26626          337 :                       signop signop2_p = TYPE_SIGN (TREE_TYPE (rhs2));
   26627              : 
   26628              :                       /* vpdpbusd.  */
   26629          337 :                       if (signop1_p != signop2_p)
   26630           85 :                         native_vnni_p
   26631           85 :                           = (GET_MODE_SIZE (mode) == 64
   26632           85 :                              ? TARGET_AVX512VNNI
   26633           28 :                              : ((TARGET_AVX512VNNI && TARGET_AVX512VL)
   26634           85 :                                 || TARGET_AVXVNNI));
   26635              :                       else
   26636              :                         /* vpdpbssd.  */
   26637          252 :                         native_vnni_p
   26638          268 :                           = (GET_MODE_SIZE (mode) == 64
   26639          252 :                              ? TARGET_AVX10_2
   26640          236 :                              : (TARGET_AVXVNNIINT8 || TARGET_AVX10_2));
   26641              :                     }
   26642          610 :                   m_num_reduc[X86_REDUC_DOT_PROD] += count * scale;
   26643              : 
   26644              :                   /* Dislike to do unroll and partial sum for
   26645              :                      emulated DOT_PROD_EXPR.  */
   26646          610 :                   if (!native_vnni_p)
   26647          153 :                     m_num_reduc[X86_REDUC_DOT_PROD] += 3 * count;
   26648              :                   break;
   26649              : 
   26650          106 :                 case SAD_EXPR:
   26651          106 :                   m_num_reduc[X86_REDUC_SAD] += count * scale;
   26652          106 :                   break;
   26653              : 
   26654              :                 default:
   26655              :                   break;
   26656              :                 }
   26657              :             }
   26658              : 
   26659              :         default:
   26660              :           break;
   26661              :         }
   26662              :     }
   26663              : 
   26664              : 
   26665      7470412 :   combined_fn cfn;
   26666      7470412 :   if ((kind == vector_stmt || kind == scalar_stmt)
   26667      1965246 :       && stmt_info
   26668      1956017 :       && stmt_info->stmt
   26669      9426429 :       && is_gimple_call (stmt_info->stmt))
   26670              :     {
   26671        26426 :       tree fndecl = gimple_call_fndecl (stmt_info->stmt);
   26672        26426 :       cgraph_node *node;
   26673        26426 :       if ((fndecl
   26674         5376 :            && (node = cgraph_node::get (fndecl))
   26675         5343 :            && node->simd_clones)
   26676        30785 :           || gimple_call_internal_p (stmt_info->stmt, IFN_MASK_CALL))
   26677         2484 :         stmt_cost = 10 * ix86_vec_cost (mode,
   26678         1242 :                                         mode == SFmode ? ix86_cost->fmass
   26679              :                                         : ix86_cost->fmasd);
   26680        25184 :       else if ((cfn = gimple_call_combined_fn (stmt_info->stmt)) != CFN_LAST)
   26681        23814 :         switch (cfn)
   26682              :           {
   26683          107 :           case CFN_FMA:
   26684          107 :             stmt_cost = ix86_vec_cost (mode,
   26685          107 :                                        mode == SFmode ? ix86_cost->fmass
   26686              :                                        : ix86_cost->fmasd);
   26687          107 :             break;
   26688           62 :           case CFN_MULH:
   26689           62 :             stmt_cost = ix86_multiplication_cost (ix86_cost, mode);
   26690           62 :             break;
   26691              :           default:
   26692              :             break;
   26693              :           }
   26694              :     }
   26695              : 
   26696      7470412 :   if (kind == vec_promote_demote)
   26697              :     {
   26698        61462 :       int outer_size
   26699              :         = tree_to_uhwi
   26700        61462 :             (TYPE_SIZE
   26701        61462 :                 (TREE_TYPE (gimple_assign_lhs (stmt_info->stmt))));
   26702        61462 :       int inner_size
   26703              :         = tree_to_uhwi
   26704        61462 :             (TYPE_SIZE
   26705        61462 :                 (TREE_TYPE (gimple_assign_rhs1 (stmt_info->stmt))));
   26706        61462 :       bool inner_fp = FLOAT_TYPE_P
   26707              :                         (TREE_TYPE (gimple_assign_rhs1 (stmt_info->stmt)));
   26708              : 
   26709         5582 :       if (fp && inner_fp)
   26710         5077 :         stmt_cost = vec_fp_conversion_cost
   26711         5077 :                           (ix86_tune_cost, GET_MODE_BITSIZE (mode));
   26712        56385 :       else if (fp && !inner_fp)
   26713         6125 :         stmt_cost = ix86_vec_cost (mode, ix86_cost->cvtpi2ps);
   26714        50260 :       else if (!fp && inner_fp)
   26715          505 :         stmt_cost = ix86_vec_cost (mode, ix86_cost->cvtps2pi);
   26716              :       else
   26717        49755 :         stmt_cost = ix86_vec_cost (mode, ix86_cost->sse_op);
   26718              :       /* VEC_PACK_TRUNC_EXPR and similar demote operations: If outer size is
   26719              :          greater than inner size we will end up doing two conversions and
   26720              :          packing them.  We always pack pairs; if the size difference is greater
   26721              :          it is split into multiple demote operations.  */
   26722        61462 :       if (inner_size > outer_size)
   26723        23313 :         stmt_cost = stmt_cost * 2
   26724        23313 :                     + ix86_vec_cost (mode, ix86_cost->sse_op);
   26725              :     }
   26726              : 
   26727              :   /* If we do elementwise loads into a vector then we are bound by
   26728              :      latency and execution resources for the many scalar loads
   26729              :      (AGU and load ports).  Try to account for this by scaling the
   26730              :      construction cost by the number of elements involved.  */
   26731      7470412 :   if ((kind == vec_construct || kind == vec_deconstruct)
   26732      7470412 :       && ((node
   26733       306407 :            && (((SLP_TREE_MEMORY_ACCESS_TYPE (node) == VMAT_ELEMENTWISE
   26734       317628 :                  || SLP_TREE_MEMORY_ACCESS_TYPE (node) == VMAT_STRIDED_SLP)
   26735        42502 :                 && (TREE_CODE (DR_STEP (STMT_VINFO_DATA_REF
   26736              :                                         (SLP_TREE_REPRESENTATIVE (node))))
   26737              :                     != INTEGER_CST))
   26738        22728 :                || mat_gather_scatter_p (SLP_TREE_MEMORY_ACCESS_TYPE (node))))))
   26739              :     {
   26740        32466 :       auto lsdata = static_cast<vect_load_store_data *> (node->data);
   26741        32466 :       tree ls_type = lsdata->ls_type ? lsdata->ls_type : vectype;
   26742        32466 :       tree ls_eltype
   26743        32466 :         = lsdata->ls_eltype ? lsdata->ls_eltype : TREE_TYPE (ls_type);
   26744        32466 :       stmt_cost = ix86_vector_cd_cost (TYPE_MODE (ls_type),
   26745        32466 :                                        TYPE_MODE (ls_eltype));
   26746        32466 :       stmt_cost *= (GET_MODE_BITSIZE (TYPE_MODE (ls_type))
   26747        64932 :                     / GET_MODE_BITSIZE (TYPE_MODE (ls_eltype)) + 1);
   26748              :     }
   26749      7437946 :   else if ((kind == vec_construct || kind == scalar_to_vec)
   26750       481652 :            && node
   26751       450295 :            && SLP_TREE_DEF_TYPE (node) == vect_external_def)
   26752              :     {
   26753       307651 :       stmt_cost = ix86_default_vector_cost (kind, mode);
   26754       307651 :       unsigned i;
   26755       307651 :       tree op;
   26756      1311420 :       FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_OPS (node), i, op)
   26757       696118 :         if (TREE_CODE (op) == SSA_NAME)
   26758       473305 :           TREE_VISITED (op) = 0;
   26759      1003769 :       FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_OPS (node), i, op)
   26760              :         {
   26761       696118 :           if (TREE_CODE (op) != SSA_NAME
   26762       473305 :               || TREE_VISITED (op))
   26763       256915 :             continue;
   26764       439203 :           TREE_VISITED (op) = 1;
   26765       439203 :           gimple *def = SSA_NAME_DEF_STMT (op);
   26766       439203 :           tree tem;
   26767              :           /* Look through a conversion.  */
   26768       439203 :           if (is_gimple_assign (def)
   26769       250382 :               && CONVERT_EXPR_CODE_P (gimple_assign_rhs_code (def))
   26770        28508 :               && ((tem = gimple_assign_rhs1 (def)), true)
   26771       467711 :               && TREE_CODE (tem) == SSA_NAME)
   26772        28297 :             def = SSA_NAME_DEF_STMT (tem);
   26773              :           /* When the component is loaded from memory without sign-
   26774              :              or zero-extension we can move it to a vector register and/or
   26775              :              insert it via vpinsr with a memory operand.  */
   26776       439203 :           if (gimple_assign_load_p (def)
   26777       131682 :               && tree_nop_conversion_p (TREE_TYPE (op),
   26778       131682 :                                         TREE_TYPE (gimple_assign_lhs (def)))
   26779       696287 :               && (GET_MODE_SIZE (TYPE_MODE (TREE_TYPE (op))) > 1
   26780         5584 :                   || TARGET_SSE4_1))
   26781              :             ;
   26782              :           /* When the component is extracted from a vector it is already
   26783              :              in a vector register.  */
   26784       315224 :           else if (is_gimple_assign (def)
   26785       121518 :                    && gimple_assign_rhs_code (def) == BIT_FIELD_REF
   26786       317874 :                    && VECTOR_TYPE_P (TREE_TYPE
   26787              :                                 (TREE_OPERAND (gimple_assign_rhs1 (def), 0))))
   26788              :             ;
   26789              :           else
   26790              :             {
   26791       312997 :               if (fp)
   26792              :                 {
   26793              :                   /* Scalar FP values residing in x87 registers need to be
   26794              :                      spilled and reloaded.  */
   26795        13822 :                   auto mode2 = TYPE_MODE (TREE_TYPE (op));
   26796        13822 :                   if (IS_STACK_MODE (mode2))
   26797              :                     {
   26798          967 :                       int cost
   26799              :                         = (ix86_cost->hard_register.fp_store[mode2 == SFmode
   26800          967 :                                                              ? 0 : 1]
   26801          967 :                            + ix86_cost->sse_load[sse_store_index (mode2)]);
   26802          967 :                       stmt_cost += COSTS_N_INSNS (cost) / 2;
   26803              :                     }
   26804        13822 :                   m_num_sse_needed[where]++;
   26805              :                 }
   26806              :               else
   26807              :                 {
   26808       299175 :                   m_num_gpr_needed[where]++;
   26809              : 
   26810       299175 :                   stmt_cost += COSTS_N_INSNS (ix86_cost->integer_to_sse) / 2;
   26811              :                 }
   26812              :             }
   26813              :         }
   26814      1003769 :       FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_OPS (node), i, op)
   26815       696118 :         if (TREE_CODE (op) == SSA_NAME)
   26816       473305 :           TREE_VISITED (op) = 0;
   26817              :     }
   26818      7470412 :   if (stmt_cost == -1)
   26819      5735580 :     stmt_cost = ix86_default_vector_cost (kind, mode);
   26820              : 
   26821              :   /* BIT_FIELD_REF <vect_**, 64, 0> with count 0 costs 0 in body.  */
   26822      7470412 :   if (kind == vec_perm && vectype && count != 0)
   26823              :     {
   26824        99274 :       unsigned vec_size = GET_MODE_SIZE (TYPE_MODE (vectype));
   26825        99274 :       unsigned nunits = TYPE_VECTOR_SUBPARTS (vectype);
   26826        99274 :       unsigned *num_vec_perm = NULL;
   26827              : 
   26828        99274 :       if (vec_size == 32)
   26829         4148 :         num_vec_perm = m_num_avx256_vec_perm;
   26830        95126 :       else if (vec_size == 64)
   26831         1196 :         num_vec_perm = m_num_avx512_vec_perm;
   26832              : 
   26833         5344 :       if (num_vec_perm && ix86_count_cross_lane_perm_p (m_vinfo, node, nunits))
   26834              :         {
   26835         5342 :           num_vec_perm[where] += count;
   26836         5342 :           if (dump_file && (dump_flags & TDF_DETAILS))
   26837              :             {
   26838          358 :               fprintf (dump_file,
   26839              :                        "Detected avx%u cross-lane permutation: ", vec_size * 8);
   26840          358 :               if (stmt_info)
   26841          355 :                 print_gimple_expr (dump_file, stmt_info->stmt, 0, TDF_SLIM);
   26842          358 :               fprintf (dump_file, " \n");
   26843              :             }
   26844              :         }
   26845              :     }
   26846              : 
   26847              :   /* Penalize DFmode vector operations for Bonnell.  */
   26848      7470412 :   if (TARGET_CPU_P (BONNELL) && kind == vector_stmt
   26849      7470495 :       && vectype && GET_MODE_INNER (TYPE_MODE (vectype)) == DFmode)
   26850           12 :     stmt_cost *= 5;  /* FIXME: The value here is arbitrary.  */
   26851              : 
   26852              :   /* Statements in an inner loop relative to the loop being
   26853              :      vectorized are weighted more heavily.  The value here is
   26854              :      arbitrary and could potentially be improved with analysis.  */
   26855      7470412 :   retval = adjust_cost_for_freq (stmt_info, where, count * stmt_cost);
   26856              : 
   26857              :   /* We need to multiply all vector stmt cost by 1.7 (estimated cost)
   26858              :      for Silvermont as it has out of order integer pipeline and can execute
   26859              :      2 scalar instruction per tick, but has in order SIMD pipeline.  */
   26860      7470412 :   if ((TARGET_CPU_P (SILVERMONT) || TARGET_CPU_P (GOLDMONT)
   26861      7470412 :        || TARGET_CPU_P (GOLDMONT_PLUS) || TARGET_CPU_P (INTEL))
   26862         2458 :       && stmt_info && stmt_info->stmt)
   26863              :     {
   26864         2114 :       tree lhs_op = gimple_get_lhs (stmt_info->stmt);
   26865         2114 :       if (lhs_op && TREE_CODE (TREE_TYPE (lhs_op)) == INTEGER_TYPE)
   26866         1585 :         retval = (retval * 17) / 10;
   26867              :     }
   26868              : 
   26869      7470412 :   m_costs[where] += retval;
   26870              : 
   26871      7470412 :   return retval;
   26872              : }
   26873              : 
   26874              : void
   26875      1807703 : ix86_vector_costs::ix86_vect_estimate_reg_pressure ()
   26876              : {
   26877      1807703 :   unsigned gpr_spill_cost = COSTS_N_INSNS (ix86_cost->int_store [2]) / 2;
   26878      1807703 :   unsigned sse_spill_cost = COSTS_N_INSNS (ix86_cost->sse_store[0]) / 2;
   26879              : 
   26880              :   /* Any better way to have target available fp registers, currently use SSE_REGS.  */
   26881      1807703 :   unsigned target_avail_sse = TARGET_64BIT ? (TARGET_AVX512F ? 32 : 16) : 8;
   26882      7230812 :   for (unsigned i = 0; i != 3; i++)
   26883              :     {
   26884      5423109 :       if (m_num_gpr_needed[i] > target_avail_regs)
   26885          694 :         m_costs[i] += gpr_spill_cost * (m_num_gpr_needed[i] - target_avail_regs);
   26886              :       /* Only measure sse registers pressure.  */
   26887      5423109 :       if (TARGET_SSE && (m_num_sse_needed[i] > target_avail_sse))
   26888           94 :         m_costs[i] += sse_spill_cost * (m_num_sse_needed[i] - target_avail_sse);
   26889              :     }
   26890      1807703 : }
   26891              : 
   26892              : void
   26893      1807703 : ix86_vector_costs::finish_cost (const vector_costs *scalar_costs)
   26894              : {
   26895      1807703 :   loop_vec_info loop_vinfo = dyn_cast<loop_vec_info> (m_vinfo);
   26896       488301 :   if (loop_vinfo && !m_costing_for_scalar)
   26897              :     {
   26898              :       /* We are currently not asking the vectorizer to compare costs
   26899              :          between different vector mode sizes.  When using predication
   26900              :          that will end up always choosing the preferred mode size even
   26901              :          if there's a smaller mode covering all lanes.  Test for this
   26902              :          situation and artificially reject the larger mode attempt.
   26903              :          ???  We currently lack masked ops for sub-SSE sized modes,
   26904              :          so we could restrict this rejection to AVX and AVX512 modes
   26905              :          but error on the safe side for now.  */
   26906       124685 :       if (LOOP_VINFO_USING_PARTIAL_VECTORS_P (loop_vinfo)
   26907           26 :           && !LOOP_VINFO_EPILOGUE_P (loop_vinfo)
   26908           16 :           && LOOP_VINFO_NITERS_KNOWN_P (loop_vinfo)
   26909       124695 :           && (exact_log2 (LOOP_VINFO_VECT_FACTOR (loop_vinfo).to_constant ())
   26910           20 :               > ceil_log2 (LOOP_VINFO_INT_NITERS (loop_vinfo))))
   26911            8 :         m_costs[vect_body] = INT_MAX;
   26912              : 
   26913              :       /* We'd like to avoid using masking if there's an in-order reduction
   26914              :          to vectorize because that will also perform in-order adds of
   26915              :          masked elements (as neutral value, of course) here, but there
   26916              :          is currently no way to indicate to try un-masked with the same
   26917              :          mode.  */
   26918              : 
   26919       124685 :       bool any_reduc_p = false;
   26920       495285 :       for (int i = 0; i != X86_REDUC_LAST; i++)
   26921       371895 :         if (m_num_reduc[i])
   26922              :           {
   26923              :             any_reduc_p = true;
   26924              :             break;
   26925              :           }
   26926              : 
   26927       124685 :       if (any_reduc_p
   26928              :           /* Not much gain for loop with gather and scatter.  */
   26929         1295 :           && m_prefer_unroll
   26930         1138 :           && !LOOP_VINFO_EPILOGUE_P (loop_vinfo))
   26931              :         {
   26932         1814 :           unsigned unroll_factor
   26933          907 :             = OPTION_SET_P (ix86_vect_unroll_limit)
   26934          907 :             ? ix86_vect_unroll_limit
   26935          907 :             : ix86_cost->vect_unroll_limit;
   26936              : 
   26937          907 :           if (unroll_factor > 1)
   26938              :             {
   26939         3628 :               for (int i = 0 ; i != X86_REDUC_LAST; i++)
   26940              :                 {
   26941         2721 :                   if (m_num_reduc[i])
   26942              :                     {
   26943          907 :                       unsigned tmp = CEIL (ix86_cost->reduc_lat_mult_thr[i],
   26944              :                                            m_num_reduc[i]);
   26945         2721 :                       unroll_factor = MIN (unroll_factor, tmp);
   26946              :                     }
   26947              :                 }
   26948              : 
   26949         1814 :               m_suggested_unroll_factor  = 1 << ceil_log2 (unroll_factor);
   26950              :             }
   26951              :         }
   26952              : 
   26953              :     }
   26954              : 
   26955      1807703 :   ix86_vect_estimate_reg_pressure ();
   26956              : 
   26957      7230812 :   for (int i = 0; i != 3; i++)
   26958      5423109 :     if (m_num_avx256_vec_perm[i]
   26959          521 :         && TARGET_AVX256_AVOID_VEC_PERM)
   26960            7 :       m_costs[i] = INT_MAX;
   26961              : 
   26962      7230812 :   for (int i = 0; i != 3; i++)
   26963      5423109 :     if (m_num_avx512_vec_perm[i] && TARGET_AVX512_AVOID_VEC_PERM)
   26964            5 :       m_costs[i] = INT_MAX;
   26965              : 
   26966              :   /* When X86_TUNE_AVX512_TWO_EPILOGUES is enabled arrange for both
   26967              :      a AVX2 and a SSE epilogue for AVX512 vectorized loops.  */
   26968      1807703 :   if (loop_vinfo
   26969       488301 :       && LOOP_VINFO_EPILOGUE_P (loop_vinfo)
   26970        43348 :       && GET_MODE_SIZE (loop_vinfo->vector_mode) == 32
   26971      1808459 :       && ix86_tune_features[X86_TUNE_AVX512_TWO_EPILOGUES])
   26972           23 :     m_suggested_epilogue_mode = V16QImode;
   26973              :   /* When a 128bit SSE vectorized epilogue still has a VF of 16 or larger
   26974              :      enable a 64bit SSE epilogue.  */
   26975      1807703 :   if (loop_vinfo
   26976       488301 :       && LOOP_VINFO_EPILOGUE_P (loop_vinfo)
   26977        43348 :       && GET_MODE_SIZE (loop_vinfo->vector_mode) == 16
   26978      1810225 :       && LOOP_VINFO_VECT_FACTOR (loop_vinfo).to_constant () >= 16)
   26979           96 :     m_suggested_epilogue_mode = V8QImode;
   26980              : 
   26981              :   /* When X86_TUNE_AVX512_MASKED_EPILOGUES is enabled try to use
   26982              :      a masked epilogue if that doesn't seem detrimental.  */
   26983      1807703 :   if (loop_vinfo
   26984       488301 :       && !LOOP_VINFO_EPILOGUE_P (loop_vinfo)
   26985       466627 :       && LOOP_VINFO_VECT_FACTOR (loop_vinfo).to_constant () > 2
   26986              :       /* Avoid a masked epilog if cascaded epilogues eventually get us
   26987              :          to one with VF 1 as that means no scalar epilog at all.  */
   26988        75940 :       && !((GET_MODE_SIZE (loop_vinfo->vector_mode)
   26989        75940 :             / LOOP_VINFO_VECT_FACTOR (loop_vinfo).to_constant () == 16)
   26990           34 :            && ix86_tune_features[X86_TUNE_AVX512_TWO_EPILOGUES])
   26991        75939 :       && ix86_tune_features[X86_TUNE_AVX512_MASKED_EPILOGUES]
   26992      1807882 :       && !OPTION_SET_P (param_vect_partial_vector_usage))
   26993              :     {
   26994          161 :       bool avoid = false;
   26995          161 :       if (LOOP_VINFO_NITERS_KNOWN_P (loop_vinfo)
   26996          129 :           && LOOP_VINFO_PEELING_FOR_ALIGNMENT (loop_vinfo) >= 0)
   26997              :         {
   26998          129 :           unsigned int peel_niter
   26999              :             = LOOP_VINFO_PEELING_FOR_ALIGNMENT (loop_vinfo);
   27000          129 :           if (LOOP_VINFO_PEELING_FOR_GAPS (loop_vinfo))
   27001            0 :             peel_niter += 1;
   27002              :           /* When we know the number of scalar iterations of the epilogue,
   27003              :              avoid masking when a single vector epilog iteration handles
   27004              :              it in full.  */
   27005          129 :           if (pow2p_hwi ((LOOP_VINFO_INT_NITERS (loop_vinfo) - peel_niter)
   27006          129 :                          % LOOP_VINFO_VECT_FACTOR (loop_vinfo).to_constant ()))
   27007              :             avoid = true;
   27008              :         }
   27009          159 :       if (!avoid && loop_outer (loop_outer (LOOP_VINFO_LOOP (loop_vinfo))))
   27010           14 :         for (auto ddr : LOOP_VINFO_DDRS (loop_vinfo))
   27011              :           {
   27012            4 :             if (DDR_ARE_DEPENDENT (ddr) == chrec_known)
   27013              :               ;
   27014            4 :             else if (DDR_ARE_DEPENDENT (ddr) == chrec_dont_know)
   27015              :               ;
   27016              :             else
   27017              :               {
   27018            2 :                 int loop_depth
   27019            4 :                     = index_in_loop_nest (LOOP_VINFO_LOOP (loop_vinfo)->num,
   27020            2 :                                           DDR_LOOP_NEST (ddr));
   27021            4 :                 if (DDR_NUM_DIST_VECTS (ddr) == 1
   27022            2 :                     && DDR_DIST_VECTS (ddr)[0][loop_depth] == 0)
   27023              :                   {
   27024              :                     /* Avoid the case when there's an outer loop that might
   27025              :                        traverse a multi-dimensional array with the inner
   27026              :                        loop just executing the masked epilogue with a
   27027              :                        read-write where the next outer iteration might
   27028              :                        read from the masked part of the previous write,
   27029              :                        'n' filling half a vector.
   27030              :                          for (j = 0; j < m; ++j)
   27031              :                            for (i = 0; i < n; ++i)
   27032              :                              a[j][i] = c * a[j][i];  */
   27033              :                     avoid = true;
   27034              :                     break;
   27035              :                   }
   27036              :               }
   27037              :           }
   27038              :       /* Avoid using masking if there's an in-order reduction
   27039              :          to vectorize because that will also perform in-order adds of
   27040              :          masked elements (as neutral value, of course).  */
   27041          161 :       if (!avoid)
   27042              :         {
   27043          632 :           for (auto inst : LOOP_VINFO_SLP_INSTANCES (loop_vinfo))
   27044          165 :             if (SLP_INSTANCE_KIND (inst) == slp_inst_kind_reduc_group
   27045          165 :                 && (vect_reduc_type (loop_vinfo, SLP_INSTANCE_TREE (inst))
   27046              :                     == FOLD_LEFT_REDUCTION))
   27047              :               {
   27048              :                 avoid = true;
   27049              :                 break;
   27050              :               }
   27051              :         }
   27052          157 :       if (!avoid)
   27053              :         {
   27054          153 :           m_suggested_epilogue_mode = loop_vinfo->vector_mode;
   27055          153 :           m_masked_epilogue = 1;
   27056              :         }
   27057              :     }
   27058              : 
   27059      1807703 :   vector_costs::finish_cost (scalar_costs);
   27060      1807703 : }
   27061              : 
   27062              : /* Return true if THIS should be preferred over OTHER as main vector loop.  */
   27063              : 
   27064              : bool
   27065        30979 : ix86_vector_costs::better_main_loop_than_p (const vector_costs *other) const
   27066              : {
   27067        30979 :   loop_vec_info this_loop_vinfo = as_a<loop_vec_info> (this->vinfo ());
   27068        30979 :   loop_vec_info other_loop_vinfo = as_a<loop_vec_info> (other->vinfo ());
   27069              : 
   27070              :   /* If the other loop is masked it does not need an epilog.  Prefer that
   27071              :      if the current loop cannot be vectorized fully with a vector
   27072              :      epilogs with at most one scalar iteration left.  */
   27073        21072 :   if (LOOP_VINFO_NITERS_KNOWN_P (this_loop_vinfo)
   27074        21072 :       && LOOP_VINFO_USING_PARTIAL_VECTORS_P (other_loop_vinfo)
   27075            4 :       && known_gt (LOOP_VINFO_VECT_FACTOR (other_loop_vinfo),
   27076              :                    LOOP_VINFO_INT_NITERS (this_loop_vinfo))
   27077        30983 :       && (popcount_hwi (LOOP_VINFO_INT_NITERS (this_loop_vinfo) & ~1)
   27078            4 :           > (param_vect_epilogues_nomask != 0)))
   27079              :     return false;
   27080              : 
   27081        30975 :   return vector_costs::better_main_loop_than_p (other);
   27082              : }
   27083              : 
   27084              : /* Return true if THIS should be preferred over OTHER as epilog vector
   27085              :    loop when vectorizing MAIN_LOOP.  */
   27086              : 
   27087              : bool
   27088         1482 : ix86_vector_costs::better_epilogue_loop_than_p (const vector_costs *other,
   27089              :                                                 loop_vec_info main_loop) const
   27090              : {
   27091         1482 :   loop_vec_info this_loop_info = as_a <loop_vec_info> (this->vinfo ());
   27092              :   /* The x86 target allows for multiple vector epilogues, if THIS is
   27093              :      the suggested epilog mode of OTHER then keep the latter unless
   27094              :      THIS has a VF of one which means no further epilog needed.  */
   27095         1482 :   int tem;
   27096         1482 :   if (known_gt (LOOP_VINFO_VECT_FACTOR (this_loop_info), 1U)
   27097         1482 :       && (GET_MODE_SIZE (other->suggested_epilogue_mode (tem))
   27098         2942 :           == GET_MODE_SIZE (this_loop_info->vector_mode)))
   27099              :     return false;
   27100         1409 :   return vector_costs::better_epilogue_loop_than_p (other, main_loop);
   27101              : }
   27102              : 
   27103              : /* Validate target specific memory model bits in VAL. */
   27104              : 
   27105              : static unsigned HOST_WIDE_INT
   27106       410325 : ix86_memmodel_check (unsigned HOST_WIDE_INT val)
   27107              : {
   27108       410325 :   enum memmodel model = memmodel_from_int (val);
   27109       410325 :   bool strong;
   27110              : 
   27111       410325 :   if (val & ~(unsigned HOST_WIDE_INT)(IX86_HLE_ACQUIRE|IX86_HLE_RELEASE
   27112              :                                       |MEMMODEL_MASK)
   27113       410321 :       || ((val & IX86_HLE_ACQUIRE) && (val & IX86_HLE_RELEASE)))
   27114              :     {
   27115            4 :       warning (OPT_Winvalid_memory_model,
   27116              :                "unknown architecture specific memory model");
   27117            4 :       return MEMMODEL_SEQ_CST;
   27118              :     }
   27119       410321 :   strong = (is_mm_acq_rel (model) || is_mm_seq_cst (model));
   27120       410321 :   if (val & IX86_HLE_ACQUIRE && !(is_mm_acquire (model) || strong))
   27121              :     {
   27122            0 :       warning (OPT_Winvalid_memory_model,
   27123              :               "%<HLE_ACQUIRE%> not used with %<ACQUIRE%> or stronger "
   27124              :                "memory model");
   27125            0 :       return MEMMODEL_SEQ_CST | IX86_HLE_ACQUIRE;
   27126              :     }
   27127       410321 :   if (val & IX86_HLE_RELEASE && !(is_mm_release (model) || strong))
   27128              :     {
   27129            0 :       warning (OPT_Winvalid_memory_model,
   27130              :               "%<HLE_RELEASE%> not used with %<RELEASE%> or stronger "
   27131              :                "memory model");
   27132            0 :       return MEMMODEL_SEQ_CST | IX86_HLE_RELEASE;
   27133              :     }
   27134              :   return val;
   27135              : }
   27136              : 
   27137              : /* Set CLONEI->vecsize_mangle, CLONEI->mask_mode, CLONEI->vecsize_int,
   27138              :    CLONEI->vecsize_float and if CLONEI->simdlen is 0, also
   27139              :    CLONEI->simdlen.  Return 0 if SIMD clones shouldn't be emitted,
   27140              :    or number of vecsize_mangle variants that should be emitted.  */
   27141              : 
   27142              : static int
   27143         7593 : ix86_simd_clone_compute_vecsize_and_simdlen (struct cgraph_node *node,
   27144              :                                              struct cgraph_simd_clone *clonei,
   27145              :                                              tree base_type, int num,
   27146              :                                              bool explicit_p)
   27147              : {
   27148         7593 :   int ret = 1;
   27149              : 
   27150         7593 :   if (clonei->simdlen
   27151         7593 :       && (clonei->simdlen < 2
   27152         1321 :           || clonei->simdlen > 1024
   27153         1321 :           || (clonei->simdlen & (clonei->simdlen - 1)) != 0))
   27154              :     {
   27155            0 :       if (explicit_p)
   27156            0 :         warning_at (DECL_SOURCE_LOCATION (node->decl), 0,
   27157              :                     "unsupported simdlen %wd", clonei->simdlen.to_constant ());
   27158            0 :       return 0;
   27159              :     }
   27160              : 
   27161         7593 :   tree ret_type = TREE_TYPE (TREE_TYPE (node->decl));
   27162         7593 :   if (TREE_CODE (ret_type) != VOID_TYPE)
   27163         6801 :     switch (TYPE_MODE (ret_type))
   27164              :       {
   27165         6801 :       case E_QImode:
   27166         6801 :       case E_HImode:
   27167         6801 :       case E_SImode:
   27168         6801 :       case E_DImode:
   27169         6801 :       case E_SFmode:
   27170         6801 :       case E_DFmode:
   27171              :       /* case E_SCmode: */
   27172              :       /* case E_DCmode: */
   27173         6801 :         if (!AGGREGATE_TYPE_P (ret_type))
   27174              :           break;
   27175              :         /* FALLTHRU */
   27176            2 :       default:
   27177            2 :         if (explicit_p)
   27178            2 :           warning_at (DECL_SOURCE_LOCATION (node->decl), 0,
   27179              :                       "unsupported return type %qT for simd", ret_type);
   27180            2 :         return 0;
   27181              :       }
   27182              : 
   27183         7591 :   tree t;
   27184         7591 :   int i;
   27185         7591 :   tree type_arg_types = TYPE_ARG_TYPES (TREE_TYPE (node->decl));
   27186         7591 :   bool decl_arg_p = (node->definition || type_arg_types == NULL_TREE);
   27187              : 
   27188         7591 :   for (t = (decl_arg_p ? DECL_ARGUMENTS (node->decl) : type_arg_types), i = 0;
   27189        20438 :        t && t != void_list_node; t = TREE_CHAIN (t), i++)
   27190              :     {
   27191        16678 :       tree arg_type = decl_arg_p ? TREE_TYPE (t) : TREE_VALUE (t);
   27192        12852 :       switch (TYPE_MODE (arg_type))
   27193              :         {
   27194        12833 :         case E_QImode:
   27195        12833 :         case E_HImode:
   27196        12833 :         case E_SImode:
   27197        12833 :         case E_DImode:
   27198        12833 :         case E_SFmode:
   27199        12833 :         case E_DFmode:
   27200              :         /* case E_SCmode: */
   27201              :         /* case E_DCmode: */
   27202        12833 :           if (!AGGREGATE_TYPE_P (arg_type))
   27203              :             break;
   27204              :           /* FALLTHRU */
   27205           41 :         default:
   27206           41 :           if (clonei->args[i].arg_type == SIMD_CLONE_ARG_TYPE_UNIFORM)
   27207              :             break;
   27208            5 :           if (explicit_p)
   27209            5 :             warning_at (DECL_SOURCE_LOCATION (node->decl), 0,
   27210              :                         "unsupported argument type %qT for simd", arg_type);
   27211              :           return 0;
   27212              :         }
   27213              :     }
   27214              : 
   27215         7586 :   if (!TREE_PUBLIC (node->decl) || !explicit_p)
   27216              :     {
   27217              :       /* If the function isn't exported, we can pick up just one ISA
   27218              :          for the clones.  */
   27219          114 :       if (TARGET_AVX512F)
   27220            0 :         clonei->vecsize_mangle = 'e';
   27221          114 :       else if (TARGET_AVX2)
   27222            1 :         clonei->vecsize_mangle = 'd';
   27223          113 :       else if (TARGET_AVX)
   27224           88 :         clonei->vecsize_mangle = 'c';
   27225              :       else
   27226           25 :         clonei->vecsize_mangle = 'b';
   27227              :       ret = 1;
   27228              :     }
   27229              :   else
   27230              :     {
   27231         7472 :       clonei->vecsize_mangle = "bcde"[num];
   27232         7472 :       ret = 4;
   27233              :     }
   27234         7586 :   clonei->mask_mode = VOIDmode;
   27235         7586 :   switch (clonei->vecsize_mangle)
   27236              :     {
   27237         1893 :     case 'b':
   27238         1893 :       clonei->vecsize_int = 128;
   27239         1893 :       clonei->vecsize_float = 128;
   27240         1893 :       break;
   27241         1956 :     case 'c':
   27242         1956 :       clonei->vecsize_int = 128;
   27243         1956 :       clonei->vecsize_float = 256;
   27244         1956 :       break;
   27245         1869 :     case 'd':
   27246         1869 :       clonei->vecsize_int = 256;
   27247         1869 :       clonei->vecsize_float = 256;
   27248         1869 :       break;
   27249         1868 :     case 'e':
   27250         1868 :       clonei->vecsize_int = 512;
   27251         1868 :       clonei->vecsize_float = 512;
   27252         1868 :       if (TYPE_MODE (base_type) == QImode)
   27253           19 :         clonei->mask_mode = DImode;
   27254              :       else
   27255         1849 :         clonei->mask_mode = SImode;
   27256              :       break;
   27257              :     }
   27258         7586 :   if (clonei->simdlen == 0)
   27259              :     {
   27260         6265 :       if (SCALAR_INT_MODE_P (TYPE_MODE (base_type)))
   27261         3297 :         clonei->simdlen = clonei->vecsize_int;
   27262              :       else
   27263         2968 :         clonei->simdlen = clonei->vecsize_float;
   27264         6265 :       clonei->simdlen = clonei->simdlen
   27265        12530 :                         / GET_MODE_BITSIZE (TYPE_MODE (base_type));
   27266              :     }
   27267         1321 :   else if (clonei->simdlen > 16)
   27268              :     {
   27269              :       /* For compatibility with ICC, use the same upper bounds
   27270              :          for simdlen.  In particular, for CTYPE below, use the return type,
   27271              :          unless the function returns void, in that case use the characteristic
   27272              :          type.  If it is possible for given SIMDLEN to pass CTYPE value
   27273              :          in registers (8 [XYZ]MM* regs for 32-bit code, 16 [XYZ]MM* regs
   27274              :          for 64-bit code), accept that SIMDLEN, otherwise warn and don't
   27275              :          emit corresponding clone.  */
   27276           12 :       tree ctype = ret_type;
   27277           12 :       if (VOID_TYPE_P (ret_type))
   27278            0 :         ctype = base_type;
   27279           24 :       int cnt = GET_MODE_BITSIZE (TYPE_MODE (ctype)) * clonei->simdlen;
   27280           12 :       if (SCALAR_INT_MODE_P (TYPE_MODE (ctype)))
   27281            8 :         cnt /= clonei->vecsize_int;
   27282              :       else
   27283            4 :         cnt /= clonei->vecsize_float;
   27284           12 :       if (cnt > (TARGET_64BIT ? 16 : 8))
   27285              :         {
   27286            0 :           if (explicit_p)
   27287            0 :             warning_at (DECL_SOURCE_LOCATION (node->decl), 0,
   27288              :                         "unsupported simdlen %wd",
   27289              :                         clonei->simdlen.to_constant ());
   27290            0 :           return 0;
   27291              :         }
   27292              :       }
   27293              :   return ret;
   27294              : }
   27295              : 
   27296              : /* If SIMD clone NODE can't be used in a vectorized loop
   27297              :    in current function, return -1, otherwise return a badness of using it
   27298              :    (0 if it is most desirable from vecsize_mangle point of view, 1
   27299              :    slightly less desirable, etc.).  */
   27300              : 
   27301              : static int
   27302         1790 : ix86_simd_clone_usable (struct cgraph_node *node, machine_mode)
   27303              : {
   27304         1790 :   switch (node->simdclone->vecsize_mangle)
   27305              :     {
   27306          638 :     case 'b':
   27307          638 :       if (!TARGET_SSE2)
   27308              :         return -1;
   27309          638 :       if (!TARGET_AVX)
   27310              :         return 0;
   27311          537 :       return TARGET_AVX512F ? 3 : TARGET_AVX2 ? 2 : 1;
   27312          630 :     case 'c':
   27313          630 :       if (!TARGET_AVX)
   27314              :         return -1;
   27315          585 :       return TARGET_AVX512F ? 2 : TARGET_AVX2 ? 1 : 0;
   27316          334 :     case 'd':
   27317          334 :       if (!TARGET_AVX2)
   27318              :         return -1;
   27319          141 :       return TARGET_AVX512F ? 1 : 0;
   27320          188 :     case 'e':
   27321          188 :       if (!TARGET_AVX512F)
   27322          130 :         return -1;
   27323              :       return 0;
   27324            0 :     default:
   27325            0 :       gcc_unreachable ();
   27326              :     }
   27327              : }
   27328              : 
   27329              : /* This function adjusts the unroll factor based on
   27330              :    the hardware capabilities. For ex, bdver3 has
   27331              :    a loop buffer which makes unrolling of smaller
   27332              :    loops less important. This function decides the
   27333              :    unroll factor using number of memory references
   27334              :    (value 32 is used) as a heuristic. */
   27335              : 
   27336              : static unsigned
   27337       799763 : ix86_loop_unroll_adjust (unsigned nunroll, class loop *loop)
   27338              : {
   27339       799763 :   basic_block *bbs;
   27340       799763 :   rtx_insn *insn;
   27341       799763 :   unsigned i;
   27342       799763 :   unsigned mem_count = 0;
   27343              : 
   27344              :   /* Unroll small size loop when unroll factor is not explicitly
   27345              :      specified.  */
   27346       799763 :   if (ix86_unroll_only_small_loops && !loop->unroll)
   27347              :     {
   27348       754587 :       if (loop->ninsns <= ix86_cost->small_unroll_ninsns)
   27349        72135 :         return MIN (nunroll, ix86_cost->small_unroll_factor);
   27350              :       else
   27351              :         return 1;
   27352              :     }
   27353              : 
   27354        45176 :   if (!TARGET_ADJUST_UNROLL)
   27355              :      return nunroll;
   27356              : 
   27357              :   /* Count the number of memory references within the loop body.
   27358              :      This value determines the unrolling factor for bdver3 and bdver4
   27359              :      architectures. */
   27360            8 :   subrtx_iterator::array_type array;
   27361            8 :   bbs = get_loop_body (loop);
   27362           24 :   for (i = 0; i < loop->num_nodes; i++)
   27363          120 :     FOR_BB_INSNS (bbs[i], insn)
   27364          104 :       if (NONDEBUG_INSN_P (insn))
   27365          588 :         FOR_EACH_SUBRTX (iter, array, PATTERN (insn), NONCONST)
   27366          516 :           if (const_rtx x = *iter)
   27367          516 :             if (MEM_P (x))
   27368              :               {
   27369           28 :                 machine_mode mode = GET_MODE (x);
   27370           56 :                 unsigned int n_words = GET_MODE_SIZE (mode) / UNITS_PER_WORD;
   27371           28 :                 if (n_words > 4)
   27372            0 :                   mem_count += 2;
   27373              :                 else
   27374           28 :                   mem_count += 1;
   27375              :               }
   27376            8 :   free (bbs);
   27377              : 
   27378            8 :   if (mem_count && mem_count <=32)
   27379            8 :     return MIN (nunroll, 32 / mem_count);
   27380              : 
   27381              :   return nunroll;
   27382            8 : }
   27383              : 
   27384              : 
   27385              : /* Implement TARGET_FLOAT_EXCEPTIONS_ROUNDING_SUPPORTED_P.  */
   27386              : 
   27387              : static bool
   27388       431538 : ix86_float_exceptions_rounding_supported_p (void)
   27389              : {
   27390              :   /* For x87 floating point with standard excess precision handling,
   27391              :      there is no adddf3 pattern (since x87 floating point only has
   27392              :      XFmode operations) so the default hook implementation gets this
   27393              :      wrong.  */
   27394       431538 :   return TARGET_80387 || (TARGET_SSE && TARGET_SSE_MATH);
   27395              : }
   27396              : 
   27397              : /* Implement TARGET_ATOMIC_ASSIGN_EXPAND_FENV.  */
   27398              : 
   27399              : static void
   27400         7054 : ix86_atomic_assign_expand_fenv (tree *hold, tree *clear, tree *update)
   27401              : {
   27402         7054 :   if (!TARGET_80387 && !(TARGET_SSE && TARGET_SSE_MATH))
   27403              :     return;
   27404         7054 :   tree exceptions_var = create_tmp_var_raw (integer_type_node);
   27405         7054 :   if (TARGET_80387)
   27406              :     {
   27407         7054 :       tree fenv_index_type = build_index_type (size_int (6));
   27408         7054 :       tree fenv_type = build_array_type (unsigned_type_node, fenv_index_type);
   27409         7054 :       tree fenv_var = create_tmp_var_raw (fenv_type);
   27410         7054 :       TREE_ADDRESSABLE (fenv_var) = 1;
   27411         7054 :       tree fenv_ptr = build_pointer_type (fenv_type);
   27412         7054 :       tree fenv_addr = build1 (ADDR_EXPR, fenv_ptr, fenv_var);
   27413         7054 :       fenv_addr = fold_convert (ptr_type_node, fenv_addr);
   27414         7054 :       tree fnstenv = get_ix86_builtin (IX86_BUILTIN_FNSTENV);
   27415         7054 :       tree fldenv = get_ix86_builtin (IX86_BUILTIN_FLDENV);
   27416         7054 :       tree fnstsw = get_ix86_builtin (IX86_BUILTIN_FNSTSW);
   27417         7054 :       tree fnclex = get_ix86_builtin (IX86_BUILTIN_FNCLEX);
   27418         7054 :       tree hold_fnstenv = build_call_expr (fnstenv, 1, fenv_addr);
   27419         7054 :       tree hold_fnclex = build_call_expr (fnclex, 0);
   27420         7054 :       fenv_var = build4 (TARGET_EXPR, fenv_type, fenv_var, hold_fnstenv,
   27421              :                          NULL_TREE, NULL_TREE);
   27422         7054 :       *hold = build2 (COMPOUND_EXPR, void_type_node, fenv_var,
   27423              :                       hold_fnclex);
   27424         7054 :       *clear = build_call_expr (fnclex, 0);
   27425         7054 :       tree sw_var = create_tmp_var_raw (short_unsigned_type_node);
   27426         7054 :       tree fnstsw_call = build_call_expr (fnstsw, 0);
   27427         7054 :       tree sw_mod = build4 (TARGET_EXPR, short_unsigned_type_node, sw_var,
   27428              :                             fnstsw_call, NULL_TREE, NULL_TREE);
   27429         7054 :       tree exceptions_x87 = fold_convert (integer_type_node, sw_var);
   27430         7054 :       tree update_mod = build4 (TARGET_EXPR, integer_type_node,
   27431              :                                 exceptions_var, exceptions_x87,
   27432              :                                 NULL_TREE, NULL_TREE);
   27433         7054 :       *update = build2 (COMPOUND_EXPR, integer_type_node,
   27434              :                         sw_mod, update_mod);
   27435         7054 :       tree update_fldenv = build_call_expr (fldenv, 1, fenv_addr);
   27436         7054 :       *update = build2 (COMPOUND_EXPR, void_type_node, *update, update_fldenv);
   27437              :     }
   27438         7054 :   if (TARGET_SSE && TARGET_SSE_MATH)
   27439              :     {
   27440         7054 :       tree mxcsr_orig_var = create_tmp_var_raw (unsigned_type_node);
   27441         7054 :       tree mxcsr_mod_var = create_tmp_var_raw (unsigned_type_node);
   27442         7054 :       tree stmxcsr = get_ix86_builtin (IX86_BUILTIN_STMXCSR);
   27443         7054 :       tree ldmxcsr = get_ix86_builtin (IX86_BUILTIN_LDMXCSR);
   27444         7054 :       tree stmxcsr_hold_call = build_call_expr (stmxcsr, 0);
   27445         7054 :       tree hold_assign_orig = build4 (TARGET_EXPR, unsigned_type_node,
   27446              :                                       mxcsr_orig_var, stmxcsr_hold_call,
   27447              :                                       NULL_TREE, NULL_TREE);
   27448         7054 :       tree hold_mod_val = build2 (BIT_IOR_EXPR, unsigned_type_node,
   27449              :                                   mxcsr_orig_var,
   27450              :                                   build_int_cst (unsigned_type_node, 0x1f80));
   27451         7054 :       hold_mod_val = build2 (BIT_AND_EXPR, unsigned_type_node, hold_mod_val,
   27452              :                              build_int_cst (unsigned_type_node, 0xffffffc0));
   27453         7054 :       tree hold_assign_mod = build4 (TARGET_EXPR, unsigned_type_node,
   27454              :                                      mxcsr_mod_var, hold_mod_val,
   27455              :                                      NULL_TREE, NULL_TREE);
   27456         7054 :       tree ldmxcsr_hold_call = build_call_expr (ldmxcsr, 1, mxcsr_mod_var);
   27457         7054 :       tree hold_all = build2 (COMPOUND_EXPR, unsigned_type_node,
   27458              :                               hold_assign_orig, hold_assign_mod);
   27459         7054 :       hold_all = build2 (COMPOUND_EXPR, void_type_node, hold_all,
   27460              :                          ldmxcsr_hold_call);
   27461         7054 :       if (*hold)
   27462         7054 :         *hold = build2 (COMPOUND_EXPR, void_type_node, *hold, hold_all);
   27463              :       else
   27464            0 :         *hold = hold_all;
   27465         7054 :       tree ldmxcsr_clear_call = build_call_expr (ldmxcsr, 1, mxcsr_mod_var);
   27466         7054 :       if (*clear)
   27467         7054 :         *clear = build2 (COMPOUND_EXPR, void_type_node, *clear,
   27468              :                          ldmxcsr_clear_call);
   27469              :       else
   27470            0 :         *clear = ldmxcsr_clear_call;
   27471         7054 :       tree stxmcsr_update_call = build_call_expr (stmxcsr, 0);
   27472         7054 :       tree exceptions_sse = fold_convert (integer_type_node,
   27473              :                                           stxmcsr_update_call);
   27474         7054 :       if (*update)
   27475              :         {
   27476         7054 :           tree exceptions_mod = build2 (BIT_IOR_EXPR, integer_type_node,
   27477              :                                         exceptions_var, exceptions_sse);
   27478         7054 :           tree exceptions_assign = build2 (MODIFY_EXPR, integer_type_node,
   27479              :                                            exceptions_var, exceptions_mod);
   27480         7054 :           *update = build2 (COMPOUND_EXPR, integer_type_node, *update,
   27481              :                             exceptions_assign);
   27482              :         }
   27483              :       else
   27484            0 :         *update = build4 (TARGET_EXPR, integer_type_node, exceptions_var,
   27485              :                           exceptions_sse, NULL_TREE, NULL_TREE);
   27486         7054 :       tree ldmxcsr_update_call = build_call_expr (ldmxcsr, 1, mxcsr_orig_var);
   27487         7054 :       *update = build2 (COMPOUND_EXPR, void_type_node, *update,
   27488              :                         ldmxcsr_update_call);
   27489              :     }
   27490         7054 :   tree atomic_feraiseexcept
   27491         7054 :     = builtin_decl_implicit (BUILT_IN_ATOMIC_FERAISEEXCEPT);
   27492         7054 :   tree atomic_feraiseexcept_call = build_call_expr (atomic_feraiseexcept,
   27493              :                                                     1, exceptions_var);
   27494         7054 :   *update = build2 (COMPOUND_EXPR, void_type_node, *update,
   27495              :                     atomic_feraiseexcept_call);
   27496              : }
   27497              : 
   27498              : #if !TARGET_MACHO && !TARGET_DLLIMPORT_DECL_ATTRIBUTES
   27499              : /* For i386, common symbol is local only for non-PIE binaries.  For
   27500              :    x86-64, common symbol is local only for non-PIE binaries or linker
   27501              :    supports copy reloc in PIE binaries.   */
   27502              : 
   27503              : static bool
   27504    769403497 : ix86_binds_local_p (const_tree exp)
   27505              : {
   27506    769403497 :   bool direct_extern_access
   27507    769403497 :     = (ix86_direct_extern_access
   27508   1535291295 :        && !(VAR_OR_FUNCTION_DECL_P (exp)
   27509    765887798 :             && lookup_attribute ("nodirect_extern_access",
   27510    765887798 :                                  DECL_ATTRIBUTES (exp))));
   27511    769403497 :   if (!direct_extern_access)
   27512         1225 :     ix86_has_no_direct_extern_access = true;
   27513    769403497 :   return default_binds_local_p_3 (exp, flag_shlib != 0, true,
   27514              :                                   direct_extern_access,
   27515              :                                   (direct_extern_access
   27516    769402272 :                                    && (!flag_pic
   27517    132147006 :                                        || (TARGET_64BIT
   27518    769403497 :                                            && HAVE_LD_PIE_COPYRELOC != 0))));
   27519              : }
   27520              : 
   27521              : /* If flag_pic or ix86_direct_extern_access is false, then neither
   27522              :    local nor global relocs should be placed in readonly memory.  */
   27523              : 
   27524              : static int
   27525      5147305 : ix86_reloc_rw_mask (void)
   27526              : {
   27527      5147305 :   return (flag_pic || !ix86_direct_extern_access) ? 3 : 0;
   27528              : }
   27529              : #endif
   27530              : 
   27531              : /* Return true iff ADDR can be used as a symbolic base address.  */
   27532              : 
   27533              : static bool
   27534         3026 : symbolic_base_address_p (rtx addr)
   27535              : {
   27536            0 :   if (SYMBOL_REF_P (addr))
   27537              :     return true;
   27538              : 
   27539         3002 :   if (GET_CODE (addr) == UNSPEC && XINT (addr, 1) == UNSPEC_GOTOFF)
   27540            0 :     return true;
   27541              : 
   27542              :   return false;
   27543              : }
   27544              : 
   27545              : /* Return true iff ADDR can be used as a base address.  */
   27546              : 
   27547              : static bool
   27548         4581 : base_address_p (rtx addr)
   27549              : {
   27550            0 :   if (REG_P (addr))
   27551              :     return true;
   27552              : 
   27553         2841 :   if (symbolic_base_address_p (addr))
   27554            0 :     return true;
   27555              : 
   27556              :   return false;
   27557              : }
   27558              : 
   27559              : /* If MEM is in the form of [(base+symbase)+offset], extract the three
   27560              :    parts of address and set to BASE, SYMBASE and OFFSET, otherwise
   27561              :    return false.  */
   27562              : 
   27563              : static bool
   27564         2948 : extract_base_offset_in_addr (rtx mem, rtx *base, rtx *symbase, rtx *offset)
   27565              : {
   27566         2948 :   rtx addr;
   27567              : 
   27568         2948 :   gcc_assert (MEM_P (mem));
   27569              : 
   27570         2948 :   addr = XEXP (mem, 0);
   27571              : 
   27572         2948 :   if (GET_CODE (addr) == CONST)
   27573           10 :     addr = XEXP (addr, 0);
   27574              : 
   27575         2948 :   if (base_address_p (addr))
   27576              :     {
   27577         1315 :       *base = addr;
   27578         1315 :       *symbase = const0_rtx;
   27579         1315 :       *offset = const0_rtx;
   27580         1315 :       return true;
   27581              :     }
   27582              : 
   27583         1633 :   if (GET_CODE (addr) == PLUS
   27584         1633 :       && base_address_p (XEXP (addr, 0)))
   27585              :     {
   27586          449 :       rtx addend = XEXP (addr, 1);
   27587              : 
   27588          449 :       if (GET_CODE (addend) == CONST)
   27589            0 :         addend = XEXP (addend, 0);
   27590              : 
   27591          449 :       if (CONST_INT_P (addend))
   27592              :         {
   27593          264 :           *base = XEXP (addr, 0);
   27594          264 :           *symbase = const0_rtx;
   27595          264 :           *offset = addend;
   27596          264 :           return true;
   27597              :         }
   27598              : 
   27599              :       /* Also accept REG + symbolic ref, with or without a CONST_INT
   27600              :          offset.  */
   27601          185 :       if (REG_P (XEXP (addr, 0)))
   27602              :         {
   27603          185 :           if (symbolic_base_address_p (addend))
   27604              :             {
   27605            0 :               *base = XEXP (addr, 0);
   27606            0 :               *symbase = addend;
   27607            0 :               *offset = const0_rtx;
   27608            0 :               return true;
   27609              :             }
   27610              : 
   27611          185 :           if (GET_CODE (addend) == PLUS
   27612            0 :               && symbolic_base_address_p (XEXP (addend, 0))
   27613          185 :               && CONST_INT_P (XEXP (addend, 1)))
   27614              :             {
   27615            0 :               *base = XEXP (addr, 0);
   27616            0 :               *symbase = XEXP (addend, 0);
   27617            0 :               *offset = XEXP (addend, 1);
   27618            0 :               return true;
   27619              :             }
   27620              :         }
   27621              :     }
   27622              : 
   27623              :   return false;
   27624              : }
   27625              : 
   27626              : /* Given OPERANDS of consecutive load/store, check if we can merge
   27627              :    them into move multiple.  LOAD is true if they are load instructions.
   27628              :    MODE is the mode of memory operands.  */
   27629              : 
   27630              : bool
   27631         1629 : ix86_operands_ok_for_move_multiple (rtx *operands, bool load,
   27632              :                                     machine_mode mode)
   27633              : {
   27634         1629 :   HOST_WIDE_INT offval_1, offval_2, msize;
   27635         1629 :   rtx mem_1, mem_2, reg_1, reg_2, base_1, base_2,
   27636              :     symbase_1, symbase_2, offset_1, offset_2;
   27637              : 
   27638         1629 :   if (load)
   27639              :     {
   27640         1317 :       mem_1 = operands[1];
   27641         1317 :       mem_2 = operands[3];
   27642         1317 :       reg_1 = operands[0];
   27643         1317 :       reg_2 = operands[2];
   27644              :     }
   27645              :   else
   27646              :     {
   27647          312 :       mem_1 = operands[0];
   27648          312 :       mem_2 = operands[2];
   27649          312 :       reg_1 = operands[1];
   27650          312 :       reg_2 = operands[3];
   27651              :     }
   27652              : 
   27653         1629 :   gcc_assert (REG_P (reg_1) && REG_P (reg_2));
   27654              : 
   27655         1629 :   if (REGNO (reg_1) != REGNO (reg_2))
   27656              :     return false;
   27657              : 
   27658              :   /* Check if the addresses are in the form of [base+offset].  */
   27659         1627 :   if (!extract_base_offset_in_addr (mem_1, &base_1, &symbase_1, &offset_1))
   27660              :     return false;
   27661         1321 :   if (!extract_base_offset_in_addr (mem_2, &base_2, &symbase_2, &offset_2))
   27662              :     return false;
   27663              : 
   27664              :   /* Check if the bases are the same.  */
   27665          258 :   if (!rtx_equal_p (base_1, base_2) || !rtx_equal_p (symbase_1, symbase_2))
   27666          115 :     return false;
   27667              : 
   27668          143 :   offval_1 = INTVAL (offset_1);
   27669          143 :   offval_2 = INTVAL (offset_2);
   27670          143 :   msize = GET_MODE_SIZE (mode);
   27671              :   /* Check if mem_1 is adjacent to mem_2 and mem_1 has lower address.  */
   27672          143 :   if (offval_1 + msize != offval_2)
   27673              :     return false;
   27674              : 
   27675              :   return true;
   27676              : }
   27677              : 
   27678              : /* Implement the TARGET_OPTAB_SUPPORTED_P hook.  */
   27679              : 
   27680              : static bool
   27681       367668 : ix86_optab_supported_p (int op, machine_mode mode1, machine_mode,
   27682              :                         optimization_type opt_type)
   27683              : {
   27684       367668 :   switch (op)
   27685              :     {
   27686          231 :     case asin_optab:
   27687          231 :     case acos_optab:
   27688          231 :     case log1p_optab:
   27689          231 :     case exp_optab:
   27690          231 :     case exp10_optab:
   27691          231 :     case exp2_optab:
   27692          231 :     case expm1_optab:
   27693          231 :     case ldexp_optab:
   27694          231 :     case scalb_optab:
   27695          231 :     case round_optab:
   27696          231 :     case lround_optab:
   27697          231 :       return opt_type == OPTIMIZE_FOR_SPEED;
   27698              : 
   27699          286 :     case rint_optab:
   27700          286 :       if (SSE_FLOAT_MODE_P (mode1)
   27701          139 :           && TARGET_SSE_MATH
   27702          127 :           && !flag_trapping_math
   27703           21 :           && !TARGET_SSE4_1
   27704              :           && mode1 != HFmode)
   27705           21 :         return opt_type == OPTIMIZE_FOR_SPEED;
   27706              :       return true;
   27707              : 
   27708         1971 :     case floor_optab:
   27709         1971 :     case ceil_optab:
   27710         1971 :     case btrunc_optab:
   27711         1971 :       if ((SSE_FLOAT_MODE_P (mode1)
   27712         1581 :            && TARGET_SSE_MATH
   27713         1514 :            && TARGET_SSE4_1)
   27714         1904 :           || mode1 == HFmode)
   27715              :         return true;
   27716         1835 :       return opt_type == OPTIMIZE_FOR_SPEED;
   27717              : 
   27718           66 :     case rsqrt_optab:
   27719           66 :       return opt_type == OPTIMIZE_FOR_SPEED && use_rsqrt_p (mode1);
   27720              : 
   27721              :     default:
   27722              :       return true;
   27723              :     }
   27724              : }
   27725              : 
   27726              : /* Address space support.
   27727              : 
   27728              :    This is not "far pointers" in the 16-bit sense, but an easy way
   27729              :    to use %fs and %gs segment prefixes.  Therefore:
   27730              : 
   27731              :     (a) All address spaces have the same modes,
   27732              :     (b) All address spaces have the same address forms,
   27733              :     (c) While %fs and %gs are technically subsets of the generic
   27734              :         address space, they are probably not subsets of each other.
   27735              :     (d) Since we have no access to the segment base register values
   27736              :         without resorting to a system call, we cannot convert a
   27737              :         non-default address space to a default address space.
   27738              :         Therefore we do not claim %fs or %gs are subsets of generic.
   27739              : 
   27740              :    Therefore we can (mostly) use the default hooks.  */
   27741              : 
   27742              : /* All use of segmentation is assumed to make address 0 valid.  */
   27743              : 
   27744              : static bool
   27745     67378083 : ix86_addr_space_zero_address_valid (addr_space_t as)
   27746              : {
   27747     67378083 :   return as != ADDR_SPACE_GENERIC;
   27748              : }
   27749              : 
   27750              : static void
   27751       789623 : ix86_init_libfuncs (void)
   27752              : {
   27753       789623 :   if (TARGET_64BIT)
   27754              :     {
   27755       774673 :       set_optab_libfunc (sdivmod_optab, TImode, "__divmodti4");
   27756       774673 :       set_optab_libfunc (udivmod_optab, TImode, "__udivmodti4");
   27757              :     }
   27758              :   else
   27759              :     {
   27760        14950 :       set_optab_libfunc (sdivmod_optab, DImode, "__divmoddi4");
   27761        14950 :       set_optab_libfunc (udivmod_optab, DImode, "__udivmoddi4");
   27762              :     }
   27763              : 
   27764              : #if TARGET_MACHO
   27765              :   darwin_rename_builtins ();
   27766              : #endif
   27767       789623 : }
   27768              : 
   27769              : /* Set the value of FLT_EVAL_METHOD in float.h.  When using only the
   27770              :    FPU, assume that the fpcw is set to extended precision; when using
   27771              :    only SSE, rounding is correct; when using both SSE and the FPU,
   27772              :    the rounding precision is indeterminate, since either may be chosen
   27773              :    apparently at random.  */
   27774              : 
   27775              : static enum flt_eval_method
   27776     89476100 : ix86_get_excess_precision (enum excess_precision_type type)
   27777              : {
   27778     89476100 :   switch (type)
   27779              :     {
   27780     85382905 :       case EXCESS_PRECISION_TYPE_FAST:
   27781              :         /* The fastest type to promote to will always be the native type,
   27782              :            whether that occurs with implicit excess precision or
   27783              :            otherwise.  */
   27784     85382905 :         return TARGET_AVX512FP16
   27785     85382905 :                ? FLT_EVAL_METHOD_PROMOTE_TO_FLOAT16
   27786     85382905 :                : FLT_EVAL_METHOD_PROMOTE_TO_FLOAT;
   27787      4093114 :       case EXCESS_PRECISION_TYPE_STANDARD:
   27788      4093114 :       case EXCESS_PRECISION_TYPE_IMPLICIT:
   27789              :         /* Otherwise, the excess precision we want when we are
   27790              :            in a standards compliant mode, and the implicit precision we
   27791              :            provide would be identical were it not for the unpredictable
   27792              :            cases.  */
   27793      4093114 :         if (TARGET_AVX512FP16 && TARGET_SSE_MATH)
   27794              :           return FLT_EVAL_METHOD_PROMOTE_TO_FLOAT16;
   27795      4087134 :         else if (!TARGET_80387)
   27796              :           return FLT_EVAL_METHOD_PROMOTE_TO_FLOAT;
   27797      4081012 :         else if (!TARGET_MIX_SSE_I387)
   27798              :           {
   27799      4080840 :             if (!(TARGET_SSE && TARGET_SSE_MATH))
   27800              :               return FLT_EVAL_METHOD_PROMOTE_TO_LONG_DOUBLE;
   27801      3092222 :             else if (TARGET_SSE2)
   27802              :               return FLT_EVAL_METHOD_PROMOTE_TO_FLOAT;
   27803              :           }
   27804              : 
   27805              :         /* If we are in standards compliant mode, but we know we will
   27806              :            calculate in unpredictable precision, return
   27807              :            FLT_EVAL_METHOD_FLOAT.  There is no reason to introduce explicit
   27808              :            excess precision if the target can't guarantee it will honor
   27809              :            it.  */
   27810          320 :         return (type == EXCESS_PRECISION_TYPE_STANDARD
   27811          320 :                 ? FLT_EVAL_METHOD_PROMOTE_TO_FLOAT
   27812              :                 : FLT_EVAL_METHOD_UNPREDICTABLE);
   27813           81 :       case EXCESS_PRECISION_TYPE_FLOAT16:
   27814           81 :         if (TARGET_80387
   27815           75 :             && !(TARGET_SSE_MATH && TARGET_SSE))
   27816            4 :           error ("%<-fexcess-precision=16%> is not compatible with %<-mfpmath=387%>");
   27817              :         return FLT_EVAL_METHOD_PROMOTE_TO_FLOAT16;
   27818            0 :       default:
   27819            0 :         gcc_unreachable ();
   27820              :     }
   27821              : 
   27822              :   return FLT_EVAL_METHOD_UNPREDICTABLE;
   27823              : }
   27824              : 
   27825              : /* Return true if _BitInt(N) is supported and fill its details into *INFO.  */
   27826              : bool
   27827       361991 : ix86_bitint_type_info (int n, struct bitint_info *info)
   27828              : {
   27829       361991 :   if (n <= 8)
   27830         9116 :     info->limb_mode = QImode;
   27831       352875 :   else if (n <= 16)
   27832         1893 :     info->limb_mode = HImode;
   27833       350982 :   else if (n <= 32 || (!TARGET_64BIT && n > 64))
   27834        45753 :     info->limb_mode = SImode;
   27835              :   else
   27836       305229 :     info->limb_mode = DImode;
   27837       361991 :   info->abi_limb_mode = info->limb_mode;
   27838       361991 :   info->big_endian = false;
   27839       361991 :   info->extended = bitint_ext_undef;
   27840       361991 :   return true;
   27841              : }
   27842              : 
   27843              : /* Implement TARGET_C_MODE_FOR_FLOATING_TYPE.  Return DFmode, TFmode
   27844              :    or XFmode for TI_LONG_DOUBLE_TYPE which is for long double type,
   27845              :    based on long double bits, go with the default one for the others.  */
   27846              : 
   27847              : static machine_mode
   27848      3781238 : ix86_c_mode_for_floating_type (enum tree_index ti)
   27849              : {
   27850      3781238 :   if (ti == TI_LONG_DOUBLE_TYPE)
   27851       630690 :     return (TARGET_LONG_DOUBLE_64 ? DFmode
   27852       630658 :                                   : (TARGET_LONG_DOUBLE_128 ? TFmode : XFmode));
   27853      3150548 :   return default_mode_for_floating_type (ti);
   27854              : }
   27855              : 
   27856              : /* Returns modified FUNCTION_TYPE for cdtor callabi.  */
   27857              : tree
   27858        14292 : ix86_cxx_adjust_cdtor_callabi_fntype (tree fntype)
   27859              : {
   27860        14292 :   if (TARGET_64BIT
   27861           71 :       || TARGET_RTD
   27862        14363 :       || ix86_function_type_abi (fntype) != MS_ABI)
   27863        14292 :     return fntype;
   27864              :   /* For 32-bit MS ABI add thiscall attribute.  */
   27865            0 :   tree attribs = tree_cons (get_identifier ("thiscall"), NULL_TREE,
   27866            0 :                             TYPE_ATTRIBUTES (fntype));
   27867            0 :   return build_type_attribute_variant (fntype, attribs);
   27868              : }
   27869              : 
   27870              : /* Implement PUSH_ROUNDING.  On 386, we have pushw instruction that
   27871              :    decrements by exactly 2 no matter what the position was, there is no pushb.
   27872              : 
   27873              :    But as CIE data alignment factor on this arch is -4 for 32bit targets
   27874              :    and -8 for 64bit targets, we need to make sure all stack pointer adjustments
   27875              :    are in multiple of 4 for 32bit targets and 8 for 64bit targets.  */
   27876              : 
   27877              : poly_int64
   27878    273319416 : ix86_push_rounding (poly_int64 bytes)
   27879              : {
   27880    353034264 :   return ROUND_UP (bytes, UNITS_PER_WORD);
   27881              : }
   27882              : 
   27883              : /* Use 8 bits metadata start from bit48 for LAM_U48,
   27884              :    6 bits metadata start from bit57 for LAM_U57.  */
   27885              : #define IX86_HWASAN_SHIFT (ix86_lam_type == lam_u48             \
   27886              :                            ? 48                                 \
   27887              :                            : (ix86_lam_type == lam_u57 ? 57 : 0))
   27888              : #define IX86_HWASAN_TAG_SIZE (ix86_lam_type == lam_u48          \
   27889              :                               ? 8                               \
   27890              :                               : (ix86_lam_type == lam_u57 ? 6 : 0))
   27891              : 
   27892              : /* Implement TARGET_MEMTAG_CAN_TAG_ADDRESSES.  */
   27893              : bool
   27894      6214346 : ix86_memtag_can_tag_addresses ()
   27895              : {
   27896      6214346 :   return ix86_lam_type != lam_none && TARGET_LP64;
   27897              : }
   27898              : 
   27899              : /* Implement TARGET_MEMTAG_TAG_BITSIZE.  */
   27900              : unsigned char
   27901          450 : ix86_memtag_tag_bitsize ()
   27902              : {
   27903          450 :   return IX86_HWASAN_TAG_SIZE;
   27904              : }
   27905              : 
   27906              : /* Implement TARGET_MEMTAG_SET_TAG.  */
   27907              : rtx
   27908          106 : ix86_memtag_set_tag (rtx untagged, rtx tag, rtx target)
   27909              : {
   27910              :   /* default_memtag_insert_random_tag may
   27911              :      generate tag with value more than 6 bits.  */
   27912          106 :   if (ix86_lam_type == lam_u57)
   27913              :     {
   27914          106 :       unsigned HOST_WIDE_INT and_imm
   27915              :         = (HOST_WIDE_INT_1U << IX86_HWASAN_TAG_SIZE) - 1;
   27916              : 
   27917          106 :       emit_insn (gen_andqi3 (tag, tag, GEN_INT (and_imm)));
   27918              :     }
   27919          106 :   tag = expand_simple_binop (Pmode, ASHIFT, tag,
   27920          106 :                              GEN_INT (IX86_HWASAN_SHIFT), NULL_RTX,
   27921              :                              /* unsignedp = */1, OPTAB_WIDEN);
   27922          106 :   rtx ret = expand_simple_binop (Pmode, IOR, untagged, tag, target,
   27923              :                                  /* unsignedp = */1, OPTAB_DIRECT);
   27924          106 :   return ret;
   27925              : }
   27926              : 
   27927              : /* Implement TARGET_MEMTAG_EXTRACT_TAG.  */
   27928              : rtx
   27929          180 : ix86_memtag_extract_tag (rtx tagged_pointer, rtx target)
   27930              : {
   27931          180 :   rtx tag = expand_simple_binop (Pmode, LSHIFTRT, tagged_pointer,
   27932          180 :                                  GEN_INT (IX86_HWASAN_SHIFT), target,
   27933              :                                  /* unsignedp = */0,
   27934              :                                  OPTAB_DIRECT);
   27935          180 :   rtx ret = gen_reg_rtx (QImode);
   27936              :   /* Mask off bit63 when LAM_U57.  */
   27937          180 :   if (ix86_lam_type == lam_u57)
   27938              :     {
   27939          180 :       unsigned HOST_WIDE_INT and_imm
   27940              :         = (HOST_WIDE_INT_1U << IX86_HWASAN_TAG_SIZE) - 1;
   27941          180 :       emit_insn (gen_andqi3 (ret, gen_lowpart (QImode, tag),
   27942          180 :                              gen_int_mode (and_imm, QImode)));
   27943              :     }
   27944              :   else
   27945            0 :     emit_move_insn (ret, gen_lowpart (QImode, tag));
   27946          180 :   return ret;
   27947              : }
   27948              : 
   27949              : /* The default implementation of TARGET_MEMTAG_UNTAGGED_POINTER.  */
   27950              : rtx
   27951          114 : ix86_memtag_untagged_pointer (rtx tagged_pointer, rtx target)
   27952              : {
   27953              :   /* Leave bit63 alone.  */
   27954          114 :   rtx tag_mask = gen_int_mode (((HOST_WIDE_INT_1U << IX86_HWASAN_SHIFT)
   27955          114 :                                 + (HOST_WIDE_INT_1U << 63) - 1),
   27956          114 :                                Pmode);
   27957          114 :   rtx untagged_base = expand_simple_binop (Pmode, AND, tagged_pointer,
   27958              :                                            tag_mask, target, true,
   27959              :                                            OPTAB_DIRECT);
   27960          114 :   gcc_assert (untagged_base);
   27961          114 :   return untagged_base;
   27962              : }
   27963              : 
   27964              : /* Implement TARGET_MEMTAG_ADD_TAG.  */
   27965              : rtx
   27966           90 : ix86_memtag_add_tag (rtx base, poly_int64 offset, unsigned char tag_offset)
   27967              : {
   27968           90 :   rtx base_tag = gen_reg_rtx (QImode);
   27969           90 :   rtx base_addr = gen_reg_rtx (Pmode);
   27970           90 :   rtx tagged_addr = gen_reg_rtx (Pmode);
   27971           90 :   rtx new_tag = gen_reg_rtx (QImode);
   27972          180 :   unsigned HOST_WIDE_INT and_imm
   27973           90 :     = (HOST_WIDE_INT_1U << IX86_HWASAN_SHIFT) - 1;
   27974              : 
   27975              :   /* When there's "overflow" in tag adding,
   27976              :      need to mask the most significant bit off.  */
   27977           90 :   emit_move_insn (base_tag, ix86_memtag_extract_tag (base, NULL_RTX));
   27978           90 :   emit_move_insn (base_addr,
   27979              :                   ix86_memtag_untagged_pointer (base, NULL_RTX));
   27980           90 :   emit_insn (gen_add2_insn (base_tag, gen_int_mode (tag_offset, QImode)));
   27981           90 :   emit_move_insn (new_tag, base_tag);
   27982           90 :   emit_insn (gen_andqi3 (new_tag, new_tag, gen_int_mode (and_imm, QImode)));
   27983           90 :   emit_move_insn (tagged_addr,
   27984              :                   ix86_memtag_set_tag (base_addr, new_tag, NULL_RTX));
   27985           90 :   return plus_constant (Pmode, tagged_addr, offset);
   27986              : }
   27987              : 
   27988              : /* Implement TARGET_HAVE_CCMP.  */
   27989              : static bool
   27990      8065547 : ix86_have_ccmp ()
   27991              : {
   27992      8065547 :   return (bool) TARGET_APX_CCMP;
   27993              : }
   27994              : 
   27995              : /* Implement TARGET_MODE_CAN_TRANSFER_BITS.  */
   27996              : static bool
   27997      4578281 : ix86_mode_can_transfer_bits (machine_mode mode)
   27998              : {
   27999      4578281 :   if (GET_MODE_CLASS (mode) == MODE_FLOAT
   28000      4531148 :       || GET_MODE_CLASS (mode) == MODE_COMPLEX_FLOAT)
   28001       112800 :     switch (GET_MODE_INNER (mode))
   28002              :       {
   28003        54192 :       case E_SFmode:
   28004        54192 :       case E_DFmode:
   28005              :         /* These suffer from normalization upon load when not using SSE.  */
   28006        54192 :         return !(ix86_fpmath & FPMATH_387);
   28007              :       default:
   28008              :         return true;
   28009              :       }
   28010              : 
   28011              :   return true;
   28012              : }
   28013              : 
   28014              : /* Implement TARGET_REDZONE_CLOBBER.  */
   28015              : static rtx
   28016            2 : ix86_redzone_clobber ()
   28017              : {
   28018            2 :   cfun->machine->asm_redzone_clobber_seen = true;
   28019            2 :   if (ix86_using_red_zone ())
   28020              :     {
   28021            2 :       rtx base = plus_constant (Pmode, stack_pointer_rtx, -RED_ZONE_SIZE);
   28022            2 :       rtx mem = gen_rtx_MEM (BLKmode, base);
   28023            2 :       set_mem_size (mem, RED_ZONE_SIZE);
   28024            2 :       return mem;
   28025              :     }
   28026              :   return NULL_RTX;
   28027              : }
   28028              : 
   28029              : /* Target-specific selftests.  */
   28030              : 
   28031              : #if CHECKING_P
   28032              : 
   28033              : namespace selftest {
   28034              : 
   28035              : /* Verify that hard regs are dumped as expected (in compact mode).  */
   28036              : 
   28037              : static void
   28038            4 : ix86_test_dumping_hard_regs ()
   28039              : {
   28040            4 :   ASSERT_RTL_DUMP_EQ ("(reg:SI ax)", gen_raw_REG (SImode, 0));
   28041            4 :   ASSERT_RTL_DUMP_EQ ("(reg:SI dx)", gen_raw_REG (SImode, 1));
   28042            4 : }
   28043              : 
   28044              : /* Test dumping an insn with repeated references to the same SCRATCH,
   28045              :    to verify the rtx_reuse code.  */
   28046              : 
   28047              : static void
   28048            4 : ix86_test_dumping_memory_blockage ()
   28049              : {
   28050            4 :   set_new_first_and_last_insn (NULL, NULL);
   28051              : 
   28052            4 :   rtx pat = gen_memory_blockage ();
   28053            4 :   rtx_reuse_manager r;
   28054            4 :   r.preprocess (pat);
   28055              : 
   28056              :   /* Verify that the repeated references to the SCRATCH show use
   28057              :      reuse IDS.  The first should be prefixed with a reuse ID,
   28058              :      and the second should be dumped as a "reuse_rtx" of that ID.
   28059              :      The expected string assumes Pmode == DImode.  */
   28060            4 :   if (Pmode == DImode)
   28061            4 :     ASSERT_RTL_DUMP_EQ_WITH_REUSE
   28062              :       ("(cinsn 1 (set (mem/v:BLK (0|scratch:DI) [0  A8])\n"
   28063              :        "        (unspec:BLK [\n"
   28064              :        "                (mem/v:BLK (reuse_rtx 0) [0  A8])\n"
   28065              :        "            ] UNSPEC_MEMORY_BLOCKAGE)))\n", pat, &r);
   28066            4 : }
   28067              : 
   28068              : /* Verify loading an RTL dump; specifically a dump of copying
   28069              :    a param on x86_64 from a hard reg into the frame.
   28070              :    This test is target-specific since the dump contains target-specific
   28071              :    hard reg names.  */
   28072              : 
   28073              : static void
   28074            4 : ix86_test_loading_dump_fragment_1 ()
   28075              : {
   28076            4 :   rtl_dump_test t (SELFTEST_LOCATION,
   28077            4 :                    locate_file ("x86_64/copy-hard-reg-into-frame.rtl"));
   28078              : 
   28079            4 :   rtx_insn *insn = get_insn_by_uid (1);
   28080              : 
   28081              :   /* The block structure and indentation here is purely for
   28082              :      readability; it mirrors the structure of the rtx.  */
   28083            4 :   tree mem_expr;
   28084            4 :   {
   28085            4 :     rtx pat = PATTERN (insn);
   28086            4 :     ASSERT_EQ (SET, GET_CODE (pat));
   28087            4 :     {
   28088            4 :       rtx dest = SET_DEST (pat);
   28089            4 :       ASSERT_EQ (MEM, GET_CODE (dest));
   28090              :       /* Verify the "/c" was parsed.  */
   28091            4 :       ASSERT_TRUE (RTX_FLAG (dest, call));
   28092            4 :       ASSERT_EQ (SImode, GET_MODE (dest));
   28093            4 :       {
   28094            4 :         rtx addr = XEXP (dest, 0);
   28095            4 :         ASSERT_EQ (PLUS, GET_CODE (addr));
   28096            4 :         ASSERT_EQ (DImode, GET_MODE (addr));
   28097            4 :         {
   28098            4 :           rtx lhs = XEXP (addr, 0);
   28099              :           /* Verify that the "frame" REG was consolidated.  */
   28100            4 :           ASSERT_RTX_PTR_EQ (frame_pointer_rtx, lhs);
   28101              :         }
   28102            4 :         {
   28103            4 :           rtx rhs = XEXP (addr, 1);
   28104            4 :           ASSERT_EQ (CONST_INT, GET_CODE (rhs));
   28105            4 :           ASSERT_EQ (-4, INTVAL (rhs));
   28106              :         }
   28107              :       }
   28108              :       /* Verify the "[1 i+0 S4 A32]" was parsed.  */
   28109            4 :       ASSERT_EQ (1, MEM_ALIAS_SET (dest));
   28110              :       /* "i" should have been handled by synthesizing a global int
   28111              :          variable named "i".  */
   28112            4 :       mem_expr = MEM_EXPR (dest);
   28113            4 :       ASSERT_NE (mem_expr, NULL);
   28114            4 :       ASSERT_EQ (VAR_DECL, TREE_CODE (mem_expr));
   28115            4 :       ASSERT_EQ (integer_type_node, TREE_TYPE (mem_expr));
   28116            4 :       ASSERT_EQ (IDENTIFIER_NODE, TREE_CODE (DECL_NAME (mem_expr)));
   28117            4 :       ASSERT_STREQ ("i", IDENTIFIER_POINTER (DECL_NAME (mem_expr)));
   28118              :       /* "+0".  */
   28119            4 :       ASSERT_TRUE (MEM_OFFSET_KNOWN_P (dest));
   28120            4 :       ASSERT_EQ (0, MEM_OFFSET (dest));
   28121              :       /* "S4".  */
   28122            4 :       ASSERT_EQ (4, MEM_SIZE (dest));
   28123              :       /* "A32.  */
   28124            4 :       ASSERT_EQ (32, MEM_ALIGN (dest));
   28125              :     }
   28126            4 :     {
   28127            4 :       rtx src = SET_SRC (pat);
   28128            4 :       ASSERT_EQ (REG, GET_CODE (src));
   28129            4 :       ASSERT_EQ (SImode, GET_MODE (src));
   28130            4 :       ASSERT_EQ (5, REGNO (src));
   28131            4 :       tree reg_expr = REG_EXPR (src);
   28132              :       /* "i" here should point to the same var as for the MEM_EXPR.  */
   28133            4 :       ASSERT_EQ (reg_expr, mem_expr);
   28134              :     }
   28135              :   }
   28136            4 : }
   28137              : 
   28138              : /* Verify that the RTL loader copes with a call_insn dump.
   28139              :    This test is target-specific since the dump contains a target-specific
   28140              :    hard reg name.  */
   28141              : 
   28142              : static void
   28143            4 : ix86_test_loading_call_insn ()
   28144              : {
   28145              :   /* The test dump includes register "xmm0", where requires TARGET_SSE
   28146              :      to exist.  */
   28147            4 :   if (!TARGET_SSE)
   28148            0 :     return;
   28149              : 
   28150            4 :   rtl_dump_test t (SELFTEST_LOCATION, locate_file ("x86_64/call-insn.rtl"));
   28151              : 
   28152            4 :   rtx_insn *insn = get_insns ();
   28153            4 :   ASSERT_EQ (CALL_INSN, GET_CODE (insn));
   28154              : 
   28155              :   /* "/j".  */
   28156            4 :   ASSERT_TRUE (RTX_FLAG (insn, jump));
   28157              : 
   28158            4 :   rtx pat = PATTERN (insn);
   28159            4 :   ASSERT_EQ (CALL, GET_CODE (SET_SRC (pat)));
   28160              : 
   28161              :   /* Verify REG_NOTES.  */
   28162            4 :   {
   28163              :     /* "(expr_list:REG_CALL_DECL".   */
   28164            4 :     ASSERT_EQ (EXPR_LIST, GET_CODE (REG_NOTES (insn)));
   28165            4 :     rtx_expr_list *note0 = as_a <rtx_expr_list *> (REG_NOTES (insn));
   28166            4 :     ASSERT_EQ (REG_CALL_DECL, REG_NOTE_KIND (note0));
   28167              : 
   28168              :     /* "(expr_list:REG_EH_REGION (const_int 0 [0])".  */
   28169            4 :     rtx_expr_list *note1 = note0->next ();
   28170            4 :     ASSERT_EQ (REG_EH_REGION, REG_NOTE_KIND (note1));
   28171              : 
   28172            4 :     ASSERT_EQ (NULL, note1->next ());
   28173              :   }
   28174              : 
   28175              :   /* Verify CALL_INSN_FUNCTION_USAGE.  */
   28176            4 :   {
   28177              :     /* "(expr_list:DF (use (reg:DF 21 xmm0))".  */
   28178            4 :     rtx_expr_list *usage
   28179            4 :       = as_a <rtx_expr_list *> (CALL_INSN_FUNCTION_USAGE (insn));
   28180            4 :     ASSERT_EQ (EXPR_LIST, GET_CODE (usage));
   28181            4 :     ASSERT_EQ (DFmode, GET_MODE (usage));
   28182            4 :     ASSERT_EQ (USE, GET_CODE (usage->element ()));
   28183            4 :     ASSERT_EQ (NULL, usage->next ());
   28184              :   }
   28185            4 : }
   28186              : 
   28187              : /* Verify that the RTL loader copes a dump from print_rtx_function.
   28188              :    This test is target-specific since the dump contains target-specific
   28189              :    hard reg names.  */
   28190              : 
   28191              : static void
   28192            4 : ix86_test_loading_full_dump ()
   28193              : {
   28194            4 :   rtl_dump_test t (SELFTEST_LOCATION, locate_file ("x86_64/times-two.rtl"));
   28195              : 
   28196            4 :   ASSERT_STREQ ("times_two", IDENTIFIER_POINTER (DECL_NAME (cfun->decl)));
   28197              : 
   28198            4 :   rtx_insn *insn_1 = get_insn_by_uid (1);
   28199            4 :   ASSERT_EQ (NOTE, GET_CODE (insn_1));
   28200              : 
   28201            4 :   rtx_insn *insn_7 = get_insn_by_uid (7);
   28202            4 :   ASSERT_EQ (INSN, GET_CODE (insn_7));
   28203            4 :   ASSERT_EQ (PARALLEL, GET_CODE (PATTERN (insn_7)));
   28204              : 
   28205            4 :   rtx_insn *insn_15 = get_insn_by_uid (15);
   28206            4 :   ASSERT_EQ (INSN, GET_CODE (insn_15));
   28207            4 :   ASSERT_EQ (USE, GET_CODE (PATTERN (insn_15)));
   28208              : 
   28209              :   /* Verify crtl->return_rtx.  */
   28210            4 :   ASSERT_EQ (REG, GET_CODE (crtl->return_rtx));
   28211            4 :   ASSERT_EQ (0, REGNO (crtl->return_rtx));
   28212            4 :   ASSERT_EQ (SImode, GET_MODE (crtl->return_rtx));
   28213            4 : }
   28214              : 
   28215              : /* Verify that the RTL loader copes with UNSPEC and UNSPEC_VOLATILE insns.
   28216              :    In particular, verify that it correctly loads the 2nd operand.
   28217              :    This test is target-specific since these are machine-specific
   28218              :    operands (and enums).  */
   28219              : 
   28220              : static void
   28221            4 : ix86_test_loading_unspec ()
   28222              : {
   28223            4 :   rtl_dump_test t (SELFTEST_LOCATION, locate_file ("x86_64/unspec.rtl"));
   28224              : 
   28225            4 :   ASSERT_STREQ ("test_unspec", IDENTIFIER_POINTER (DECL_NAME (cfun->decl)));
   28226              : 
   28227            4 :   ASSERT_TRUE (cfun);
   28228              : 
   28229              :   /* Test of an UNSPEC.  */
   28230            4 :    rtx_insn *insn = get_insns ();
   28231            4 :   ASSERT_EQ (INSN, GET_CODE (insn));
   28232            4 :   rtx set = single_set (insn);
   28233            4 :   ASSERT_NE (NULL, set);
   28234            4 :   rtx dst = SET_DEST (set);
   28235            4 :   ASSERT_EQ (MEM, GET_CODE (dst));
   28236            4 :   rtx src = SET_SRC (set);
   28237            4 :   ASSERT_EQ (UNSPEC, GET_CODE (src));
   28238            4 :   ASSERT_EQ (BLKmode, GET_MODE (src));
   28239            4 :   ASSERT_EQ (UNSPEC_MEMORY_BLOCKAGE, XINT (src, 1));
   28240              : 
   28241            4 :   rtx v0 = XVECEXP (src, 0, 0);
   28242              : 
   28243              :   /* Verify that the two uses of the first SCRATCH have pointer
   28244              :      equality.  */
   28245            4 :   rtx scratch_a = XEXP (dst, 0);
   28246            4 :   ASSERT_EQ (SCRATCH, GET_CODE (scratch_a));
   28247              : 
   28248            4 :   rtx scratch_b = XEXP (v0, 0);
   28249            4 :   ASSERT_EQ (SCRATCH, GET_CODE (scratch_b));
   28250              : 
   28251            4 :   ASSERT_EQ (scratch_a, scratch_b);
   28252              : 
   28253              :   /* Verify that the two mems are thus treated as equal.  */
   28254            4 :   ASSERT_TRUE (rtx_equal_p (dst, v0));
   28255              : 
   28256              :   /* Verify that the insn is recognized.  */
   28257            4 :   ASSERT_NE(-1, recog_memoized (insn));
   28258              : 
   28259              :   /* Test of an UNSPEC_VOLATILE, which has its own enum values.  */
   28260            4 :   insn = NEXT_INSN (insn);
   28261            4 :   ASSERT_EQ (INSN, GET_CODE (insn));
   28262              : 
   28263            4 :   set = single_set (insn);
   28264            4 :   ASSERT_NE (NULL, set);
   28265              : 
   28266            4 :   src = SET_SRC (set);
   28267            4 :   ASSERT_EQ (UNSPEC_VOLATILE, GET_CODE (src));
   28268            4 :   ASSERT_EQ (UNSPECV_RDTSCP, XINT (src, 1));
   28269            4 : }
   28270              : 
   28271              : /* Run all target-specific selftests.  */
   28272              : 
   28273              : static void
   28274            4 : ix86_run_selftests (void)
   28275              : {
   28276            4 :   ix86_test_dumping_hard_regs ();
   28277            4 :   ix86_test_dumping_memory_blockage ();
   28278              : 
   28279              :   /* Various tests of loading RTL dumps, here because they contain
   28280              :      ix86-isms (e.g. names of hard regs).  */
   28281            4 :   ix86_test_loading_dump_fragment_1 ();
   28282            4 :   ix86_test_loading_call_insn ();
   28283            4 :   ix86_test_loading_full_dump ();
   28284            4 :   ix86_test_loading_unspec ();
   28285            4 : }
   28286              : 
   28287              : } // namespace selftest
   28288              : 
   28289              : #endif /* CHECKING_P */
   28290              : 
   28291              : static const scoped_attribute_specs *const ix86_attribute_table[] =
   28292              : {
   28293              :   &ix86_gnu_attribute_table
   28294              : };
   28295              : 
   28296              : /* Initialize the GCC target structure.  */
   28297              : #undef TARGET_RETURN_IN_MEMORY
   28298              : #define TARGET_RETURN_IN_MEMORY ix86_return_in_memory
   28299              : 
   28300              : #undef TARGET_LEGITIMIZE_ADDRESS
   28301              : #define TARGET_LEGITIMIZE_ADDRESS ix86_legitimize_address
   28302              : 
   28303              : #undef TARGET_ATTRIBUTE_TABLE
   28304              : #define TARGET_ATTRIBUTE_TABLE ix86_attribute_table
   28305              : #undef TARGET_FUNCTION_ATTRIBUTE_INLINABLE_P
   28306              : #define TARGET_FUNCTION_ATTRIBUTE_INLINABLE_P hook_bool_const_tree_true
   28307              : #if TARGET_DLLIMPORT_DECL_ATTRIBUTES
   28308              : #  undef TARGET_MERGE_DECL_ATTRIBUTES
   28309              : #  define TARGET_MERGE_DECL_ATTRIBUTES merge_dllimport_decl_attributes
   28310              : #endif
   28311              : 
   28312              : #undef TARGET_INVALID_CONVERSION
   28313              : #define TARGET_INVALID_CONVERSION ix86_invalid_conversion
   28314              : 
   28315              : #undef TARGET_INVALID_UNARY_OP
   28316              : #define TARGET_INVALID_UNARY_OP ix86_invalid_unary_op
   28317              : 
   28318              : #undef TARGET_INVALID_BINARY_OP
   28319              : #define TARGET_INVALID_BINARY_OP ix86_invalid_binary_op
   28320              : 
   28321              : #undef TARGET_COMP_TYPE_ATTRIBUTES
   28322              : #define TARGET_COMP_TYPE_ATTRIBUTES ix86_comp_type_attributes
   28323              : 
   28324              : #undef TARGET_INIT_BUILTINS
   28325              : #define TARGET_INIT_BUILTINS ix86_init_builtins
   28326              : #undef TARGET_BUILTIN_DECL
   28327              : #define TARGET_BUILTIN_DECL ix86_builtin_decl
   28328              : #undef TARGET_EXPAND_BUILTIN
   28329              : #define TARGET_EXPAND_BUILTIN ix86_expand_builtin
   28330              : 
   28331              : #undef TARGET_VECTORIZE_BUILTIN_VECTORIZED_FUNCTION
   28332              : #define TARGET_VECTORIZE_BUILTIN_VECTORIZED_FUNCTION \
   28333              :   ix86_builtin_vectorized_function
   28334              : 
   28335              : #undef TARGET_VECTORIZE_BUILTIN_GATHER
   28336              : #define TARGET_VECTORIZE_BUILTIN_GATHER ix86_vectorize_builtin_gather
   28337              : 
   28338              : #undef TARGET_VECTORIZE_BUILTIN_SCATTER
   28339              : #define TARGET_VECTORIZE_BUILTIN_SCATTER ix86_vectorize_builtin_scatter
   28340              : 
   28341              : #undef TARGET_BUILTIN_RECIPROCAL
   28342              : #define TARGET_BUILTIN_RECIPROCAL ix86_builtin_reciprocal
   28343              : 
   28344              : #undef TARGET_ASM_FUNCTION_EPILOGUE
   28345              : #define TARGET_ASM_FUNCTION_EPILOGUE ix86_output_function_epilogue
   28346              : 
   28347              : #undef TARGET_ASM_PRINT_PATCHABLE_FUNCTION_ENTRY
   28348              : #define TARGET_ASM_PRINT_PATCHABLE_FUNCTION_ENTRY \
   28349              :   ix86_print_patchable_function_entry
   28350              : 
   28351              : #undef TARGET_ENCODE_SECTION_INFO
   28352              : #ifndef SUBTARGET_ENCODE_SECTION_INFO
   28353              : #define TARGET_ENCODE_SECTION_INFO ix86_encode_section_info
   28354              : #else
   28355              : #define TARGET_ENCODE_SECTION_INFO SUBTARGET_ENCODE_SECTION_INFO
   28356              : #endif
   28357              : 
   28358              : #undef TARGET_ASM_OPEN_PAREN
   28359              : #define TARGET_ASM_OPEN_PAREN ""
   28360              : #undef TARGET_ASM_CLOSE_PAREN
   28361              : #define TARGET_ASM_CLOSE_PAREN ""
   28362              : 
   28363              : #undef TARGET_ASM_BYTE_OP
   28364              : #define TARGET_ASM_BYTE_OP ASM_BYTE
   28365              : 
   28366              : #undef TARGET_ASM_ALIGNED_HI_OP
   28367              : #define TARGET_ASM_ALIGNED_HI_OP ASM_SHORT
   28368              : #undef TARGET_ASM_ALIGNED_SI_OP
   28369              : #define TARGET_ASM_ALIGNED_SI_OP ASM_LONG
   28370              : #ifdef ASM_QUAD
   28371              : #undef TARGET_ASM_ALIGNED_DI_OP
   28372              : #define TARGET_ASM_ALIGNED_DI_OP ASM_QUAD
   28373              : #endif
   28374              : 
   28375              : #undef TARGET_PROFILE_BEFORE_PROLOGUE
   28376              : #define TARGET_PROFILE_BEFORE_PROLOGUE ix86_profile_before_prologue
   28377              : 
   28378              : #undef TARGET_MANGLE_DECL_ASSEMBLER_NAME
   28379              : #define TARGET_MANGLE_DECL_ASSEMBLER_NAME ix86_mangle_decl_assembler_name
   28380              : 
   28381              : #undef TARGET_ASM_UNALIGNED_HI_OP
   28382              : #define TARGET_ASM_UNALIGNED_HI_OP TARGET_ASM_ALIGNED_HI_OP
   28383              : #undef TARGET_ASM_UNALIGNED_SI_OP
   28384              : #define TARGET_ASM_UNALIGNED_SI_OP TARGET_ASM_ALIGNED_SI_OP
   28385              : #undef TARGET_ASM_UNALIGNED_DI_OP
   28386              : #define TARGET_ASM_UNALIGNED_DI_OP TARGET_ASM_ALIGNED_DI_OP
   28387              : 
   28388              : #undef TARGET_PRINT_OPERAND
   28389              : #define TARGET_PRINT_OPERAND ix86_print_operand
   28390              : #undef TARGET_PRINT_OPERAND_ADDRESS
   28391              : #define TARGET_PRINT_OPERAND_ADDRESS ix86_print_operand_address
   28392              : #undef TARGET_PRINT_OPERAND_PUNCT_VALID_P
   28393              : #define TARGET_PRINT_OPERAND_PUNCT_VALID_P ix86_print_operand_punct_valid_p
   28394              : #undef TARGET_ASM_OUTPUT_ADDR_CONST_EXTRA
   28395              : #define TARGET_ASM_OUTPUT_ADDR_CONST_EXTRA i386_asm_output_addr_const_extra
   28396              : 
   28397              : #undef TARGET_SCHED_INIT_GLOBAL
   28398              : #define TARGET_SCHED_INIT_GLOBAL ix86_sched_init_global
   28399              : #undef TARGET_SCHED_ADJUST_COST
   28400              : #define TARGET_SCHED_ADJUST_COST ix86_adjust_cost
   28401              : #undef TARGET_SCHED_ISSUE_RATE
   28402              : #define TARGET_SCHED_ISSUE_RATE ix86_issue_rate
   28403              : #undef TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD
   28404              : #define TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD \
   28405              :   ia32_multipass_dfa_lookahead
   28406              : #undef TARGET_SCHED_MACRO_FUSION_P
   28407              : #define TARGET_SCHED_MACRO_FUSION_P ix86_macro_fusion_p
   28408              : #undef TARGET_SCHED_MACRO_FUSION_PAIR_P
   28409              : #define TARGET_SCHED_MACRO_FUSION_PAIR_P ix86_macro_fusion_pair_p
   28410              : 
   28411              : #undef TARGET_FUNCTION_OK_FOR_SIBCALL
   28412              : #define TARGET_FUNCTION_OK_FOR_SIBCALL ix86_function_ok_for_sibcall
   28413              : 
   28414              : #undef TARGET_MEMMODEL_CHECK
   28415              : #define TARGET_MEMMODEL_CHECK ix86_memmodel_check
   28416              : 
   28417              : #undef TARGET_ATOMIC_ASSIGN_EXPAND_FENV
   28418              : #define TARGET_ATOMIC_ASSIGN_EXPAND_FENV ix86_atomic_assign_expand_fenv
   28419              : 
   28420              : #ifdef HAVE_AS_TLS
   28421              : #undef TARGET_HAVE_TLS
   28422              : #define TARGET_HAVE_TLS true
   28423              : #endif
   28424              : #undef TARGET_CANNOT_FORCE_CONST_MEM
   28425              : #define TARGET_CANNOT_FORCE_CONST_MEM ix86_cannot_force_const_mem
   28426              : #undef TARGET_USE_BLOCKS_FOR_CONSTANT_P
   28427              : #define TARGET_USE_BLOCKS_FOR_CONSTANT_P hook_bool_mode_const_rtx_true
   28428              : 
   28429              : #undef TARGET_DELEGITIMIZE_ADDRESS
   28430              : #define TARGET_DELEGITIMIZE_ADDRESS ix86_delegitimize_address
   28431              : 
   28432              : #undef TARGET_CONST_NOT_OK_FOR_DEBUG_P
   28433              : #define TARGET_CONST_NOT_OK_FOR_DEBUG_P ix86_const_not_ok_for_debug_p
   28434              : 
   28435              : #undef TARGET_MS_BITFIELD_LAYOUT_P
   28436              : #define TARGET_MS_BITFIELD_LAYOUT_P ix86_ms_bitfield_layout_p
   28437              : 
   28438              : #if TARGET_MACHO
   28439              : #undef TARGET_BINDS_LOCAL_P
   28440              : #define TARGET_BINDS_LOCAL_P darwin_binds_local_p
   28441              : #else
   28442              : #undef TARGET_BINDS_LOCAL_P
   28443              : #define TARGET_BINDS_LOCAL_P ix86_binds_local_p
   28444              : #endif
   28445              : #if TARGET_DLLIMPORT_DECL_ATTRIBUTES
   28446              : #undef TARGET_BINDS_LOCAL_P
   28447              : #define TARGET_BINDS_LOCAL_P i386_pe_binds_local_p
   28448              : #endif
   28449              : 
   28450              : #undef TARGET_ASM_OUTPUT_MI_THUNK
   28451              : #define TARGET_ASM_OUTPUT_MI_THUNK x86_output_mi_thunk
   28452              : #undef TARGET_ASM_CAN_OUTPUT_MI_THUNK
   28453              : #define TARGET_ASM_CAN_OUTPUT_MI_THUNK x86_can_output_mi_thunk
   28454              : 
   28455              : #undef TARGET_ASM_FILE_START
   28456              : #define TARGET_ASM_FILE_START x86_file_start
   28457              : 
   28458              : #undef TARGET_OPTION_OVERRIDE
   28459              : #define TARGET_OPTION_OVERRIDE ix86_option_override
   28460              : 
   28461              : #undef TARGET_REGISTER_MOVE_COST
   28462              : #define TARGET_REGISTER_MOVE_COST ix86_register_move_cost
   28463              : #undef TARGET_MEMORY_MOVE_COST
   28464              : #define TARGET_MEMORY_MOVE_COST ix86_memory_move_cost
   28465              : #undef TARGET_RTX_COSTS
   28466              : #define TARGET_RTX_COSTS ix86_rtx_costs
   28467              : #undef TARGET_INSN_COST
   28468              : #define TARGET_INSN_COST ix86_insn_cost
   28469              : #undef TARGET_ADDRESS_COST
   28470              : #define TARGET_ADDRESS_COST ix86_address_cost
   28471              : 
   28472              : #undef TARGET_USE_BY_PIECES_INFRASTRUCTURE_P
   28473              : #define TARGET_USE_BY_PIECES_INFRASTRUCTURE_P \
   28474              :   ix86_use_by_pieces_infrastructure_p
   28475              : 
   28476              : #undef TARGET_OVERLAP_OP_BY_PIECES_P
   28477              : #define TARGET_OVERLAP_OP_BY_PIECES_P hook_bool_void_true
   28478              : 
   28479              : #undef TARGET_FLAGS_REGNUM
   28480              : #define TARGET_FLAGS_REGNUM FLAGS_REG
   28481              : #undef TARGET_FIXED_CONDITION_CODE_REGS
   28482              : #define TARGET_FIXED_CONDITION_CODE_REGS ix86_fixed_condition_code_regs
   28483              : #undef TARGET_CC_MODES_COMPATIBLE
   28484              : #define TARGET_CC_MODES_COMPATIBLE ix86_cc_modes_compatible
   28485              : 
   28486              : #undef TARGET_MACHINE_DEPENDENT_REORG
   28487              : #define TARGET_MACHINE_DEPENDENT_REORG ix86_reorg
   28488              : 
   28489              : #undef TARGET_BUILD_BUILTIN_VA_LIST
   28490              : #define TARGET_BUILD_BUILTIN_VA_LIST ix86_build_builtin_va_list
   28491              : 
   28492              : #undef TARGET_FOLD_BUILTIN
   28493              : #define TARGET_FOLD_BUILTIN ix86_fold_builtin
   28494              : 
   28495              : #undef TARGET_GIMPLE_FOLD_BUILTIN
   28496              : #define TARGET_GIMPLE_FOLD_BUILTIN ix86_gimple_fold_builtin
   28497              : 
   28498              : #undef TARGET_COMPARE_VERSION_PRIORITY
   28499              : #define TARGET_COMPARE_VERSION_PRIORITY ix86_compare_version_priority
   28500              : 
   28501              : #undef TARGET_GENERATE_VERSION_DISPATCHER_BODY
   28502              : #define TARGET_GENERATE_VERSION_DISPATCHER_BODY \
   28503              :   ix86_generate_version_dispatcher_body
   28504              : 
   28505              : #undef TARGET_GET_FUNCTION_VERSIONS_DISPATCHER
   28506              : #define TARGET_GET_FUNCTION_VERSIONS_DISPATCHER \
   28507              :   ix86_get_function_versions_dispatcher
   28508              : 
   28509              : #undef TARGET_ENUM_VA_LIST_P
   28510              : #define TARGET_ENUM_VA_LIST_P ix86_enum_va_list
   28511              : 
   28512              : #undef TARGET_FN_ABI_VA_LIST
   28513              : #define TARGET_FN_ABI_VA_LIST ix86_fn_abi_va_list
   28514              : 
   28515              : #undef TARGET_CANONICAL_VA_LIST_TYPE
   28516              : #define TARGET_CANONICAL_VA_LIST_TYPE ix86_canonical_va_list_type
   28517              : 
   28518              : #undef TARGET_EXPAND_BUILTIN_VA_START
   28519              : #define TARGET_EXPAND_BUILTIN_VA_START ix86_va_start
   28520              : 
   28521              : #undef TARGET_MD_ASM_ADJUST
   28522              : #define TARGET_MD_ASM_ADJUST ix86_md_asm_adjust
   28523              : 
   28524              : #undef TARGET_C_EXCESS_PRECISION
   28525              : #define TARGET_C_EXCESS_PRECISION ix86_get_excess_precision
   28526              : #undef TARGET_C_BITINT_TYPE_INFO
   28527              : #define TARGET_C_BITINT_TYPE_INFO ix86_bitint_type_info
   28528              : #undef TARGET_C_MODE_FOR_FLOATING_TYPE
   28529              : #define TARGET_C_MODE_FOR_FLOATING_TYPE ix86_c_mode_for_floating_type
   28530              : #undef TARGET_CXX_ADJUST_CDTOR_CALLABI_FNTYPE
   28531              : #define TARGET_CXX_ADJUST_CDTOR_CALLABI_FNTYPE ix86_cxx_adjust_cdtor_callabi_fntype
   28532              : #undef TARGET_PROMOTE_PROTOTYPES
   28533              : #define TARGET_PROMOTE_PROTOTYPES hook_bool_const_tree_true
   28534              : #undef TARGET_PUSH_ARGUMENT
   28535              : #define TARGET_PUSH_ARGUMENT ix86_push_argument
   28536              : #undef TARGET_SETUP_INCOMING_VARARGS
   28537              : #define TARGET_SETUP_INCOMING_VARARGS ix86_setup_incoming_varargs
   28538              : #undef TARGET_MUST_PASS_IN_STACK
   28539              : #define TARGET_MUST_PASS_IN_STACK ix86_must_pass_in_stack
   28540              : #undef TARGET_ALLOCATE_STACK_SLOTS_FOR_ARGS
   28541              : #define TARGET_ALLOCATE_STACK_SLOTS_FOR_ARGS ix86_allocate_stack_slots_for_args
   28542              : #undef TARGET_FUNCTION_ARG_ADVANCE
   28543              : #define TARGET_FUNCTION_ARG_ADVANCE ix86_function_arg_advance
   28544              : #undef TARGET_FUNCTION_ARG
   28545              : #define TARGET_FUNCTION_ARG ix86_function_arg
   28546              : #undef TARGET_INIT_PIC_REG
   28547              : #define TARGET_INIT_PIC_REG ix86_init_pic_reg
   28548              : #undef TARGET_USE_PSEUDO_PIC_REG
   28549              : #define TARGET_USE_PSEUDO_PIC_REG ix86_use_pseudo_pic_reg
   28550              : #undef TARGET_FUNCTION_ARG_BOUNDARY
   28551              : #define TARGET_FUNCTION_ARG_BOUNDARY ix86_function_arg_boundary
   28552              : #undef TARGET_PASS_BY_REFERENCE
   28553              : #define TARGET_PASS_BY_REFERENCE ix86_pass_by_reference
   28554              : #undef TARGET_INTERNAL_ARG_POINTER
   28555              : #define TARGET_INTERNAL_ARG_POINTER ix86_internal_arg_pointer
   28556              : #undef TARGET_UPDATE_STACK_BOUNDARY
   28557              : #define TARGET_UPDATE_STACK_BOUNDARY ix86_update_stack_boundary
   28558              : #undef TARGET_GET_DRAP_RTX
   28559              : #define TARGET_GET_DRAP_RTX ix86_get_drap_rtx
   28560              : #undef TARGET_STRICT_ARGUMENT_NAMING
   28561              : #define TARGET_STRICT_ARGUMENT_NAMING hook_bool_CUMULATIVE_ARGS_true
   28562              : #undef TARGET_STATIC_CHAIN
   28563              : #define TARGET_STATIC_CHAIN ix86_static_chain
   28564              : #undef TARGET_TRAMPOLINE_INIT
   28565              : #define TARGET_TRAMPOLINE_INIT ix86_trampoline_init
   28566              : #undef TARGET_RETURN_POPS_ARGS
   28567              : #define TARGET_RETURN_POPS_ARGS ix86_return_pops_args
   28568              : 
   28569              : #undef TARGET_WARN_FUNC_RETURN
   28570              : #define TARGET_WARN_FUNC_RETURN ix86_warn_func_return
   28571              : 
   28572              : #undef TARGET_LEGITIMATE_COMBINED_INSN
   28573              : #define TARGET_LEGITIMATE_COMBINED_INSN ix86_legitimate_combined_insn
   28574              : 
   28575              : #undef TARGET_ASAN_SHADOW_OFFSET
   28576              : #define TARGET_ASAN_SHADOW_OFFSET ix86_asan_shadow_offset
   28577              : 
   28578              : #undef TARGET_GIMPLIFY_VA_ARG_EXPR
   28579              : #define TARGET_GIMPLIFY_VA_ARG_EXPR ix86_gimplify_va_arg
   28580              : 
   28581              : #undef TARGET_SCALAR_MODE_SUPPORTED_P
   28582              : #define TARGET_SCALAR_MODE_SUPPORTED_P ix86_scalar_mode_supported_p
   28583              : 
   28584              : #undef TARGET_LIBGCC_FLOATING_MODE_SUPPORTED_P
   28585              : #define TARGET_LIBGCC_FLOATING_MODE_SUPPORTED_P \
   28586              : ix86_libgcc_floating_mode_supported_p
   28587              : 
   28588              : #undef TARGET_VECTOR_MODE_SUPPORTED_P
   28589              : #define TARGET_VECTOR_MODE_SUPPORTED_P ix86_vector_mode_supported_p
   28590              : 
   28591              : #undef TARGET_C_MODE_FOR_SUFFIX
   28592              : #define TARGET_C_MODE_FOR_SUFFIX ix86_c_mode_for_suffix
   28593              : 
   28594              : #ifdef HAVE_AS_TLS
   28595              : #undef TARGET_ASM_OUTPUT_DWARF_DTPREL
   28596              : #define TARGET_ASM_OUTPUT_DWARF_DTPREL i386_output_dwarf_dtprel
   28597              : #endif
   28598              : 
   28599              : #ifdef SUBTARGET_INSERT_ATTRIBUTES
   28600              : #undef TARGET_INSERT_ATTRIBUTES
   28601              : #define TARGET_INSERT_ATTRIBUTES SUBTARGET_INSERT_ATTRIBUTES
   28602              : #endif
   28603              : 
   28604              : #undef TARGET_MANGLE_TYPE
   28605              : #define TARGET_MANGLE_TYPE ix86_mangle_type
   28606              : 
   28607              : #undef TARGET_EMIT_SUPPORT_TINFOS
   28608              : #define TARGET_EMIT_SUPPORT_TINFOS ix86_emit_support_tinfos
   28609              : 
   28610              : #undef TARGET_STACK_PROTECT_GUARD
   28611              : #define TARGET_STACK_PROTECT_GUARD ix86_stack_protect_guard
   28612              : 
   28613              : #undef TARGET_STACK_PROTECT_GUARD_SYMBOL_P
   28614              : #define TARGET_STACK_PROTECT_GUARD_SYMBOL_P \
   28615              :   ix86_stack_protect_guard_symbol_p
   28616              : 
   28617              : #undef TARGET_STACK_PROTECT_RUNTIME_ENABLED_P
   28618              : #define TARGET_STACK_PROTECT_RUNTIME_ENABLED_P \
   28619              :   ix86_stack_protect_runtime_enabled_p
   28620              : 
   28621              : #if !TARGET_MACHO
   28622              : #undef TARGET_STACK_PROTECT_FAIL
   28623              : #define TARGET_STACK_PROTECT_FAIL ix86_stack_protect_fail
   28624              : #endif
   28625              : 
   28626              : #undef TARGET_FUNCTION_VALUE
   28627              : #define TARGET_FUNCTION_VALUE ix86_function_value
   28628              : 
   28629              : #undef TARGET_FUNCTION_VALUE_REGNO_P
   28630              : #define TARGET_FUNCTION_VALUE_REGNO_P ix86_function_value_regno_p
   28631              : 
   28632              : #undef TARGET_ZERO_CALL_USED_REGS
   28633              : #define TARGET_ZERO_CALL_USED_REGS ix86_zero_call_used_regs
   28634              : 
   28635              : #undef TARGET_PROMOTE_FUNCTION_MODE
   28636              : #define TARGET_PROMOTE_FUNCTION_MODE ix86_promote_function_mode
   28637              : 
   28638              : #undef  TARGET_OVERRIDE_OPTIONS_AFTER_CHANGE
   28639              : #define TARGET_OVERRIDE_OPTIONS_AFTER_CHANGE ix86_override_options_after_change
   28640              : 
   28641              : #undef TARGET_MEMBER_TYPE_FORCES_BLK
   28642              : #define TARGET_MEMBER_TYPE_FORCES_BLK ix86_member_type_forces_blk
   28643              : 
   28644              : #undef TARGET_INSTANTIATE_DECLS
   28645              : #define TARGET_INSTANTIATE_DECLS ix86_instantiate_decls
   28646              : 
   28647              : #undef TARGET_SECONDARY_RELOAD
   28648              : #define TARGET_SECONDARY_RELOAD ix86_secondary_reload
   28649              : #undef TARGET_SECONDARY_MEMORY_NEEDED
   28650              : #define TARGET_SECONDARY_MEMORY_NEEDED ix86_secondary_memory_needed
   28651              : #undef TARGET_SECONDARY_MEMORY_NEEDED_MODE
   28652              : #define TARGET_SECONDARY_MEMORY_NEEDED_MODE ix86_secondary_memory_needed_mode
   28653              : 
   28654              : #undef TARGET_CLASS_MAX_NREGS
   28655              : #define TARGET_CLASS_MAX_NREGS ix86_class_max_nregs
   28656              : 
   28657              : #undef TARGET_PREFERRED_RELOAD_CLASS
   28658              : #define TARGET_PREFERRED_RELOAD_CLASS ix86_preferred_reload_class
   28659              : #undef TARGET_PREFERRED_OUTPUT_RELOAD_CLASS
   28660              : #define TARGET_PREFERRED_OUTPUT_RELOAD_CLASS ix86_preferred_output_reload_class
   28661              : /* When this hook returns true for MODE, the compiler allows
   28662              :    registers explicitly used in the rtl to be used as spill registers
   28663              :    but prevents the compiler from extending the lifetime of these
   28664              :    registers.  */
   28665              : #undef TARGET_SMALL_REGISTER_CLASSES_FOR_MODE_P
   28666              : #define TARGET_SMALL_REGISTER_CLASSES_FOR_MODE_P hook_bool_mode_true
   28667              : #undef TARGET_CLASS_LIKELY_SPILLED_P
   28668              : #define TARGET_CLASS_LIKELY_SPILLED_P ix86_class_likely_spilled_p
   28669              : #undef TARGET_CALLEE_SAVE_COST
   28670              : #define TARGET_CALLEE_SAVE_COST ix86_callee_save_cost
   28671              : 
   28672              : #undef TARGET_VECTORIZE_BUILTIN_VECTORIZATION_COST
   28673              : #define TARGET_VECTORIZE_BUILTIN_VECTORIZATION_COST \
   28674              :   ix86_builtin_vectorization_cost
   28675              : #undef TARGET_VECTORIZE_VEC_PERM_CONST
   28676              : #define TARGET_VECTORIZE_VEC_PERM_CONST ix86_vectorize_vec_perm_const
   28677              : #undef TARGET_VECTORIZE_PREFERRED_SIMD_MODE
   28678              : #define TARGET_VECTORIZE_PREFERRED_SIMD_MODE \
   28679              :   ix86_preferred_simd_mode
   28680              : #undef TARGET_VECTORIZE_SPLIT_REDUCTION
   28681              : #define TARGET_VECTORIZE_SPLIT_REDUCTION \
   28682              :   ix86_split_reduction
   28683              : #undef TARGET_VECTORIZE_AUTOVECTORIZE_VECTOR_MODES
   28684              : #define TARGET_VECTORIZE_AUTOVECTORIZE_VECTOR_MODES \
   28685              :   ix86_autovectorize_vector_modes
   28686              : #undef TARGET_VECTORIZE_GET_MASK_MODE
   28687              : #define TARGET_VECTORIZE_GET_MASK_MODE ix86_get_mask_mode
   28688              : #undef TARGET_VECTORIZE_CREATE_COSTS
   28689              : #define TARGET_VECTORIZE_CREATE_COSTS ix86_vectorize_create_costs
   28690              : 
   28691              : #undef TARGET_SET_CURRENT_FUNCTION
   28692              : #define TARGET_SET_CURRENT_FUNCTION ix86_set_current_function
   28693              : 
   28694              : #undef TARGET_OPTION_VALID_ATTRIBUTE_P
   28695              : #define TARGET_OPTION_VALID_ATTRIBUTE_P ix86_valid_target_attribute_p
   28696              : 
   28697              : #undef TARGET_OPTION_SAVE
   28698              : #define TARGET_OPTION_SAVE ix86_function_specific_save
   28699              : 
   28700              : #undef TARGET_OPTION_RESTORE
   28701              : #define TARGET_OPTION_RESTORE ix86_function_specific_restore
   28702              : 
   28703              : #undef TARGET_OPTION_POST_STREAM_IN
   28704              : #define TARGET_OPTION_POST_STREAM_IN ix86_function_specific_post_stream_in
   28705              : 
   28706              : #undef TARGET_OPTION_PRINT
   28707              : #define TARGET_OPTION_PRINT ix86_function_specific_print
   28708              : 
   28709              : #undef TARGET_CAN_INLINE_P
   28710              : #define TARGET_CAN_INLINE_P ix86_can_inline_p
   28711              : 
   28712              : #undef TARGET_LEGITIMATE_ADDRESS_P
   28713              : #define TARGET_LEGITIMATE_ADDRESS_P ix86_legitimate_address_p
   28714              : 
   28715              : #undef TARGET_REGISTER_PRIORITY
   28716              : #define TARGET_REGISTER_PRIORITY ix86_register_priority
   28717              : 
   28718              : #undef TARGET_REGISTER_USAGE_LEVELING_P
   28719              : #define TARGET_REGISTER_USAGE_LEVELING_P hook_bool_void_true
   28720              : 
   28721              : #undef TARGET_LEGITIMATE_CONSTANT_P
   28722              : #define TARGET_LEGITIMATE_CONSTANT_P ix86_legitimate_constant_p
   28723              : 
   28724              : #undef TARGET_COMPUTE_FRAME_LAYOUT
   28725              : #define TARGET_COMPUTE_FRAME_LAYOUT ix86_compute_frame_layout
   28726              : 
   28727              : #undef TARGET_FRAME_POINTER_REQUIRED
   28728              : #define TARGET_FRAME_POINTER_REQUIRED ix86_frame_pointer_required
   28729              : 
   28730              : #undef TARGET_CAN_ELIMINATE
   28731              : #define TARGET_CAN_ELIMINATE ix86_can_eliminate
   28732              : 
   28733              : #undef TARGET_EXTRA_LIVE_ON_ENTRY
   28734              : #define TARGET_EXTRA_LIVE_ON_ENTRY ix86_live_on_entry
   28735              : 
   28736              : #undef TARGET_ASM_CODE_END
   28737              : #define TARGET_ASM_CODE_END ix86_code_end
   28738              : 
   28739              : #undef TARGET_CONDITIONAL_REGISTER_USAGE
   28740              : #define TARGET_CONDITIONAL_REGISTER_USAGE ix86_conditional_register_usage
   28741              : 
   28742              : #undef TARGET_CANONICALIZE_COMPARISON
   28743              : #define TARGET_CANONICALIZE_COMPARISON ix86_canonicalize_comparison
   28744              : 
   28745              : #undef TARGET_LOOP_UNROLL_ADJUST
   28746              : #define TARGET_LOOP_UNROLL_ADJUST ix86_loop_unroll_adjust
   28747              : 
   28748              : /* Disabled due to PRs 70902, 71453, 71555, 71596 and 71657.  */
   28749              : #undef TARGET_SPILL_CLASS
   28750              : #define TARGET_SPILL_CLASS ix86_spill_class
   28751              : 
   28752              : #undef TARGET_SIMD_CLONE_COMPUTE_VECSIZE_AND_SIMDLEN
   28753              : #define TARGET_SIMD_CLONE_COMPUTE_VECSIZE_AND_SIMDLEN \
   28754              :   ix86_simd_clone_compute_vecsize_and_simdlen
   28755              : 
   28756              : #undef TARGET_SIMD_CLONE_ADJUST
   28757              : #define TARGET_SIMD_CLONE_ADJUST ix86_simd_clone_adjust
   28758              : 
   28759              : #undef TARGET_SIMD_CLONE_USABLE
   28760              : #define TARGET_SIMD_CLONE_USABLE ix86_simd_clone_usable
   28761              : 
   28762              : #undef TARGET_OMP_DEVICE_KIND_ARCH_ISA
   28763              : #define TARGET_OMP_DEVICE_KIND_ARCH_ISA ix86_omp_device_kind_arch_isa
   28764              : 
   28765              : #undef TARGET_FLOAT_EXCEPTIONS_ROUNDING_SUPPORTED_P
   28766              : #define TARGET_FLOAT_EXCEPTIONS_ROUNDING_SUPPORTED_P \
   28767              :   ix86_float_exceptions_rounding_supported_p
   28768              : 
   28769              : #undef TARGET_MODE_EMIT
   28770              : #define TARGET_MODE_EMIT ix86_emit_mode_set
   28771              : 
   28772              : #undef TARGET_MODE_NEEDED
   28773              : #define TARGET_MODE_NEEDED ix86_mode_needed
   28774              : 
   28775              : #undef TARGET_MODE_AFTER
   28776              : #define TARGET_MODE_AFTER ix86_mode_after
   28777              : 
   28778              : #undef TARGET_MODE_ENTRY
   28779              : #define TARGET_MODE_ENTRY ix86_mode_entry
   28780              : 
   28781              : #undef TARGET_MODE_EXIT
   28782              : #define TARGET_MODE_EXIT ix86_mode_exit
   28783              : 
   28784              : #undef TARGET_MODE_PRIORITY
   28785              : #define TARGET_MODE_PRIORITY ix86_mode_priority
   28786              : 
   28787              : #undef TARGET_CALL_FUSAGE_CONTAINS_NON_CALLEE_CLOBBERS
   28788              : #define TARGET_CALL_FUSAGE_CONTAINS_NON_CALLEE_CLOBBERS true
   28789              : 
   28790              : #undef TARGET_OFFLOAD_OPTIONS
   28791              : #define TARGET_OFFLOAD_OPTIONS \
   28792              :   ix86_offload_options
   28793              : 
   28794              : #undef TARGET_ABSOLUTE_BIGGEST_ALIGNMENT
   28795              : #define TARGET_ABSOLUTE_BIGGEST_ALIGNMENT 512
   28796              : 
   28797              : #undef TARGET_OPTAB_SUPPORTED_P
   28798              : #define TARGET_OPTAB_SUPPORTED_P ix86_optab_supported_p
   28799              : 
   28800              : #undef TARGET_HARD_REGNO_SCRATCH_OK
   28801              : #define TARGET_HARD_REGNO_SCRATCH_OK ix86_hard_regno_scratch_ok
   28802              : 
   28803              : #undef TARGET_CUSTOM_FUNCTION_DESCRIPTORS
   28804              : #define TARGET_CUSTOM_FUNCTION_DESCRIPTORS X86_CUSTOM_FUNCTION_TEST
   28805              : 
   28806              : #undef TARGET_ADDR_SPACE_ZERO_ADDRESS_VALID
   28807              : #define TARGET_ADDR_SPACE_ZERO_ADDRESS_VALID ix86_addr_space_zero_address_valid
   28808              : 
   28809              : #undef TARGET_INIT_LIBFUNCS
   28810              : #define TARGET_INIT_LIBFUNCS ix86_init_libfuncs
   28811              : 
   28812              : #undef TARGET_EXPAND_DIVMOD_LIBFUNC
   28813              : #define TARGET_EXPAND_DIVMOD_LIBFUNC ix86_expand_divmod_libfunc
   28814              : 
   28815              : #undef TARGET_MAX_NOCE_IFCVT_SEQ_COST
   28816              : #define TARGET_MAX_NOCE_IFCVT_SEQ_COST ix86_max_noce_ifcvt_seq_cost
   28817              : 
   28818              : #undef TARGET_NOCE_CONVERSION_PROFITABLE_P
   28819              : #define TARGET_NOCE_CONVERSION_PROFITABLE_P ix86_noce_conversion_profitable_p
   28820              : 
   28821              : #undef TARGET_HARD_REGNO_NREGS
   28822              : #define TARGET_HARD_REGNO_NREGS ix86_hard_regno_nregs
   28823              : #undef TARGET_HARD_REGNO_MODE_OK
   28824              : #define TARGET_HARD_REGNO_MODE_OK ix86_hard_regno_mode_ok
   28825              : 
   28826              : #undef TARGET_MODES_TIEABLE_P
   28827              : #define TARGET_MODES_TIEABLE_P ix86_modes_tieable_p
   28828              : 
   28829              : #undef TARGET_HARD_REGNO_CALL_PART_CLOBBERED
   28830              : #define TARGET_HARD_REGNO_CALL_PART_CLOBBERED \
   28831              :   ix86_hard_regno_call_part_clobbered
   28832              : 
   28833              : #undef TARGET_CAN_CHANGE_MODE_CLASS
   28834              : #define TARGET_CAN_CHANGE_MODE_CLASS ix86_can_change_mode_class
   28835              : 
   28836              : #undef TARGET_LOWER_LOCAL_DECL_ALIGNMENT
   28837              : #define TARGET_LOWER_LOCAL_DECL_ALIGNMENT ix86_lower_local_decl_alignment
   28838              : 
   28839              : #undef TARGET_STATIC_RTX_ALIGNMENT
   28840              : #define TARGET_STATIC_RTX_ALIGNMENT ix86_static_rtx_alignment
   28841              : #undef TARGET_CONSTANT_ALIGNMENT
   28842              : #define TARGET_CONSTANT_ALIGNMENT ix86_constant_alignment
   28843              : 
   28844              : #undef TARGET_EMPTY_RECORD_P
   28845              : #define TARGET_EMPTY_RECORD_P ix86_is_empty_record
   28846              : 
   28847              : #undef TARGET_WARN_PARAMETER_PASSING_ABI
   28848              : #define TARGET_WARN_PARAMETER_PASSING_ABI ix86_warn_parameter_passing_abi
   28849              : 
   28850              : #undef TARGET_GET_MULTILIB_ABI_NAME
   28851              : #define TARGET_GET_MULTILIB_ABI_NAME \
   28852              :   ix86_get_multilib_abi_name
   28853              : 
   28854              : #undef TARGET_IFUNC_REF_LOCAL_OK
   28855              : #define TARGET_IFUNC_REF_LOCAL_OK ix86_ifunc_ref_local_ok
   28856              : 
   28857              : #if !TARGET_MACHO && !TARGET_DLLIMPORT_DECL_ATTRIBUTES
   28858              : # undef TARGET_ASM_RELOC_RW_MASK
   28859              : # define TARGET_ASM_RELOC_RW_MASK ix86_reloc_rw_mask
   28860              : #endif
   28861              : 
   28862              : #undef TARGET_MEMTAG_CAN_TAG_ADDRESSES
   28863              : #define TARGET_MEMTAG_CAN_TAG_ADDRESSES ix86_memtag_can_tag_addresses
   28864              : 
   28865              : #undef TARGET_MEMTAG_ADD_TAG
   28866              : #define TARGET_MEMTAG_ADD_TAG ix86_memtag_add_tag
   28867              : 
   28868              : #undef TARGET_MEMTAG_SET_TAG
   28869              : #define TARGET_MEMTAG_SET_TAG ix86_memtag_set_tag
   28870              : 
   28871              : #undef TARGET_MEMTAG_EXTRACT_TAG
   28872              : #define TARGET_MEMTAG_EXTRACT_TAG ix86_memtag_extract_tag
   28873              : 
   28874              : #undef TARGET_MEMTAG_UNTAGGED_POINTER
   28875              : #define TARGET_MEMTAG_UNTAGGED_POINTER ix86_memtag_untagged_pointer
   28876              : 
   28877              : #undef TARGET_MEMTAG_TAG_BITSIZE
   28878              : #define TARGET_MEMTAG_TAG_BITSIZE ix86_memtag_tag_bitsize
   28879              : 
   28880              : #undef TARGET_GEN_CCMP_FIRST
   28881              : #define TARGET_GEN_CCMP_FIRST ix86_gen_ccmp_first
   28882              : 
   28883              : #undef TARGET_GEN_CCMP_NEXT
   28884              : #define TARGET_GEN_CCMP_NEXT ix86_gen_ccmp_next
   28885              : 
   28886              : #undef TARGET_HAVE_CCMP
   28887              : #define TARGET_HAVE_CCMP ix86_have_ccmp
   28888              : 
   28889              : #undef TARGET_MODE_CAN_TRANSFER_BITS
   28890              : #define TARGET_MODE_CAN_TRANSFER_BITS ix86_mode_can_transfer_bits
   28891              : 
   28892              : #undef TARGET_REDZONE_CLOBBER
   28893              : #define TARGET_REDZONE_CLOBBER ix86_redzone_clobber
   28894              : 
   28895              : static bool
   28896        92996 : ix86_libc_has_fast_function (int fcode ATTRIBUTE_UNUSED)
   28897              : {
   28898              : #ifdef OPTION_GLIBC
   28899        92996 :   if (OPTION_GLIBC)
   28900        92996 :     return (built_in_function)fcode == BUILT_IN_MEMPCPY;
   28901              :   else
   28902              :     return false;
   28903              : #else
   28904              :   return false;
   28905              : #endif
   28906              : }
   28907              : 
   28908              : #undef TARGET_LIBC_HAS_FAST_FUNCTION
   28909              : #define TARGET_LIBC_HAS_FAST_FUNCTION ix86_libc_has_fast_function
   28910              : 
   28911              : static unsigned
   28912        78204 : ix86_libm_function_max_error (unsigned cfn, machine_mode mode,
   28913              :                               bool boundary_p)
   28914              : {
   28915              : #ifdef OPTION_GLIBC
   28916        78204 :   bool glibc_p = OPTION_GLIBC;
   28917              : #else
   28918              :   bool glibc_p = false;
   28919              : #endif
   28920        78204 :   if (glibc_p)
   28921              :     {
   28922              :       /* If __FAST_MATH__ is defined, glibc provides libmvec.  */
   28923        78204 :       unsigned int libmvec_ret = 0;
   28924        78204 :       if (!flag_trapping_math
   28925         8300 :           && flag_unsafe_math_optimizations
   28926         3378 :           && flag_finite_math_only
   28927         3352 :           && !flag_signed_zeros
   28928         3352 :           && !flag_errno_math)
   28929         3352 :         switch (cfn)
   28930              :           {
   28931         1396 :           CASE_CFN_COS:
   28932         1396 :           CASE_CFN_COS_FN:
   28933         1396 :           CASE_CFN_SIN:
   28934         1396 :           CASE_CFN_SIN_FN:
   28935         1396 :             if (!boundary_p)
   28936              :               {
   28937              :                 /* With non-default rounding modes, libmvec provides
   28938              :                    complete garbage in results.  E.g.
   28939              :                    _ZGVcN8v_sinf for 1.40129846e-45f in FE_UPWARD
   28940              :                    returns 0.00333309174f rather than 1.40129846e-45f.  */
   28941          587 :                 if (flag_rounding_math)
   28942              :                   return ~0U;
   28943              :                 /* https://www.gnu.org/software/libc/manual/html_node/Errors-in-Math-Functions.html
   28944              :                    claims libmvec maximum error is 4ulps.
   28945              :                    My own random testing indicates 2ulps for SFmode and
   28946              :                    0.5ulps for DFmode, but let's go with the 4ulps.  */
   28947              :                 libmvec_ret = 4;
   28948              :               }
   28949              :             break;
   28950              :           default:
   28951              :             break;
   28952              :           }
   28953        78204 :       unsigned int ret = glibc_linux_libm_function_max_error (cfn, mode,
   28954              :                                                               boundary_p);
   28955        78204 :       return MAX (ret, libmvec_ret);
   28956              :     }
   28957            0 :   return default_libm_function_max_error (cfn, mode, boundary_p);
   28958              : }
   28959              : 
   28960              : #undef TARGET_LIBM_FUNCTION_MAX_ERROR
   28961              : #define TARGET_LIBM_FUNCTION_MAX_ERROR ix86_libm_function_max_error
   28962              : 
   28963              : #if TARGET_MACHO
   28964              : static bool
   28965              : ix86_cannot_copy_insn_p (rtx_insn *insn)
   28966              : {
   28967              :   if (TARGET_64BIT)
   28968              :     return false;
   28969              : 
   28970              :   rtx set = single_set (insn);
   28971              :   if (set)
   28972              :     {
   28973              :       rtx src = SET_SRC (set);
   28974              :       if (GET_CODE (src) == UNSPEC
   28975              :           && XINT (src, 1) == UNSPEC_SET_GOT)
   28976              :         return true;
   28977              :     }
   28978              :   return false;
   28979              : }
   28980              : 
   28981              : #undef TARGET_CANNOT_COPY_INSN_P
   28982              : #define TARGET_CANNOT_COPY_INSN_P ix86_cannot_copy_insn_p
   28983              : 
   28984              : #endif
   28985              : 
   28986              : #if CHECKING_P
   28987              : #undef TARGET_RUN_TARGET_SELFTESTS
   28988              : #define TARGET_RUN_TARGET_SELFTESTS selftest::ix86_run_selftests
   28989              : #endif /* #if CHECKING_P */
   28990              : 
   28991              : #undef TARGET_DOCUMENTATION_NAME
   28992              : #define TARGET_DOCUMENTATION_NAME "x86"
   28993              : 
   28994              : /* Implement TARGET_SHRINK_WRAP_GET_SEPARATE_COMPONENTS.  */
   28995              : sbitmap
   28996       733358 : ix86_get_separate_components (void)
   28997              : {
   28998       733358 :   HOST_WIDE_INT offset, to_allocate;
   28999       733358 :   sbitmap components = sbitmap_alloc (FIRST_PSEUDO_REGISTER);
   29000       733358 :   bitmap_clear (components);
   29001       733358 :   struct machine_function *m = cfun->machine;
   29002              : 
   29003       733358 :   offset = m->frame.stack_pointer_offset;
   29004       733358 :   to_allocate = offset - m->frame.sse_reg_save_offset;
   29005              : 
   29006              :   /* Shrink wrap separate uses MOV, which means APX PPX cannot be used.
   29007              :      Experiments show that APX PPX can speed up the prologue.  If the function
   29008              :      does not exit early during actual execution, then using APX PPX is faster.
   29009              :      If the function always exits early during actual execution, then shrink
   29010              :      wrap separate reduces the number of MOV (PUSH/POP) instructions actually
   29011              :      executed, thus speeding up execution.
   29012              :      foo:
   29013              :           movl    $1, %eax
   29014              :           testq   %rdi, %rdi
   29015              :           jne.L60
   29016              :           ret   ---> early return.
   29017              :     .L60:
   29018              :           subq    $88, %rsp     ---> belong to prologue.
   29019              :           xorl    %eax, %eax
   29020              :           movq    %rbx, 40 (%rsp) ---> belong to prologue.
   29021              :           movq    8 (%rdi), %rbx
   29022              :           movq    %rbp, 48 (%rsp) ---> belong to prologue.
   29023              :           movq    %rdi, %rbp
   29024              :           testq   %rbx, %rbx
   29025              :           jne.L61
   29026              :           movq    40 (%rsp), %rbx
   29027              :           movq    48 (%rsp), %rbp
   29028              :           addq    $88, %rsp
   29029              :           ret
   29030              :      .L61:
   29031              :           movq    %r12, 56 (%rsp) ---> belong to prologue.
   29032              :           movq    %r13, 64 (%rsp) ---> belong to prologue.
   29033              :           movq    %r14, 72 (%rsp) ---> belong to prologue.
   29034              :      ... ...
   29035              : 
   29036              :      Disable shrink wrap separate when PPX is enabled.  */
   29037       733358 :   if ((TARGET_APX_PPX && !crtl->calls_eh_return)
   29038       732890 :       || cfun->machine->func_type != TYPE_NORMAL
   29039              :       || TARGET_SEH
   29040       732792 :       || crtl->stack_realign_needed
   29041       723195 :       || m->call_ms2sysv)
   29042              :     return components;
   29043              : 
   29044              :   /* Since shrink wrapping separate uses MOV instead of PUSH/POP.
   29045              :      Disable shrink wrap separate when MOV is prohibited.  */
   29046       721273 :   if (save_regs_using_push_pop (to_allocate))
   29047              :     return components;
   29048              : 
   29049     32485458 :   for (unsigned int regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
   29050     32136152 :     if (GENERAL_REGNO_P (regno) && ix86_save_reg (regno, true, true))
   29051              :       {
   29052              :         /* Skip registers with large offsets, where a pseudo may be needed.  */
   29053       601224 :         if (IN_RANGE (offset, -0x8000, 0x7fff))
   29054       600157 :           bitmap_set_bit (components, regno);
   29055       647210 :         offset += UNITS_PER_WORD;
   29056              :       }
   29057              : 
   29058              :   /* Don't mess with the following registers.  */
   29059       349306 :   if (frame_pointer_needed)
   29060         6349 :     bitmap_clear_bit (components, HARD_FRAME_POINTER_REGNUM);
   29061              : 
   29062       349306 :   if (crtl->drap_reg)
   29063          129 :     bitmap_clear_bit (components, REGNO (crtl->drap_reg));
   29064              : 
   29065       349306 :   if (pic_offset_table_rtx)
   29066        29898 :     bitmap_clear_bit (components, REAL_PIC_OFFSET_TABLE_REGNUM);
   29067              : 
   29068              :   return components;
   29069              : }
   29070              : 
   29071              : /* Implement TARGET_SHRINK_WRAP_COMPONENTS_FOR_BB.  */
   29072              : sbitmap
   29073      9254040 : ix86_components_for_bb (basic_block bb)
   29074              : {
   29075      9254040 :   bitmap in = DF_LIVE_IN (bb);
   29076      9254040 :   bitmap gen = &DF_LIVE_BB_INFO (bb)->gen;
   29077      9254040 :   bitmap kill = &DF_LIVE_BB_INFO (bb)->kill;
   29078              : 
   29079      9254040 :   sbitmap components = sbitmap_alloc (FIRST_PSEUDO_REGISTER);
   29080      9254040 :   bitmap_clear (components);
   29081              : 
   29082      9254040 :   function_abi_aggregator callee_abis;
   29083      9254040 :   rtx_insn *insn;
   29084    107864257 :   FOR_BB_INSNS (bb, insn)
   29085     98610217 :     if (CALL_P (insn))
   29086      3032706 :       callee_abis.note_callee_abi (insn_callee_abi (insn));
   29087      9254040 :   HARD_REG_SET extra_caller_saves = callee_abis.caller_save_regs (*crtl->abi);
   29088              : 
   29089              :   /* GPRs are used in a bb if they are in the IN, GEN, or KILL sets.  */
   29090    860625720 :   for (unsigned int regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
   29091    851371680 :     if (!fixed_regs[regno]
   29092    851371680 :         && (TEST_HARD_REG_BIT (extra_caller_saves, regno)
   29093    432682042 :             || bitmap_bit_p (in, regno)
   29094    407561131 :             || bitmap_bit_p (gen, regno)
   29095    395189744 :             || bitmap_bit_p (kill, regno)))
   29096     37757647 :       bitmap_set_bit (components, regno);
   29097              : 
   29098      9254040 :   return components;
   29099              : }
   29100              : 
   29101              : /* Implement TARGET_SHRINK_WRAP_DISQUALIFY_COMPONENTS.  */
   29102              : void
   29103       468615 : ix86_disqualify_components (sbitmap, edge, sbitmap, bool)
   29104              : {
   29105              :   /* Nothing to do for x86.  */
   29106       468615 : }
   29107              : 
   29108              : /* Implement TARGET_SHRINK_WRAP_EMIT_PROLOGUE_COMPONENTS.  */
   29109              : void
   29110       160913 : ix86_emit_prologue_components (sbitmap components)
   29111              : {
   29112       160913 :   HOST_WIDE_INT cfa_offset;
   29113       160913 :   struct machine_function *m = cfun->machine;
   29114              : 
   29115       160913 :   cfa_offset = m->frame.reg_save_offset + m->fs.sp_offset
   29116       160913 :                - m->frame.stack_pointer_offset;
   29117     14964909 :   for (unsigned int regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
   29118     14803996 :     if (GENERAL_REGNO_P (regno) && ix86_save_reg (regno, true, true))
   29119              :       {
   29120       732460 :         if (bitmap_bit_p (components, regno))
   29121       189407 :           ix86_emit_save_reg_using_mov (word_mode, regno, cfa_offset);
   29122       781903 :         cfa_offset -= UNITS_PER_WORD;
   29123              :       }
   29124       160913 : }
   29125              : 
   29126              : /* Implement TARGET_SHRINK_WRAP_EMIT_EPILOGUE_COMPONENTS.  */
   29127              : void
   29128       143689 : ix86_emit_epilogue_components (sbitmap components)
   29129              : {
   29130       143689 :   HOST_WIDE_INT cfa_offset;
   29131       143689 :   struct machine_function *m = cfun->machine;
   29132       143689 :   cfa_offset = m->frame.reg_save_offset + m->fs.sp_offset
   29133       143689 :                - m->frame.stack_pointer_offset;
   29134              : 
   29135     13363077 :   for (unsigned int regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
   29136     13219388 :     if (GENERAL_REGNO_P (regno) && ix86_save_reg (regno, true, true))
   29137              :       {
   29138       655773 :         if (bitmap_bit_p (components, regno))
   29139              :           {
   29140       254041 :             rtx reg = gen_rtx_REG (word_mode, regno);
   29141       254041 :             rtx mem;
   29142       254041 :             rtx_insn *insn;
   29143              : 
   29144       254041 :             mem = choose_baseaddr (cfa_offset, NULL);
   29145       254041 :             mem = gen_frame_mem (word_mode, mem);
   29146       254041 :             insn = emit_move_insn (reg, mem);
   29147              : 
   29148       254041 :             RTX_FRAME_RELATED_P (insn) = 1;
   29149       254041 :             add_reg_note (insn, REG_CFA_RESTORE, reg);
   29150              :           }
   29151       711108 :         cfa_offset -= UNITS_PER_WORD;
   29152              :       }
   29153       143689 : }
   29154              : 
   29155              : /* Implement TARGET_SHRINK_WRAP_SET_HANDLED_COMPONENTS.  */
   29156              : void
   29157        44085 : ix86_set_handled_components (sbitmap components)
   29158              : {
   29159      4099905 :   for (unsigned int regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
   29160      4055820 :     if (bitmap_bit_p (components, regno))
   29161              :       {
   29162       104539 :         cfun->machine->reg_is_wrapped_separately[regno] = true;
   29163       104539 :         cfun->machine->use_fast_prologue_epilogue = true;
   29164       104539 :         cfun->machine->frame.save_regs_using_mov = true;
   29165              :       }
   29166        44085 : }
   29167              : 
   29168              : #undef TARGET_SHRINK_WRAP_GET_SEPARATE_COMPONENTS
   29169              : #define TARGET_SHRINK_WRAP_GET_SEPARATE_COMPONENTS ix86_get_separate_components
   29170              : #undef TARGET_SHRINK_WRAP_COMPONENTS_FOR_BB
   29171              : #define TARGET_SHRINK_WRAP_COMPONENTS_FOR_BB ix86_components_for_bb
   29172              : #undef TARGET_SHRINK_WRAP_DISQUALIFY_COMPONENTS
   29173              : #define TARGET_SHRINK_WRAP_DISQUALIFY_COMPONENTS ix86_disqualify_components
   29174              : #undef TARGET_SHRINK_WRAP_EMIT_PROLOGUE_COMPONENTS
   29175              : #define TARGET_SHRINK_WRAP_EMIT_PROLOGUE_COMPONENTS \
   29176              :   ix86_emit_prologue_components
   29177              : #undef TARGET_SHRINK_WRAP_EMIT_EPILOGUE_COMPONENTS
   29178              : #define TARGET_SHRINK_WRAP_EMIT_EPILOGUE_COMPONENTS \
   29179              :   ix86_emit_epilogue_components
   29180              : #undef TARGET_SHRINK_WRAP_SET_HANDLED_COMPONENTS
   29181              : #define TARGET_SHRINK_WRAP_SET_HANDLED_COMPONENTS ix86_set_handled_components
   29182              : 
   29183              : struct gcc_target targetm = TARGET_INITIALIZER;
   29184              : 
   29185              : #include "gt-i386.h"
        

Generated by: LCOV version 2.4-beta

LCOV profile is generated on x86_64 machine using following configure options: configure --disable-bootstrap --enable-coverage=opt --enable-languages=c,c++,fortran,go,jit,lto,rust,m2 --enable-host-shared. GCC test suite is run with the built compiler.