LCOV - code coverage report
Current view: top level - gcc/config/i386 - i386.cc (source / functions) Coverage Total Hit
Test: gcc.info Lines: 89.3 % 12992 11599
Test Date: 2026-04-20 14:57:17 Functions: 97.0 % 469 455
Legend: Lines:     hit not hit

            Line data    Source code
       1              : /* Subroutines used for code generation on IA-32.
       2              :    Copyright (C) 1988-2026 Free Software Foundation, Inc.
       3              : 
       4              : This file is part of GCC.
       5              : 
       6              : GCC is free software; you can redistribute it and/or modify
       7              : it under the terms of the GNU General Public License as published by
       8              : the Free Software Foundation; either version 3, or (at your option)
       9              : any later version.
      10              : 
      11              : GCC is distributed in the hope that it will be useful,
      12              : but WITHOUT ANY WARRANTY; without even the implied warranty of
      13              : MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
      14              : GNU General Public License for more details.
      15              : 
      16              : You should have received a copy of the GNU General Public License
      17              : along with GCC; see the file COPYING3.  If not see
      18              : <http://www.gnu.org/licenses/>.  */
      19              : 
      20              : #define INCLUDE_STRING
      21              : #define IN_TARGET_CODE 1
      22              : 
      23              : #include "config.h"
      24              : #include "system.h"
      25              : #include "coretypes.h"
      26              : #include "backend.h"
      27              : #include "rtl.h"
      28              : #include "tree.h"
      29              : #include "memmodel.h"
      30              : #include "gimple.h"
      31              : #include "cfghooks.h"
      32              : #include "cfgloop.h"
      33              : #include "df.h"
      34              : #include "tm_p.h"
      35              : #include "stringpool.h"
      36              : #include "expmed.h"
      37              : #include "optabs.h"
      38              : #include "regs.h"
      39              : #include "emit-rtl.h"
      40              : #include "recog.h"
      41              : #include "cgraph.h"
      42              : #include "diagnostic.h"
      43              : #include "cfgbuild.h"
      44              : #include "alias.h"
      45              : #include "fold-const.h"
      46              : #include "attribs.h"
      47              : #include "calls.h"
      48              : #include "stor-layout.h"
      49              : #include "varasm.h"
      50              : #include "output.h"
      51              : #include "insn-attr.h"
      52              : #include "flags.h"
      53              : #include "except.h"
      54              : #include "explow.h"
      55              : #include "expr.h"
      56              : #include "cfgrtl.h"
      57              : #include "common/common-target.h"
      58              : #include "langhooks.h"
      59              : #include "reload.h"
      60              : #include "gimplify.h"
      61              : #include "dwarf2.h"
      62              : #include "tm-constrs.h"
      63              : #include "cselib.h"
      64              : #include "sched-int.h"
      65              : #include "opts.h"
      66              : #include "tree-pass.h"
      67              : #include "context.h"
      68              : #include "pass_manager.h"
      69              : #include "target-globals.h"
      70              : #include "gimple-iterator.h"
      71              : #include "gimple-fold.h"
      72              : #include "tree-vectorizer.h"
      73              : #include "shrink-wrap.h"
      74              : #include "builtins.h"
      75              : #include "rtl-iter.h"
      76              : #include "tree-iterator.h"
      77              : #include "dbgcnt.h"
      78              : #include "case-cfn-macros.h"
      79              : #include "dojump.h"
      80              : #include "fold-const-call.h"
      81              : #include "tree-vrp.h"
      82              : #include "tree-ssanames.h"
      83              : #include "selftest.h"
      84              : #include "selftest-rtl.h"
      85              : #include "print-rtl.h"
      86              : #include "intl.h"
      87              : #include "ifcvt.h"
      88              : #include "symbol-summary.h"
      89              : #include "sreal.h"
      90              : #include "ipa-cp.h"
      91              : #include "ipa-prop.h"
      92              : #include "ipa-fnsummary.h"
      93              : #include "wide-int-bitmask.h"
      94              : #include "tree-vector-builder.h"
      95              : #include "debug.h"
      96              : #include "dwarf2out.h"
      97              : #include "i386-options.h"
      98              : #include "i386-builtins.h"
      99              : #include "i386-expand.h"
     100              : #include "i386-features.h"
     101              : #include "function-abi.h"
     102              : #include "rtl-error.h"
     103              : #include "gimple-pretty-print.h"
     104              : 
     105              : /* This file should be included last.  */
     106              : #include "target-def.h"
     107              : 
     108              : static void ix86_print_operand_address_as (FILE *, rtx, addr_space_t, bool);
     109              : static void ix86_emit_restore_reg_using_pop (rtx, bool = false);
     110              : 
     111              : 
     112              : #ifndef CHECK_STACK_LIMIT
     113              : #define CHECK_STACK_LIMIT (-1)
     114              : #endif
     115              : 
     116              : /* Return index of given mode in mult and division cost tables.  */
     117              : #define MODE_INDEX(mode)                                        \
     118              :   ((mode) == QImode ? 0                                         \
     119              :    : (mode) == HImode ? 1                                       \
     120              :    : (mode) == SImode ? 2                                       \
     121              :    : (mode) == DImode ? 3                                       \
     122              :    : 4)
     123              : 
     124              : 
     125              : /* Set by -mtune.  */
     126              : const struct processor_costs *ix86_tune_cost = NULL;
     127              : 
     128              : /* Set by -mtune or -Os.  */
     129              : const struct processor_costs *ix86_cost = NULL;
     130              : 
     131              : /* In case the average insn count for single function invocation is
     132              :    lower than this constant, emit fast (but longer) prologue and
     133              :    epilogue code.  */
     134              : #define FAST_PROLOGUE_INSN_COUNT 20
     135              : 
     136              : /* Names for 8 (low), 8 (high), and 16-bit registers, respectively.  */
     137              : static const char *const qi_reg_name[] = QI_REGISTER_NAMES;
     138              : static const char *const qi_high_reg_name[] = QI_HIGH_REGISTER_NAMES;
     139              : static const char *const hi_reg_name[] = HI_REGISTER_NAMES;
     140              : 
     141              : /* Array of the smallest class containing reg number REGNO, indexed by
     142              :    REGNO.  Used by REGNO_REG_CLASS in i386.h.  */
     143              : 
     144              : enum reg_class const regclass_map[FIRST_PSEUDO_REGISTER] =
     145              : {
     146              :   /* ax, dx, cx, bx */
     147              :   AREG, DREG, CREG, BREG,
     148              :   /* si, di, bp, sp */
     149              :   SIREG, DIREG, NON_Q_REGS, NON_Q_REGS,
     150              :   /* FP registers */
     151              :   FP_TOP_REG, FP_SECOND_REG, FLOAT_REGS, FLOAT_REGS,
     152              :   FLOAT_REGS, FLOAT_REGS, FLOAT_REGS, FLOAT_REGS,
     153              :   /* arg pointer, flags, fpsr, frame */
     154              :   NON_Q_REGS, NO_REGS, NO_REGS, NON_Q_REGS,
     155              :   /* SSE registers */
     156              :   SSE_FIRST_REG, SSE_REGS, SSE_REGS, SSE_REGS,
     157              :   SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS,
     158              :   /* MMX registers */
     159              :   MMX_REGS, MMX_REGS, MMX_REGS, MMX_REGS,
     160              :   MMX_REGS, MMX_REGS, MMX_REGS, MMX_REGS,
     161              :   /* REX registers */
     162              :   GENERAL_REGS, GENERAL_REGS, GENERAL_REGS, GENERAL_REGS,
     163              :   GENERAL_REGS, GENERAL_REGS, GENERAL_REGS, GENERAL_REGS,
     164              :   /* SSE REX registers */
     165              :   SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS,
     166              :   SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS,
     167              :   /* AVX-512 SSE registers */
     168              :   ALL_SSE_REGS, ALL_SSE_REGS, ALL_SSE_REGS, ALL_SSE_REGS,
     169              :   ALL_SSE_REGS, ALL_SSE_REGS, ALL_SSE_REGS, ALL_SSE_REGS,
     170              :   ALL_SSE_REGS, ALL_SSE_REGS, ALL_SSE_REGS, ALL_SSE_REGS,
     171              :   ALL_SSE_REGS, ALL_SSE_REGS, ALL_SSE_REGS, ALL_SSE_REGS,
     172              :   /* Mask registers.  */
     173              :   ALL_MASK_REGS, MASK_REGS, MASK_REGS, MASK_REGS,
     174              :   MASK_REGS, MASK_REGS, MASK_REGS, MASK_REGS,
     175              :   /* REX2 registers */
     176              :   GENERAL_REGS, GENERAL_REGS, GENERAL_REGS, GENERAL_REGS,
     177              :   GENERAL_REGS, GENERAL_REGS, GENERAL_REGS, GENERAL_REGS,
     178              :   GENERAL_REGS, GENERAL_REGS, GENERAL_REGS, GENERAL_REGS,
     179              :   GENERAL_REGS, GENERAL_REGS, GENERAL_REGS, GENERAL_REGS,
     180              : };
     181              : 
     182              : /* The "default" register map used in 32bit mode.  */
     183              : 
     184              : unsigned int const debugger_register_map[FIRST_PSEUDO_REGISTER] =
     185              : {
     186              :   /* general regs */
     187              :   0, 2, 1, 3, 6, 7, 4, 5,
     188              :   /* fp regs */
     189              :   12, 13, 14, 15, 16, 17, 18, 19,
     190              :   /* arg, flags, fpsr, frame */
     191              :   IGNORED_DWARF_REGNUM, IGNORED_DWARF_REGNUM,
     192              :   IGNORED_DWARF_REGNUM, IGNORED_DWARF_REGNUM,
     193              :   /* SSE */
     194              :   21, 22, 23, 24, 25, 26, 27, 28,
     195              :   /* MMX */
     196              :   29, 30, 31, 32, 33, 34, 35, 36,
     197              :   /* extended integer registers */
     198              :   INVALID_REGNUM, INVALID_REGNUM, INVALID_REGNUM, INVALID_REGNUM,
     199              :   INVALID_REGNUM, INVALID_REGNUM, INVALID_REGNUM, INVALID_REGNUM,
     200              :   /* extended sse registers */
     201              :   INVALID_REGNUM, INVALID_REGNUM, INVALID_REGNUM, INVALID_REGNUM,
     202              :   INVALID_REGNUM, INVALID_REGNUM, INVALID_REGNUM, INVALID_REGNUM,
     203              :   /* AVX-512 registers 16-23 */
     204              :   INVALID_REGNUM, INVALID_REGNUM, INVALID_REGNUM, INVALID_REGNUM,
     205              :   INVALID_REGNUM, INVALID_REGNUM, INVALID_REGNUM, INVALID_REGNUM,
     206              :   /* AVX-512 registers 24-31 */
     207              :   INVALID_REGNUM, INVALID_REGNUM, INVALID_REGNUM, INVALID_REGNUM,
     208              :   INVALID_REGNUM, INVALID_REGNUM, INVALID_REGNUM, INVALID_REGNUM,
     209              :   /* Mask registers */
     210              :   93, 94, 95, 96, 97, 98, 99, 100
     211              : };
     212              : 
     213              : /* The "default" register map used in 64bit mode.  */
     214              : 
     215              : unsigned int const debugger64_register_map[FIRST_PSEUDO_REGISTER] =
     216              : {
     217              :   /* general regs */
     218              :   0, 1, 2, 3, 4, 5, 6, 7,
     219              :   /* fp regs */
     220              :   33, 34, 35, 36, 37, 38, 39, 40,
     221              :   /* arg, flags, fpsr, frame */
     222              :   IGNORED_DWARF_REGNUM, IGNORED_DWARF_REGNUM,
     223              :   IGNORED_DWARF_REGNUM, IGNORED_DWARF_REGNUM,
     224              :   /* SSE */
     225              :   17, 18, 19, 20, 21, 22, 23, 24,
     226              :   /* MMX */
     227              :   41, 42, 43, 44, 45, 46, 47, 48,
     228              :   /* extended integer registers */
     229              :   8, 9, 10, 11, 12, 13, 14, 15,
     230              :   /* extended SSE registers */
     231              :   25, 26, 27, 28, 29, 30, 31, 32,
     232              :   /* AVX-512 registers 16-23 */
     233              :   67, 68, 69, 70, 71, 72, 73, 74,
     234              :   /* AVX-512 registers 24-31 */
     235              :   75, 76, 77, 78, 79, 80, 81, 82,
     236              :   /* Mask registers */
     237              :   118, 119, 120, 121, 122, 123, 124, 125,
     238              :   /* rex2 extend interger registers */
     239              :   130, 131, 132, 133, 134, 135, 136, 137,
     240              :   138, 139, 140, 141, 142, 143, 144, 145
     241              : };
     242              : 
     243              : /* Define the register numbers to be used in Dwarf debugging information.
     244              :    The SVR4 reference port C compiler uses the following register numbers
     245              :    in its Dwarf output code:
     246              :         0 for %eax (gcc regno = 0)
     247              :         1 for %ecx (gcc regno = 2)
     248              :         2 for %edx (gcc regno = 1)
     249              :         3 for %ebx (gcc regno = 3)
     250              :         4 for %esp (gcc regno = 7)
     251              :         5 for %ebp (gcc regno = 6)
     252              :         6 for %esi (gcc regno = 4)
     253              :         7 for %edi (gcc regno = 5)
     254              :    The following three DWARF register numbers are never generated by
     255              :    the SVR4 C compiler or by the GNU compilers, but SDB on x86/svr4
     256              :    believed these numbers have these meanings.
     257              :         8  for %eip    (no gcc equivalent)
     258              :         9  for %eflags (gcc regno = 17)
     259              :         10 for %trapno (no gcc equivalent)
     260              :    It is not at all clear how we should number the FP stack registers
     261              :    for the x86 architecture.  If the version of SDB on x86/svr4 were
     262              :    a bit less brain dead with respect to floating-point then we would
     263              :    have a precedent to follow with respect to DWARF register numbers
     264              :    for x86 FP registers, but the SDB on x86/svr4 was so completely
     265              :    broken with respect to FP registers that it is hardly worth thinking
     266              :    of it as something to strive for compatibility with.
     267              :    The version of x86/svr4 SDB I had does (partially)
     268              :    seem to believe that DWARF register number 11 is associated with
     269              :    the x86 register %st(0), but that's about all.  Higher DWARF
     270              :    register numbers don't seem to be associated with anything in
     271              :    particular, and even for DWARF regno 11, SDB only seemed to under-
     272              :    stand that it should say that a variable lives in %st(0) (when
     273              :    asked via an `=' command) if we said it was in DWARF regno 11,
     274              :    but SDB still printed garbage when asked for the value of the
     275              :    variable in question (via a `/' command).
     276              :    (Also note that the labels SDB printed for various FP stack regs
     277              :    when doing an `x' command were all wrong.)
     278              :    Note that these problems generally don't affect the native SVR4
     279              :    C compiler because it doesn't allow the use of -O with -g and
     280              :    because when it is *not* optimizing, it allocates a memory
     281              :    location for each floating-point variable, and the memory
     282              :    location is what gets described in the DWARF AT_location
     283              :    attribute for the variable in question.
     284              :    Regardless of the severe mental illness of the x86/svr4 SDB, we
     285              :    do something sensible here and we use the following DWARF
     286              :    register numbers.  Note that these are all stack-top-relative
     287              :    numbers.
     288              :         11 for %st(0) (gcc regno = 8)
     289              :         12 for %st(1) (gcc regno = 9)
     290              :         13 for %st(2) (gcc regno = 10)
     291              :         14 for %st(3) (gcc regno = 11)
     292              :         15 for %st(4) (gcc regno = 12)
     293              :         16 for %st(5) (gcc regno = 13)
     294              :         17 for %st(6) (gcc regno = 14)
     295              :         18 for %st(7) (gcc regno = 15)
     296              : */
     297              : unsigned int const svr4_debugger_register_map[FIRST_PSEUDO_REGISTER] =
     298              : {
     299              :   /* general regs */
     300              :   0, 2, 1, 3, 6, 7, 5, 4,
     301              :   /* fp regs */
     302              :   11, 12, 13, 14, 15, 16, 17, 18,
     303              :   /* arg, flags, fpsr, frame */
     304              :   IGNORED_DWARF_REGNUM, 9,
     305              :   IGNORED_DWARF_REGNUM, IGNORED_DWARF_REGNUM,
     306              :   /* SSE registers */
     307              :   21, 22, 23, 24, 25, 26, 27, 28,
     308              :   /* MMX registers */
     309              :   29, 30, 31, 32, 33, 34, 35, 36,
     310              :   /* extended integer registers */
     311              :   INVALID_REGNUM, INVALID_REGNUM, INVALID_REGNUM, INVALID_REGNUM,
     312              :   INVALID_REGNUM, INVALID_REGNUM, INVALID_REGNUM, INVALID_REGNUM,
     313              :   /* extended sse registers */
     314              :   INVALID_REGNUM, INVALID_REGNUM, INVALID_REGNUM, INVALID_REGNUM,
     315              :   INVALID_REGNUM, INVALID_REGNUM, INVALID_REGNUM, INVALID_REGNUM,
     316              :   /* AVX-512 registers 16-23 */
     317              :   INVALID_REGNUM, INVALID_REGNUM, INVALID_REGNUM, INVALID_REGNUM,
     318              :   INVALID_REGNUM, INVALID_REGNUM, INVALID_REGNUM, INVALID_REGNUM,
     319              :   /* AVX-512 registers 24-31 */
     320              :   INVALID_REGNUM, INVALID_REGNUM, INVALID_REGNUM, INVALID_REGNUM,
     321              :   INVALID_REGNUM, INVALID_REGNUM, INVALID_REGNUM, INVALID_REGNUM,
     322              :   /* Mask registers */
     323              :   93, 94, 95, 96, 97, 98, 99, 100
     324              : };
     325              : 
     326              : /* Define parameter passing and return registers.  */
     327              : 
     328              : static int const x86_64_int_parameter_registers[6] =
     329              : {
     330              :   DI_REG, SI_REG, DX_REG, CX_REG, R8_REG, R9_REG
     331              : };
     332              : 
     333              : static int const x86_64_ms_abi_int_parameter_registers[4] =
     334              : {
     335              :   CX_REG, DX_REG, R8_REG, R9_REG
     336              : };
     337              : 
     338              : /* Similar as Clang's preserve_none function parameter passing.
     339              :    NB: Use DI_REG and SI_REG, see ix86_function_arg_regno_p.  */
     340              : 
     341              : static int const x86_64_preserve_none_int_parameter_registers[6] =
     342              : {
     343              :   R12_REG, R13_REG, R14_REG, R15_REG, DI_REG, SI_REG
     344              : };
     345              : 
     346              : static int const x86_64_int_return_registers[4] =
     347              : {
     348              :   AX_REG, DX_REG, DI_REG, SI_REG
     349              : };
     350              : 
     351              : /* Define the structure for the machine field in struct function.  */
     352              : 
     353              : struct GTY(()) stack_local_entry {
     354              :   unsigned short mode;
     355              :   unsigned short n;
     356              :   rtx rtl;
     357              :   struct stack_local_entry *next;
     358              : };
     359              : 
     360              : /* Which cpu are we scheduling for.  */
     361              : enum attr_cpu ix86_schedule;
     362              : 
     363              : /* Which cpu are we optimizing for.  */
     364              : enum processor_type ix86_tune;
     365              : 
     366              : /* Which instruction set architecture to use.  */
     367              : enum processor_type ix86_arch;
     368              : 
     369              : /* True if processor has SSE prefetch instruction.  */
     370              : unsigned char ix86_prefetch_sse;
     371              : 
     372              : /* Preferred alignment for stack boundary in bits.  */
     373              : unsigned int ix86_preferred_stack_boundary;
     374              : 
     375              : /* Alignment for incoming stack boundary in bits specified at
     376              :    command line.  */
     377              : unsigned int ix86_user_incoming_stack_boundary;
     378              : 
     379              : /* Default alignment for incoming stack boundary in bits.  */
     380              : unsigned int ix86_default_incoming_stack_boundary;
     381              : 
     382              : /* Alignment for incoming stack boundary in bits.  */
     383              : unsigned int ix86_incoming_stack_boundary;
     384              : 
     385              : /* True if there is no direct access to extern symbols.  */
     386              : bool ix86_has_no_direct_extern_access;
     387              : 
     388              : /* Calling abi specific va_list type nodes.  */
     389              : tree sysv_va_list_type_node;
     390              : tree ms_va_list_type_node;
     391              : 
     392              : /* Prefix built by ASM_GENERATE_INTERNAL_LABEL.  */
     393              : char internal_label_prefix[16];
     394              : int internal_label_prefix_len;
     395              : 
     396              : /* Fence to use after loop using movnt.  */
     397              : tree x86_mfence;
     398              : 
     399              : /* Register class used for passing given 64bit part of the argument.
     400              :    These represent classes as documented by the PS ABI, with the exception
     401              :    of SSESF, SSEDF classes, that are basically SSE class, just gcc will
     402              :    use SF or DFmode move instead of DImode to avoid reformatting penalties.
     403              : 
     404              :    Similarly we play games with INTEGERSI_CLASS to use cheaper SImode moves
     405              :    whenever possible (upper half does contain padding).  */
     406              : enum x86_64_reg_class
     407              :   {
     408              :     X86_64_NO_CLASS,
     409              :     X86_64_INTEGER_CLASS,
     410              :     X86_64_INTEGERSI_CLASS,
     411              :     X86_64_SSE_CLASS,
     412              :     X86_64_SSEHF_CLASS,
     413              :     X86_64_SSESF_CLASS,
     414              :     X86_64_SSEDF_CLASS,
     415              :     X86_64_SSEUP_CLASS,
     416              :     X86_64_X87_CLASS,
     417              :     X86_64_X87UP_CLASS,
     418              :     X86_64_COMPLEX_X87_CLASS,
     419              :     X86_64_MEMORY_CLASS
     420              :   };
     421              : 
     422              : #define MAX_CLASSES 8
     423              : 
     424              : /* Table of constants used by fldpi, fldln2, etc....  */
     425              : static REAL_VALUE_TYPE ext_80387_constants_table [5];
     426              : static bool ext_80387_constants_init;
     427              : 
     428              : 
     429              : static rtx ix86_function_value (const_tree, const_tree, bool);
     430              : static bool ix86_function_value_regno_p (const unsigned int);
     431              : static unsigned int ix86_function_arg_boundary (machine_mode,
     432              :                                                 const_tree);
     433              : static rtx ix86_static_chain (const_tree, bool);
     434              : static int ix86_function_regparm (const_tree, const_tree);
     435              : static void ix86_compute_frame_layout (void);
     436              : static tree ix86_canonical_va_list_type (tree);
     437              : static unsigned int split_stack_prologue_scratch_regno (void);
     438              : static bool i386_asm_output_addr_const_extra (FILE *, rtx);
     439              : 
     440              : static bool ix86_can_inline_p (tree, tree);
     441              : static unsigned int ix86_minimum_incoming_stack_boundary (bool);
     442              : 
     443              : typedef enum ix86_flags_cc
     444              : {
     445              :   X86_CCO = 0, X86_CCNO, X86_CCB, X86_CCNB,
     446              :   X86_CCE, X86_CCNE, X86_CCBE, X86_CCNBE,
     447              :   X86_CCS, X86_CCNS, X86_CCP, X86_CCNP,
     448              :   X86_CCL, X86_CCNL, X86_CCLE, X86_CCNLE
     449              : } ix86_cc;
     450              : 
     451              : static const char *ix86_ccmp_dfv_mapping[] =
     452              : {
     453              :   "{dfv=of}", "{dfv=}", "{dfv=cf}", "{dfv=}",
     454              :   "{dfv=zf}", "{dfv=}", "{dfv=cf, zf}", "{dfv=}",
     455              :   "{dfv=sf}", "{dfv=}", "{dfv=cf}", "{dfv=}",
     456              :   "{dfv=sf}", "{dfv=sf, of}", "{dfv=sf, of, zf}", "{dfv=sf, of}"
     457              : };
     458              : 
     459              : 
     460              : /* Whether -mtune= or -march= were specified */
     461              : int ix86_tune_defaulted;
     462              : int ix86_arch_specified;
     463              : 
     464              : /* Return true if a red-zone is in use.  We can't use red-zone when
     465              :    there are local indirect jumps, like "indirect_jump" or "tablejump",
     466              :    which jumps to another place in the function, since "call" in the
     467              :    indirect thunk pushes the return address onto stack, destroying
     468              :    red-zone.
     469              : 
     470              :    NB: Don't use red-zone for functions with no_caller_saved_registers
     471              :    and 32 GPRs or 16 XMM registers since 128-byte red-zone is too small
     472              :    for 31 GPRs or 15 GPRs + 16 XMM registers.
     473              : 
     474              :    TODO: If we can reserve the first 2 WORDs, for PUSH and, another
     475              :    for CALL, in red-zone, we can allow local indirect jumps with
     476              :    indirect thunk.  */
     477              : 
     478              : bool
     479      9889340 : ix86_using_red_zone (void)
     480              : {
     481      9889340 :   return (TARGET_RED_ZONE
     482      8947516 :           && !TARGET_64BIT_MS_ABI
     483      8645038 :           && ((!TARGET_APX_EGPR && !TARGET_SSE)
     484      8622039 :               || (cfun->machine->call_saved_registers
     485      8622039 :                   != TYPE_NO_CALLER_SAVED_REGISTERS))
     486     18534317 :           && (!cfun->machine->has_local_indirect_jump
     487        47544 :               || cfun->machine->indirect_branch_type == indirect_branch_keep));
     488              : }
     489              : 
     490              : /* Return true, if profiling code should be emitted before
     491              :    prologue. Otherwise it returns false.
     492              :    Note: For x86 with "hotfix" it is sorried.  */
     493              : static bool
     494      4492288 : ix86_profile_before_prologue (void)
     495              : {
     496      4492288 :   return flag_fentry != 0;
     497              : }
     498              : 
     499              : /* Update register usage after having seen the compiler flags.  */
     500              : 
     501              : static void
     502       826625 : ix86_conditional_register_usage (void)
     503              : {
     504       826625 :   int i, c_mask;
     505              : 
     506              :   /* If there are no caller-saved registers, preserve all registers.
     507              :      except fixed_regs and registers used for function return value
     508              :      since aggregate_value_p checks call_used_regs[regno] on return
     509              :      value.  */
     510       826625 :   if (cfun
     511        64165 :       && (cfun->machine->call_saved_registers
     512        64165 :           == TYPE_NO_CALLER_SAVED_REGISTERS))
     513       407247 :     for (i = 0; i < FIRST_PSEUDO_REGISTER; i++)
     514       402868 :       if (!fixed_regs[i] && !ix86_function_value_regno_p (i))
     515       363035 :         call_used_regs[i] = 0;
     516              : 
     517              :   /* For 32-bit targets, disable the REX registers.  */
     518       826625 :   if (! TARGET_64BIT)
     519              :     {
     520       134622 :       for (i = FIRST_REX_INT_REG; i <= LAST_REX_INT_REG; i++)
     521       119664 :         CLEAR_HARD_REG_BIT (accessible_reg_set, i);
     522       134622 :       for (i = FIRST_REX_SSE_REG; i <= LAST_REX_SSE_REG; i++)
     523       119664 :         CLEAR_HARD_REG_BIT (accessible_reg_set, i);
     524       254286 :       for (i = FIRST_EXT_REX_SSE_REG; i <= LAST_EXT_REX_SSE_REG; i++)
     525       239328 :         CLEAR_HARD_REG_BIT (accessible_reg_set, i);
     526              :     }
     527              : 
     528              :   /*  See the definition of CALL_USED_REGISTERS in i386.h.  */
     529       826625 :   c_mask = CALL_USED_REGISTERS_MASK (TARGET_64BIT_MS_ABI);
     530              : 
     531       826625 :   CLEAR_HARD_REG_SET (reg_class_contents[(int)CLOBBERED_REGS]);
     532              : 
     533     76876125 :   for (i = 0; i < FIRST_PSEUDO_REGISTER; i++)
     534              :     {
     535              :       /* Set/reset conditionally defined registers from
     536              :          CALL_USED_REGISTERS initializer.  */
     537     76049500 :       if (call_used_regs[i] > 1)
     538     13164639 :         call_used_regs[i] = !!(call_used_regs[i] & c_mask);
     539              : 
     540              :       /* Calculate registers of CLOBBERED_REGS register set
     541              :          as call used registers from GENERAL_REGS register set.  */
     542     76049500 :       if (TEST_HARD_REG_BIT (reg_class_contents[(int)GENERAL_REGS], i)
     543     76049500 :           && call_used_regs[i])
     544     23045237 :         SET_HARD_REG_BIT (reg_class_contents[(int)CLOBBERED_REGS], i);
     545              :     }
     546              : 
     547              :   /* If MMX is disabled, disable the registers.  */
     548       826625 :   if (! TARGET_MMX)
     549       399330 :     accessible_reg_set &= ~reg_class_contents[MMX_REGS];
     550              : 
     551              :   /* If SSE is disabled, disable the registers.  */
     552       826625 :   if (! TARGET_SSE)
     553       393352 :     accessible_reg_set &= ~reg_class_contents[ALL_SSE_REGS];
     554              : 
     555              :   /* If the FPU is disabled, disable the registers.  */
     556       826625 :   if (! (TARGET_80387 || TARGET_FLOAT_RETURNS_IN_80387))
     557       394576 :     accessible_reg_set &= ~reg_class_contents[FLOAT_REGS];
     558              : 
     559              :   /* If AVX512F is disabled, disable the registers.  */
     560       826625 :   if (! TARGET_AVX512F)
     561              :     {
     562      9959994 :       for (i = FIRST_EXT_REX_SSE_REG; i <= LAST_EXT_REX_SSE_REG; i++)
     563      9374112 :         CLEAR_HARD_REG_BIT (accessible_reg_set, i);
     564              : 
     565      1171764 :       accessible_reg_set &= ~reg_class_contents[ALL_MASK_REGS];
     566              :     }
     567              : 
     568              :   /* If APX is disabled, disable the registers.  */
     569       826625 :   if (! (TARGET_APX_EGPR && TARGET_64BIT))
     570              :     {
     571     14041218 :       for (i = FIRST_REX2_INT_REG; i <= LAST_REX2_INT_REG; i++)
     572     13215264 :         CLEAR_HARD_REG_BIT (accessible_reg_set, i);
     573              :     }
     574       826625 : }
     575              : 
     576              : /* Canonicalize a comparison from one we don't have to one we do have.  */
     577              : 
     578              : static void
     579     24073177 : ix86_canonicalize_comparison (int *code, rtx *op0, rtx *op1,
     580              :                               bool op0_preserve_value)
     581              : {
     582              :   /* The order of operands in x87 ficom compare is forced by combine in
     583              :      simplify_comparison () function. Float operator is treated as RTX_OBJ
     584              :      with a precedence over other operators and is always put in the first
     585              :      place. Swap condition and operands to match ficom instruction.  */
     586     24073177 :   if (!op0_preserve_value
     587     23264143 :       && GET_CODE (*op0) == FLOAT && MEM_P (XEXP (*op0, 0)) && REG_P (*op1))
     588              :     {
     589           12 :       enum rtx_code scode = swap_condition ((enum rtx_code) *code);
     590              : 
     591              :       /* We are called only for compares that are split to SAHF instruction.
     592              :          Ensure that we have setcc/jcc insn for the swapped condition.  */
     593           12 :       if (ix86_fp_compare_code_to_integer (scode) != UNKNOWN)
     594              :         {
     595            6 :           std::swap (*op0, *op1);
     596            6 :           *code = (int) scode;
     597            6 :           return;
     598              :         }
     599              :     }
     600              : 
     601              :   /* SUB (a, b) underflows precisely when a < b.  Convert
     602              :      (compare (minus (a b)) a) to (compare (a b))
     603              :      to match *sub<mode>_3 pattern.  */
     604     23264137 :   if (!op0_preserve_value
     605     23264137 :       && (*code == GTU || *code == LEU)
     606      1806678 :       && GET_CODE (*op0) == MINUS
     607        80984 :       && rtx_equal_p (XEXP (*op0, 0), *op1))
     608              :     {
     609          488 :       *op1 = XEXP (*op0, 1);
     610          488 :       *op0 = XEXP (*op0, 0);
     611          488 :       *code = (int) swap_condition ((enum rtx_code) *code);
     612          488 :       return;
     613              :     }
     614              : 
     615              :   /* Swap operands of GTU comparison to canonicalize
     616              :      addcarry/subborrow comparison.  */
     617     24072683 :   if (!op0_preserve_value
     618     23263649 :       && *code == GTU
     619       836233 :       && GET_CODE (*op0) == PLUS
     620       315054 :       && ix86_carry_flag_operator (XEXP (*op0, 0), VOIDmode)
     621        43911 :       && GET_CODE (XEXP (*op0, 1)) == ZERO_EXTEND
     622     24112390 :       && GET_CODE (*op1) == ZERO_EXTEND)
     623              :     {
     624        36401 :       std::swap (*op0, *op1);
     625        36401 :       *code = (int) swap_condition ((enum rtx_code) *code);
     626        36401 :       return;
     627              :     }
     628              : }
     629              : 
     630              : /* Hook to determine if one function can safely inline another.  */
     631              : 
     632              : static bool
     633      9848945 : ix86_can_inline_p (tree caller, tree callee)
     634              : {
     635      9848945 :   tree caller_tree = DECL_FUNCTION_SPECIFIC_TARGET (caller);
     636      9848945 :   tree callee_tree = DECL_FUNCTION_SPECIFIC_TARGET (callee);
     637              : 
     638              :   /* Changes of those flags can be tolerated for always inlines. Lets hope
     639              :      user knows what he is doing.  */
     640      9848945 :   unsigned HOST_WIDE_INT always_inline_safe_mask
     641              :          = (MASK_USE_8BIT_IDIV | MASK_ACCUMULATE_OUTGOING_ARGS
     642              :             | MASK_NO_ALIGN_STRINGOPS | MASK_AVX256_SPLIT_UNALIGNED_LOAD
     643              :             | MASK_AVX256_SPLIT_UNALIGNED_STORE | MASK_CLD
     644              :             | MASK_NO_FANCY_MATH_387 | MASK_IEEE_FP | MASK_INLINE_ALL_STRINGOPS
     645              :             | MASK_INLINE_STRINGOPS_DYNAMICALLY | MASK_RECIP | MASK_STACK_PROBE
     646              :             | MASK_STV | MASK_TLS_DIRECT_SEG_REFS | MASK_VZEROUPPER
     647              :             | MASK_NO_PUSH_ARGS | MASK_OMIT_LEAF_FRAME_POINTER);
     648              : 
     649              : 
     650      9848945 :   if (!callee_tree)
     651      9252447 :     callee_tree = target_option_default_node;
     652      9848945 :   if (!caller_tree)
     653      9252507 :     caller_tree = target_option_default_node;
     654      9848945 :   if (callee_tree == caller_tree)
     655              :     return true;
     656              : 
     657         5287 :   struct cl_target_option *caller_opts = TREE_TARGET_OPTION (caller_tree);
     658         5287 :   struct cl_target_option *callee_opts = TREE_TARGET_OPTION (callee_tree);
     659         5287 :   bool ret = false;
     660         5287 :   bool always_inline
     661         5287 :     = (DECL_DISREGARD_INLINE_LIMITS (callee)
     662         9932 :        && lookup_attribute ("always_inline",
     663         4645 :                             DECL_ATTRIBUTES (callee)));
     664              : 
     665              :   /* If callee only uses GPRs, ignore MASK_80387.  */
     666         5287 :   if (TARGET_GENERAL_REGS_ONLY_P (callee_opts->x_ix86_target_flags))
     667         1028 :     always_inline_safe_mask |= MASK_80387;
     668              : 
     669         5287 :   cgraph_node *callee_node = cgraph_node::get (callee);
     670              :   /* Callee's isa options should be a subset of the caller's, i.e. a SSE4
     671              :      function can inline a SSE2 function but a SSE2 function can't inline
     672              :      a SSE4 function.  */
     673         5287 :   if (((caller_opts->x_ix86_isa_flags & callee_opts->x_ix86_isa_flags)
     674              :        != callee_opts->x_ix86_isa_flags)
     675         5054 :       || ((caller_opts->x_ix86_isa_flags2 & callee_opts->x_ix86_isa_flags2)
     676              :           != callee_opts->x_ix86_isa_flags2))
     677              :     ret = false;
     678              : 
     679              :   /* See if we have the same non-isa options.  */
     680         5017 :   else if ((!always_inline
     681          388 :             && caller_opts->x_target_flags != callee_opts->x_target_flags)
     682         4973 :            || (caller_opts->x_target_flags & ~always_inline_safe_mask)
     683         4973 :                != (callee_opts->x_target_flags & ~always_inline_safe_mask))
     684              :     ret = false;
     685              : 
     686         4973 :   else if (caller_opts->x_ix86_fpmath != callee_opts->x_ix86_fpmath
     687              :            /* If the calle doesn't use FP expressions differences in
     688              :               ix86_fpmath can be ignored.  We are called from FEs
     689              :               for multi-versioning call optimization, so beware of
     690              :               ipa_fn_summaries not available.  */
     691         1245 :            && (! ipa_fn_summaries
     692         1245 :                || ipa_fn_summaries->get (callee_node) == NULL
     693         1245 :                || ipa_fn_summaries->get (callee_node)->fp_expressions))
     694              :     ret = false;
     695              : 
     696              :   /* At this point we cannot identify whether arch or tune setting
     697              :      comes from target attribute or not. So the most conservative way
     698              :      is to allow the callee that uses default arch and tune string to
     699              :      be inlined.  */
     700         4699 :   else if (!strcmp (callee_opts->x_ix86_arch_string, "x86-64")
     701         1428 :            && !strcmp (callee_opts->x_ix86_tune_string, "generic"))
     702              :     ret = true;
     703              : 
     704              :   /* See if arch, tune, etc. are the same. As previous ISA flags already
     705              :      checks if callee's ISA is subset of caller's, do not block
     706              :      always_inline attribute for callee even it has different arch. */
     707         3279 :   else if (!always_inline && caller_opts->arch != callee_opts->arch)
     708              :     ret = false;
     709              : 
     710           15 :   else if (!always_inline && caller_opts->tune != callee_opts->tune)
     711              :     ret = false;
     712              : 
     713         3279 :   else if (!always_inline
     714           15 :            && caller_opts->branch_cost != callee_opts->branch_cost)
     715              :     ret = false;
     716              : 
     717              :   else
     718      9848357 :     ret = true;
     719              : 
     720              :   return ret;
     721              : }
     722              : 
     723              : /* Return true if this goes in large data/bss.  */
     724              : 
     725              : static bool
     726     80664491 : ix86_in_large_data_p (tree exp)
     727              : {
     728     80664491 :   if (ix86_cmodel != CM_MEDIUM && ix86_cmodel != CM_MEDIUM_PIC
     729     80664253 :       && ix86_cmodel != CM_LARGE && ix86_cmodel != CM_LARGE_PIC)
     730              :     return false;
     731              : 
     732         1147 :   if (exp == NULL_TREE)
     733              :     return false;
     734              : 
     735              :   /* Functions are never large data.  */
     736         1147 :   if (TREE_CODE (exp) == FUNCTION_DECL)
     737              :     return false;
     738              : 
     739              :   /* Automatic variables are never large data.  */
     740          279 :   if (VAR_P (exp) && !is_global_var (exp))
     741              :     return false;
     742              : 
     743          279 :   if (VAR_P (exp) && DECL_SECTION_NAME (exp))
     744              :     {
     745           51 :       const char *section = DECL_SECTION_NAME (exp);
     746           51 :       if (strcmp (section, ".ldata") == 0
     747           51 :           || startswith (section, ".ldata.")
     748           51 :           || strcmp (section, ".lbss") == 0
     749           51 :           || startswith (section, ".lbss.")
     750           99 :           || startswith (section, ".gnu.linkonce.lb."))
     751              :         return true;
     752              :       return false;
     753              :     }
     754              :   else
     755              :     {
     756          228 :       HOST_WIDE_INT size = int_size_in_bytes (TREE_TYPE (exp));
     757              : 
     758              :       /* If this is an incomplete type with size 0, then we can't put it
     759              :          in data because it might be too big when completed.  Also,
     760              :          int_size_in_bytes returns -1 if size can vary or is larger than
     761              :          an integer in which case also it is safer to assume that it goes in
     762              :          large data.  */
     763          228 :       if (size <= 0 || size > ix86_section_threshold)
     764              :         return true;
     765              :     }
     766              : 
     767              :   return false;
     768              : }
     769              : 
     770              : /* i386-specific section flag to mark large sections.  */
     771              : #define SECTION_LARGE SECTION_MACH_DEP
     772              : 
     773              : /* Switch to the appropriate section for output of DECL.
     774              :    DECL is either a `VAR_DECL' node or a constant of some sort.
     775              :    RELOC indicates whether forming the initial value of DECL requires
     776              :    link-time relocations.  */
     777              : 
     778              : ATTRIBUTE_UNUSED static section *
     779      1658313 : x86_64_elf_select_section (tree decl, int reloc,
     780              :                            unsigned HOST_WIDE_INT align)
     781              : {
     782      1658313 :   if (ix86_in_large_data_p (decl))
     783              :     {
     784            6 :       const char *sname = NULL;
     785            6 :       unsigned int flags = SECTION_WRITE | SECTION_LARGE;
     786            6 :       switch (categorize_decl_for_section (decl, reloc))
     787              :         {
     788            1 :         case SECCAT_DATA:
     789            1 :           sname = ".ldata";
     790            1 :           break;
     791            0 :         case SECCAT_DATA_REL:
     792            0 :           sname = ".ldata.rel";
     793            0 :           break;
     794            0 :         case SECCAT_DATA_REL_LOCAL:
     795            0 :           sname = ".ldata.rel.local";
     796            0 :           break;
     797            0 :         case SECCAT_DATA_REL_RO:
     798            0 :           sname = ".ldata.rel.ro";
     799            0 :           break;
     800            0 :         case SECCAT_DATA_REL_RO_LOCAL:
     801            0 :           sname = ".ldata.rel.ro.local";
     802            0 :           break;
     803            0 :         case SECCAT_BSS:
     804            0 :           sname = ".lbss";
     805            0 :           flags |= SECTION_BSS;
     806            0 :           break;
     807              :         case SECCAT_RODATA:
     808              :         case SECCAT_RODATA_MERGE_STR:
     809              :         case SECCAT_RODATA_MERGE_STR_INIT:
     810              :         case SECCAT_RODATA_MERGE_CONST:
     811              :           sname = ".lrodata";
     812              :           flags &= ~SECTION_WRITE;
     813              :           break;
     814            0 :         case SECCAT_SRODATA:
     815            0 :         case SECCAT_SDATA:
     816            0 :         case SECCAT_SBSS:
     817            0 :           gcc_unreachable ();
     818              :         case SECCAT_TEXT:
     819              :         case SECCAT_TDATA:
     820              :         case SECCAT_TBSS:
     821              :           /* We don't split these for medium model.  Place them into
     822              :              default sections and hope for best.  */
     823              :           break;
     824              :         }
     825            1 :       if (sname)
     826              :         {
     827              :           /* We might get called with string constants, but get_named_section
     828              :              doesn't like them as they are not DECLs.  Also, we need to set
     829              :              flags in that case.  */
     830            6 :           if (!DECL_P (decl))
     831            3 :             return get_section (sname, flags, NULL);
     832            3 :           return get_named_section (decl, sname, reloc);
     833              :         }
     834              :     }
     835      1658307 :   return default_elf_select_section (decl, reloc, align);
     836              : }
     837              : 
     838              : /* Select a set of attributes for section NAME based on the properties
     839              :    of DECL and whether or not RELOC indicates that DECL's initializer
     840              :    might contain runtime relocations.  */
     841              : 
     842              : static unsigned int ATTRIBUTE_UNUSED
     843     67294513 : x86_64_elf_section_type_flags (tree decl, const char *name, int reloc)
     844              : {
     845     67294513 :   unsigned int flags = default_section_type_flags (decl, name, reloc);
     846              : 
     847     67294513 :   if (ix86_in_large_data_p (decl))
     848           10 :     flags |= SECTION_LARGE;
     849              : 
     850     67294513 :   if (decl == NULL_TREE
     851          375 :       && (strcmp (name, ".ldata.rel.ro") == 0
     852          375 :           || strcmp (name, ".ldata.rel.ro.local") == 0))
     853            0 :     flags |= SECTION_RELRO;
     854              : 
     855     67294513 :   if (strcmp (name, ".lbss") == 0
     856     67294509 :       || startswith (name, ".lbss.")
     857    134589019 :       || startswith (name, ".gnu.linkonce.lb."))
     858              :     {
     859            7 :       flags |= SECTION_BSS;
     860              :       /* Clear SECTION_NOTYPE so .lbss etc. are marked @nobits in
     861              :          default_elf_asm_named_section.  */
     862            7 :       flags &= ~SECTION_NOTYPE;
     863              :     }
     864              : 
     865     67294513 :   return flags;
     866              : }
     867              : 
     868              : /* Build up a unique section name, expressed as a
     869              :    STRING_CST node, and assign it to DECL_SECTION_NAME (decl).
     870              :    RELOC indicates whether the initial value of EXP requires
     871              :    link-time relocations.  */
     872              : 
     873              : static void ATTRIBUTE_UNUSED
     874      1802095 : x86_64_elf_unique_section (tree decl, int reloc)
     875              : {
     876      1802095 :   if (ix86_in_large_data_p (decl))
     877              :     {
     878            3 :       const char *prefix = NULL;
     879              :       /* We only need to use .gnu.linkonce if we don't have COMDAT groups.  */
     880            3 :       bool one_only = DECL_COMDAT_GROUP (decl) && !HAVE_COMDAT_GROUP;
     881              : 
     882            3 :       switch (categorize_decl_for_section (decl, reloc))
     883              :         {
     884            0 :         case SECCAT_DATA:
     885            0 :         case SECCAT_DATA_REL:
     886            0 :         case SECCAT_DATA_REL_LOCAL:
     887            0 :         case SECCAT_DATA_REL_RO:
     888            0 :         case SECCAT_DATA_REL_RO_LOCAL:
     889            0 :           prefix = one_only ? ".ld" : ".ldata";
     890              :           break;
     891            3 :         case SECCAT_BSS:
     892            3 :           prefix = one_only ? ".lb" : ".lbss";
     893              :           break;
     894              :         case SECCAT_RODATA:
     895              :         case SECCAT_RODATA_MERGE_STR:
     896              :         case SECCAT_RODATA_MERGE_STR_INIT:
     897              :         case SECCAT_RODATA_MERGE_CONST:
     898              :           prefix = one_only ? ".lr" : ".lrodata";
     899              :           break;
     900            0 :         case SECCAT_SRODATA:
     901            0 :         case SECCAT_SDATA:
     902            0 :         case SECCAT_SBSS:
     903            0 :           gcc_unreachable ();
     904              :         case SECCAT_TEXT:
     905              :         case SECCAT_TDATA:
     906              :         case SECCAT_TBSS:
     907              :           /* We don't split these for medium model.  Place them into
     908              :              default sections and hope for best.  */
     909              :           break;
     910              :         }
     911            3 :       if (prefix)
     912              :         {
     913            3 :           const char *name, *linkonce;
     914            3 :           char *string;
     915              : 
     916            3 :           name = IDENTIFIER_POINTER (DECL_ASSEMBLER_NAME (decl));
     917            3 :           name = targetm.strip_name_encoding (name);
     918              : 
     919              :           /* If we're using one_only, then there needs to be a .gnu.linkonce
     920              :              prefix to the section name.  */
     921            3 :           linkonce = one_only ? ".gnu.linkonce" : "";
     922              : 
     923            3 :           string = ACONCAT ((linkonce, prefix, ".", name, NULL));
     924              : 
     925            3 :           set_decl_section_name (decl, string);
     926            3 :           return;
     927              :         }
     928              :     }
     929      1802092 :   default_unique_section (decl, reloc);
     930              : }
     931              : 
     932              : /* Return true if TYPE has no_callee_saved_registers or preserve_none
     933              :    attribute.  */
     934              : 
     935              : bool
     936      7532982 : ix86_type_no_callee_saved_registers_p (const_tree type)
     937              : {
     938     15065964 :   return (lookup_attribute ("no_callee_saved_registers",
     939      7532982 :                             TYPE_ATTRIBUTES (type)) != NULL
     940     15065833 :           || lookup_attribute ("preserve_none",
     941      7532851 :                                TYPE_ATTRIBUTES (type)) != NULL);
     942              : }
     943              : 
     944              : #ifdef COMMON_ASM_OP
     945              : 
     946              : #ifndef LARGECOMM_SECTION_ASM_OP
     947              : #define LARGECOMM_SECTION_ASM_OP "\t.largecomm\t"
     948              : #endif
     949              : 
     950              : /* This says how to output assembler code to declare an
     951              :    uninitialized external linkage data object.
     952              : 
     953              :    For medium model x86-64 we need to use LARGECOMM_SECTION_ASM_OP opcode for
     954              :    large objects.  */
     955              : void
     956       170828 : x86_elf_aligned_decl_common (FILE *file, tree decl,
     957              :                         const char *name, unsigned HOST_WIDE_INT size,
     958              :                         unsigned align)
     959              : {
     960       170828 :   if ((ix86_cmodel == CM_MEDIUM || ix86_cmodel == CM_MEDIUM_PIC
     961       170822 :        || ix86_cmodel == CM_LARGE || ix86_cmodel == CM_LARGE_PIC)
     962            7 :       && size > (unsigned int)ix86_section_threshold)
     963              :     {
     964            1 :       switch_to_section (get_named_section (decl, ".lbss", 0));
     965            1 :       fputs (LARGECOMM_SECTION_ASM_OP, file);
     966              :     }
     967              :   else
     968       170827 :     fputs (COMMON_ASM_OP, file);
     969       170828 :   assemble_name (file, name);
     970       170828 :   fprintf (file, "," HOST_WIDE_INT_PRINT_UNSIGNED ",%u\n",
     971              :            size, align / BITS_PER_UNIT);
     972       170828 : }
     973              : #endif
     974              : 
     975              : /* Utility function for targets to use in implementing
     976              :    ASM_OUTPUT_ALIGNED_BSS.  */
     977              : 
     978              : void
     979       767737 : x86_output_aligned_bss (FILE *file, tree decl, const char *name,
     980              :                         unsigned HOST_WIDE_INT size, unsigned align)
     981              : {
     982       767737 :   if ((ix86_cmodel == CM_MEDIUM || ix86_cmodel == CM_MEDIUM_PIC
     983       767727 :        || ix86_cmodel == CM_LARGE || ix86_cmodel == CM_LARGE_PIC)
     984           42 :       && size > (unsigned int)ix86_section_threshold)
     985            3 :     switch_to_section (get_named_section (decl, ".lbss", 0));
     986              :   else
     987       767734 :     switch_to_section (bss_section);
     988       921743 :   ASM_OUTPUT_ALIGN (file, floor_log2 (align / BITS_PER_UNIT));
     989              : #ifdef ASM_DECLARE_OBJECT_NAME
     990       767737 :   last_assemble_variable_decl = decl;
     991       767737 :   ASM_DECLARE_OBJECT_NAME (file, name, decl);
     992              : #else
     993              :   /* Standard thing is just output label for the object.  */
     994              :   ASM_OUTPUT_LABEL (file, name);
     995              : #endif /* ASM_DECLARE_OBJECT_NAME */
     996       767737 :   ASM_OUTPUT_SKIP (file, size ? size : 1);
     997       767737 : }
     998              : 
     999              : /* Decide whether we must probe the stack before any space allocation
    1000              :    on this target.  It's essentially TARGET_STACK_PROBE except when
    1001              :    -fstack-check causes the stack to be already probed differently.  */
    1002              : 
    1003              : bool
    1004       869089 : ix86_target_stack_probe (void)
    1005              : {
    1006              :   /* Do not probe the stack twice if static stack checking is enabled.  */
    1007       869089 :   if (flag_stack_check == STATIC_BUILTIN_STACK_CHECK)
    1008              :     return false;
    1009              : 
    1010       869089 :   return TARGET_STACK_PROBE;
    1011              : }
    1012              : 
    1013              : /* Decide whether we can make a sibling call to a function.  DECL is the
    1014              :    declaration of the function being targeted by the call and EXP is the
    1015              :    CALL_EXPR representing the call.  */
    1016              : 
    1017              : static bool
    1018       138975 : ix86_function_ok_for_sibcall (tree decl, tree exp)
    1019              : {
    1020       138975 :   tree type, decl_or_type;
    1021       138975 :   rtx a, b;
    1022       138975 :   bool bind_global = decl && !targetm.binds_local_p (decl);
    1023              : 
    1024       138975 :   if (ix86_function_naked (current_function_decl))
    1025              :     return false;
    1026              : 
    1027              :   /* Sibling call isn't OK if there are no caller-saved registers
    1028              :      since all registers must be preserved before return.  */
    1029       138973 :   if (cfun->machine->call_saved_registers
    1030       138973 :       == TYPE_NO_CALLER_SAVED_REGISTERS)
    1031              :     return false;
    1032              : 
    1033              :   /* If we are generating position-independent code, we cannot sibcall
    1034              :      optimize direct calls to global functions, as the PLT requires
    1035              :      %ebx be live. (Darwin does not have a PLT.)  */
    1036       138944 :   if (!TARGET_MACHO
    1037       138944 :       && !TARGET_64BIT
    1038        11329 :       && flag_pic
    1039         8405 :       && flag_plt
    1040         8405 :       && bind_global)
    1041              :     return false;
    1042              : 
    1043              :   /* If we need to align the outgoing stack, then sibcalling would
    1044              :      unalign the stack, which may break the called function.  */
    1045       134298 :   if (ix86_minimum_incoming_stack_boundary (true)
    1046       134298 :       < PREFERRED_STACK_BOUNDARY)
    1047              :     return false;
    1048              : 
    1049       133717 :   if (decl)
    1050              :     {
    1051       122807 :       decl_or_type = decl;
    1052       122807 :       type = TREE_TYPE (decl);
    1053              :     }
    1054              :   else
    1055              :     {
    1056              :       /* We're looking at the CALL_EXPR, we need the type of the function.  */
    1057        10910 :       type = CALL_EXPR_FN (exp);                /* pointer expression */
    1058        10910 :       type = TREE_TYPE (type);                  /* pointer type */
    1059        10910 :       type = TREE_TYPE (type);                  /* function type */
    1060        10910 :       decl_or_type = type;
    1061              :     }
    1062              : 
    1063              :   /* Sibling call isn't OK if callee has no callee-saved registers
    1064              :      and the calling function has callee-saved registers.  */
    1065       133717 :   if ((cfun->machine->call_saved_registers
    1066       133717 :        != TYPE_NO_CALLEE_SAVED_REGISTERS)
    1067       133717 :       && cfun->machine->call_saved_registers != TYPE_PRESERVE_NONE
    1068       133717 :       && ix86_type_no_callee_saved_registers_p (type))
    1069              :     return false;
    1070              : 
    1071              :   /* If outgoing reg parm stack space changes, we cannot do sibcall.  */
    1072       133701 :   if ((OUTGOING_REG_PARM_STACK_SPACE (type)
    1073       133701 :        != OUTGOING_REG_PARM_STACK_SPACE (TREE_TYPE (current_function_decl)))
    1074       266655 :       || (REG_PARM_STACK_SPACE (decl_or_type)
    1075       132954 :           != REG_PARM_STACK_SPACE (current_function_decl)))
    1076              :     {
    1077          747 :       maybe_complain_about_tail_call (exp,
    1078              :                                       "inconsistent size of stack space"
    1079              :                                       " allocated for arguments which are"
    1080              :                                       " passed in registers");
    1081          747 :       return false;
    1082              :     }
    1083              : 
    1084              :   /* Check that the return value locations are the same.  Like
    1085              :      if we are returning floats on the 80387 register stack, we cannot
    1086              :      make a sibcall from a function that doesn't return a float to a
    1087              :      function that does or, conversely, from a function that does return
    1088              :      a float to a function that doesn't; the necessary stack adjustment
    1089              :      would not be executed.  This is also the place we notice
    1090              :      differences in the return value ABI.  Note that it is ok for one
    1091              :      of the functions to have void return type as long as the return
    1092              :      value of the other is passed in a register.  */
    1093       132954 :   a = ix86_function_value (TREE_TYPE (exp), decl_or_type, false);
    1094       132954 :   b = ix86_function_value (TREE_TYPE (DECL_RESULT (cfun->decl)),
    1095       132954 :                            cfun->decl, false);
    1096       132954 :   if (STACK_REG_P (a) || STACK_REG_P (b))
    1097              :     {
    1098         1020 :       if (!rtx_equal_p (a, b))
    1099              :         return false;
    1100              :     }
    1101       131934 :   else if (VOID_TYPE_P (TREE_TYPE (DECL_RESULT (cfun->decl))))
    1102              :     ;
    1103        24852 :   else if (!rtx_equal_p (a, b))
    1104              :     return false;
    1105              : 
    1106       132573 :   if (TARGET_64BIT)
    1107              :     {
    1108              :       /* The SYSV ABI has more call-clobbered registers;
    1109              :          disallow sibcalls from MS to SYSV.  */
    1110       125890 :       if (cfun->machine->call_abi == MS_ABI
    1111       125890 :           && ix86_function_type_abi (type) == SYSV_ABI)
    1112              :         return false;
    1113              :     }
    1114              :   else
    1115              :     {
    1116              :       /* If this call is indirect, we'll need to be able to use a
    1117              :          call-clobbered register for the address of the target function.
    1118              :          Make sure that all such registers are not used for passing
    1119              :          parameters.  Note that DLLIMPORT functions and call to global
    1120              :          function via GOT slot are indirect.  */
    1121         6683 :       if (!decl
    1122         4768 :           || (bind_global && flag_pic && !flag_plt)
    1123              :           || (TARGET_DLLIMPORT_DECL_ATTRIBUTES && DECL_DLLIMPORT_P (decl))
    1124         4768 :           || flag_force_indirect_call)
    1125              :         {
    1126              :           /* Check if regparm >= 3 since arg_reg_available is set to
    1127              :              false if regparm == 0.  If regparm is 1 or 2, there is
    1128              :              always a call-clobbered register available.
    1129              : 
    1130              :              ??? The symbol indirect call doesn't need a call-clobbered
    1131              :              register.  But we don't know if this is a symbol indirect
    1132              :              call or not here.  */
    1133         1915 :           if (ix86_function_regparm (type, decl) >= 3
    1134         1915 :               && !cfun->machine->arg_reg_available)
    1135              :             return false;
    1136              :         }
    1137              :     }
    1138              : 
    1139       132573 :   if (decl && ix86_use_pseudo_pic_reg ())
    1140              :     {
    1141              :       /* When PIC register is used, it must be restored after ifunc
    1142              :          function returns.  */
    1143         2059 :        cgraph_node *node = cgraph_node::get (decl);
    1144         2059 :        if (node && node->ifunc_resolver)
    1145              :          return false;
    1146              :     }
    1147              : 
    1148              :   /* Disable sibcall if callee has indirect_return attribute and
    1149              :      caller doesn't since callee will return to the caller's caller
    1150              :      via an indirect jump.  */
    1151       132573 :   if (((flag_cf_protection & (CF_RETURN | CF_BRANCH))
    1152              :        == (CF_RETURN | CF_BRANCH))
    1153        53690 :       && lookup_attribute ("indirect_return", TYPE_ATTRIBUTES (type))
    1154       132577 :       && !lookup_attribute ("indirect_return",
    1155            4 :                             TYPE_ATTRIBUTES (TREE_TYPE (cfun->decl))))
    1156              :     return false;
    1157              : 
    1158              :   /* Otherwise okay.  That also includes certain types of indirect calls.  */
    1159              :   return true;
    1160              : }
    1161              : 
    1162              : /* This function determines from TYPE the calling-convention.  */
    1163              : 
    1164              : unsigned int
    1165      6196930 : ix86_get_callcvt (const_tree type)
    1166              : {
    1167      6196930 :   unsigned int ret = 0;
    1168      6196930 :   bool is_stdarg;
    1169      6196930 :   tree attrs;
    1170              : 
    1171      6196930 :   if (TARGET_64BIT)
    1172              :     return IX86_CALLCVT_CDECL;
    1173              : 
    1174      3269720 :   attrs = TYPE_ATTRIBUTES (type);
    1175      3269720 :   if (attrs != NULL_TREE)
    1176              :     {
    1177        67147 :       if (lookup_attribute ("cdecl", attrs))
    1178              :         ret |= IX86_CALLCVT_CDECL;
    1179        67147 :       else if (lookup_attribute ("stdcall", attrs))
    1180              :         ret |= IX86_CALLCVT_STDCALL;
    1181        67147 :       else if (lookup_attribute ("fastcall", attrs))
    1182              :         ret |= IX86_CALLCVT_FASTCALL;
    1183        67138 :       else if (lookup_attribute ("thiscall", attrs))
    1184              :         ret |= IX86_CALLCVT_THISCALL;
    1185              : 
    1186              :       /* Regparam isn't allowed for thiscall and fastcall.  */
    1187              :       if ((ret & (IX86_CALLCVT_THISCALL | IX86_CALLCVT_FASTCALL)) == 0)
    1188              :         {
    1189        67138 :           if (lookup_attribute ("regparm", attrs))
    1190        15828 :             ret |= IX86_CALLCVT_REGPARM;
    1191        67138 :           if (lookup_attribute ("sseregparm", attrs))
    1192            0 :             ret |= IX86_CALLCVT_SSEREGPARM;
    1193              :         }
    1194              : 
    1195        67147 :       if (IX86_BASE_CALLCVT(ret) != 0)
    1196            9 :         return ret;
    1197              :     }
    1198              : 
    1199      3269711 :   is_stdarg = stdarg_p (type);
    1200      3269711 :   if (TARGET_RTD && !is_stdarg)
    1201            0 :     return IX86_CALLCVT_STDCALL | ret;
    1202              : 
    1203      3269711 :   if (ret != 0
    1204      3269711 :       || is_stdarg
    1205      3244885 :       || TREE_CODE (type) != METHOD_TYPE
    1206      3406704 :       || ix86_function_type_abi (type) != MS_ABI)
    1207      3269711 :     return IX86_CALLCVT_CDECL | ret;
    1208              : 
    1209              :   return IX86_CALLCVT_THISCALL;
    1210              : }
    1211              : 
    1212              : /* Return 0 if the attributes for two types are incompatible, 1 if they
    1213              :    are compatible, and 2 if they are nearly compatible (which causes a
    1214              :    warning to be generated).  */
    1215              : 
    1216              : static int
    1217      1481426 : ix86_comp_type_attributes (const_tree type1, const_tree type2)
    1218              : {
    1219      1481426 :   unsigned int ccvt1, ccvt2;
    1220              : 
    1221      1481426 :   if (TREE_CODE (type1) != FUNCTION_TYPE
    1222      1481426 :       && TREE_CODE (type1) != METHOD_TYPE)
    1223              :     return 1;
    1224              : 
    1225      1474729 :   ccvt1 = ix86_get_callcvt (type1);
    1226      1474729 :   ccvt2 = ix86_get_callcvt (type2);
    1227      1474729 :   if (ccvt1 != ccvt2)
    1228              :     return 0;
    1229      2927330 :   if (ix86_function_regparm (type1, NULL)
    1230      1463665 :       != ix86_function_regparm (type2, NULL))
    1231              :     return 0;
    1232              : 
    1233      1425904 :   if (ix86_type_no_callee_saved_registers_p (type1)
    1234       712952 :       != ix86_type_no_callee_saved_registers_p (type2))
    1235              :     return 0;
    1236              : 
    1237              :   /* preserve_none attribute uses a different calling convention is
    1238              :      only for 64-bit.  */
    1239       712826 :   if (TARGET_64BIT
    1240      1425592 :       && (lookup_attribute ("preserve_none", TYPE_ATTRIBUTES (type1))
    1241       712766 :           != lookup_attribute ("preserve_none",
    1242       712766 :                                TYPE_ATTRIBUTES (type2))))
    1243              :     return 0;
    1244              : 
    1245              :   return 1;
    1246              : }
    1247              : 
    1248              : /* Return the regparm value for a function with the indicated TYPE and DECL.
    1249              :    DECL may be NULL when calling function indirectly
    1250              :    or considering a libcall.  */
    1251              : 
    1252              : static int
    1253      4198840 : ix86_function_regparm (const_tree type, const_tree decl)
    1254              : {
    1255      4198840 :   tree attr;
    1256      4198840 :   int regparm;
    1257      4198840 :   unsigned int ccvt;
    1258              : 
    1259      4198840 :   if (TARGET_64BIT)
    1260      2927210 :     return (ix86_function_type_abi (type) == SYSV_ABI
    1261      2927210 :             ? X86_64_REGPARM_MAX : X86_64_MS_REGPARM_MAX);
    1262      1271630 :   ccvt = ix86_get_callcvt (type);
    1263      1271630 :   regparm = ix86_regparm;
    1264              : 
    1265      1271630 :   if ((ccvt & IX86_CALLCVT_REGPARM) != 0)
    1266              :     {
    1267         2020 :       attr = lookup_attribute ("regparm", TYPE_ATTRIBUTES (type));
    1268         2020 :       if (attr)
    1269              :         {
    1270         2020 :           regparm = TREE_INT_CST_LOW (TREE_VALUE (TREE_VALUE (attr)));
    1271         2020 :           return regparm;
    1272              :         }
    1273              :     }
    1274      1269610 :   else if ((ccvt & IX86_CALLCVT_FASTCALL) != 0)
    1275              :     return 2;
    1276      1269610 :   else if ((ccvt & IX86_CALLCVT_THISCALL) != 0)
    1277              :     return 1;
    1278              : 
    1279              :   /* Use register calling convention for local functions when possible.  */
    1280      1269610 :   if (decl
    1281      1205476 :       && TREE_CODE (decl) == FUNCTION_DECL)
    1282              :     {
    1283      1195381 :       cgraph_node *target = cgraph_node::get (decl);
    1284      1195381 :       if (target)
    1285      1187865 :         target = target->function_symbol ();
    1286              : 
    1287              :       /* Caller and callee must agree on the calling convention, so
    1288              :          checking here just optimize means that with
    1289              :          __attribute__((optimize (...))) caller could use regparm convention
    1290              :          and callee not, or vice versa.  Instead look at whether the callee
    1291              :          is optimized or not.  */
    1292      1187865 :       if (target && opt_for_fn (target->decl, optimize)
    1293      2374838 :           && !(profile_flag && !flag_fentry))
    1294              :         {
    1295      1186973 :           if (target->local && target->can_change_signature)
    1296              :             {
    1297       140024 :               int local_regparm, globals = 0, regno;
    1298              : 
    1299              :               /* Make sure no regparm register is taken by a
    1300              :                  fixed register variable.  */
    1301       140024 :               for (local_regparm = 0; local_regparm < REGPARM_MAX;
    1302              :                    local_regparm++)
    1303       105018 :                 if (fixed_regs[local_regparm])
    1304              :                   break;
    1305              : 
    1306              :               /* We don't want to use regparm(3) for nested functions as
    1307              :                  these use a static chain pointer in the third argument.  */
    1308        35006 :               if (local_regparm == 3 && DECL_STATIC_CHAIN (target->decl))
    1309              :                 local_regparm = 2;
    1310              : 
    1311              :               /* Save a register for the split stack.  */
    1312        35006 :               if (flag_split_stack)
    1313              :                 {
    1314        20760 :                   if (local_regparm == 3)
    1315              :                     local_regparm = 2;
    1316          707 :                   else if (local_regparm == 2
    1317          707 :                            && DECL_STATIC_CHAIN (target->decl))
    1318              :                     local_regparm = 1;
    1319              :                 }
    1320              : 
    1321              :               /* Each fixed register usage increases register pressure,
    1322              :                  so less registers should be used for argument passing.
    1323              :                  This functionality can be overriden by an explicit
    1324              :                  regparm value.  */
    1325       245042 :               for (regno = AX_REG; regno <= DI_REG; regno++)
    1326       210036 :                 if (fixed_regs[regno])
    1327            0 :                   globals++;
    1328              : 
    1329        35006 :               local_regparm
    1330        35006 :                 = globals < local_regparm ? local_regparm - globals : 0;
    1331              : 
    1332        35006 :               if (local_regparm > regparm)
    1333      4198840 :                 regparm = local_regparm;
    1334              :             }
    1335              :         }
    1336              :     }
    1337              : 
    1338              :   return regparm;
    1339              : }
    1340              : 
    1341              : /* Return 1 or 2, if we can pass up to SSE_REGPARM_MAX SFmode (1) and
    1342              :    DFmode (2) arguments in SSE registers for a function with the
    1343              :    indicated TYPE and DECL.  DECL may be NULL when calling function
    1344              :    indirectly or considering a libcall.  Return -1 if any FP parameter
    1345              :    should be rejected by error.  This is used in siutation we imply SSE
    1346              :    calling convetion but the function is called from another function with
    1347              :    SSE disabled. Otherwise return 0.  */
    1348              : 
    1349              : static int
    1350      1077243 : ix86_function_sseregparm (const_tree type, const_tree decl, bool warn)
    1351              : {
    1352      1077243 :   gcc_assert (!TARGET_64BIT);
    1353              : 
    1354              :   /* Use SSE registers to pass SFmode and DFmode arguments if requested
    1355              :      by the sseregparm attribute.  */
    1356      1077243 :   if (TARGET_SSEREGPARM
    1357      1077243 :       || (type && lookup_attribute ("sseregparm", TYPE_ATTRIBUTES (type))))
    1358              :     {
    1359            0 :       if (!TARGET_SSE)
    1360              :         {
    1361            0 :           if (warn)
    1362              :             {
    1363            0 :               if (decl)
    1364            0 :                 error ("calling %qD with attribute sseregparm without "
    1365              :                        "SSE/SSE2 enabled", decl);
    1366              :               else
    1367            0 :                 error ("calling %qT with attribute sseregparm without "
    1368              :                        "SSE/SSE2 enabled", type);
    1369              :             }
    1370            0 :           return 0;
    1371              :         }
    1372              : 
    1373              :       return 2;
    1374              :     }
    1375              : 
    1376      1077243 :   if (!decl)
    1377              :     return 0;
    1378              : 
    1379       978874 :   cgraph_node *target = cgraph_node::get (decl);
    1380       978874 :   if (target)
    1381       971365 :     target = target->function_symbol ();
    1382              : 
    1383              :   /* For local functions, pass up to SSE_REGPARM_MAX SFmode
    1384              :      (and DFmode for SSE2) arguments in SSE registers.  */
    1385       971365 :   if (target
    1386              :       /* TARGET_SSE_MATH */
    1387       971365 :       && (target_opts_for_fn (target->decl)->x_ix86_fpmath & FPMATH_SSE)
    1388         1296 :       && opt_for_fn (target->decl, optimize)
    1389       972661 :       && !(profile_flag && !flag_fentry))
    1390              :     {
    1391         1296 :       if (target->local && target->can_change_signature)
    1392              :         {
    1393              :           /* Refuse to produce wrong code when local function with SSE enabled
    1394              :              is called from SSE disabled function.
    1395              :              FIXME: We need a way to detect these cases cross-ltrans partition
    1396              :              and avoid using SSE calling conventions on local functions called
    1397              :              from function with SSE disabled.  For now at least delay the
    1398              :              warning until we know we are going to produce wrong code.
    1399              :              See PR66047  */
    1400            0 :           if (!TARGET_SSE && warn)
    1401              :             return -1;
    1402            0 :           return TARGET_SSE2_P (target_opts_for_fn (target->decl)
    1403            0 :                                 ->x_ix86_isa_flags) ? 2 : 1;
    1404              :         }
    1405              :     }
    1406              : 
    1407              :   return 0;
    1408              : }
    1409              : 
    1410              : /* Return true if EAX is live at the start of the function.  Used by
    1411              :    ix86_expand_prologue to determine if we need special help before
    1412              :    calling allocate_stack_worker.  */
    1413              : 
    1414              : static bool
    1415         7090 : ix86_eax_live_at_start_p (void)
    1416              : {
    1417              :   /* Cheat.  Don't bother working forward from ix86_function_regparm
    1418              :      to the function type to whether an actual argument is located in
    1419              :      eax.  Instead just look at cfg info, which is still close enough
    1420              :      to correct at this point.  This gives false positives for broken
    1421              :      functions that might use uninitialized data that happens to be
    1422              :      allocated in eax, but who cares?  */
    1423         7090 :   return REGNO_REG_SET_P (df_get_live_out (ENTRY_BLOCK_PTR_FOR_FN (cfun)), 0);
    1424              : }
    1425              : 
    1426              : static bool
    1427       159954 : ix86_keep_aggregate_return_pointer (tree fntype)
    1428              : {
    1429       159954 :   tree attr;
    1430              : 
    1431       159954 :   if (!TARGET_64BIT)
    1432              :     {
    1433       159954 :       attr = lookup_attribute ("callee_pop_aggregate_return",
    1434       159954 :                                TYPE_ATTRIBUTES (fntype));
    1435       159954 :       if (attr)
    1436            0 :         return (TREE_INT_CST_LOW (TREE_VALUE (TREE_VALUE (attr))) == 0);
    1437              : 
    1438              :       /* For 32-bit MS-ABI the default is to keep aggregate
    1439              :          return pointer.  */
    1440       159954 :       if (ix86_function_type_abi (fntype) == MS_ABI)
    1441              :         return true;
    1442              :     }
    1443              :   return KEEP_AGGREGATE_RETURN_POINTER != 0;
    1444              : }
    1445              : 
    1446              : /* Value is the number of bytes of arguments automatically
    1447              :    popped when returning from a subroutine call.
    1448              :    FUNDECL is the declaration node of the function (as a tree),
    1449              :    FUNTYPE is the data type of the function (as a tree),
    1450              :    or for a library call it is an identifier node for the subroutine name.
    1451              :    SIZE is the number of bytes of arguments passed on the stack.
    1452              : 
    1453              :    On the 80386, the RTD insn may be used to pop them if the number
    1454              :      of args is fixed, but if the number is variable then the caller
    1455              :      must pop them all.  RTD can't be used for library calls now
    1456              :      because the library is compiled with the Unix compiler.
    1457              :    Use of RTD is a selectable option, since it is incompatible with
    1458              :    standard Unix calling sequences.  If the option is not selected,
    1459              :    the caller must always pop the args.
    1460              : 
    1461              :    The attribute stdcall is equivalent to RTD on a per module basis.  */
    1462              : 
    1463              : static poly_int64
    1464      7582995 : ix86_return_pops_args (tree fundecl, tree funtype, poly_int64 size)
    1465              : {
    1466      7582995 :   unsigned int ccvt;
    1467              : 
    1468              :   /* None of the 64-bit ABIs pop arguments.  */
    1469      7582995 :   if (TARGET_64BIT)
    1470      6709772 :     return 0;
    1471              : 
    1472       873223 :   ccvt = ix86_get_callcvt (funtype);
    1473              : 
    1474       873223 :   if ((ccvt & (IX86_CALLCVT_STDCALL | IX86_CALLCVT_FASTCALL
    1475              :                | IX86_CALLCVT_THISCALL)) != 0
    1476       873223 :       && ! stdarg_p (funtype))
    1477            3 :     return size;
    1478              : 
    1479              :   /* Lose any fake structure return argument if it is passed on the stack.  */
    1480       873220 :   if (aggregate_value_p (TREE_TYPE (funtype), fundecl)
    1481       873220 :       && !ix86_keep_aggregate_return_pointer (funtype))
    1482              :     {
    1483       159954 :       int nregs = ix86_function_regparm (funtype, fundecl);
    1484       159954 :       if (nregs == 0)
    1485       458907 :         return GET_MODE_SIZE (Pmode);
    1486              :     }
    1487              : 
    1488       720251 :   return 0;
    1489              : }
    1490              : 
    1491              : /* Implement the TARGET_LEGITIMATE_COMBINED_INSN hook.  */
    1492              : 
    1493              : static bool
    1494     10029983 : ix86_legitimate_combined_insn (rtx_insn *insn)
    1495              : {
    1496     10029983 :   int i;
    1497              : 
    1498              :   /* Check operand constraints in case hard registers were propagated
    1499              :      into insn pattern.  This check prevents combine pass from
    1500              :      generating insn patterns with invalid hard register operands.
    1501              :      These invalid insns can eventually confuse reload to error out
    1502              :      with a spill failure.  See also PRs 46829 and 46843.  */
    1503              : 
    1504     10029983 :   gcc_assert (INSN_CODE (insn) >= 0);
    1505              : 
    1506     10029983 :   extract_insn (insn);
    1507     10029983 :   preprocess_constraints (insn);
    1508              : 
    1509     10029983 :   int n_operands = recog_data.n_operands;
    1510     10029983 :   int n_alternatives = recog_data.n_alternatives;
    1511     34249944 :   for (i = 0; i < n_operands; i++)
    1512              :     {
    1513     24223486 :       rtx op = recog_data.operand[i];
    1514     24223486 :       machine_mode mode = GET_MODE (op);
    1515     24223486 :       const operand_alternative *op_alt;
    1516     24223486 :       int offset = 0;
    1517     24223486 :       bool win;
    1518     24223486 :       int j;
    1519              : 
    1520              :       /* A unary operator may be accepted by the predicate, but it
    1521              :          is irrelevant for matching constraints.  */
    1522     24223486 :       if (UNARY_P (op))
    1523        48612 :         op = XEXP (op, 0);
    1524              : 
    1525     24223486 :       if (SUBREG_P (op))
    1526              :         {
    1527       876585 :           if (REG_P (SUBREG_REG (op))
    1528       876585 :               && REGNO (SUBREG_REG (op)) < FIRST_PSEUDO_REGISTER)
    1529           55 :             offset = subreg_regno_offset (REGNO (SUBREG_REG (op)),
    1530           55 :                                           GET_MODE (SUBREG_REG (op)),
    1531           55 :                                           SUBREG_BYTE (op),
    1532           55 :                                           GET_MODE (op));
    1533       876585 :           op = SUBREG_REG (op);
    1534              :         }
    1535              : 
    1536     24223486 :       if (!(REG_P (op) && HARD_REGISTER_P (op)))
    1537     23919306 :         continue;
    1538              : 
    1539       304180 :       op_alt = recog_op_alt;
    1540              : 
    1541              :       /* Operand has no constraints, anything is OK.  */
    1542       304180 :       win = !n_alternatives;
    1543              : 
    1544       304180 :       alternative_mask preferred = get_preferred_alternatives (insn);
    1545       833301 :       for (j = 0; j < n_alternatives; j++, op_alt += n_operands)
    1546              :         {
    1547       525517 :           if (!TEST_BIT (preferred, j))
    1548       137151 :             continue;
    1549       388366 :           if (op_alt[i].anything_ok
    1550       202847 :               || (op_alt[i].matches != -1
    1551        33822 :                   && operands_match_p
    1552        33822 :                   (recog_data.operand[i],
    1553        33822 :                    recog_data.operand[op_alt[i].matches]))
    1554       587101 :               || reg_fits_class_p (op, op_alt[i].cl, offset, mode))
    1555              :             {
    1556              :               win = true;
    1557              :               break;
    1558              :             }
    1559              :         }
    1560              : 
    1561       304180 :       if (!win)
    1562              :         return false;
    1563              :     }
    1564              : 
    1565              :   return true;
    1566              : }
    1567              : 
    1568              : /* Implement the TARGET_ASAN_SHADOW_OFFSET hook.  */
    1569              : 
    1570              : static unsigned HOST_WIDE_INT
    1571         4616 : ix86_asan_shadow_offset (void)
    1572              : {
    1573         4616 :   return SUBTARGET_SHADOW_OFFSET;
    1574              : }
    1575              : 
    1576              : /* Argument support functions.  */
    1577              : 
    1578              : /* Return true when register may be used to pass function parameters.  */
    1579              : bool
    1580   1479502617 : ix86_function_arg_regno_p (int regno)
    1581              : {
    1582   1479502617 :   int i;
    1583   1479502617 :   enum calling_abi call_abi;
    1584   1479502617 :   const int *parm_regs;
    1585              : 
    1586   1476056754 :   if (TARGET_SSE && SSE_REGNO_P (regno)
    1587   2446768039 :       && regno < FIRST_SSE_REG + SSE_REGPARM_MAX)
    1588              :     return true;
    1589              : 
    1590   1360045431 :    if (!TARGET_64BIT)
    1591    129167428 :      return (regno < REGPARM_MAX
    1592    129167428 :              || (TARGET_MMX && MMX_REGNO_P (regno)
    1593     11609184 :                  && regno < FIRST_MMX_REG + MMX_REGPARM_MAX));
    1594              : 
    1595              :   /* TODO: The function should depend on current function ABI but
    1596              :      builtins.cc would need updating then. Therefore we use the
    1597              :      default ABI.  */
    1598   1230878003 :   call_abi = ix86_cfun_abi ();
    1599              : 
    1600              :   /* RAX is used as hidden argument to va_arg functions.  */
    1601   1230878003 :   if (call_abi == SYSV_ABI && regno == AX_REG)
    1602              :     return true;
    1603              : 
    1604   1216654770 :   if (cfun
    1605   1216654438 :       && cfun->machine->call_saved_registers == TYPE_PRESERVE_NONE)
    1606              :     parm_regs = x86_64_preserve_none_int_parameter_registers;
    1607   1216636510 :   else if (call_abi == MS_ABI)
    1608              :     parm_regs = x86_64_ms_abi_int_parameter_registers;
    1609              :   else
    1610   1180674222 :     parm_regs = x86_64_int_parameter_registers;
    1611              : 
    1612  16283290598 :   for (i = 0; i < (call_abi == MS_ABI
    1613   8141645299 :                    ? X86_64_MS_REGPARM_MAX : X86_64_REGPARM_MAX); i++)
    1614   7012011130 :     if (regno == parm_regs[i])
    1615              :       return true;
    1616              :   return false;
    1617              : }
    1618              : 
    1619              : /* Return if we do not know how to pass ARG solely in registers.  */
    1620              : 
    1621              : static bool
    1622    404046318 : ix86_must_pass_in_stack (const function_arg_info &arg)
    1623              : {
    1624    404046318 :   if (must_pass_in_stack_var_size_or_pad (arg))
    1625              :     return true;
    1626              : 
    1627              :   /* For 32-bit, we want TImode aggregates to go on the stack.  But watch out!
    1628              :      The layout_type routine is crafty and tries to trick us into passing
    1629              :      currently unsupported vector types on the stack by using TImode.  */
    1630      1771411 :   return (!TARGET_64BIT && arg.mode == TImode
    1631    404046281 :           && arg.type && TREE_CODE (arg.type) != VECTOR_TYPE);
    1632              : }
    1633              : 
    1634              : /* It returns the size, in bytes, of the area reserved for arguments passed
    1635              :    in registers for the function represented by fndecl dependent to the used
    1636              :    abi format.  */
    1637              : int
    1638     10691520 : ix86_reg_parm_stack_space (const_tree fndecl)
    1639              : {
    1640     10691520 :   enum calling_abi call_abi = SYSV_ABI;
    1641     10691520 :   if (fndecl != NULL_TREE && TREE_CODE (fndecl) == FUNCTION_DECL)
    1642     10380011 :     call_abi = ix86_function_abi (fndecl);
    1643              :   else
    1644       311509 :     call_abi = ix86_function_type_abi (fndecl);
    1645     10691520 :   if (TARGET_64BIT && call_abi == MS_ABI)
    1646       119238 :     return 32;
    1647              :   return 0;
    1648              : }
    1649              : 
    1650              : /* We add this as a workaround in order to use libc_has_function
    1651              :    hook in i386.md.  */
    1652              : bool
    1653            0 : ix86_libc_has_function (enum function_class fn_class)
    1654              : {
    1655            0 :   return targetm.libc_has_function (fn_class, NULL_TREE);
    1656              : }
    1657              : 
    1658              : /* Returns value SYSV_ABI, MS_ABI dependent on fntype,
    1659              :    specifying the call abi used.  */
    1660              : enum calling_abi
    1661    439778280 : ix86_function_type_abi (const_tree fntype)
    1662              : {
    1663    439778280 :   enum calling_abi abi = ix86_abi;
    1664              : 
    1665    439778280 :   if (fntype == NULL_TREE || TYPE_ATTRIBUTES (fntype) == NULL_TREE)
    1666              :     return abi;
    1667              : 
    1668     17603693 :   if (abi == SYSV_ABI
    1669     17603693 :       && lookup_attribute ("ms_abi", TYPE_ATTRIBUTES (fntype)))
    1670              :     {
    1671      2606722 :       static int warned;
    1672      2606722 :       if (TARGET_X32 && !warned)
    1673              :         {
    1674            1 :           error ("X32 does not support %<ms_abi%> attribute");
    1675            1 :           warned = 1;
    1676              :         }
    1677              : 
    1678              :       abi = MS_ABI;
    1679              :     }
    1680     14996971 :   else if (abi == MS_ABI
    1681     14996971 :            && lookup_attribute ("sysv_abi", TYPE_ATTRIBUTES (fntype)))
    1682              :     abi = SYSV_ABI;
    1683              : 
    1684              :   return abi;
    1685              : }
    1686              : 
    1687              : enum calling_abi
    1688    217245494 : ix86_function_abi (const_tree fndecl)
    1689              : {
    1690    217245494 :   return fndecl ? ix86_function_type_abi (TREE_TYPE (fndecl)) : ix86_abi;
    1691              : }
    1692              : 
    1693              : /* Returns value SYSV_ABI, MS_ABI dependent on cfun,
    1694              :    specifying the call abi used.  */
    1695              : enum calling_abi
    1696   2080594938 : ix86_cfun_abi (void)
    1697              : {
    1698   2080594938 :   return cfun ? cfun->machine->call_abi : ix86_abi;
    1699              : }
    1700              : 
    1701              : bool
    1702      5028361 : ix86_function_ms_hook_prologue (const_tree fn)
    1703              : {
    1704      5028361 :   if (fn && lookup_attribute ("ms_hook_prologue", DECL_ATTRIBUTES (fn)))
    1705              :     {
    1706            8 :       if (decl_function_context (fn) != NULL_TREE)
    1707            0 :         error_at (DECL_SOURCE_LOCATION (fn),
    1708              :                   "%<ms_hook_prologue%> attribute is not compatible "
    1709              :                   "with nested function");
    1710              :       else
    1711              :         return true;
    1712              :     }
    1713              :   return false;
    1714              : }
    1715              : 
    1716              : bool
    1717    115107867 : ix86_function_naked (const_tree fn)
    1718              : {
    1719    115107867 :   if (fn && lookup_attribute ("naked", DECL_ATTRIBUTES (fn)))
    1720              :     return true;
    1721              : 
    1722              :   return false;
    1723              : }
    1724              : 
    1725              : /* Write the extra assembler code needed to declare a function properly.  */
    1726              : 
    1727              : void
    1728      1550370 : ix86_asm_output_function_label (FILE *out_file, const char *fname,
    1729              :                                 tree decl)
    1730              : {
    1731      1550370 :   bool is_ms_hook = ix86_function_ms_hook_prologue (decl);
    1732              : 
    1733      1550370 :   if (cfun)
    1734      1546769 :     cfun->machine->function_label_emitted = true;
    1735              : 
    1736      1550370 :   if (is_ms_hook)
    1737              :     {
    1738            2 :       int i, filler_count = (TARGET_64BIT ? 32 : 16);
    1739            2 :       unsigned int filler_cc = 0xcccccccc;
    1740              : 
    1741           18 :       for (i = 0; i < filler_count; i += 4)
    1742           16 :         fprintf (out_file, ASM_LONG " %#x\n", filler_cc);
    1743              :     }
    1744              : 
    1745              : #ifdef SUBTARGET_ASM_UNWIND_INIT
    1746              :   SUBTARGET_ASM_UNWIND_INIT (out_file);
    1747              : #endif
    1748              : 
    1749      1550370 :   assemble_function_label_raw (out_file, fname);
    1750              : 
    1751              :   /* Output magic byte marker, if hot-patch attribute is set.  */
    1752      1550370 :   if (is_ms_hook)
    1753              :     {
    1754            2 :       if (TARGET_64BIT)
    1755              :         {
    1756              :           /* leaq [%rsp + 0], %rsp  */
    1757            2 :           fputs (ASM_BYTE "0x48, 0x8d, 0xa4, 0x24, 0x00, 0x00, 0x00, 0x00\n",
    1758              :                  out_file);
    1759              :         }
    1760              :       else
    1761              :         {
    1762              :           /* movl.s %edi, %edi
    1763              :              push   %ebp
    1764              :              movl.s %esp, %ebp */
    1765            0 :           fputs (ASM_BYTE "0x8b, 0xff, 0x55, 0x8b, 0xec\n", out_file);
    1766              :         }
    1767              :     }
    1768      1550370 : }
    1769              : 
    1770              : /* Output a user-defined label.  In AT&T syntax, registers are prefixed
    1771              :    with %, so labels require no punctuation.  In Intel syntax, registers
    1772              :    are unprefixed, so labels may clash with registers or other operators,
    1773              :    and require quoting.  */
    1774              : void
    1775     35036178 : ix86_asm_output_labelref (FILE *file, const char *prefix, const char *label)
    1776              : {
    1777     35036178 :   if (ASSEMBLER_DIALECT == ASM_ATT)
    1778     35035078 :     fprintf (file, "%s%s", prefix, label);
    1779              :   else
    1780         1100 :     fprintf (file, "\"%s%s\"", prefix, label);
    1781     35036178 : }
    1782              : 
    1783              : /* Implementation of call abi switching target hook. Specific to FNDECL
    1784              :    the specific call register sets are set.  See also
    1785              :    ix86_conditional_register_usage for more details.  */
    1786              : void
    1787    196749138 : ix86_call_abi_override (const_tree fndecl)
    1788              : {
    1789    196749138 :   cfun->machine->call_abi = ix86_function_abi (fndecl);
    1790    196749138 : }
    1791              : 
    1792              : /* Return 1 if pseudo register should be created and used to hold
    1793              :    GOT address for PIC code.  */
    1794              : bool
    1795    169930737 : ix86_use_pseudo_pic_reg (void)
    1796              : {
    1797    169930737 :   if ((TARGET_64BIT
    1798    158882977 :        && (ix86_cmodel == CM_SMALL_PIC
    1799              :            || TARGET_PECOFF))
    1800    164000143 :       || !flag_pic)
    1801    165131920 :     return false;
    1802              :   return true;
    1803              : }
    1804              : 
    1805              : /* Initialize large model PIC register.  */
    1806              : 
    1807              : static void
    1808           56 : ix86_init_large_pic_reg (unsigned int tmp_regno)
    1809              : {
    1810           56 :   rtx_code_label *label;
    1811           56 :   rtx tmp_reg;
    1812              : 
    1813           56 :   gcc_assert (Pmode == DImode);
    1814           56 :   label = gen_label_rtx ();
    1815           56 :   emit_label (label);
    1816           56 :   LABEL_PRESERVE_P (label) = 1;
    1817           56 :   tmp_reg = gen_rtx_REG (Pmode, tmp_regno);
    1818           56 :   gcc_assert (REGNO (pic_offset_table_rtx) != tmp_regno);
    1819           56 :   emit_insn (gen_set_rip_rex64 (pic_offset_table_rtx,
    1820              :                                 label));
    1821           56 :   emit_insn (gen_set_got_offset_rex64 (tmp_reg, label));
    1822           56 :   emit_insn (gen_add2_insn (pic_offset_table_rtx, tmp_reg));
    1823           56 :   const char *name = LABEL_NAME (label);
    1824           56 :   PUT_CODE (label, NOTE);
    1825           56 :   NOTE_KIND (label) = NOTE_INSN_DELETED_LABEL;
    1826           56 :   NOTE_DELETED_LABEL_NAME (label) = name;
    1827           56 : }
    1828              : 
    1829              : /* Create and initialize PIC register if required.  */
    1830              : static void
    1831      1480117 : ix86_init_pic_reg (void)
    1832              : {
    1833      1480117 :   edge entry_edge;
    1834      1480117 :   rtx_insn *seq;
    1835              : 
    1836      1480117 :   if (!ix86_use_pseudo_pic_reg ())
    1837              :     return;
    1838              : 
    1839        40438 :   start_sequence ();
    1840              : 
    1841        40438 :   if (TARGET_64BIT)
    1842              :     {
    1843           69 :       if (ix86_cmodel == CM_LARGE_PIC)
    1844           53 :         ix86_init_large_pic_reg (R11_REG);
    1845              :       else
    1846           16 :         emit_insn (gen_set_got_rex64 (pic_offset_table_rtx));
    1847              :     }
    1848              :   else
    1849              :     {
    1850              :       /*  If there is future mcount call in the function it is more profitable
    1851              :           to emit SET_GOT into ABI defined REAL_PIC_OFFSET_TABLE_REGNUM.  */
    1852        40369 :       rtx reg = crtl->profile
    1853        40369 :                 ? gen_rtx_REG (Pmode, REAL_PIC_OFFSET_TABLE_REGNUM)
    1854        40369 :                 : pic_offset_table_rtx;
    1855        40369 :       rtx_insn *insn = emit_insn (gen_set_got (reg));
    1856        40369 :       RTX_FRAME_RELATED_P (insn) = 1;
    1857        40369 :       if (crtl->profile)
    1858            0 :         emit_move_insn (pic_offset_table_rtx, reg);
    1859        40369 :       add_reg_note (insn, REG_CFA_FLUSH_QUEUE, NULL_RTX);
    1860              :     }
    1861              : 
    1862        40438 :   seq = end_sequence ();
    1863              : 
    1864        40438 :   entry_edge = single_succ_edge (ENTRY_BLOCK_PTR_FOR_FN (cfun));
    1865        40438 :   insert_insn_on_edge (seq, entry_edge);
    1866        40438 :   commit_one_edge_insertion (entry_edge);
    1867              : }
    1868              : 
    1869              : /* Initialize a variable CUM of type CUMULATIVE_ARGS
    1870              :    for a call to a function whose data type is FNTYPE.
    1871              :    For a library call, FNTYPE is 0.  */
    1872              : 
    1873              : void
    1874     10409303 : init_cumulative_args (CUMULATIVE_ARGS *cum,  /* Argument info to initialize */
    1875              :                       tree fntype,      /* tree ptr for function decl */
    1876              :                       rtx libname,      /* SYMBOL_REF of library name or 0 */
    1877              :                       tree fndecl,
    1878              :                       int caller)
    1879              : {
    1880     10409303 :   struct cgraph_node *local_info_node = NULL;
    1881     10409303 :   struct cgraph_node *target = NULL;
    1882              : 
    1883              :   /* Set silent_p to false to raise an error for invalid calls when
    1884              :      expanding function body.  */
    1885     10409303 :   cfun->machine->silent_p = false;
    1886              : 
    1887     10409303 :   memset (cum, 0, sizeof (*cum));
    1888              : 
    1889     10409303 :   tree preserve_none_type;
    1890     10409303 :   if (fndecl)
    1891              :     {
    1892     10069244 :       target = cgraph_node::get (fndecl);
    1893     10069244 :       if (target)
    1894              :         {
    1895      9922838 :           target = target->function_symbol ();
    1896      9922838 :           local_info_node = cgraph_node::local_info_node (target->decl);
    1897      9922838 :           cum->call_abi = ix86_function_abi (target->decl);
    1898      9922838 :           preserve_none_type = TREE_TYPE (target->decl);
    1899              :         }
    1900              :       else
    1901              :         {
    1902       146406 :           cum->call_abi = ix86_function_abi (fndecl);
    1903       146406 :           preserve_none_type = TREE_TYPE (fndecl);
    1904              :         }
    1905              :     }
    1906              :   else
    1907              :     {
    1908       340059 :       cum->call_abi = ix86_function_type_abi (fntype);
    1909       340059 :       preserve_none_type = fntype;
    1910              :     }
    1911     10409303 :   cum->preserve_none_abi
    1912     10409303 :     = (preserve_none_type
    1913     20701716 :        && (lookup_attribute ("preserve_none",
    1914     10292413 :                              TYPE_ATTRIBUTES (preserve_none_type))
    1915              :            != nullptr));
    1916              : 
    1917     10409303 :   cum->caller = caller;
    1918              : 
    1919              :   /* Set up the number of registers to use for passing arguments.  */
    1920     10409303 :   cum->nregs = ix86_regparm;
    1921     10409303 :   if (TARGET_64BIT)
    1922              :     {
    1923      9373237 :       cum->nregs = (cum->call_abi == SYSV_ABI
    1924      9373237 :                    ? X86_64_REGPARM_MAX
    1925              :                    : X86_64_MS_REGPARM_MAX);
    1926              :     }
    1927     10409303 :   if (TARGET_SSE)
    1928              :     {
    1929     10400200 :       cum->sse_nregs = SSE_REGPARM_MAX;
    1930     10400200 :       if (TARGET_64BIT)
    1931              :         {
    1932      9364254 :           cum->sse_nregs = (cum->call_abi == SYSV_ABI
    1933      9364254 :                            ? X86_64_SSE_REGPARM_MAX
    1934              :                            : X86_64_MS_SSE_REGPARM_MAX);
    1935              :         }
    1936              :     }
    1937     10409303 :   if (TARGET_MMX)
    1938     11232593 :     cum->mmx_nregs = MMX_REGPARM_MAX;
    1939     10409303 :   cum->warn_avx512f = true;
    1940     10409303 :   cum->warn_avx = true;
    1941     10409303 :   cum->warn_sse = true;
    1942     10409303 :   cum->warn_mmx = true;
    1943              : 
    1944              :   /* Because type might mismatch in between caller and callee, we need to
    1945              :      use actual type of function for local calls.
    1946              :      FIXME: cgraph_analyze can be told to actually record if function uses
    1947              :      va_start so for local functions maybe_vaarg can be made aggressive
    1948              :      helping K&R code.
    1949              :      FIXME: once typesytem is fixed, we won't need this code anymore.  */
    1950     10409303 :   if (local_info_node && local_info_node->local
    1951       426823 :       && local_info_node->can_change_signature)
    1952       403552 :     fntype = TREE_TYPE (target->decl);
    1953     10409303 :   cum->stdarg = stdarg_p (fntype);
    1954     20818606 :   cum->maybe_vaarg = (fntype
    1955     10876044 :                       ? (!prototype_p (fntype) || stdarg_p (fntype))
    1956       116890 :                       : !libname);
    1957              : 
    1958     10409303 :   cum->decl = fndecl;
    1959              : 
    1960     10409303 :   cum->warn_empty = !warn_abi || cum->stdarg;
    1961     10409303 :   if (!cum->warn_empty && fntype)
    1962              :     {
    1963      2706277 :       function_args_iterator iter;
    1964      2706277 :       tree argtype;
    1965      2706277 :       bool seen_empty_type = false;
    1966      7487856 :       FOREACH_FUNCTION_ARGS (fntype, argtype, iter)
    1967              :         {
    1968      7487793 :           if (argtype == error_mark_node || VOID_TYPE_P (argtype))
    1969              :             break;
    1970      4801189 :           if (TYPE_EMPTY_P (argtype))
    1971              :             seen_empty_type = true;
    1972      4729947 :           else if (seen_empty_type)
    1973              :             {
    1974        19610 :               cum->warn_empty = true;
    1975        19610 :               break;
    1976              :             }
    1977              :         }
    1978              :     }
    1979              : 
    1980     10409303 :   if (!TARGET_64BIT)
    1981              :     {
    1982              :       /* If there are variable arguments, then we won't pass anything
    1983              :          in registers in 32-bit mode. */
    1984      1036066 :       if (stdarg_p (fntype))
    1985              :         {
    1986         9055 :           cum->nregs = 0;
    1987              :           /* Since in 32-bit, variable arguments are always passed on
    1988              :              stack, there is scratch register available for indirect
    1989              :              sibcall.  */
    1990         9055 :           cfun->machine->arg_reg_available = true;
    1991         9055 :           cum->sse_nregs = 0;
    1992         9055 :           cum->mmx_nregs = 0;
    1993         9055 :           cum->warn_avx512f = false;
    1994         9055 :           cum->warn_avx = false;
    1995         9055 :           cum->warn_sse = false;
    1996         9055 :           cum->warn_mmx = false;
    1997         9055 :           return;
    1998              :         }
    1999              : 
    2000              :       /* Use ecx and edx registers if function has fastcall attribute,
    2001              :          else look for regparm information.  */
    2002      1027011 :       if (fntype)
    2003              :         {
    2004      1013817 :           unsigned int ccvt = ix86_get_callcvt (fntype);
    2005      1013817 :           if ((ccvt & IX86_CALLCVT_THISCALL) != 0)
    2006              :             {
    2007            0 :               cum->nregs = 1;
    2008            0 :               cum->fastcall = 1; /* Same first register as in fastcall.  */
    2009              :             }
    2010      1013817 :           else if ((ccvt & IX86_CALLCVT_FASTCALL) != 0)
    2011              :             {
    2012            4 :               cum->nregs = 2;
    2013            4 :               cum->fastcall = 1;
    2014              :             }
    2015              :           else
    2016      1013813 :             cum->nregs = ix86_function_regparm (fntype, fndecl);
    2017              :         }
    2018              : 
    2019              :       /* Set up the number of SSE registers used for passing SFmode
    2020              :          and DFmode arguments.  Warn for mismatching ABI.  */
    2021      1027011 :       cum->float_in_sse = ix86_function_sseregparm (fntype, fndecl, true);
    2022              :     }
    2023              : 
    2024     10400248 :   cfun->machine->arg_reg_available = (cum->nregs > 0);
    2025              : }
    2026              : 
    2027              : /* Return the "natural" mode for TYPE.  In most cases, this is just TYPE_MODE.
    2028              :    But in the case of vector types, it is some vector mode.
    2029              : 
    2030              :    When we have only some of our vector isa extensions enabled, then there
    2031              :    are some modes for which vector_mode_supported_p is false.  For these
    2032              :    modes, the generic vector support in gcc will choose some non-vector mode
    2033              :    in order to implement the type.  By computing the natural mode, we'll
    2034              :    select the proper ABI location for the operand and not depend on whatever
    2035              :    the middle-end decides to do with these vector types.
    2036              : 
    2037              :    The midde-end can't deal with the vector types > 16 bytes.  In this
    2038              :    case, we return the original mode and warn ABI change if CUM isn't
    2039              :    NULL.
    2040              : 
    2041              :    If INT_RETURN is true, warn ABI change if the vector mode isn't
    2042              :    available for function return value.  */
    2043              : 
    2044              : static machine_mode
    2045    228854765 : type_natural_mode (const_tree type, const CUMULATIVE_ARGS *cum,
    2046              :                    bool in_return)
    2047              : {
    2048    228854765 :   machine_mode mode = TYPE_MODE (type);
    2049              : 
    2050    228854765 :   if (VECTOR_TYPE_P (type) && !VECTOR_MODE_P (mode))
    2051              :     {
    2052       467262 :       HOST_WIDE_INT size = int_size_in_bytes (type);
    2053       467262 :       if ((size == 8 || size == 16 || size == 32 || size == 64)
    2054              :           /* ??? Generic code allows us to create width 1 vectors.  Ignore.  */
    2055       467262 :           && TYPE_VECTOR_SUBPARTS (type) > 1)
    2056              :         {
    2057       430668 :           machine_mode innermode = TYPE_MODE (TREE_TYPE (type));
    2058              : 
    2059              :           /* There are no XFmode vector modes ...  */
    2060       430668 :           if (innermode == XFmode)
    2061              :             return mode;
    2062              : 
    2063              :           /* ... and no decimal float vector modes.  */
    2064       430115 :           if (DECIMAL_FLOAT_MODE_P (innermode))
    2065              :             return mode;
    2066              : 
    2067       429822 :           if (SCALAR_FLOAT_TYPE_P (TREE_TYPE (type)))
    2068              :             mode = MIN_MODE_VECTOR_FLOAT;
    2069              :           else
    2070       360002 :             mode = MIN_MODE_VECTOR_INT;
    2071              : 
    2072              :           /* Get the mode which has this inner mode and number of units.  */
    2073      9086586 :           FOR_EACH_MODE_FROM (mode, mode)
    2074     18908131 :             if (GET_MODE_NUNITS (mode) == TYPE_VECTOR_SUBPARTS (type)
    2075     10251367 :                 && GET_MODE_INNER (mode) == innermode)
    2076              :               {
    2077       429822 :                 if (size == 64 && !TARGET_AVX512F && !TARGET_IAMCU)
    2078              :                   {
    2079       293561 :                     static bool warnedavx512f;
    2080       293561 :                     static bool warnedavx512f_ret;
    2081              : 
    2082       293561 :                     if (cum && cum->warn_avx512f && !warnedavx512f)
    2083              :                       {
    2084         1361 :                         if (warning (OPT_Wpsabi, "AVX512F vector argument "
    2085              :                                      "without AVX512F enabled changes the ABI"))
    2086            2 :                           warnedavx512f = true;
    2087              :                       }
    2088       292200 :                     else if (in_return && !warnedavx512f_ret)
    2089              :                       {
    2090       283579 :                         if (warning (OPT_Wpsabi, "AVX512F vector return "
    2091              :                                      "without AVX512F enabled changes the ABI"))
    2092            4 :                           warnedavx512f_ret = true;
    2093              :                       }
    2094              : 
    2095       293561 :                     return TYPE_MODE (type);
    2096              :                   }
    2097       136261 :                 else if (size == 32 && !TARGET_AVX && !TARGET_IAMCU)
    2098              :                   {
    2099       135712 :                     static bool warnedavx;
    2100       135712 :                     static bool warnedavx_ret;
    2101              : 
    2102       135712 :                     if (cum && cum->warn_avx && !warnedavx)
    2103              :                       {
    2104          770 :                         if (warning (OPT_Wpsabi, "AVX vector argument "
    2105              :                                      "without AVX enabled changes the ABI"))
    2106            5 :                           warnedavx = true;
    2107              :                       }
    2108       134942 :                     else if (in_return && !warnedavx_ret)
    2109              :                       {
    2110       120871 :                         if (warning (OPT_Wpsabi, "AVX vector return "
    2111              :                                      "without AVX enabled changes the ABI"))
    2112           10 :                           warnedavx_ret = true;
    2113              :                       }
    2114              : 
    2115       135712 :                     return TYPE_MODE (type);
    2116              :                   }
    2117          549 :                 else if (((size == 8 && TARGET_64BIT) || size == 16)
    2118          546 :                          && !TARGET_SSE
    2119          140 :                          && !TARGET_IAMCU)
    2120              :                   {
    2121          140 :                     static bool warnedsse;
    2122          140 :                     static bool warnedsse_ret;
    2123              : 
    2124          140 :                     if (cum && cum->warn_sse && !warnedsse)
    2125              :                       {
    2126           19 :                         if (warning (OPT_Wpsabi, "SSE vector argument "
    2127              :                                      "without SSE enabled changes the ABI"))
    2128            6 :                           warnedsse = true;
    2129              :                       }
    2130          121 :                     else if (!TARGET_64BIT && in_return && !warnedsse_ret)
    2131              :                       {
    2132            0 :                         if (warning (OPT_Wpsabi, "SSE vector return "
    2133              :                                      "without SSE enabled changes the ABI"))
    2134            0 :                           warnedsse_ret = true;
    2135              :                       }
    2136              :                   }
    2137          409 :                 else if ((size == 8 && !TARGET_64BIT)
    2138            0 :                          && (!cfun
    2139            0 :                              || cfun->machine->func_type == TYPE_NORMAL)
    2140            0 :                          && !TARGET_MMX
    2141            0 :                          && !TARGET_IAMCU)
    2142              :                   {
    2143            0 :                     static bool warnedmmx;
    2144            0 :                     static bool warnedmmx_ret;
    2145              : 
    2146            0 :                     if (cum && cum->warn_mmx && !warnedmmx)
    2147              :                       {
    2148            0 :                         if (warning (OPT_Wpsabi, "MMX vector argument "
    2149              :                                      "without MMX enabled changes the ABI"))
    2150            0 :                           warnedmmx = true;
    2151              :                       }
    2152            0 :                     else if (in_return && !warnedmmx_ret)
    2153              :                       {
    2154            0 :                         if (warning (OPT_Wpsabi, "MMX vector return "
    2155              :                                      "without MMX enabled changes the ABI"))
    2156            0 :                           warnedmmx_ret = true;
    2157              :                       }
    2158              :                   }
    2159          549 :                 return mode;
    2160              :               }
    2161              : 
    2162            0 :           gcc_unreachable ();
    2163              :         }
    2164              :     }
    2165              : 
    2166              :   return mode;
    2167              : }
    2168              : 
    2169              : /* We want to pass a value in REGNO whose "natural" mode is MODE.  However,
    2170              :    this may not agree with the mode that the type system has chosen for the
    2171              :    register, which is ORIG_MODE.  If ORIG_MODE is not BLKmode, then we can
    2172              :    go ahead and use it.  Otherwise we have to build a PARALLEL instead.  */
    2173              : 
    2174              : static rtx
    2175     36445491 : gen_reg_or_parallel (machine_mode mode, machine_mode orig_mode,
    2176              :                      unsigned int regno)
    2177              : {
    2178     36445491 :   rtx tmp;
    2179              : 
    2180     36445491 :   if (orig_mode != BLKmode)
    2181     36445463 :     tmp = gen_rtx_REG (orig_mode, regno);
    2182              :   else
    2183              :     {
    2184           28 :       tmp = gen_rtx_REG (mode, regno);
    2185           28 :       tmp = gen_rtx_EXPR_LIST (VOIDmode, tmp, const0_rtx);
    2186           28 :       tmp = gen_rtx_PARALLEL (orig_mode, gen_rtvec (1, tmp));
    2187              :     }
    2188              : 
    2189     36445491 :   return tmp;
    2190              : }
    2191              : 
    2192              : /* x86-64 register passing implementation.  See x86-64 ABI for details.  Goal
    2193              :    of this code is to classify each 8bytes of incoming argument by the register
    2194              :    class and assign registers accordingly.  */
    2195              : 
    2196              : /* Return the union class of CLASS1 and CLASS2.
    2197              :    See the x86-64 PS ABI for details.  */
    2198              : 
    2199              : static enum x86_64_reg_class
    2200     53221695 : merge_classes (enum x86_64_reg_class class1, enum x86_64_reg_class class2)
    2201              : {
    2202              :   /* Rule #1: If both classes are equal, this is the resulting class.  */
    2203     51981684 :   if (class1 == class2)
    2204              :     return class1;
    2205              : 
    2206              :   /* Rule #2: If one of the classes is NO_CLASS, the resulting class is
    2207              :      the other class.  */
    2208     45830529 :   if (class1 == X86_64_NO_CLASS)
    2209              :     return class2;
    2210     46650466 :   if (class2 == X86_64_NO_CLASS)
    2211              :     return class1;
    2212              : 
    2213              :   /* Rule #3: If one of the classes is MEMORY, the result is MEMORY.  */
    2214      1661326 :   if (class1 == X86_64_MEMORY_CLASS || class2 == X86_64_MEMORY_CLASS)
    2215              :     return X86_64_MEMORY_CLASS;
    2216              : 
    2217              :   /* Rule #4: If one of the classes is INTEGER, the result is INTEGER.  */
    2218      1515290 :   if ((class1 == X86_64_INTEGERSI_CLASS
    2219       190314 :        && (class2 == X86_64_SSESF_CLASS || class2 == X86_64_SSEHF_CLASS))
    2220      1514084 :       || (class2 == X86_64_INTEGERSI_CLASS
    2221       924097 :           && (class1 == X86_64_SSESF_CLASS || class1 == X86_64_SSEHF_CLASS)))
    2222              :     return X86_64_INTEGERSI_CLASS;
    2223      1510268 :   if (class1 == X86_64_INTEGER_CLASS || class1 == X86_64_INTEGERSI_CLASS
    2224       381589 :       || class2 == X86_64_INTEGER_CLASS || class2 == X86_64_INTEGERSI_CLASS)
    2225              :     return X86_64_INTEGER_CLASS;
    2226              : 
    2227              :   /* Rule #5: If one of the classes is X87, X87UP, or COMPLEX_X87 class,
    2228              :      MEMORY is used.  */
    2229        90921 :   if (class1 == X86_64_X87_CLASS
    2230              :       || class1 == X86_64_X87UP_CLASS
    2231        90921 :       || class1 == X86_64_COMPLEX_X87_CLASS
    2232              :       || class2 == X86_64_X87_CLASS
    2233        90016 :       || class2 == X86_64_X87UP_CLASS
    2234        59748 :       || class2 == X86_64_COMPLEX_X87_CLASS)
    2235        31173 :     return X86_64_MEMORY_CLASS;
    2236              : 
    2237              :   /* Rule #6: Otherwise class SSE is used.  */
    2238              :   return X86_64_SSE_CLASS;
    2239              : }
    2240              : 
    2241              : /* Classify the argument of type TYPE and mode MODE.
    2242              :    CLASSES will be filled by the register class used to pass each word
    2243              :    of the operand.  The number of words is returned.  In case the parameter
    2244              :    should be passed in memory, 0 is returned. As a special case for zero
    2245              :    sized containers, classes[0] will be NO_CLASS and 1 is returned.
    2246              : 
    2247              :    BIT_OFFSET is used internally for handling records and specifies offset
    2248              :    of the offset in bits modulo 512 to avoid overflow cases.
    2249              : 
    2250              :    See the x86-64 PS ABI for details.
    2251              : */
    2252              : 
    2253              : static int
    2254    389944759 : classify_argument (machine_mode mode, const_tree type,
    2255              :                    enum x86_64_reg_class classes[MAX_CLASSES], int bit_offset,
    2256              :                    int &zero_width_bitfields)
    2257              : {
    2258    389944759 :   HOST_WIDE_INT bytes
    2259    773771951 :     = mode == BLKmode ? int_size_in_bytes (type) : (int) GET_MODE_SIZE (mode);
    2260    389944759 :   int words = CEIL (bytes + (bit_offset % 64) / 8, UNITS_PER_WORD);
    2261              : 
    2262              :   /* Variable sized entities are always passed/returned in memory.  */
    2263    389944759 :   if (bytes < 0)
    2264              :     return 0;
    2265              : 
    2266    389943596 :   if (mode != VOIDmode)
    2267              :     {
    2268              :       /* The value of "named" doesn't matter.  */
    2269    388863941 :       function_arg_info arg (const_cast<tree> (type), mode, /*named=*/true);
    2270    388863941 :       if (targetm.calls.must_pass_in_stack (arg))
    2271           37 :         return 0;
    2272              :     }
    2273              : 
    2274    389943559 :   if (type && (AGGREGATE_TYPE_P (type)
    2275    354934079 :                || (TREE_CODE (type) == BITINT_TYPE && words > 1)))
    2276              :     {
    2277     36108717 :       int i;
    2278     36108717 :       tree field;
    2279     36108717 :       enum x86_64_reg_class subclasses[MAX_CLASSES];
    2280              : 
    2281              :       /* On x86-64 we pass structures larger than 64 bytes on the stack.  */
    2282     36108717 :       if (bytes > 64)
    2283              :         return 0;
    2284              : 
    2285     90113897 :       for (i = 0; i < words; i++)
    2286     54827433 :         classes[i] = X86_64_NO_CLASS;
    2287              : 
    2288              :       /* Zero sized arrays or structures are NO_CLASS.  We return 0 to
    2289              :          signalize memory class, so handle it as special case.  */
    2290     35286464 :       if (!words)
    2291              :         {
    2292        83094 :           classes[0] = X86_64_NO_CLASS;
    2293        83094 :           return 1;
    2294              :         }
    2295              : 
    2296              :       /* Classify each field of record and merge classes.  */
    2297     35203370 :       switch (TREE_CODE (type))
    2298              :         {
    2299     33178692 :         case RECORD_TYPE:
    2300              :           /* And now merge the fields of structure.  */
    2301    907547220 :           for (field = TYPE_FIELDS (type); field; field = DECL_CHAIN (field))
    2302              :             {
    2303    874880636 :               if (TREE_CODE (field) == FIELD_DECL)
    2304              :                 {
    2305     48420898 :                   int num;
    2306              : 
    2307     48420898 :                   if (TREE_TYPE (field) == error_mark_node)
    2308            4 :                     continue;
    2309              : 
    2310              :                   /* Bitfields are always classified as integer.  Handle them
    2311              :                      early, since later code would consider them to be
    2312              :                      misaligned integers.  */
    2313     48420894 :                   if (DECL_BIT_FIELD (field))
    2314              :                     {
    2315      1249272 :                       if (integer_zerop (DECL_SIZE (field)))
    2316              :                         {
    2317        12902 :                           if (DECL_FIELD_CXX_ZERO_WIDTH_BIT_FIELD (field))
    2318         8048 :                             continue;
    2319         4854 :                           if (zero_width_bitfields != 2)
    2320              :                             {
    2321         4320 :                               zero_width_bitfields = 1;
    2322         4320 :                               continue;
    2323              :                             }
    2324              :                         }
    2325      1236904 :                       for (i = (int_bit_position (field)
    2326      1236904 :                                 + (bit_offset % 64)) / 8 / 8;
    2327      2476915 :                            i < ((int_bit_position (field) + (bit_offset % 64))
    2328      2476915 :                                 + tree_to_shwi (DECL_SIZE (field))
    2329      2476915 :                                 + 63) / 8 / 8; i++)
    2330      1240011 :                         classes[i]
    2331      2480022 :                           = merge_classes (X86_64_INTEGER_CLASS, classes[i]);
    2332              :                     }
    2333              :                   else
    2334              :                     {
    2335     47171622 :                       int pos;
    2336              : 
    2337     47171622 :                       type = TREE_TYPE (field);
    2338              : 
    2339              :                       /* Flexible array member is ignored.  */
    2340     47171622 :                       if (TYPE_MODE (type) == BLKmode
    2341       647933 :                           && TREE_CODE (type) == ARRAY_TYPE
    2342       170640 :                           && TYPE_SIZE (type) == NULL_TREE
    2343         2013 :                           && TYPE_DOMAIN (type) != NULL_TREE
    2344     47172870 :                           && (TYPE_MAX_VALUE (TYPE_DOMAIN (type))
    2345              :                               == NULL_TREE))
    2346              :                         {
    2347         1248 :                           static bool warned;
    2348              : 
    2349         1248 :                           if (!warned && warn_psabi)
    2350              :                             {
    2351            3 :                               warned = true;
    2352            3 :                               inform (input_location,
    2353              :                                       "the ABI of passing struct with"
    2354              :                                       " a flexible array member has"
    2355              :                                       " changed in GCC 4.4");
    2356              :                             }
    2357         1248 :                           continue;
    2358         1248 :                         }
    2359     47170374 :                       num = classify_argument (TYPE_MODE (type), type,
    2360              :                                                subclasses,
    2361     47170374 :                                                (int_bit_position (field)
    2362     47170374 :                                                 + bit_offset) % 512,
    2363              :                                                zero_width_bitfields);
    2364     47170374 :                       if (!num)
    2365              :                         return 0;
    2366     46658266 :                       pos = (int_bit_position (field)
    2367     46658266 :                              + (bit_offset % 64)) / 8 / 8;
    2368     96682082 :                       for (i = 0; i < num && (i + pos) < words; i++)
    2369     50023816 :                         classes[i + pos]
    2370     50023816 :                           = merge_classes (subclasses[i], classes[i + pos]);
    2371              :                     }
    2372              :                 }
    2373              :             }
    2374              :           break;
    2375              : 
    2376       448631 :         case ARRAY_TYPE:
    2377              :           /* Arrays are handled as small records.  */
    2378       448631 :           {
    2379       448631 :             int num;
    2380       448631 :             num = classify_argument (TYPE_MODE (TREE_TYPE (type)),
    2381       448631 :                                      TREE_TYPE (type), subclasses, bit_offset,
    2382              :                                      zero_width_bitfields);
    2383       448631 :             if (!num)
    2384              :               return 0;
    2385              : 
    2386              :             /* The partial classes are now full classes.  */
    2387       433165 :             if (subclasses[0] == X86_64_SSESF_CLASS && bytes != 4)
    2388        13863 :               subclasses[0] = X86_64_SSE_CLASS;
    2389       433165 :             if (subclasses[0] == X86_64_SSEHF_CLASS && bytes != 2)
    2390         5126 :               subclasses[0] = X86_64_SSE_CLASS;
    2391       433165 :             if (subclasses[0] == X86_64_INTEGERSI_CLASS
    2392       165804 :                 && !((bit_offset % 64) == 0 && bytes == 4))
    2393       133652 :               subclasses[0] = X86_64_INTEGER_CLASS;
    2394              : 
    2395      1335417 :             for (i = 0; i < words; i++)
    2396       902252 :               classes[i] = subclasses[i % num];
    2397              : 
    2398              :             break;
    2399              :           }
    2400       274201 :         case UNION_TYPE:
    2401       274201 :         case QUAL_UNION_TYPE:
    2402              :           /* Unions are similar to RECORD_TYPE but offset is always 0.
    2403              :              */
    2404      3042251 :           for (field = TYPE_FIELDS (type); field; field = DECL_CHAIN (field))
    2405              :             {
    2406      2803315 :               if (TREE_CODE (field) == FIELD_DECL)
    2407              :                 {
    2408      1232903 :                   int num;
    2409              : 
    2410      1232903 :                   if (TREE_TYPE (field) == error_mark_node)
    2411           10 :                     continue;
    2412              : 
    2413      1232893 :                   num = classify_argument (TYPE_MODE (TREE_TYPE (field)),
    2414      1232893 :                                            TREE_TYPE (field), subclasses,
    2415              :                                            bit_offset, zero_width_bitfields);
    2416      1232893 :                   if (!num)
    2417              :                     return 0;
    2418      3155496 :                   for (i = 0; i < num && i < words; i++)
    2419      1957868 :                     classes[i] = merge_classes (subclasses[i], classes[i]);
    2420              :                 }
    2421              :             }
    2422              :           break;
    2423              : 
    2424      1301846 :         case BITINT_TYPE:
    2425              :           /* _BitInt(N) for N > 64 is passed as structure containing
    2426              :              (N + 63) / 64 64-bit elements.  */
    2427      1301846 :           if (words > 2)
    2428              :             return 0;
    2429        74994 :           classes[0] = classes[1] = X86_64_INTEGER_CLASS;
    2430        74994 :           return 2;
    2431              : 
    2432            0 :         default:
    2433            0 :           gcc_unreachable ();
    2434              :         }
    2435              : 
    2436     33338685 :       if (words > 2)
    2437              :         {
    2438              :           /* When size > 16 bytes, if the first one isn't
    2439              :              X86_64_SSE_CLASS or any other ones aren't
    2440              :              X86_64_SSEUP_CLASS, everything should be passed in
    2441              :              memory.  */
    2442      1653388 :           if (classes[0] != X86_64_SSE_CLASS)
    2443              :             return 0;
    2444              : 
    2445       197324 :           for (i = 1; i < words; i++)
    2446       179135 :             if (classes[i] != X86_64_SSEUP_CLASS)
    2447              :               return 0;
    2448              :         }
    2449              : 
    2450              :       /* Final merger cleanup.  */
    2451     73759682 :       for (i = 0; i < words; i++)
    2452              :         {
    2453              :           /* If one class is MEMORY, everything should be passed in
    2454              :              memory.  */
    2455     42089026 :           if (classes[i] == X86_64_MEMORY_CLASS)
    2456              :             return 0;
    2457              : 
    2458              :           /* The X86_64_SSEUP_CLASS should be always preceded by
    2459              :              X86_64_SSE_CLASS or X86_64_SSEUP_CLASS.  */
    2460     42058566 :           if (classes[i] == X86_64_SSEUP_CLASS
    2461       207097 :               && classes[i - 1] != X86_64_SSE_CLASS
    2462        76550 :               && classes[i - 1] != X86_64_SSEUP_CLASS)
    2463              :             {
    2464              :               /* The first one should never be X86_64_SSEUP_CLASS.  */
    2465         1916 :               gcc_assert (i != 0);
    2466         1916 :               classes[i] = X86_64_SSE_CLASS;
    2467              :             }
    2468              : 
    2469              :           /* If X86_64_X87UP_CLASS isn't preceded by X86_64_X87_CLASS,
    2470              :              everything should be passed in memory.  */
    2471     42058566 :           if (classes[i] == X86_64_X87UP_CLASS
    2472       180307 :               && (classes[i - 1] != X86_64_X87_CLASS))
    2473              :             {
    2474         2370 :               static bool warned;
    2475              : 
    2476              :               /* The first one should never be X86_64_X87UP_CLASS.  */
    2477         2370 :               gcc_assert (i != 0);
    2478         2370 :               if (!warned && warn_psabi)
    2479              :                 {
    2480            1 :                   warned = true;
    2481            1 :                   inform (input_location,
    2482              :                           "the ABI of passing union with %<long double%>"
    2483              :                           " has changed in GCC 4.4");
    2484              :                 }
    2485         2370 :               return 0;
    2486              :             }
    2487              :         }
    2488              :       return words;
    2489              :     }
    2490              : 
    2491              :   /* Compute alignment needed.  We align all types to natural boundaries with
    2492              :      exception of XFmode that is aligned to 64bits.  */
    2493    353834842 :   if (mode != VOIDmode && mode != BLKmode)
    2494              :     {
    2495    352291174 :       int mode_alignment = GET_MODE_BITSIZE (mode);
    2496              : 
    2497    352291174 :       if (mode == XFmode)
    2498              :         mode_alignment = 128;
    2499    345346774 :       else if (mode == XCmode)
    2500       554977 :         mode_alignment = 256;
    2501    352291174 :       if (COMPLEX_MODE_P (mode))
    2502      2317754 :         mode_alignment /= 2;
    2503              :       /* Misaligned fields are always returned in memory.  */
    2504    352291174 :       if (bit_offset % mode_alignment)
    2505              :         return 0;
    2506              :     }
    2507              : 
    2508              :   /* for V1xx modes, just use the base mode */
    2509    353827209 :   if (VECTOR_MODE_P (mode) && mode != V1DImode && mode != V1TImode
    2510    447294695 :       && GET_MODE_UNIT_SIZE (mode) == bytes)
    2511         6469 :     mode = GET_MODE_INNER (mode);
    2512              : 
    2513              :   /* Classification of atomic types.  */
    2514    353827209 :   switch (mode)
    2515              :     {
    2516       208054 :     case E_SDmode:
    2517       208054 :     case E_DDmode:
    2518       208054 :       classes[0] = X86_64_SSE_CLASS;
    2519       208054 :       return 1;
    2520        99208 :     case E_TDmode:
    2521        99208 :       classes[0] = X86_64_SSE_CLASS;
    2522        99208 :       classes[1] = X86_64_SSEUP_CLASS;
    2523        99208 :       return 2;
    2524    230809608 :     case E_DImode:
    2525    230809608 :     case E_SImode:
    2526    230809608 :     case E_HImode:
    2527    230809608 :     case E_QImode:
    2528    230809608 :     case E_CSImode:
    2529    230809608 :     case E_CHImode:
    2530    230809608 :     case E_CQImode:
    2531    230809608 :       {
    2532    230809608 :         int size = bit_offset + (int) GET_MODE_BITSIZE (mode);
    2533              : 
    2534              :         /* Analyze last 128 bits only.  */
    2535    230809608 :         size = (size - 1) & 0x7f;
    2536              : 
    2537    230809608 :         if (size < 32)
    2538              :           {
    2539    102742471 :             classes[0] = X86_64_INTEGERSI_CLASS;
    2540    102742471 :             return 1;
    2541              :           }
    2542    128067137 :         else if (size < 64)
    2543              :           {
    2544    118425610 :             classes[0] = X86_64_INTEGER_CLASS;
    2545    118425610 :             return 1;
    2546              :           }
    2547      9641527 :         else if (size < 64+32)
    2548              :           {
    2549      3860700 :             classes[0] = X86_64_INTEGER_CLASS;
    2550      3860700 :             classes[1] = X86_64_INTEGERSI_CLASS;
    2551      3860700 :             return 2;
    2552              :           }
    2553      5780827 :         else if (size < 64+64)
    2554              :           {
    2555      5780827 :             classes[0] = classes[1] = X86_64_INTEGER_CLASS;
    2556      5780827 :             return 2;
    2557              :           }
    2558              :         else
    2559              :           gcc_unreachable ();
    2560              :       }
    2561      2437946 :     case E_CDImode:
    2562      2437946 :     case E_TImode:
    2563      2437946 :       classes[0] = classes[1] = X86_64_INTEGER_CLASS;
    2564      2437946 :       return 2;
    2565            0 :     case E_COImode:
    2566            0 :     case E_OImode:
    2567              :       /* OImode shouldn't be used directly.  */
    2568            0 :       gcc_unreachable ();
    2569              :     case E_CTImode:
    2570              :       return 0;
    2571       822787 :     case E_HFmode:
    2572       822787 :     case E_BFmode:
    2573       822787 :       if (!(bit_offset % 64))
    2574       820237 :         classes[0] = X86_64_SSEHF_CLASS;
    2575              :       else
    2576         2550 :         classes[0] = X86_64_SSE_CLASS;
    2577              :       return 1;
    2578      9758741 :     case E_SFmode:
    2579      9758741 :       if (!(bit_offset % 64))
    2580      9705281 :         classes[0] = X86_64_SSESF_CLASS;
    2581              :       else
    2582        53460 :         classes[0] = X86_64_SSE_CLASS;
    2583              :       return 1;
    2584      4329993 :     case E_DFmode:
    2585      4329993 :       classes[0] = X86_64_SSEDF_CLASS;
    2586      4329993 :       return 1;
    2587      6943684 :     case E_XFmode:
    2588      6943684 :       classes[0] = X86_64_X87_CLASS;
    2589      6943684 :       classes[1] = X86_64_X87UP_CLASS;
    2590      6943684 :       return 2;
    2591      1282644 :     case E_TFmode:
    2592      1282644 :       classes[0] = X86_64_SSE_CLASS;
    2593      1282644 :       classes[1] = X86_64_SSEUP_CLASS;
    2594      1282644 :       return 2;
    2595        75166 :     case E_HCmode:
    2596        75166 :     case E_BCmode:
    2597        75166 :       classes[0] = X86_64_SSE_CLASS;
    2598        75166 :       if (!(bit_offset % 64))
    2599              :         return 1;
    2600              :       else
    2601              :         {
    2602           98 :           classes[1] = X86_64_SSEHF_CLASS;
    2603           98 :           return 2;
    2604              :         }
    2605       692191 :     case E_SCmode:
    2606       692191 :       classes[0] = X86_64_SSE_CLASS;
    2607       692191 :       if (!(bit_offset % 64))
    2608              :         return 1;
    2609              :       else
    2610              :         {
    2611         1119 :           static bool warned;
    2612              : 
    2613         1119 :           if (!warned && warn_psabi)
    2614              :             {
    2615            2 :               warned = true;
    2616            2 :               inform (input_location,
    2617              :                       "the ABI of passing structure with %<complex float%>"
    2618              :                       " member has changed in GCC 4.4");
    2619              :             }
    2620         1119 :           classes[1] = X86_64_SSESF_CLASS;
    2621         1119 :           return 2;
    2622              :         }
    2623       701894 :     case E_DCmode:
    2624       701894 :       classes[0] = X86_64_SSEDF_CLASS;
    2625       701894 :       classes[1] = X86_64_SSEDF_CLASS;
    2626       701894 :       return 2;
    2627       554977 :     case E_XCmode:
    2628       554977 :       classes[0] = X86_64_COMPLEX_X87_CLASS;
    2629       554977 :       return 1;
    2630              :     case E_TCmode:
    2631              :       /* This modes is larger than 16 bytes.  */
    2632              :       return 0;
    2633     25341644 :     case E_V8SFmode:
    2634     25341644 :     case E_V8SImode:
    2635     25341644 :     case E_V32QImode:
    2636     25341644 :     case E_V16HFmode:
    2637     25341644 :     case E_V16BFmode:
    2638     25341644 :     case E_V16HImode:
    2639     25341644 :     case E_V4DFmode:
    2640     25341644 :     case E_V4DImode:
    2641     25341644 :       classes[0] = X86_64_SSE_CLASS;
    2642     25341644 :       classes[1] = X86_64_SSEUP_CLASS;
    2643     25341644 :       classes[2] = X86_64_SSEUP_CLASS;
    2644     25341644 :       classes[3] = X86_64_SSEUP_CLASS;
    2645     25341644 :       return 4;
    2646     27476715 :     case E_V8DFmode:
    2647     27476715 :     case E_V16SFmode:
    2648     27476715 :     case E_V32HFmode:
    2649     27476715 :     case E_V32BFmode:
    2650     27476715 :     case E_V8DImode:
    2651     27476715 :     case E_V16SImode:
    2652     27476715 :     case E_V32HImode:
    2653     27476715 :     case E_V64QImode:
    2654     27476715 :       classes[0] = X86_64_SSE_CLASS;
    2655     27476715 :       classes[1] = X86_64_SSEUP_CLASS;
    2656     27476715 :       classes[2] = X86_64_SSEUP_CLASS;
    2657     27476715 :       classes[3] = X86_64_SSEUP_CLASS;
    2658     27476715 :       classes[4] = X86_64_SSEUP_CLASS;
    2659     27476715 :       classes[5] = X86_64_SSEUP_CLASS;
    2660     27476715 :       classes[6] = X86_64_SSEUP_CLASS;
    2661     27476715 :       classes[7] = X86_64_SSEUP_CLASS;
    2662     27476715 :       return 8;
    2663     37352466 :     case E_V4SFmode:
    2664     37352466 :     case E_V4SImode:
    2665     37352466 :     case E_V16QImode:
    2666     37352466 :     case E_V8HImode:
    2667     37352466 :     case E_V8HFmode:
    2668     37352466 :     case E_V8BFmode:
    2669     37352466 :     case E_V2DFmode:
    2670     37352466 :     case E_V2DImode:
    2671     37352466 :       classes[0] = X86_64_SSE_CLASS;
    2672     37352466 :       classes[1] = X86_64_SSEUP_CLASS;
    2673     37352466 :       return 2;
    2674      3265076 :     case E_V1TImode:
    2675      3265076 :     case E_V1DImode:
    2676      3265076 :     case E_V2SFmode:
    2677      3265076 :     case E_V2SImode:
    2678      3265076 :     case E_V4HImode:
    2679      3265076 :     case E_V4HFmode:
    2680      3265076 :     case E_V4BFmode:
    2681      3265076 :     case E_V2HFmode:
    2682      3265076 :     case E_V2BFmode:
    2683      3265076 :     case E_V8QImode:
    2684      3265076 :       classes[0] = X86_64_SSE_CLASS;
    2685      3265076 :       return 1;
    2686              :     case E_BLKmode:
    2687              :     case E_VOIDmode:
    2688              :       return 0;
    2689        44993 :     default:
    2690        44993 :       gcc_assert (VECTOR_MODE_P (mode));
    2691              : 
    2692        44993 :       if (bytes > 16)
    2693              :         return 0;
    2694              : 
    2695        60258 :       gcc_assert (GET_MODE_CLASS (GET_MODE_INNER (mode)) == MODE_INT);
    2696              : 
    2697        60258 :       if (bit_offset + GET_MODE_BITSIZE (mode) <= 32)
    2698        29689 :         classes[0] = X86_64_INTEGERSI_CLASS;
    2699              :       else
    2700          440 :         classes[0] = X86_64_INTEGER_CLASS;
    2701        30129 :       classes[1] = X86_64_INTEGER_CLASS;
    2702        30129 :       return 1 + (bytes > 8);
    2703              :     }
    2704              : }
    2705              : 
    2706              : /* Wrapper around classify_argument with the extra zero_width_bitfields
    2707              :    argument, to diagnose GCC 12.1 ABI differences for C.  */
    2708              : 
    2709              : static int
    2710    341092327 : classify_argument (machine_mode mode, const_tree type,
    2711              :                    enum x86_64_reg_class classes[MAX_CLASSES], int bit_offset)
    2712              : {
    2713    341092327 :   int zero_width_bitfields = 0;
    2714    341092327 :   static bool warned = false;
    2715    341092327 :   int n = classify_argument (mode, type, classes, bit_offset,
    2716              :                              zero_width_bitfields);
    2717    341092327 :   if (!zero_width_bitfields || warned || !warn_psabi)
    2718              :     return n;
    2719          534 :   enum x86_64_reg_class alt_classes[MAX_CLASSES];
    2720          534 :   zero_width_bitfields = 2;
    2721          534 :   if (classify_argument (mode, type, alt_classes, bit_offset,
    2722              :                          zero_width_bitfields) != n)
    2723            0 :     zero_width_bitfields = 3;
    2724              :   else
    2725         1286 :     for (int i = 0; i < n; i++)
    2726          760 :       if (classes[i] != alt_classes[i])
    2727              :         {
    2728            8 :           zero_width_bitfields = 3;
    2729            8 :           break;
    2730              :         }
    2731          534 :   if (zero_width_bitfields == 3)
    2732              :     {
    2733            8 :       warned = true;
    2734            8 :       const char *url
    2735              :         = CHANGES_ROOT_URL "gcc-12/changes.html#zero_width_bitfields";
    2736              : 
    2737            8 :       inform (input_location,
    2738              :               "the ABI of passing C structures with zero-width bit-fields"
    2739              :               " has changed in GCC %{12.1%}", url);
    2740              :     }
    2741              :   return n;
    2742              : }
    2743              : 
    2744              : /* Examine the argument and return set number of register required in each
    2745              :    class.  Return true iff parameter should be passed in memory.  */
    2746              : 
    2747              : static bool
    2748    229531410 : examine_argument (machine_mode mode, const_tree type, int in_return,
    2749              :                   int *int_nregs, int *sse_nregs)
    2750              : {
    2751    229531410 :   enum x86_64_reg_class regclass[MAX_CLASSES];
    2752    229531410 :   int n = classify_argument (mode, type, regclass, 0);
    2753              : 
    2754    229531410 :   *int_nregs = 0;
    2755    229531410 :   *sse_nregs = 0;
    2756              : 
    2757    229531410 :   if (!n)
    2758              :     return true;
    2759    665215238 :   for (n--; n >= 0; n--)
    2760    440691553 :     switch (regclass[n])
    2761              :       {
    2762    152282519 :       case X86_64_INTEGER_CLASS:
    2763    152282519 :       case X86_64_INTEGERSI_CLASS:
    2764    152282519 :         (*int_nregs)++;
    2765    152282519 :         break;
    2766     74397332 :       case X86_64_SSE_CLASS:
    2767     74397332 :       case X86_64_SSEHF_CLASS:
    2768     74397332 :       case X86_64_SSESF_CLASS:
    2769     74397332 :       case X86_64_SSEDF_CLASS:
    2770     74397332 :         (*sse_nregs)++;
    2771     74397332 :         break;
    2772              :       case X86_64_NO_CLASS:
    2773              :       case X86_64_SSEUP_CLASS:
    2774              :         break;
    2775      9421846 :       case X86_64_X87_CLASS:
    2776      9421846 :       case X86_64_X87UP_CLASS:
    2777      9421846 :       case X86_64_COMPLEX_X87_CLASS:
    2778      9421846 :         if (!in_return)
    2779              :           return true;
    2780              :         break;
    2781            0 :       case X86_64_MEMORY_CLASS:
    2782            0 :         gcc_unreachable ();
    2783              :       }
    2784              : 
    2785              :   return false;
    2786              : }
    2787              : 
    2788              : /* Construct container for the argument used by GCC interface.  See
    2789              :    FUNCTION_ARG for the detailed description.  */
    2790              : 
    2791              : static rtx
    2792    111560917 : construct_container (machine_mode mode, machine_mode orig_mode,
    2793              :                      const_tree type, int in_return, int nintregs, int nsseregs,
    2794              :                      const int *intreg, int sse_regno)
    2795              : {
    2796              :   /* The following variables hold the static issued_error state.  */
    2797    111560917 :   static bool issued_sse_arg_error;
    2798    111560917 :   static bool issued_sse_ret_error;
    2799    111560917 :   static bool issued_x87_ret_error;
    2800              : 
    2801    111560917 :   machine_mode tmpmode;
    2802    111560917 :   int bytes
    2803    222445743 :     = mode == BLKmode ? int_size_in_bytes (type) : (int) GET_MODE_SIZE (mode);
    2804    111560917 :   enum x86_64_reg_class regclass[MAX_CLASSES];
    2805    111560917 :   int n;
    2806    111560917 :   int i;
    2807    111560917 :   int nexps = 0;
    2808    111560917 :   int needed_sseregs, needed_intregs;
    2809    111560917 :   rtx exp[MAX_CLASSES];
    2810    111560917 :   rtx ret;
    2811              : 
    2812    111560917 :   n = classify_argument (mode, type, regclass, 0);
    2813    111560917 :   if (!n)
    2814              :     return NULL;
    2815    111097907 :   if (examine_argument (mode, type, in_return, &needed_intregs,
    2816              :                         &needed_sseregs))
    2817              :     return NULL;
    2818    111048576 :   if (needed_intregs > nintregs || needed_sseregs > nsseregs)
    2819              :     return NULL;
    2820              : 
    2821              :   /* We allowed the user to turn off SSE for kernel mode.  Don't crash if
    2822              :      some less clueful developer tries to use floating-point anyway.  */
    2823    109947135 :   if (needed_sseregs
    2824     36764482 :       && (!TARGET_SSE || (VALID_SSE2_TYPE_MODE (mode) && !TARGET_SSE2)))
    2825              :     {
    2826              :       /* Return early if we shouldn't raise an error for invalid
    2827              :          calls.  */
    2828           71 :       if (cfun != NULL && cfun->machine->silent_p)
    2829              :         return NULL;
    2830           39 :       if (in_return)
    2831              :         {
    2832           34 :           if (!issued_sse_ret_error)
    2833              :             {
    2834           16 :               if (VALID_SSE2_TYPE_MODE (mode))
    2835            5 :                 error ("SSE register return with SSE2 disabled");
    2836              :               else
    2837           11 :                 error ("SSE register return with SSE disabled");
    2838           16 :               issued_sse_ret_error = true;
    2839              :             }
    2840              :         }
    2841            5 :       else if (!issued_sse_arg_error)
    2842              :         {
    2843            5 :           if (VALID_SSE2_TYPE_MODE (mode))
    2844            0 :             error ("SSE register argument with SSE2 disabled");
    2845              :           else
    2846            5 :             error ("SSE register argument with SSE disabled");
    2847            5 :           issued_sse_arg_error = true;
    2848              :         }
    2849           39 :       return NULL;
    2850              :     }
    2851              : 
    2852              :   /* Likewise, error if the ABI requires us to return values in the
    2853              :      x87 registers and the user specified -mno-80387.  */
    2854    109947064 :   if (!TARGET_FLOAT_RETURNS_IN_80387 && in_return)
    2855      1424678 :     for (i = 0; i < n; i++)
    2856       751950 :       if (regclass[i] == X86_64_X87_CLASS
    2857              :           || regclass[i] == X86_64_X87UP_CLASS
    2858       751950 :           || regclass[i] == X86_64_COMPLEX_X87_CLASS)
    2859              :         {
    2860              :           /* Return early if we shouldn't raise an error for invalid
    2861              :              calls.  */
    2862           16 :           if (cfun != NULL && cfun->machine->silent_p)
    2863              :             return NULL;
    2864           13 :           if (!issued_x87_ret_error)
    2865              :             {
    2866            8 :               error ("x87 register return with x87 disabled");
    2867            8 :               issued_x87_ret_error = true;
    2868              :             }
    2869           13 :           return NULL;
    2870              :         }
    2871              : 
    2872              :   /* First construct simple cases.  Avoid SCmode, since we want to use
    2873              :      single register to pass this type.  */
    2874    109947048 :   if (n == 1 && mode != SCmode && mode != HCmode)
    2875     73438560 :     switch (regclass[0])
    2876              :       {
    2877     67400950 :       case X86_64_INTEGER_CLASS:
    2878     67400950 :       case X86_64_INTEGERSI_CLASS:
    2879     67400950 :         return gen_rtx_REG (mode, intreg[0]);
    2880      5837598 :       case X86_64_SSE_CLASS:
    2881      5837598 :       case X86_64_SSEHF_CLASS:
    2882      5837598 :       case X86_64_SSESF_CLASS:
    2883      5837598 :       case X86_64_SSEDF_CLASS:
    2884      5837598 :         if (mode != BLKmode)
    2885     11674388 :           return gen_reg_or_parallel (mode, orig_mode,
    2886     11674388 :                                       GET_SSE_REGNO (sse_regno));
    2887              :         break;
    2888       171693 :       case X86_64_X87_CLASS:
    2889       171693 :       case X86_64_COMPLEX_X87_CLASS:
    2890       171693 :         return gen_rtx_REG (mode, FIRST_STACK_REG);
    2891              :       case X86_64_NO_CLASS:
    2892              :         /* Zero sized array, struct or class.  */
    2893              :         return NULL;
    2894            0 :       default:
    2895            0 :         gcc_unreachable ();
    2896              :       }
    2897     36508892 :   if (n == 2
    2898     18712123 :       && regclass[0] == X86_64_SSE_CLASS
    2899     12881799 :       && regclass[1] == X86_64_SSEUP_CLASS
    2900     12876691 :       && mode != BLKmode)
    2901     25753382 :     return gen_reg_or_parallel (mode, orig_mode,
    2902     25753382 :                                 GET_SSE_REGNO (sse_regno));
    2903     23632201 :   if (n == 4
    2904      8430742 :       && regclass[0] == X86_64_SSE_CLASS
    2905      8430742 :       && regclass[1] == X86_64_SSEUP_CLASS
    2906      8430742 :       && regclass[2] == X86_64_SSEUP_CLASS
    2907      8430742 :       && regclass[3] == X86_64_SSEUP_CLASS
    2908      8430742 :       && mode != BLKmode)
    2909     16858106 :     return gen_reg_or_parallel (mode, orig_mode,
    2910     16858106 :                                 GET_SSE_REGNO (sse_regno));
    2911     15203148 :   if (n == 8
    2912      9128370 :       && regclass[0] == X86_64_SSE_CLASS
    2913      9128370 :       && regclass[1] == X86_64_SSEUP_CLASS
    2914      9128370 :       && regclass[2] == X86_64_SSEUP_CLASS
    2915      9128370 :       && regclass[3] == X86_64_SSEUP_CLASS
    2916      9128370 :       && regclass[4] == X86_64_SSEUP_CLASS
    2917      9128370 :       && regclass[5] == X86_64_SSEUP_CLASS
    2918      9128370 :       && regclass[6] == X86_64_SSEUP_CLASS
    2919      9128370 :       && regclass[7] == X86_64_SSEUP_CLASS
    2920      9128370 :       && mode != BLKmode)
    2921     18252468 :     return gen_reg_or_parallel (mode, orig_mode,
    2922     18252468 :                                 GET_SSE_REGNO (sse_regno));
    2923      6076914 :   if (n == 2
    2924      5835432 :       && regclass[0] == X86_64_X87_CLASS
    2925      2250395 :       && regclass[1] == X86_64_X87UP_CLASS)
    2926      2250395 :     return gen_rtx_REG (XFmode, FIRST_STACK_REG);
    2927              : 
    2928      3826519 :   if (n == 2
    2929      3585037 :       && regclass[0] == X86_64_INTEGER_CLASS
    2930      3189148 :       && regclass[1] == X86_64_INTEGER_CLASS
    2931      3180862 :       && (mode == CDImode || mode == TImode || mode == BLKmode)
    2932      3180862 :       && intreg[0] + 1 == intreg[1])
    2933              :     {
    2934      2864410 :       if (mode == BLKmode)
    2935              :         {
    2936              :           /* Use TImode for BLKmode values in 2 integer registers.  */
    2937       504542 :           exp[0] = gen_rtx_EXPR_LIST (VOIDmode,
    2938       252271 :                                       gen_rtx_REG (TImode, intreg[0]),
    2939              :                                       GEN_INT (0));
    2940       252271 :           ret = gen_rtx_PARALLEL (mode, rtvec_alloc (1));
    2941       252271 :           XVECEXP (ret, 0, 0) = exp[0];
    2942       252271 :           return ret;
    2943              :         }
    2944              :       else
    2945      2612139 :         return gen_rtx_REG (mode, intreg[0]);
    2946              :     }
    2947              : 
    2948              :   /* Otherwise figure out the entries of the PARALLEL.  */
    2949      2644845 :   for (i = 0; i < n; i++)
    2950              :     {
    2951      1682736 :       int pos;
    2952              : 
    2953      1682736 :       switch (regclass[i])
    2954              :         {
    2955              :           case X86_64_NO_CLASS:
    2956              :             break;
    2957       937351 :           case X86_64_INTEGER_CLASS:
    2958       937351 :           case X86_64_INTEGERSI_CLASS:
    2959              :             /* Merge TImodes on aligned occasions here too.  */
    2960       937351 :             if (i * 8 + 8 > bytes)
    2961              :               {
    2962         3226 :                 unsigned int tmpbits = (bytes - i * 8) * BITS_PER_UNIT;
    2963         3226 :                 if (!int_mode_for_size (tmpbits, 0).exists (&tmpmode))
    2964              :                   /* We've requested 24 bytes we
    2965              :                      don't have mode for.  Use DImode.  */
    2966          357 :                   tmpmode = DImode;
    2967              :               }
    2968       934125 :             else if (regclass[i] == X86_64_INTEGERSI_CLASS)
    2969              :               tmpmode = SImode;
    2970              :             else
    2971       787365 :               tmpmode = DImode;
    2972      1874702 :             exp [nexps++]
    2973       937351 :               = gen_rtx_EXPR_LIST (VOIDmode,
    2974       937351 :                                    gen_rtx_REG (tmpmode, *intreg),
    2975       937351 :                                    GEN_INT (i*8));
    2976       937351 :             intreg++;
    2977       937351 :             break;
    2978          592 :           case X86_64_SSEHF_CLASS:
    2979          592 :             tmpmode = (mode == BFmode ? BFmode : HFmode);
    2980         1184 :             exp [nexps++]
    2981         1184 :               = gen_rtx_EXPR_LIST (VOIDmode,
    2982              :                                    gen_rtx_REG (tmpmode,
    2983          592 :                                                 GET_SSE_REGNO (sse_regno)),
    2984          592 :                                    GEN_INT (i*8));
    2985          592 :             sse_regno++;
    2986          592 :             break;
    2987         2969 :           case X86_64_SSESF_CLASS:
    2988         5938 :             exp [nexps++]
    2989         5938 :               = gen_rtx_EXPR_LIST (VOIDmode,
    2990              :                                    gen_rtx_REG (SFmode,
    2991         2969 :                                                 GET_SSE_REGNO (sse_regno)),
    2992         2969 :                                    GEN_INT (i*8));
    2993         2969 :             sse_regno++;
    2994         2969 :             break;
    2995       482534 :           case X86_64_SSEDF_CLASS:
    2996       965068 :             exp [nexps++]
    2997       965068 :               = gen_rtx_EXPR_LIST (VOIDmode,
    2998              :                                    gen_rtx_REG (DFmode,
    2999       482534 :                                                 GET_SSE_REGNO (sse_regno)),
    3000       482534 :                                    GEN_INT (i*8));
    3001       482534 :             sse_regno++;
    3002       482534 :             break;
    3003       251328 :           case X86_64_SSE_CLASS:
    3004       251328 :             pos = i;
    3005       251328 :             switch (n)
    3006              :               {
    3007              :               case 1:
    3008              :                 tmpmode = DImode;
    3009              :                 break;
    3010        10060 :               case 2:
    3011        10060 :                 if (i == 0 && regclass[1] == X86_64_SSEUP_CLASS)
    3012              :                   {
    3013            0 :                     tmpmode = TImode;
    3014            0 :                     i++;
    3015              :                   }
    3016              :                 else
    3017              :                   tmpmode = DImode;
    3018              :                 break;
    3019         1689 :               case 4:
    3020         1689 :                 gcc_assert (i == 0
    3021              :                             && regclass[1] == X86_64_SSEUP_CLASS
    3022              :                             && regclass[2] == X86_64_SSEUP_CLASS
    3023              :                             && regclass[3] == X86_64_SSEUP_CLASS);
    3024              :                 tmpmode = OImode;
    3025              :                 i += 3;
    3026              :                 break;
    3027         2136 :               case 8:
    3028         2136 :                 gcc_assert (i == 0
    3029              :                             && regclass[1] == X86_64_SSEUP_CLASS
    3030              :                             && regclass[2] == X86_64_SSEUP_CLASS
    3031              :                             && regclass[3] == X86_64_SSEUP_CLASS
    3032              :                             && regclass[4] == X86_64_SSEUP_CLASS
    3033              :                             && regclass[5] == X86_64_SSEUP_CLASS
    3034              :                             && regclass[6] == X86_64_SSEUP_CLASS
    3035              :                             && regclass[7] == X86_64_SSEUP_CLASS);
    3036              :                 tmpmode = XImode;
    3037              :                 i += 7;
    3038              :                 break;
    3039            0 :               default:
    3040            0 :                 gcc_unreachable ();
    3041              :               }
    3042       502656 :             exp [nexps++]
    3043       502656 :               = gen_rtx_EXPR_LIST (VOIDmode,
    3044              :                                    gen_rtx_REG (tmpmode,
    3045       251328 :                                                 GET_SSE_REGNO (sse_regno)),
    3046       251328 :                                    GEN_INT (pos*8));
    3047       251328 :             sse_regno++;
    3048       251328 :             break;
    3049            0 :           default:
    3050            0 :             gcc_unreachable ();
    3051              :         }
    3052              :     }
    3053              : 
    3054              :   /* Empty aligned struct, union or class.  */
    3055       962109 :   if (nexps == 0)
    3056              :     return NULL;
    3057              : 
    3058       961854 :   ret =  gen_rtx_PARALLEL (mode, rtvec_alloc (nexps));
    3059      2636628 :   for (i = 0; i < nexps; i++)
    3060      1674774 :     XVECEXP (ret, 0, i) = exp [i];
    3061              :   return ret;
    3062              : }
    3063              : 
    3064              : /* Update the data in CUM to advance over an argument of mode MODE
    3065              :    and data type TYPE.  (TYPE is null for libcalls where that information
    3066              :    may not be available.)
    3067              : 
    3068              :    Return a number of integer regsiters advanced over.  */
    3069              : 
    3070              : static int
    3071      2129187 : function_arg_advance_32 (CUMULATIVE_ARGS *cum, machine_mode mode,
    3072              :                          const_tree type, HOST_WIDE_INT bytes,
    3073              :                          HOST_WIDE_INT words)
    3074              : {
    3075      2129187 :   int res = 0;
    3076      2129187 :   bool error_p = false;
    3077              : 
    3078      2129187 :   if (TARGET_IAMCU)
    3079              :     {
    3080              :       /* Intel MCU psABI passes scalars and aggregates no larger than 8
    3081              :          bytes in registers.  */
    3082            0 :       if (!VECTOR_MODE_P (mode) && bytes <= 8)
    3083            0 :         goto pass_in_reg;
    3084              :       return res;
    3085              :     }
    3086              : 
    3087      2129187 :   switch (mode)
    3088              :     {
    3089              :     default:
    3090              :       break;
    3091              : 
    3092        93891 :     case E_BLKmode:
    3093        93891 :       if (bytes < 0)
    3094              :         break;
    3095              :       /* FALLTHRU */
    3096              : 
    3097      2092387 :     case E_DImode:
    3098      2092387 :     case E_SImode:
    3099      2092387 :     case E_HImode:
    3100      2092387 :     case E_QImode:
    3101        93891 : pass_in_reg:
    3102      2092387 :       cum->words += words;
    3103      2092387 :       cum->nregs -= words;
    3104      2092387 :       cum->regno += words;
    3105      2092387 :       if (cum->nregs >= 0)
    3106        47118 :         res = words;
    3107      2092387 :       if (cum->nregs <= 0)
    3108              :         {
    3109      2058459 :           cum->nregs = 0;
    3110      2058459 :           cfun->machine->arg_reg_available = false;
    3111      2058459 :           cum->regno = 0;
    3112              :         }
    3113              :       break;
    3114              : 
    3115            0 :     case E_OImode:
    3116              :       /* OImode shouldn't be used directly.  */
    3117            0 :       gcc_unreachable ();
    3118              : 
    3119         4736 :     case E_DFmode:
    3120         4736 :       if (cum->float_in_sse == -1)
    3121            0 :         error_p = true;
    3122         4736 :       if (cum->float_in_sse < 2)
    3123              :         break;
    3124              :       /* FALLTHRU */
    3125         1352 :     case E_SFmode:
    3126         1352 :       if (cum->float_in_sse == -1)
    3127            0 :         error_p = true;
    3128         1352 :       if (cum->float_in_sse < 1)
    3129              :         break;
    3130              :       /* FALLTHRU */
    3131              : 
    3132           52 :     case E_V16HFmode:
    3133           52 :     case E_V16BFmode:
    3134           52 :     case E_V8SFmode:
    3135           52 :     case E_V8SImode:
    3136           52 :     case E_V64QImode:
    3137           52 :     case E_V32HImode:
    3138           52 :     case E_V16SImode:
    3139           52 :     case E_V8DImode:
    3140           52 :     case E_V32HFmode:
    3141           52 :     case E_V32BFmode:
    3142           52 :     case E_V16SFmode:
    3143           52 :     case E_V8DFmode:
    3144           52 :     case E_V32QImode:
    3145           52 :     case E_V16HImode:
    3146           52 :     case E_V4DFmode:
    3147           52 :     case E_V4DImode:
    3148           52 :     case E_TImode:
    3149           52 :     case E_V16QImode:
    3150           52 :     case E_V8HImode:
    3151           52 :     case E_V4SImode:
    3152           52 :     case E_V2DImode:
    3153           52 :     case E_V8HFmode:
    3154           52 :     case E_V8BFmode:
    3155           52 :     case E_V4SFmode:
    3156           52 :     case E_V2DFmode:
    3157           52 :       if (!type || !AGGREGATE_TYPE_P (type))
    3158              :         {
    3159           52 :           cum->sse_words += words;
    3160           52 :           cum->sse_nregs -= 1;
    3161           52 :           cum->sse_regno += 1;
    3162           52 :           if (cum->sse_nregs <= 0)
    3163              :             {
    3164            4 :               cum->sse_nregs = 0;
    3165            4 :               cum->sse_regno = 0;
    3166              :             }
    3167              :         }
    3168              :       break;
    3169              : 
    3170           16 :     case E_V8QImode:
    3171           16 :     case E_V4HImode:
    3172           16 :     case E_V4HFmode:
    3173           16 :     case E_V4BFmode:
    3174           16 :     case E_V2SImode:
    3175           16 :     case E_V2SFmode:
    3176           16 :     case E_V1TImode:
    3177           16 :     case E_V1DImode:
    3178           16 :       if (!type || !AGGREGATE_TYPE_P (type))
    3179              :         {
    3180           16 :           cum->mmx_words += words;
    3181           16 :           cum->mmx_nregs -= 1;
    3182           16 :           cum->mmx_regno += 1;
    3183           16 :           if (cum->mmx_nregs <= 0)
    3184              :             {
    3185            0 :               cum->mmx_nregs = 0;
    3186            0 :               cum->mmx_regno = 0;
    3187              :             }
    3188              :         }
    3189              :       break;
    3190              :     }
    3191      2064599 :   if (error_p)
    3192              :     {
    3193            0 :       cum->float_in_sse = 0;
    3194            0 :       error ("calling %qD with SSE calling convention without "
    3195              :              "SSE/SSE2 enabled", cum->decl);
    3196            0 :       sorry ("this is a GCC bug that can be worked around by adding "
    3197              :              "attribute used to function called");
    3198              :     }
    3199              : 
    3200              :   return res;
    3201              : }
    3202              : 
    3203              : static int
    3204     18989904 : function_arg_advance_64 (CUMULATIVE_ARGS *cum, machine_mode mode,
    3205              :                          const_tree type, HOST_WIDE_INT words, bool named)
    3206              : {
    3207     18989904 :   int int_nregs, sse_nregs;
    3208              : 
    3209              :   /* Unnamed 512 and 256bit vector mode parameters are passed on stack.  */
    3210     18989904 :   if (!named && (VALID_AVX512F_REG_MODE (mode)
    3211              :                  || VALID_AVX256_REG_MODE (mode)))
    3212              :     return 0;
    3213              : 
    3214     18989540 :   if (!examine_argument (mode, type, 0, &int_nregs, &sse_nregs)
    3215     18989540 :       && sse_nregs <= cum->sse_nregs && int_nregs <= cum->nregs)
    3216              :     {
    3217     16716605 :       cum->nregs -= int_nregs;
    3218     16716605 :       cum->sse_nregs -= sse_nregs;
    3219     16716605 :       cum->regno += int_nregs;
    3220     16716605 :       cum->sse_regno += sse_nregs;
    3221     16716605 :       return int_nregs;
    3222              :     }
    3223              :   else
    3224              :     {
    3225      2272935 :       int align = ix86_function_arg_boundary (mode, type) / BITS_PER_WORD;
    3226      2272935 :       cum->words = ROUND_UP (cum->words, align);
    3227      2272935 :       cum->words += words;
    3228      2272935 :       return 0;
    3229              :     }
    3230              : }
    3231              : 
    3232              : static int
    3233       446989 : function_arg_advance_ms_64 (CUMULATIVE_ARGS *cum, HOST_WIDE_INT bytes,
    3234              :                             HOST_WIDE_INT words)
    3235              : {
    3236              :   /* Otherwise, this should be passed indirect.  */
    3237       446989 :   gcc_assert (bytes == 1 || bytes == 2 || bytes == 4 || bytes == 8);
    3238              : 
    3239       446989 :   cum->words += words;
    3240       446989 :   if (cum->nregs > 0)
    3241              :     {
    3242       289355 :       cum->nregs -= 1;
    3243       289355 :       cum->regno += 1;
    3244       289355 :       return 1;
    3245              :     }
    3246              :   return 0;
    3247              : }
    3248              : 
    3249              : /* Update the data in CUM to advance over argument ARG.  */
    3250              : 
    3251              : static void
    3252     21566447 : ix86_function_arg_advance (cumulative_args_t cum_v,
    3253              :                            const function_arg_info &arg)
    3254              : {
    3255     21566447 :   CUMULATIVE_ARGS *cum = get_cumulative_args (cum_v);
    3256     21566447 :   machine_mode mode = arg.mode;
    3257     21566447 :   HOST_WIDE_INT bytes, words;
    3258     21566447 :   int nregs;
    3259              : 
    3260              :   /* The argument of interrupt handler is a special case and is
    3261              :      handled in ix86_function_arg.  */
    3262     21566447 :   if (!cum->caller && cfun->machine->func_type != TYPE_NORMAL)
    3263              :     return;
    3264              : 
    3265     21566080 :   bytes = arg.promoted_size_in_bytes ();
    3266     21566080 :   words = CEIL (bytes, UNITS_PER_WORD);
    3267              : 
    3268     21566080 :   if (arg.type)
    3269     21253939 :     mode = type_natural_mode (arg.type, NULL, false);
    3270              : 
    3271     21566080 :   if (TARGET_64BIT)
    3272              :     {
    3273     19436893 :       enum calling_abi call_abi = cum ? cum->call_abi : ix86_abi;
    3274              : 
    3275     19436893 :       if (call_abi == MS_ABI)
    3276       446989 :         nregs = function_arg_advance_ms_64 (cum, bytes, words);
    3277              :       else
    3278     18989904 :         nregs = function_arg_advance_64 (cum, mode, arg.type, words,
    3279     18989904 :                                          arg.named);
    3280              :     }
    3281              :   else
    3282      2129187 :     nregs = function_arg_advance_32 (cum, mode, arg.type, bytes, words);
    3283              : 
    3284     21566080 :   if (!nregs)
    3285              :     {
    3286              :       /* Track if there are outgoing arguments on stack.  */
    3287      5701794 :       if (cum->caller)
    3288      2714476 :         cfun->machine->outgoing_args_on_stack = true;
    3289              :     }
    3290              : }
    3291              : 
    3292              : /* Define where to put the arguments to a function.
    3293              :    Value is zero to push the argument on the stack,
    3294              :    or a hard register in which to store the argument.
    3295              : 
    3296              :    MODE is the argument's machine mode.
    3297              :    TYPE is the data type of the argument (as a tree).
    3298              :     This is null for libcalls where that information may
    3299              :     not be available.
    3300              :    CUM is a variable of type CUMULATIVE_ARGS which gives info about
    3301              :     the preceding args and about the function being called.
    3302              :    NAMED is nonzero if this argument is a named parameter
    3303              :     (otherwise it is an extra parameter matching an ellipsis).  */
    3304              : 
    3305              : static rtx
    3306      2556093 : function_arg_32 (CUMULATIVE_ARGS *cum, machine_mode mode,
    3307              :                  machine_mode orig_mode, const_tree type,
    3308              :                  HOST_WIDE_INT bytes, HOST_WIDE_INT words)
    3309              : {
    3310      2556093 :   bool error_p = false;
    3311              : 
    3312              :   /* Avoid the AL settings for the Unix64 ABI.  */
    3313      2556093 :   if (mode == VOIDmode)
    3314       741825 :     return constm1_rtx;
    3315              : 
    3316      1814268 :   if (TARGET_IAMCU)
    3317              :     {
    3318              :       /* Intel MCU psABI passes scalars and aggregates no larger than 8
    3319              :          bytes in registers.  */
    3320            0 :       if (!VECTOR_MODE_P (mode) && bytes <= 8)
    3321            0 :         goto pass_in_reg;
    3322              :       return NULL_RTX;
    3323              :     }
    3324              : 
    3325      1814268 :   switch (mode)
    3326              :     {
    3327              :     default:
    3328              :       break;
    3329              : 
    3330        77859 :     case E_BLKmode:
    3331        77859 :       if (bytes < 0)
    3332              :         break;
    3333              :       /* FALLTHRU */
    3334      1780753 :     case E_DImode:
    3335      1780753 :     case E_SImode:
    3336      1780753 :     case E_HImode:
    3337      1780753 :     case E_QImode:
    3338        77859 : pass_in_reg:
    3339      1780753 :       if (words <= cum->nregs)
    3340              :         {
    3341        45286 :           int regno = cum->regno;
    3342              : 
    3343              :           /* Fastcall allocates the first two DWORD (SImode) or
    3344              :             smaller arguments to ECX and EDX if it isn't an
    3345              :             aggregate type .  */
    3346        45286 :           if (cum->fastcall)
    3347              :             {
    3348            6 :               if (mode == BLKmode
    3349            6 :                   || mode == DImode
    3350            6 :                   || (type && AGGREGATE_TYPE_P (type)))
    3351              :                 break;
    3352              : 
    3353              :               /* ECX not EAX is the first allocated register.  */
    3354            6 :               if (regno == AX_REG)
    3355        45286 :                 regno = CX_REG;
    3356              :             }
    3357        45286 :           return gen_rtx_REG (mode, regno);
    3358              :         }
    3359              :       break;
    3360              : 
    3361         3346 :     case E_DFmode:
    3362         3346 :       if (cum->float_in_sse == -1)
    3363            0 :         error_p = true;
    3364         3346 :       if (cum->float_in_sse < 2)
    3365              :         break;
    3366              :       /* FALLTHRU */
    3367          952 :     case E_SFmode:
    3368          952 :       if (cum->float_in_sse == -1)
    3369            0 :         error_p = true;
    3370          952 :       if (cum->float_in_sse < 1)
    3371              :         break;
    3372              :       /* FALLTHRU */
    3373           12 :     case E_TImode:
    3374              :       /* In 32bit, we pass TImode in xmm registers.  */
    3375           12 :     case E_V16QImode:
    3376           12 :     case E_V8HImode:
    3377           12 :     case E_V4SImode:
    3378           12 :     case E_V2DImode:
    3379           12 :     case E_V8HFmode:
    3380           12 :     case E_V8BFmode:
    3381           12 :     case E_V4SFmode:
    3382           12 :     case E_V2DFmode:
    3383           12 :       if (!type || !AGGREGATE_TYPE_P (type))
    3384              :         {
    3385           12 :           if (cum->sse_nregs)
    3386           12 :             return gen_reg_or_parallel (mode, orig_mode,
    3387           12 :                                         cum->sse_regno + FIRST_SSE_REG);
    3388              :         }
    3389              :       break;
    3390              : 
    3391            0 :     case E_OImode:
    3392            0 :     case E_XImode:
    3393              :       /* OImode and XImode shouldn't be used directly.  */
    3394            0 :       gcc_unreachable ();
    3395              : 
    3396            9 :     case E_V64QImode:
    3397            9 :     case E_V32HImode:
    3398            9 :     case E_V16SImode:
    3399            9 :     case E_V8DImode:
    3400            9 :     case E_V32HFmode:
    3401            9 :     case E_V32BFmode:
    3402            9 :     case E_V16SFmode:
    3403            9 :     case E_V8DFmode:
    3404            9 :     case E_V16HFmode:
    3405            9 :     case E_V16BFmode:
    3406            9 :     case E_V8SFmode:
    3407            9 :     case E_V8SImode:
    3408            9 :     case E_V32QImode:
    3409            9 :     case E_V16HImode:
    3410            9 :     case E_V4DFmode:
    3411            9 :     case E_V4DImode:
    3412            9 :       if (!type || !AGGREGATE_TYPE_P (type))
    3413              :         {
    3414            9 :           if (cum->sse_nregs)
    3415            9 :             return gen_reg_or_parallel (mode, orig_mode,
    3416            9 :                                         cum->sse_regno + FIRST_SSE_REG);
    3417              :         }
    3418              :       break;
    3419              : 
    3420            8 :     case E_V8QImode:
    3421            8 :     case E_V4HImode:
    3422            8 :     case E_V4HFmode:
    3423            8 :     case E_V4BFmode:
    3424            8 :     case E_V2SImode:
    3425            8 :     case E_V2SFmode:
    3426            8 :     case E_V1TImode:
    3427            8 :     case E_V1DImode:
    3428            8 :       if (!type || !AGGREGATE_TYPE_P (type))
    3429              :         {
    3430            8 :           if (cum->mmx_nregs)
    3431            8 :             return gen_reg_or_parallel (mode, orig_mode,
    3432            8 :                                         cum->mmx_regno + FIRST_MMX_REG);
    3433              :         }
    3434              :       break;
    3435              :     }
    3436         4298 :   if (error_p)
    3437              :     {
    3438            0 :       cum->float_in_sse = 0;
    3439            0 :       error ("calling %qD with SSE calling convention without "
    3440              :              "SSE/SSE2 enabled", cum->decl);
    3441            0 :       sorry ("this is a GCC bug that can be worked around by adding "
    3442              :              "attribute used to function called");
    3443              :     }
    3444              : 
    3445              :   return NULL_RTX;
    3446              : }
    3447              : 
    3448              : static rtx
    3449     18683482 : function_arg_64 (const CUMULATIVE_ARGS *cum, machine_mode mode,
    3450              :                  machine_mode orig_mode, const_tree type, bool named)
    3451              : {
    3452              :   /* Handle a hidden AL argument containing number of registers
    3453              :      for varargs x86-64 functions.  */
    3454     18683482 :   if (mode == VOIDmode)
    3455      5194820 :     return GEN_INT (cum->maybe_vaarg
    3456              :                     ? (cum->sse_nregs < 0
    3457              :                        ? X86_64_SSE_REGPARM_MAX
    3458              :                        : cum->sse_regno)
    3459              :                     : -1);
    3460              : 
    3461     13488662 :   switch (mode)
    3462              :     {
    3463              :     default:
    3464              :       break;
    3465              : 
    3466        90203 :     case E_V16HFmode:
    3467        90203 :     case E_V16BFmode:
    3468        90203 :     case E_V8SFmode:
    3469        90203 :     case E_V8SImode:
    3470        90203 :     case E_V32QImode:
    3471        90203 :     case E_V16HImode:
    3472        90203 :     case E_V4DFmode:
    3473        90203 :     case E_V4DImode:
    3474        90203 :     case E_V32HFmode:
    3475        90203 :     case E_V32BFmode:
    3476        90203 :     case E_V16SFmode:
    3477        90203 :     case E_V16SImode:
    3478        90203 :     case E_V64QImode:
    3479        90203 :     case E_V32HImode:
    3480        90203 :     case E_V8DFmode:
    3481        90203 :     case E_V8DImode:
    3482              :       /* Unnamed 256 and 512bit vector mode parameters are passed on stack.  */
    3483        90203 :       if (!named)
    3484              :         return NULL;
    3485              :       break;
    3486              :     }
    3487              : 
    3488     13488298 :   const int *parm_regs;
    3489     13488298 :   if (cum->preserve_none_abi)
    3490              :     parm_regs = x86_64_preserve_none_int_parameter_registers;
    3491              :   else
    3492     13488183 :     parm_regs = x86_64_int_parameter_registers;
    3493              : 
    3494     13488298 :   return construct_container (mode, orig_mode, type, 0, cum->nregs,
    3495     13488298 :                               cum->sse_nregs,
    3496     13488298 :                               &parm_regs[cum->regno],
    3497     13488298 :                               cum->sse_regno);
    3498              : }
    3499              : 
    3500              : static rtx
    3501       296338 : function_arg_ms_64 (const CUMULATIVE_ARGS *cum, machine_mode mode,
    3502              :                     machine_mode orig_mode, bool named, const_tree type,
    3503              :                     HOST_WIDE_INT bytes)
    3504              : {
    3505       296338 :   unsigned int regno;
    3506              : 
    3507              :   /* We need to add clobber for MS_ABI->SYSV ABI calls in expand_call.
    3508              :      We use value of -2 to specify that current function call is MSABI.  */
    3509       296338 :   if (mode == VOIDmode)
    3510        36293 :     return GEN_INT (-2);
    3511              : 
    3512              :   /* If we've run out of registers, it goes on the stack.  */
    3513       260045 :   if (cum->nregs == 0)
    3514              :     return NULL_RTX;
    3515              : 
    3516       176290 :   regno = x86_64_ms_abi_int_parameter_registers[cum->regno];
    3517              : 
    3518              :   /* Only floating point modes less than 64 bits are passed in anything but
    3519              :      integer regs.  Larger floating point types are excluded as the Windows
    3520              :      ABI requires vreg args can be shadowed in GPRs (for red zone / varargs). */
    3521       176290 :   if (TARGET_SSE && (mode == HFmode || mode == SFmode || mode == DFmode))
    3522              :     {
    3523        38254 :       if (named)
    3524              :         {
    3525        38254 :           if (type == NULL_TREE || !AGGREGATE_TYPE_P (type))
    3526        37260 :             regno = cum->regno + FIRST_SSE_REG;
    3527              :         }
    3528              :       else
    3529              :         {
    3530            0 :           rtx t1, t2;
    3531              : 
    3532              :           /* Unnamed floating parameters are passed in both the
    3533              :              SSE and integer registers.  */
    3534            0 :           t1 = gen_rtx_REG (mode, cum->regno + FIRST_SSE_REG);
    3535            0 :           t2 = gen_rtx_REG (mode, regno);
    3536            0 :           t1 = gen_rtx_EXPR_LIST (VOIDmode, t1, const0_rtx);
    3537            0 :           t2 = gen_rtx_EXPR_LIST (VOIDmode, t2, const0_rtx);
    3538            0 :           return gen_rtx_PARALLEL (mode, gen_rtvec (2, t1, t2));
    3539              :         }
    3540              :     }
    3541              :   /* Handle aggregated types passed in register.  */
    3542       176290 :   if (orig_mode == BLKmode)
    3543              :     {
    3544            0 :       if (bytes > 0 && bytes <= 8)
    3545            0 :         mode = (bytes > 4 ? DImode : SImode);
    3546            0 :       if (mode == BLKmode)
    3547            0 :         mode = DImode;
    3548              :     }
    3549              : 
    3550       176290 :   return gen_reg_or_parallel (mode, orig_mode, regno);
    3551              : }
    3552              : 
    3553              : /* Return where to put the arguments to a function.
    3554              :    Return zero to push the argument on the stack, or a hard register in which to store the argument.
    3555              : 
    3556              :    ARG describes the argument while CUM gives information about the
    3557              :    preceding args and about the function being called.  */
    3558              : 
    3559              : static rtx
    3560     21536100 : ix86_function_arg (cumulative_args_t cum_v, const function_arg_info &arg)
    3561              : {
    3562     21536100 :   CUMULATIVE_ARGS *cum = get_cumulative_args (cum_v);
    3563     21536100 :   machine_mode mode = arg.mode;
    3564     21536100 :   HOST_WIDE_INT bytes, words;
    3565     21536100 :   rtx reg;
    3566              : 
    3567     21536100 :   if (!cum->caller && cfun->machine->func_type != TYPE_NORMAL)
    3568              :     {
    3569          187 :       gcc_assert (arg.type != NULL_TREE);
    3570          187 :       if (POINTER_TYPE_P (arg.type))
    3571              :         {
    3572              :           /* This is the pointer argument.  */
    3573          122 :           gcc_assert (TYPE_MODE (arg.type) == ptr_mode);
    3574              :           /* It is at -WORD(AP) in the current frame in interrupt and
    3575              :              exception handlers.  */
    3576          122 :           reg = plus_constant (Pmode, arg_pointer_rtx, -UNITS_PER_WORD);
    3577              :         }
    3578              :       else
    3579              :         {
    3580           65 :           gcc_assert (cfun->machine->func_type == TYPE_EXCEPTION
    3581              :                       && TREE_CODE (arg.type) == INTEGER_TYPE
    3582              :                       && TYPE_MODE (arg.type) == word_mode);
    3583              :           /* The error code is the word-mode integer argument at
    3584              :              -2 * WORD(AP) in the current frame of the exception
    3585              :              handler.  */
    3586           65 :           reg = gen_rtx_MEM (word_mode,
    3587           65 :                              plus_constant (Pmode,
    3588              :                                             arg_pointer_rtx,
    3589           65 :                                             -2 * UNITS_PER_WORD));
    3590              :         }
    3591          187 :       return reg;
    3592              :     }
    3593              : 
    3594     21535913 :   bytes = arg.promoted_size_in_bytes ();
    3595     21535913 :   words = CEIL (bytes, UNITS_PER_WORD);
    3596              : 
    3597              :   /* To simplify the code below, represent vector types with a vector mode
    3598              :      even if MMX/SSE are not active.  */
    3599     21535913 :   if (arg.type && VECTOR_TYPE_P (arg.type))
    3600       171358 :     mode = type_natural_mode (arg.type, cum, false);
    3601              : 
    3602     21535913 :   if (TARGET_64BIT)
    3603              :     {
    3604     18979820 :       enum calling_abi call_abi = cum ? cum->call_abi : ix86_abi;
    3605              : 
    3606     18979820 :       if (call_abi == MS_ABI)
    3607       296338 :         reg = function_arg_ms_64 (cum, mode, arg.mode, arg.named,
    3608       296338 :                                   arg.type, bytes);
    3609              :       else
    3610     18683482 :         reg = function_arg_64 (cum, mode, arg.mode, arg.type, arg.named);
    3611              :     }
    3612              :   else
    3613      2556093 :     reg = function_arg_32 (cum, mode, arg.mode, arg.type, bytes, words);
    3614              : 
    3615              :   /* Track if there are outgoing arguments on stack.  */
    3616     21535913 :   if (reg == NULL_RTX && cum->caller)
    3617      2197096 :     cfun->machine->outgoing_args_on_stack = true;
    3618              : 
    3619              :   return reg;
    3620              : }
    3621              : 
    3622              : /* A C expression that indicates when an argument must be passed by
    3623              :    reference.  If nonzero for an argument, a copy of that argument is
    3624              :    made in memory and a pointer to the argument is passed instead of
    3625              :    the argument itself.  The pointer is passed in whatever way is
    3626              :    appropriate for passing a pointer to that type.  */
    3627              : 
    3628              : static bool
    3629     21487316 : ix86_pass_by_reference (cumulative_args_t cum_v, const function_arg_info &arg)
    3630              : {
    3631     21487316 :   CUMULATIVE_ARGS *cum = get_cumulative_args (cum_v);
    3632              : 
    3633     21487316 :   if (TARGET_64BIT)
    3634              :     {
    3635     19368389 :       enum calling_abi call_abi = cum ? cum->call_abi : ix86_abi;
    3636              : 
    3637              :       /* See Windows x64 Software Convention.  */
    3638     19368389 :       if (call_abi == MS_ABI)
    3639              :         {
    3640       441390 :           HOST_WIDE_INT msize = GET_MODE_SIZE (arg.mode);
    3641              : 
    3642       441390 :           if (tree type = arg.type)
    3643              :             {
    3644              :               /* Arrays are passed by reference.  */
    3645       441390 :               if (TREE_CODE (type) == ARRAY_TYPE)
    3646              :                 return true;
    3647              : 
    3648       441390 :               if (RECORD_OR_UNION_TYPE_P (type))
    3649              :                 {
    3650              :                   /* Structs/unions of sizes other than 8, 16, 32, or 64 bits
    3651              :                      are passed by reference.  */
    3652        15022 :                   msize = int_size_in_bytes (type);
    3653              :                 }
    3654              :             }
    3655              : 
    3656              :           /* __m128 is passed by reference.  */
    3657       872851 :           return msize != 1 && msize != 2 && msize != 4 && msize != 8;
    3658              :         }
    3659     18926999 :       else if (arg.type && int_size_in_bytes (arg.type) == -1)
    3660              :         return true;
    3661              :     }
    3662              : 
    3663              :   return false;
    3664              : }
    3665              : 
    3666              : /* Return true when TYPE should be 128bit aligned for 32bit argument
    3667              :    passing ABI.  XXX: This function is obsolete and is only used for
    3668              :    checking psABI compatibility with previous versions of GCC.  */
    3669              : 
    3670              : static bool
    3671      1974463 : ix86_compat_aligned_value_p (const_tree type)
    3672              : {
    3673      1974463 :   machine_mode mode = TYPE_MODE (type);
    3674      1974463 :   if (((TARGET_SSE && SSE_REG_MODE_P (mode))
    3675      1974421 :        || mode == TDmode
    3676      1974421 :        || mode == TFmode
    3677              :        || mode == TCmode)
    3678      1974675 :       && (!TYPE_USER_ALIGN (type) || TYPE_ALIGN (type) > 128))
    3679              :     return true;
    3680      1974251 :   if (TYPE_ALIGN (type) < 128)
    3681              :     return false;
    3682              : 
    3683            0 :   if (AGGREGATE_TYPE_P (type))
    3684              :     {
    3685              :       /* Walk the aggregates recursively.  */
    3686            0 :       switch (TREE_CODE (type))
    3687              :         {
    3688            0 :         case RECORD_TYPE:
    3689            0 :         case UNION_TYPE:
    3690            0 :         case QUAL_UNION_TYPE:
    3691            0 :           {
    3692            0 :             tree field;
    3693              : 
    3694              :             /* Walk all the structure fields.  */
    3695            0 :             for (field = TYPE_FIELDS (type); field; field = DECL_CHAIN (field))
    3696              :               {
    3697            0 :                 if (TREE_CODE (field) == FIELD_DECL
    3698            0 :                     && ix86_compat_aligned_value_p (TREE_TYPE (field)))
    3699              :                   return true;
    3700              :               }
    3701              :             break;
    3702              :           }
    3703              : 
    3704            0 :         case ARRAY_TYPE:
    3705              :           /* Just for use if some languages passes arrays by value.  */
    3706            0 :           if (ix86_compat_aligned_value_p (TREE_TYPE (type)))
    3707              :             return true;
    3708              :           break;
    3709              : 
    3710              :         default:
    3711              :           gcc_unreachable ();
    3712              :         }
    3713              :     }
    3714              :   return false;
    3715              : }
    3716              : 
    3717              : /* Return the alignment boundary for MODE and TYPE with alignment ALIGN.
    3718              :    XXX: This function is obsolete and is only used for checking psABI
    3719              :    compatibility with previous versions of GCC.  */
    3720              : 
    3721              : static unsigned int
    3722      5541689 : ix86_compat_function_arg_boundary (machine_mode mode,
    3723              :                                    const_tree type, unsigned int align)
    3724              : {
    3725              :   /* In 32bit, only _Decimal128 and __float128 are aligned to their
    3726              :      natural boundaries.  */
    3727      5541689 :   if (!TARGET_64BIT && mode != TDmode && mode != TFmode)
    3728              :     {
    3729              :       /* i386 ABI defines all arguments to be 4 byte aligned.  We have to
    3730              :          make an exception for SSE modes since these require 128bit
    3731              :          alignment.
    3732              : 
    3733              :          The handling here differs from field_alignment.  ICC aligns MMX
    3734              :          arguments to 4 byte boundaries, while structure fields are aligned
    3735              :          to 8 byte boundaries.  */
    3736      1986393 :       if (!type)
    3737              :         {
    3738        11930 :           if (!(TARGET_SSE && SSE_REG_MODE_P (mode)))
    3739      1986181 :             align = PARM_BOUNDARY;
    3740              :         }
    3741              :       else
    3742              :         {
    3743      1974463 :           if (!ix86_compat_aligned_value_p (type))
    3744      1986181 :             align = PARM_BOUNDARY;
    3745              :         }
    3746              :     }
    3747     10683489 :   if (align > BIGGEST_ALIGNMENT)
    3748           90 :     align = BIGGEST_ALIGNMENT;
    3749      5541689 :   return align;
    3750              : }
    3751              : 
    3752              : /* Return true when TYPE should be 128bit aligned for 32bit argument
    3753              :    passing ABI.  */
    3754              : 
    3755              : static bool
    3756      1977134 : ix86_contains_aligned_value_p (const_tree type)
    3757              : {
    3758      1977134 :   machine_mode mode = TYPE_MODE (type);
    3759              : 
    3760      1977134 :   if (mode == XFmode || mode == XCmode)
    3761              :     return false;
    3762              : 
    3763      1975000 :   if (TYPE_ALIGN (type) < 128)
    3764              :     return false;
    3765              : 
    3766         2883 :   if (AGGREGATE_TYPE_P (type))
    3767              :     {
    3768              :       /* Walk the aggregates recursively.  */
    3769            0 :       switch (TREE_CODE (type))
    3770              :         {
    3771            0 :         case RECORD_TYPE:
    3772            0 :         case UNION_TYPE:
    3773            0 :         case QUAL_UNION_TYPE:
    3774            0 :           {
    3775            0 :             tree field;
    3776              : 
    3777              :             /* Walk all the structure fields.  */
    3778            0 :             for (field = TYPE_FIELDS (type);
    3779            0 :                  field;
    3780            0 :                  field = DECL_CHAIN (field))
    3781              :               {
    3782            0 :                 if (TREE_CODE (field) == FIELD_DECL
    3783            0 :                     && ix86_contains_aligned_value_p (TREE_TYPE (field)))
    3784              :                   return true;
    3785              :               }
    3786              :             break;
    3787              :           }
    3788              : 
    3789            0 :         case ARRAY_TYPE:
    3790              :           /* Just for use if some languages passes arrays by value.  */
    3791            0 :           if (ix86_contains_aligned_value_p (TREE_TYPE (type)))
    3792              :             return true;
    3793              :           break;
    3794              : 
    3795              :         default:
    3796              :           gcc_unreachable ();
    3797              :         }
    3798              :     }
    3799              :   else
    3800         2883 :     return TYPE_ALIGN (type) >= 128;
    3801              : 
    3802              :   return false;
    3803              : }
    3804              : 
    3805              : /* Gives the alignment boundary, in bits, of an argument with the
    3806              :    specified mode and type.  */
    3807              : 
    3808              : static unsigned int
    3809     10937583 : ix86_function_arg_boundary (machine_mode mode, const_tree type)
    3810              : {
    3811     10937583 :   unsigned int align;
    3812     10937583 :   if (type)
    3813              :     {
    3814              :       /* Since the main variant type is used for call, we convert it to
    3815              :          the main variant type.  */
    3816     10897806 :       type = TYPE_MAIN_VARIANT (type);
    3817     10897806 :       align = TYPE_ALIGN (type);
    3818     10897806 :       if (TYPE_EMPTY_P (type))
    3819        24517 :         return PARM_BOUNDARY;
    3820              :     }
    3821              :   else
    3822        39777 :     align = GET_MODE_ALIGNMENT (mode);
    3823     12937365 :   if (align < PARM_BOUNDARY)
    3824      4109211 :     align = PARM_BOUNDARY;
    3825              :   else
    3826              :     {
    3827      6803855 :       static bool warned;
    3828      6803855 :       unsigned int saved_align = align;
    3829              : 
    3830      6803855 :       if (!TARGET_64BIT)
    3831              :         {
    3832              :           /* i386 ABI defines XFmode arguments to be 4 byte aligned.  */
    3833      2012936 :           if (!type)
    3834              :             {
    3835        35802 :               if (mode == XFmode || mode == XCmode)
    3836              :                 align = PARM_BOUNDARY;
    3837              :             }
    3838      1977134 :           else if (!ix86_contains_aligned_value_p (type))
    3839              :             align = PARM_BOUNDARY;
    3840              : 
    3841        38685 :           if (align < 128)
    3842      1986181 :             align = PARM_BOUNDARY;
    3843              :         }
    3844              : 
    3845      6803855 :       if (warn_psabi
    3846      5546525 :           && !warned
    3847     12345544 :           && align != ix86_compat_function_arg_boundary (mode, type,
    3848              :                                                          saved_align))
    3849              :         {
    3850           90 :           warned = true;
    3851           90 :           inform (input_location,
    3852              :                   "the ABI for passing parameters with %d-byte"
    3853              :                   " alignment has changed in GCC 4.6",
    3854              :                   align / BITS_PER_UNIT);
    3855              :         }
    3856              :     }
    3857              : 
    3858              :   return align;
    3859              : }
    3860              : 
    3861              : /* Return true if N is a possible register number of function value.  */
    3862              : 
    3863              : static bool
    3864      4662656 : ix86_function_value_regno_p (const unsigned int regno)
    3865              : {
    3866      4662656 :   switch (regno)
    3867              :     {
    3868              :     case AX_REG:
    3869              :       return true;
    3870       105159 :     case DX_REG:
    3871       105159 :       return (!TARGET_64BIT || ix86_cfun_abi () != MS_ABI);
    3872        98769 :     case DI_REG:
    3873        98769 :     case SI_REG:
    3874        98769 :       return TARGET_64BIT && ix86_cfun_abi () != MS_ABI;
    3875              : 
    3876              :       /* Complex values are returned in %st(0)/%st(1) pair.  */
    3877        24145 :     case ST0_REG:
    3878        24145 :     case ST1_REG:
    3879              :       /* TODO: The function should depend on current function ABI but
    3880              :        builtins.cc would need updating then. Therefore we use the
    3881              :        default ABI.  */
    3882        24145 :       if (TARGET_64BIT && ix86_cfun_abi () == MS_ABI)
    3883              :         return false;
    3884        24145 :       return TARGET_FLOAT_RETURNS_IN_80387;
    3885              : 
    3886              :       /* Complex values are returned in %xmm0/%xmm1 pair.  */
    3887      1291922 :     case XMM0_REG:
    3888      1291922 :     case XMM1_REG:
    3889      1291922 :       return TARGET_SSE;
    3890              : 
    3891         9489 :     case MM0_REG:
    3892         9489 :       if (TARGET_MACHO || TARGET_64BIT)
    3893              :         return false;
    3894         2497 :       return TARGET_MMX;
    3895              :     }
    3896              : 
    3897              :   return false;
    3898              : }
    3899              : 
    3900              : /* Check whether the register REGNO should be zeroed on X86.
    3901              :    When ALL_SSE_ZEROED is true, all SSE registers have been zeroed
    3902              :    together, no need to zero it again.
    3903              :    When NEED_ZERO_MMX is true, MMX registers should be cleared.  */
    3904              : 
    3905              : static bool
    3906         1377 : zero_call_used_regno_p (const unsigned int regno,
    3907              :                         bool all_sse_zeroed,
    3908              :                         bool need_zero_mmx)
    3909              : {
    3910          835 :   return GENERAL_REGNO_P (regno)
    3911          819 :          || (!all_sse_zeroed && SSE_REGNO_P (regno))
    3912          439 :          || MASK_REGNO_P (regno)
    3913         1800 :          || (need_zero_mmx && MMX_REGNO_P (regno));
    3914              : }
    3915              : 
    3916              : /* Return the machine_mode that is used to zero register REGNO.  */
    3917              : 
    3918              : static machine_mode
    3919          954 : zero_call_used_regno_mode (const unsigned int regno)
    3920              : {
    3921              :   /* NB: We only need to zero the lower 32 bits for integer registers
    3922              :      and the lower 128 bits for vector registers since destination are
    3923              :      zero-extended to the full register width.  */
    3924          954 :   if (GENERAL_REGNO_P (regno))
    3925              :     return SImode;
    3926              :   else if (SSE_REGNO_P (regno))
    3927          380 :     return V4SFmode;
    3928              :   else if (MASK_REGNO_P (regno))
    3929              :     return HImode;
    3930              :   else if (MMX_REGNO_P (regno))
    3931            0 :     return V2SImode;
    3932              :   else
    3933            0 :     gcc_unreachable ();
    3934              : }
    3935              : 
    3936              : /* Generate a rtx to zero all vector registers together if possible,
    3937              :    otherwise, return NULL.  */
    3938              : 
    3939              : static rtx
    3940          131 : zero_all_vector_registers (HARD_REG_SET need_zeroed_hardregs)
    3941              : {
    3942          131 :   if (!TARGET_AVX)
    3943              :     return NULL;
    3944              : 
    3945          372 :   for (unsigned int regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
    3946          368 :     if ((LEGACY_SSE_REGNO_P (regno)
    3947          336 :          || (TARGET_64BIT
    3948          336 :              && (REX_SSE_REGNO_P (regno)
    3949          304 :                  || (TARGET_AVX512F && EXT_REX_SSE_REGNO_P (regno)))))
    3950          432 :         && !TEST_HARD_REG_BIT (need_zeroed_hardregs, regno))
    3951              :       return NULL;
    3952              : 
    3953            4 :   return gen_avx_vzeroall ();
    3954              : }
    3955              : 
    3956              : /* Generate insns to zero all st registers together.
    3957              :    Return true when zeroing instructions are generated.
    3958              :    Assume the number of st registers that are zeroed is num_of_st,
    3959              :    we will emit the following sequence to zero them together:
    3960              :                   fldz;         \
    3961              :                   fldz;         \
    3962              :                   ...
    3963              :                   fldz;         \
    3964              :                   fstp %%st(0); \
    3965              :                   fstp %%st(0); \
    3966              :                   ...
    3967              :                   fstp %%st(0);
    3968              :    i.e., num_of_st fldz followed by num_of_st fstp to clear the stack
    3969              :    mark stack slots empty.
    3970              : 
    3971              :    How to compute the num_of_st:
    3972              :    There is no direct mapping from stack registers to hard register
    3973              :    numbers.  If one stack register needs to be cleared, we don't know
    3974              :    where in the stack the value remains.  So, if any stack register
    3975              :    needs to be cleared, the whole stack should be cleared.  However,
    3976              :    x87 stack registers that hold the return value should be excluded.
    3977              :    x87 returns in the top (two for complex values) register, so
    3978              :    num_of_st should be 7/6 when x87 returns, otherwise it will be 8.
    3979              :    return the value of num_of_st.  */
    3980              : 
    3981              : 
    3982              : static int
    3983          131 : zero_all_st_registers (HARD_REG_SET need_zeroed_hardregs)
    3984              : {
    3985              : 
    3986              :   /* If the FPU is disabled, no need to zero all st registers.  */
    3987          131 :   if (! (TARGET_80387 || TARGET_FLOAT_RETURNS_IN_80387))
    3988              :     return 0;
    3989              : 
    3990        10329 :   unsigned int num_of_st = 0;
    3991        10329 :   for (unsigned int regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
    3992        10220 :     if ((STACK_REGNO_P (regno) || MMX_REGNO_P (regno))
    3993        10220 :         && TEST_HARD_REG_BIT (need_zeroed_hardregs, regno))
    3994              :       {
    3995              :         num_of_st++;
    3996              :         break;
    3997              :       }
    3998              : 
    3999          130 :   if (num_of_st == 0)
    4000              :     return 0;
    4001              : 
    4002           21 :   bool return_with_x87 = false;
    4003           42 :   return_with_x87 = (crtl->return_rtx
    4004           21 :                      && (STACK_REG_P (crtl->return_rtx)));
    4005              : 
    4006           21 :   bool complex_return = false;
    4007           42 :   complex_return = (crtl->return_rtx
    4008           21 :                     && COMPLEX_MODE_P (GET_MODE (crtl->return_rtx)));
    4009              : 
    4010           21 :   if (return_with_x87)
    4011            2 :     if (complex_return)
    4012              :       num_of_st = 6;
    4013              :     else
    4014            1 :       num_of_st = 7;
    4015              :   else
    4016              :     num_of_st = 8;
    4017              : 
    4018           21 :   rtx st_reg = gen_rtx_REG (XFmode, FIRST_STACK_REG);
    4019          186 :   for (unsigned int i = 0; i < num_of_st; i++)
    4020          165 :     emit_insn (gen_rtx_SET (st_reg, CONST0_RTX (XFmode)));
    4021              : 
    4022          186 :   for (unsigned int i = 0; i < num_of_st; i++)
    4023              :     {
    4024          165 :       rtx insn;
    4025          165 :       insn = emit_insn (gen_rtx_SET (st_reg, st_reg));
    4026          165 :       add_reg_note (insn, REG_DEAD, st_reg);
    4027              :     }
    4028           21 :   return num_of_st;
    4029              : }
    4030              : 
    4031              : 
    4032              : /* When the routine exit in MMX mode, if any ST register needs
    4033              :    to be zeroed, we should clear all MMX registers except the
    4034              :    RET_MMX_REGNO that holds the return value.  */
    4035              : static bool
    4036            0 : zero_all_mm_registers (HARD_REG_SET need_zeroed_hardregs,
    4037              :                        unsigned int ret_mmx_regno)
    4038              : {
    4039            0 :   bool need_zero_all_mm = false;
    4040            0 :   for (unsigned int regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
    4041            0 :     if (STACK_REGNO_P (regno)
    4042            0 :         && TEST_HARD_REG_BIT (need_zeroed_hardregs, regno))
    4043              :       {
    4044              :         need_zero_all_mm = true;
    4045              :         break;
    4046              :       }
    4047              : 
    4048            0 :   if (!need_zero_all_mm)
    4049              :     return false;
    4050              : 
    4051              :   machine_mode mode = V2SImode;
    4052            0 :   for (unsigned int regno = FIRST_MMX_REG; regno <= LAST_MMX_REG; regno++)
    4053            0 :     if (regno != ret_mmx_regno)
    4054              :       {
    4055            0 :         rtx reg = gen_rtx_REG (mode, regno);
    4056            0 :         emit_insn (gen_rtx_SET (reg, CONST0_RTX (mode)));
    4057              :       }
    4058              :   return true;
    4059              : }
    4060              : 
    4061              : /* TARGET_ZERO_CALL_USED_REGS.  */
    4062              : /* Generate a sequence of instructions that zero registers specified by
    4063              :    NEED_ZEROED_HARDREGS.  Return the ZEROED_HARDREGS that are actually
    4064              :    zeroed.  */
    4065              : static HARD_REG_SET
    4066          131 : ix86_zero_call_used_regs (HARD_REG_SET need_zeroed_hardregs)
    4067              : {
    4068          131 :   HARD_REG_SET zeroed_hardregs;
    4069          131 :   bool all_sse_zeroed = false;
    4070          131 :   int all_st_zeroed_num = 0;
    4071          131 :   bool all_mm_zeroed = false;
    4072              : 
    4073          131 :   CLEAR_HARD_REG_SET (zeroed_hardregs);
    4074              : 
    4075              :   /* first, let's see whether we can zero all vector registers together.  */
    4076          131 :   rtx zero_all_vec_insn = zero_all_vector_registers (need_zeroed_hardregs);
    4077          131 :   if (zero_all_vec_insn)
    4078              :     {
    4079            4 :       emit_insn (zero_all_vec_insn);
    4080            4 :       all_sse_zeroed = true;
    4081            4 :       if (TARGET_64BIT && TARGET_AVX512F)
    4082              :         {
    4083            2 :           rtx zero = CONST0_RTX (V4SFmode);
    4084           34 :           for (unsigned int regno = XMM16_REG;
    4085           34 :                regno <= XMM31_REG;
    4086              :                regno++)
    4087              :             {
    4088           32 :               rtx reg = gen_rtx_REG (V4SFmode, regno);
    4089           32 :               emit_move_insn (reg, zero);
    4090              :             }
    4091              :         }
    4092              :     }
    4093              : 
    4094              :   /* mm/st registers are shared registers set, we should follow the following
    4095              :      rules to clear them:
    4096              :                         MMX exit mode         x87 exit mode
    4097              :         -------------|----------------------|---------------
    4098              :         uses x87 reg | clear all MMX        | clear all x87
    4099              :         uses MMX reg | clear individual MMX | clear all x87
    4100              :         x87 + MMX    | clear all MMX        | clear all x87
    4101              : 
    4102              :      first, we should decide which mode (MMX mode or x87 mode) the function
    4103              :      exit with.  */
    4104              : 
    4105          131 :   bool exit_with_mmx_mode = (crtl->return_rtx
    4106          131 :                              && (MMX_REG_P (crtl->return_rtx)));
    4107              : 
    4108          131 :   if (!exit_with_mmx_mode)
    4109              :     /* x87 exit mode, we should zero all st registers together.  */
    4110              :     {
    4111          131 :       all_st_zeroed_num = zero_all_st_registers (need_zeroed_hardregs);
    4112              : 
    4113          131 :       if (all_st_zeroed_num > 0)
    4114          189 :         for (unsigned int regno = FIRST_STACK_REG; regno <= LAST_STACK_REG; regno++)
    4115              :           /* x87 stack registers that hold the return value should be excluded.
    4116              :              x87 returns in the top (two for complex values) register.  */
    4117          168 :           if (all_st_zeroed_num == 8
    4118          168 :               || !((all_st_zeroed_num >= 6 && regno == REGNO (crtl->return_rtx))
    4119              :                    || (all_st_zeroed_num == 6
    4120            7 :                        && (regno == (REGNO (crtl->return_rtx) + 1)))))
    4121          165 :             SET_HARD_REG_BIT (zeroed_hardregs, regno);
    4122              :     }
    4123              :   else
    4124              :     /* MMX exit mode, check whether we can zero all mm registers.  */
    4125              :     {
    4126            0 :       unsigned int exit_mmx_regno = REGNO (crtl->return_rtx);
    4127            0 :       all_mm_zeroed = zero_all_mm_registers (need_zeroed_hardregs,
    4128              :                                              exit_mmx_regno);
    4129            0 :       if (all_mm_zeroed)
    4130            0 :         for (unsigned int regno = FIRST_MMX_REG; regno <= LAST_MMX_REG; regno++)
    4131            0 :           if (regno != exit_mmx_regno)
    4132            0 :             SET_HARD_REG_BIT (zeroed_hardregs, regno);
    4133              :     }
    4134              : 
    4135              :   /* Now, generate instructions to zero all the other registers.  */
    4136              : 
    4137        12183 :   for (unsigned int regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
    4138              :     {
    4139        12052 :       if (!TEST_HARD_REG_BIT (need_zeroed_hardregs, regno))
    4140        10675 :         continue;
    4141         1800 :       if (!zero_call_used_regno_p (regno, all_sse_zeroed,
    4142         1377 :                                    exit_with_mmx_mode && !all_mm_zeroed))
    4143          423 :         continue;
    4144              : 
    4145          954 :       SET_HARD_REG_BIT (zeroed_hardregs, regno);
    4146              : 
    4147          954 :       machine_mode mode = zero_call_used_regno_mode (regno);
    4148              : 
    4149          954 :       rtx reg = gen_rtx_REG (mode, regno);
    4150          954 :       rtx tmp = gen_rtx_SET (reg, CONST0_RTX (mode));
    4151              : 
    4152          954 :       switch (mode)
    4153              :         {
    4154          558 :         case E_SImode:
    4155          558 :           if (!TARGET_USE_MOV0 || optimize_insn_for_size_p ())
    4156              :             {
    4157          558 :               rtx clob = gen_rtx_CLOBBER (VOIDmode,
    4158              :                                           gen_rtx_REG (CCmode,
    4159              :                                                        FLAGS_REG));
    4160          558 :               tmp = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2,
    4161              :                                                            tmp,
    4162              :                                                            clob));
    4163              :             }
    4164              :           /* FALLTHRU.  */
    4165              : 
    4166          954 :         case E_V4SFmode:
    4167          954 :         case E_HImode:
    4168          954 :         case E_V2SImode:
    4169          954 :           emit_insn (tmp);
    4170          954 :           break;
    4171              : 
    4172            0 :         default:
    4173            0 :           gcc_unreachable ();
    4174              :         }
    4175              :     }
    4176          131 :   return zeroed_hardregs;
    4177              : }
    4178              : 
    4179              : /* Define how to find the value returned by a function.
    4180              :    VALTYPE is the data type of the value (as a tree).
    4181              :    If the precise function being called is known, FUNC is its FUNCTION_DECL;
    4182              :    otherwise, FUNC is 0.  */
    4183              : 
    4184              : static rtx
    4185      3931110 : function_value_32 (machine_mode orig_mode, machine_mode mode,
    4186              :                    const_tree fntype, const_tree fn)
    4187              : {
    4188      3931110 :   unsigned int regno;
    4189              : 
    4190              :   /* 8-byte vector modes in %mm0. See ix86_return_in_memory for where
    4191              :      we normally prevent this case when mmx is not available.  However
    4192              :      some ABIs may require the result to be returned like DImode.  */
    4193      4199195 :   if (VECTOR_MODE_P (mode) && GET_MODE_SIZE (mode) == 8)
    4194              :     regno = FIRST_MMX_REG;
    4195              : 
    4196              :   /* 16-byte vector modes in %xmm0.  See ix86_return_in_memory for where
    4197              :      we prevent this case when sse is not available.  However some ABIs
    4198              :      may require the result to be returned like integer TImode.  */
    4199      3921834 :   else if (mode == TImode
    4200      4180643 :            || (VECTOR_MODE_P (mode) && GET_MODE_SIZE (mode) == 16))
    4201              :     regno = FIRST_SSE_REG;
    4202              : 
    4203              :   /* 32-byte vector modes in %ymm0.   */
    4204      3962745 :   else if (VECTOR_MODE_P (mode) && GET_MODE_SIZE (mode) == 32)
    4205              :     regno = FIRST_SSE_REG;
    4206              : 
    4207              :   /* 64-byte vector modes in %zmm0.   */
    4208      3818547 :   else if (VECTOR_MODE_P (mode) && GET_MODE_SIZE (mode) == 64)
    4209              :     regno = FIRST_SSE_REG;
    4210              : 
    4211              :   /* Floating point return values in %st(0) (unless -mno-fp-ret-in-387).  */
    4212      3663025 :   else if (X87_FLOAT_MODE_P (mode) && TARGET_FLOAT_RETURNS_IN_80387)
    4213              :     regno = FIRST_FLOAT_REG;
    4214              :   else
    4215              :     /* Most things go in %eax.  */
    4216      3598760 :     regno = AX_REG;
    4217              : 
    4218              :   /* Return __bf16/ _Float16/_Complex _Foat16 by sse register.  */
    4219      3931110 :   if (mode == HFmode || mode == BFmode)
    4220              :     {
    4221         1907 :       if (!TARGET_SSE2)
    4222              :         {
    4223            0 :           error ("SSE register return with SSE2 disabled");
    4224            0 :           regno = AX_REG;
    4225              :         }
    4226              :       else
    4227              :         regno = FIRST_SSE_REG;
    4228              :     }
    4229              : 
    4230      3931110 :   if (mode == HCmode)
    4231              :     {
    4232          129 :       if (!TARGET_SSE2)
    4233            0 :         error ("SSE register return with SSE2 disabled");
    4234              : 
    4235          129 :       rtx ret = gen_rtx_PARALLEL (mode, rtvec_alloc(1));
    4236          258 :       XVECEXP (ret, 0, 0)
    4237          258 :         = gen_rtx_EXPR_LIST (VOIDmode,
    4238              :                              gen_rtx_REG (SImode,
    4239          129 :                                           TARGET_SSE2 ? FIRST_SSE_REG : AX_REG),
    4240              :                              GEN_INT (0));
    4241          129 :       return ret;
    4242              :     }
    4243              : 
    4244              :   /* Override FP return register with %xmm0 for local functions when
    4245              :      SSE math is enabled or for functions with sseregparm attribute.  */
    4246      3930981 :   if ((fn || fntype) && (mode == SFmode || mode == DFmode))
    4247              :     {
    4248        50232 :       int sse_level = ix86_function_sseregparm (fntype, fn, false);
    4249        50232 :       if (sse_level == -1)
    4250              :         {
    4251            0 :           error ("calling %qD with SSE calling convention without "
    4252              :                  "SSE/SSE2 enabled", fn);
    4253            0 :           sorry ("this is a GCC bug that can be worked around by adding "
    4254              :                  "attribute used to function called");
    4255              :         }
    4256        50232 :       else if ((sse_level >= 1 && mode == SFmode)
    4257        50232 :                || (sse_level == 2 && mode == DFmode))
    4258              :         regno = FIRST_SSE_REG;
    4259              :     }
    4260              : 
    4261              :   /* OImode shouldn't be used directly.  */
    4262      3930981 :   gcc_assert (mode != OImode);
    4263              : 
    4264      3930981 :   return gen_rtx_REG (orig_mode, regno);
    4265              : }
    4266              : 
    4267              : static rtx
    4268     98122836 : function_value_64 (machine_mode orig_mode, machine_mode mode,
    4269              :                    const_tree valtype)
    4270              : {
    4271     98122836 :   rtx ret;
    4272              : 
    4273              :   /* Handle libcalls, which don't provide a type node.  */
    4274     98122836 :   if (valtype == NULL)
    4275              :     {
    4276       102030 :       unsigned int regno;
    4277              : 
    4278       102030 :       switch (mode)
    4279              :         {
    4280              :         case E_BFmode:
    4281              :         case E_HFmode:
    4282              :         case E_HCmode:
    4283              :         case E_SFmode:
    4284              :         case E_SCmode:
    4285              :         case E_DFmode:
    4286              :         case E_DCmode:
    4287              :         case E_TFmode:
    4288              :         case E_SDmode:
    4289              :         case E_DDmode:
    4290              :         case E_TDmode:
    4291              :           regno = FIRST_SSE_REG;
    4292              :           break;
    4293         1037 :         case E_XFmode:
    4294         1037 :         case E_XCmode:
    4295         1037 :           regno = FIRST_FLOAT_REG;
    4296         1037 :           break;
    4297              :         case E_TCmode:
    4298              :           return NULL;
    4299        56048 :         default:
    4300        56048 :           regno = AX_REG;
    4301              :         }
    4302              : 
    4303       102030 :       return gen_rtx_REG (mode, regno);
    4304              :     }
    4305     98020806 :   else if (POINTER_TYPE_P (valtype))
    4306              :     {
    4307              :       /* Pointers are always returned in word_mode.  */
    4308     16637513 :       mode = word_mode;
    4309              :     }
    4310              : 
    4311     98020806 :   ret = construct_container (mode, orig_mode, valtype, 1,
    4312              :                              X86_64_REGPARM_MAX, X86_64_SSE_REGPARM_MAX,
    4313              :                              x86_64_int_return_registers, 0);
    4314              : 
    4315              :   /* For zero sized structures, construct_container returns NULL, but we
    4316              :      need to keep rest of compiler happy by returning meaningful value.  */
    4317     98020806 :   if (!ret)
    4318       205265 :     ret = gen_rtx_REG (orig_mode, AX_REG);
    4319              : 
    4320              :   return ret;
    4321              : }
    4322              : 
    4323              : static rtx
    4324            0 : function_value_ms_32 (machine_mode orig_mode, machine_mode mode,
    4325              :                       const_tree fntype, const_tree fn, const_tree valtype)
    4326              : {
    4327            0 :   unsigned int regno;
    4328              : 
    4329              :   /* Floating point return values in %st(0)
    4330              :      (unless -mno-fp-ret-in-387 or aggregate type of up to 8 bytes).  */
    4331            0 :   if (X87_FLOAT_MODE_P (mode) && TARGET_FLOAT_RETURNS_IN_80387
    4332            0 :            && (GET_MODE_SIZE (mode) > 8
    4333            0 :                || valtype == NULL_TREE || !AGGREGATE_TYPE_P (valtype)))
    4334              :   {
    4335            0 :     regno = FIRST_FLOAT_REG;
    4336            0 :     return gen_rtx_REG (orig_mode, regno);
    4337              :   }
    4338              :   else
    4339            0 :     return function_value_32(orig_mode, mode, fntype,fn);
    4340              : }
    4341              : 
    4342              : static rtx
    4343       767095 : function_value_ms_64 (machine_mode orig_mode, machine_mode mode,
    4344              :                       const_tree valtype)
    4345              : {
    4346       767095 :   unsigned int regno = AX_REG;
    4347              : 
    4348       767095 :   if (TARGET_SSE)
    4349              :     {
    4350      1532736 :       switch (GET_MODE_SIZE (mode))
    4351              :         {
    4352        14003 :         case 16:
    4353        14003 :           if (valtype != NULL_TREE
    4354        14003 :               && !VECTOR_INTEGER_TYPE_P (valtype)
    4355         7146 :               && !INTEGRAL_TYPE_P (valtype)
    4356        21149 :               && !VECTOR_FLOAT_TYPE_P (valtype))
    4357              :             break;
    4358        14003 :           if ((SCALAR_INT_MODE_P (mode) || VECTOR_MODE_P (mode))
    4359              :               && !COMPLEX_MODE_P (mode))
    4360       197656 :             regno = FIRST_SSE_REG;
    4361              :           break;
    4362       741063 :         case 8:
    4363       741063 :         case 4:
    4364       741063 :         case 2:
    4365       741063 :           if (valtype != NULL_TREE && AGGREGATE_TYPE_P (valtype))
    4366              :             break;
    4367       723842 :           if (mode == HFmode || mode == SFmode || mode == DFmode)
    4368       197656 :             regno = FIRST_SSE_REG;
    4369              :           break;
    4370              :         default:
    4371              :           break;
    4372              :         }
    4373              :     }
    4374       767095 :   return gen_rtx_REG (orig_mode, regno);
    4375              : }
    4376              : 
    4377              : static rtx
    4378    102821041 : ix86_function_value_1 (const_tree valtype, const_tree fntype_or_decl,
    4379              :                        machine_mode orig_mode, machine_mode mode)
    4380              : {
    4381    102821041 :   const_tree fn, fntype;
    4382              : 
    4383    102821041 :   fn = NULL_TREE;
    4384    102821041 :   if (fntype_or_decl && DECL_P (fntype_or_decl))
    4385      3544799 :     fn = fntype_or_decl;
    4386      3544799 :   fntype = fn ? TREE_TYPE (fn) : fntype_or_decl;
    4387              : 
    4388    102821041 :   if (ix86_function_type_abi (fntype) == MS_ABI)
    4389              :     {
    4390       767095 :       if (TARGET_64BIT)
    4391       767095 :         return function_value_ms_64 (orig_mode, mode, valtype);
    4392              :       else
    4393            0 :         return function_value_ms_32 (orig_mode, mode, fntype, fn, valtype);
    4394              :     }
    4395    102053946 :   else if (TARGET_64BIT)
    4396     98122836 :     return function_value_64 (orig_mode, mode, valtype);
    4397              :   else
    4398      3931110 :     return function_value_32 (orig_mode, mode, fntype, fn);
    4399              : }
    4400              : 
    4401              : static rtx
    4402    102715877 : ix86_function_value (const_tree valtype, const_tree fntype_or_decl, bool)
    4403              : {
    4404    102715877 :   machine_mode mode, orig_mode;
    4405              : 
    4406    102715877 :   orig_mode = TYPE_MODE (valtype);
    4407    102715877 :   mode = type_natural_mode (valtype, NULL, true);
    4408    102715877 :   return ix86_function_value_1 (valtype, fntype_or_decl, orig_mode, mode);
    4409              : }
    4410              : 
    4411              : /* Pointer function arguments and return values are promoted to
    4412              :    word_mode for normal functions.  */
    4413              : 
    4414              : static machine_mode
    4415     32041405 : ix86_promote_function_mode (const_tree type, machine_mode mode,
    4416              :                             int *punsignedp, const_tree fntype,
    4417              :                             int for_return)
    4418              : {
    4419     32041405 :   if (cfun->machine->func_type == TYPE_NORMAL
    4420     32040382 :       && type != NULL_TREE
    4421     32006821 :       && POINTER_TYPE_P (type))
    4422              :     {
    4423     16050995 :       *punsignedp = POINTERS_EXTEND_UNSIGNED;
    4424     16050995 :       return word_mode;
    4425              :     }
    4426     15990410 :   return default_promote_function_mode (type, mode, punsignedp, fntype,
    4427     15990410 :                                         for_return);
    4428              : }
    4429              : 
    4430              : /* Return true if a structure, union or array with MODE containing FIELD
    4431              :    should be accessed using BLKmode.  */
    4432              : 
    4433              : static bool
    4434    142590258 : ix86_member_type_forces_blk (const_tree field, machine_mode mode)
    4435              : {
    4436              :   /* Union with XFmode must be in BLKmode.  */
    4437    142590258 :   return (mode == XFmode
    4438    142728009 :           && (TREE_CODE (DECL_FIELD_CONTEXT (field)) == UNION_TYPE
    4439       130879 :               || TREE_CODE (DECL_FIELD_CONTEXT (field)) == QUAL_UNION_TYPE));
    4440              : }
    4441              : 
    4442              : rtx
    4443       105164 : ix86_libcall_value (machine_mode mode)
    4444              : {
    4445       105164 :   return ix86_function_value_1 (NULL, NULL, mode, mode);
    4446              : }
    4447              : 
    4448              : /* Return true iff type is returned in memory.  */
    4449              : 
    4450              : static bool
    4451    104661750 : ix86_return_in_memory (const_tree type, const_tree fntype ATTRIBUTE_UNUSED)
    4452              : {
    4453    104661750 :   const machine_mode mode = type_natural_mode (type, NULL, true);
    4454    104661750 :   HOST_WIDE_INT size;
    4455              : 
    4456    104661750 :   if (TARGET_64BIT)
    4457              :     {
    4458    100122066 :       if (ix86_function_type_abi (fntype) == MS_ABI)
    4459              :         {
    4460       706853 :           size = int_size_in_bytes (type);
    4461              : 
    4462              :           /* __m128 is returned in xmm0.  */
    4463       706853 :           if ((!type || VECTOR_INTEGER_TYPE_P (type)
    4464       687282 :                || INTEGRAL_TYPE_P (type)
    4465       216966 :                || VECTOR_FLOAT_TYPE_P (type))
    4466       505715 :               && (SCALAR_INT_MODE_P (mode) || VECTOR_MODE_P (mode))
    4467              :               && !COMPLEX_MODE_P (mode)
    4468      1212568 :               && (GET_MODE_SIZE (mode) == 16 || size == 16))
    4469              :             return false;
    4470              : 
    4471              :           /* Otherwise, the size must be exactly in [1248]. */
    4472      1349324 :           return size != 1 && size != 2 && size != 4 && size != 8;
    4473              :         }
    4474              :       else
    4475              :         {
    4476     99415213 :           int needed_intregs, needed_sseregs;
    4477              : 
    4478     99415213 :           return examine_argument (mode, type, 1,
    4479              :                                    &needed_intregs, &needed_sseregs);
    4480              :         }
    4481              :     }
    4482              :   else
    4483              :     {
    4484      4539684 :       size = int_size_in_bytes (type);
    4485              : 
    4486              :       /* Intel MCU psABI returns scalars and aggregates no larger than 8
    4487              :          bytes in registers.  */
    4488      4539684 :       if (TARGET_IAMCU)
    4489            0 :         return VECTOR_MODE_P (mode) || size < 0 || size > 8;
    4490              : 
    4491      4539684 :       if (mode == BLKmode)
    4492              :         return true;
    4493              : 
    4494      4539684 :       if (MS_AGGREGATE_RETURN && AGGREGATE_TYPE_P (type) && size <= 8)
    4495              :         return false;
    4496              : 
    4497      4539684 :       if (VECTOR_MODE_P (mode) || mode == TImode)
    4498              :         {
    4499              :           /* User-created vectors small enough to fit in EAX.  */
    4500       268055 :           if (size < 8)
    4501              :             return false;
    4502              : 
    4503              :           /* Unless ABI prescibes otherwise,
    4504              :              MMX/3dNow values are returned in MM0 if available.  */
    4505              : 
    4506       268055 :           if (size == 8)
    4507         9266 :             return TARGET_VECT8_RETURNS || !TARGET_MMX;
    4508              : 
    4509              :           /* SSE values are returned in XMM0 if available.  */
    4510       258789 :           if (size == 16)
    4511       108939 :             return !TARGET_SSE;
    4512              : 
    4513              :           /* AVX values are returned in YMM0 if available.  */
    4514       149850 :           if (size == 32)
    4515        72090 :             return !TARGET_AVX;
    4516              : 
    4517              :           /* AVX512F values are returned in ZMM0 if available.  */
    4518        77760 :           if (size == 64)
    4519        77760 :             return !TARGET_AVX512F;
    4520              :         }
    4521              : 
    4522      4271629 :       if (mode == XFmode)
    4523              :         return false;
    4524              : 
    4525      4259948 :       if (size > 12)
    4526              :         return true;
    4527              : 
    4528              :       /* OImode shouldn't be used directly.  */
    4529      3278347 :       gcc_assert (mode != OImode);
    4530              : 
    4531              :       return false;
    4532              :     }
    4533              : }
    4534              : 
    4535              : /* Implement TARGET_PUSH_ARGUMENT.  */
    4536              : 
    4537              : static bool
    4538      9345974 : ix86_push_argument (unsigned int npush)
    4539              : {
    4540              :   /* If SSE2 is available, use vector move to put large argument onto
    4541              :      stack.  NB:  In 32-bit mode, use 8-byte vector move.  */
    4542     11770644 :   return ((!TARGET_SSE2 || npush < (TARGET_64BIT ? 16 : 8))
    4543      9081101 :           && TARGET_PUSH_ARGS
    4544     18426977 :           && !ACCUMULATE_OUTGOING_ARGS);
    4545              : }
    4546              : 
    4547              : 
    4548              : /* Create the va_list data type.  */
    4549              : 
    4550              : static tree
    4551       281305 : ix86_build_builtin_va_list_64 (void)
    4552              : {
    4553       281305 :   tree f_gpr, f_fpr, f_ovf, f_sav, record, type_decl;
    4554              : 
    4555       281305 :   record = lang_hooks.types.make_type (RECORD_TYPE);
    4556       281305 :   type_decl = build_decl (BUILTINS_LOCATION,
    4557              :                           TYPE_DECL, get_identifier ("__va_list_tag"), record);
    4558              : 
    4559       281305 :   f_gpr = build_decl (BUILTINS_LOCATION,
    4560              :                       FIELD_DECL, get_identifier ("gp_offset"),
    4561              :                       unsigned_type_node);
    4562       281305 :   f_fpr = build_decl (BUILTINS_LOCATION,
    4563              :                       FIELD_DECL, get_identifier ("fp_offset"),
    4564              :                       unsigned_type_node);
    4565       281305 :   f_ovf = build_decl (BUILTINS_LOCATION,
    4566              :                       FIELD_DECL, get_identifier ("overflow_arg_area"),
    4567              :                       ptr_type_node);
    4568       281305 :   f_sav = build_decl (BUILTINS_LOCATION,
    4569              :                       FIELD_DECL, get_identifier ("reg_save_area"),
    4570              :                       ptr_type_node);
    4571              : 
    4572       281305 :   va_list_gpr_counter_field = f_gpr;
    4573       281305 :   va_list_fpr_counter_field = f_fpr;
    4574              : 
    4575       281305 :   DECL_FIELD_CONTEXT (f_gpr) = record;
    4576       281305 :   DECL_FIELD_CONTEXT (f_fpr) = record;
    4577       281305 :   DECL_FIELD_CONTEXT (f_ovf) = record;
    4578       281305 :   DECL_FIELD_CONTEXT (f_sav) = record;
    4579              : 
    4580       281305 :   TYPE_STUB_DECL (record) = type_decl;
    4581       281305 :   TYPE_NAME (record) = type_decl;
    4582       281305 :   TYPE_FIELDS (record) = f_gpr;
    4583       281305 :   DECL_CHAIN (f_gpr) = f_fpr;
    4584       281305 :   DECL_CHAIN (f_fpr) = f_ovf;
    4585       281305 :   DECL_CHAIN (f_ovf) = f_sav;
    4586       281305 :   TREE_PUBLIC (type_decl) = 1;
    4587              : 
    4588       281305 :   layout_type (record);
    4589              : 
    4590       281305 :   TYPE_ATTRIBUTES (record) = tree_cons (get_identifier ("sysv_abi va_list"),
    4591       281305 :                                         NULL_TREE, TYPE_ATTRIBUTES (record));
    4592              : 
    4593              :   /* The correct type is an array type of one element.  */
    4594       281305 :   return build_array_type (record, build_index_type (size_zero_node));
    4595              : }
    4596              : 
    4597              : /* Setup the builtin va_list data type and for 64-bit the additional
    4598              :    calling convention specific va_list data types.  */
    4599              : 
    4600              : static tree
    4601       288469 : ix86_build_builtin_va_list (void)
    4602              : {
    4603       288469 :   if (TARGET_64BIT)
    4604              :     {
    4605              :       /* Initialize ABI specific va_list builtin types.
    4606              : 
    4607              :          In lto1, we can encounter two va_list types:
    4608              :          - one as a result of the type-merge across TUs, and
    4609              :          - the one constructed here.
    4610              :          These two types will not have the same TYPE_MAIN_VARIANT, and therefore
    4611              :          a type identity check in canonical_va_list_type based on
    4612              :          TYPE_MAIN_VARIANT (which we used to have) will not work.
    4613              :          Instead, we tag each va_list_type_node with its unique attribute, and
    4614              :          look for the attribute in the type identity check in
    4615              :          canonical_va_list_type.
    4616              : 
    4617              :          Tagging sysv_va_list_type_node directly with the attribute is
    4618              :          problematic since it's a array of one record, which will degrade into a
    4619              :          pointer to record when used as parameter (see build_va_arg comments for
    4620              :          an example), dropping the attribute in the process.  So we tag the
    4621              :          record instead.  */
    4622              : 
    4623              :       /* For SYSV_ABI we use an array of one record.  */
    4624       281305 :       sysv_va_list_type_node = ix86_build_builtin_va_list_64 ();
    4625              : 
    4626              :       /* For MS_ABI we use plain pointer to argument area.  */
    4627       281305 :       tree char_ptr_type = build_pointer_type (char_type_node);
    4628       281305 :       tree attr = tree_cons (get_identifier ("ms_abi va_list"), NULL_TREE,
    4629       281305 :                              TYPE_ATTRIBUTES (char_ptr_type));
    4630       281305 :       ms_va_list_type_node = build_type_attribute_variant (char_ptr_type, attr);
    4631              : 
    4632       281305 :       return ((ix86_abi == MS_ABI)
    4633       281305 :               ? ms_va_list_type_node
    4634       281305 :               : sysv_va_list_type_node);
    4635              :     }
    4636              :   else
    4637              :     {
    4638              :       /* For i386 we use plain pointer to argument area.  */
    4639         7164 :       return build_pointer_type (char_type_node);
    4640              :     }
    4641              : }
    4642              : 
    4643              : /* Worker function for TARGET_SETUP_INCOMING_VARARGS.  */
    4644              : 
    4645              : static void
    4646        15714 : setup_incoming_varargs_64 (CUMULATIVE_ARGS *cum)
    4647              : {
    4648        15714 :   rtx save_area, mem;
    4649        15714 :   alias_set_type set;
    4650        15714 :   int i, max;
    4651              : 
    4652              :   /* GPR size of varargs save area.  */
    4653        15714 :   if (cfun->va_list_gpr_size)
    4654        15264 :     ix86_varargs_gpr_size = X86_64_REGPARM_MAX * UNITS_PER_WORD;
    4655              :   else
    4656          450 :     ix86_varargs_gpr_size = 0;
    4657              : 
    4658              :   /* FPR size of varargs save area.  We don't need it if we don't pass
    4659              :      anything in SSE registers.  */
    4660        15714 :   if (TARGET_SSE && cfun->va_list_fpr_size)
    4661        14657 :     ix86_varargs_fpr_size = X86_64_SSE_REGPARM_MAX * 16;
    4662              :   else
    4663         1057 :     ix86_varargs_fpr_size = 0;
    4664              : 
    4665        15714 :   if (! ix86_varargs_gpr_size && ! ix86_varargs_fpr_size)
    4666              :     return;
    4667              : 
    4668        15433 :   save_area = frame_pointer_rtx;
    4669        15433 :   set = get_varargs_alias_set ();
    4670              : 
    4671        15433 :   max = cum->regno + cfun->va_list_gpr_size / UNITS_PER_WORD;
    4672        15433 :   if (max > X86_64_REGPARM_MAX)
    4673              :     max = X86_64_REGPARM_MAX;
    4674              : 
    4675        15433 :   const int *parm_regs;
    4676        15433 :   if (cum->preserve_none_abi)
    4677              :     parm_regs = x86_64_preserve_none_int_parameter_registers;
    4678              :   else
    4679        15432 :     parm_regs = x86_64_int_parameter_registers;
    4680              : 
    4681        85622 :   for (i = cum->regno; i < max; i++)
    4682              :     {
    4683        70189 :       mem = gen_rtx_MEM (word_mode,
    4684        70189 :                          plus_constant (Pmode, save_area, i * UNITS_PER_WORD));
    4685        70189 :       MEM_NOTRAP_P (mem) = 1;
    4686        70189 :       set_mem_alias_set (mem, set);
    4687        70189 :       emit_move_insn (mem,
    4688        70189 :                       gen_rtx_REG (word_mode, parm_regs[i]));
    4689              :     }
    4690              : 
    4691        15433 :   if (ix86_varargs_fpr_size)
    4692              :     {
    4693        14657 :       machine_mode smode;
    4694        14657 :       rtx_code_label *label;
    4695        14657 :       rtx test;
    4696              : 
    4697              :       /* Now emit code to save SSE registers.  The AX parameter contains number
    4698              :          of SSE parameter registers used to call this function, though all we
    4699              :          actually check here is the zero/non-zero status.  */
    4700              : 
    4701        14657 :       label = gen_label_rtx ();
    4702        14657 :       test = gen_rtx_EQ (VOIDmode, gen_rtx_REG (QImode, AX_REG), const0_rtx);
    4703        14657 :       emit_jump_insn (gen_cbranchqi4 (test, XEXP (test, 0), XEXP (test, 1),
    4704              :                                       label));
    4705              : 
    4706              :       /* ??? If !TARGET_SSE_TYPELESS_STORES, would we perform better if
    4707              :          we used movdqa (i.e. TImode) instead?  Perhaps even better would
    4708              :          be if we could determine the real mode of the data, via a hook
    4709              :          into pass_stdarg.  Ignore all that for now.  */
    4710        14657 :       smode = V4SFmode;
    4711        14657 :       if (crtl->stack_alignment_needed < GET_MODE_ALIGNMENT (smode))
    4712         4158 :         crtl->stack_alignment_needed = GET_MODE_ALIGNMENT (smode);
    4713              : 
    4714        14657 :       max = cum->sse_regno + cfun->va_list_fpr_size / 16;
    4715        14657 :       if (max > X86_64_SSE_REGPARM_MAX)
    4716              :         max = X86_64_SSE_REGPARM_MAX;
    4717              : 
    4718       130304 :       for (i = cum->sse_regno; i < max; ++i)
    4719              :         {
    4720       115647 :           mem = plus_constant (Pmode, save_area,
    4721       115647 :                                i * 16 + ix86_varargs_gpr_size);
    4722       115647 :           mem = gen_rtx_MEM (smode, mem);
    4723       115647 :           MEM_NOTRAP_P (mem) = 1;
    4724       115647 :           set_mem_alias_set (mem, set);
    4725       115647 :           set_mem_align (mem, GET_MODE_ALIGNMENT (smode));
    4726              : 
    4727       115647 :           emit_move_insn (mem, gen_rtx_REG (smode, GET_SSE_REGNO (i)));
    4728              :         }
    4729              : 
    4730        14657 :       emit_label (label);
    4731              :     }
    4732              : }
    4733              : 
    4734              : static void
    4735         5652 : setup_incoming_varargs_ms_64 (CUMULATIVE_ARGS *cum)
    4736              : {
    4737         5652 :   alias_set_type set = get_varargs_alias_set ();
    4738         5652 :   int i;
    4739              : 
    4740              :   /* Reset to zero, as there might be a sysv vaarg used
    4741              :      before.  */
    4742         5652 :   ix86_varargs_gpr_size = 0;
    4743         5652 :   ix86_varargs_fpr_size = 0;
    4744              : 
    4745        14154 :   for (i = cum->regno; i < X86_64_MS_REGPARM_MAX; i++)
    4746              :     {
    4747         8502 :       rtx reg, mem;
    4748              : 
    4749         8502 :       mem = gen_rtx_MEM (Pmode,
    4750         8502 :                          plus_constant (Pmode, virtual_incoming_args_rtx,
    4751         8502 :                                         i * UNITS_PER_WORD));
    4752         8502 :       MEM_NOTRAP_P (mem) = 1;
    4753         8502 :       set_mem_alias_set (mem, set);
    4754              : 
    4755         8502 :       reg = gen_rtx_REG (Pmode, x86_64_ms_abi_int_parameter_registers[i]);
    4756         8502 :       emit_move_insn (mem, reg);
    4757              :     }
    4758         5652 : }
    4759              : 
    4760              : static void
    4761        21520 : ix86_setup_incoming_varargs (cumulative_args_t cum_v,
    4762              :                              const function_arg_info &arg,
    4763              :                              int *, int no_rtl)
    4764              : {
    4765        21520 :   CUMULATIVE_ARGS *cum = get_cumulative_args (cum_v);
    4766        21520 :   CUMULATIVE_ARGS next_cum;
    4767        21520 :   tree fntype;
    4768              : 
    4769              :   /* This argument doesn't appear to be used anymore.  Which is good,
    4770              :      because the old code here didn't suppress rtl generation.  */
    4771        21520 :   gcc_assert (!no_rtl);
    4772              : 
    4773        21520 :   if (!TARGET_64BIT)
    4774          154 :     return;
    4775              : 
    4776        21366 :   fntype = TREE_TYPE (current_function_decl);
    4777              : 
    4778              :   /* For varargs, we do not want to skip the dummy va_dcl argument.
    4779              :      For stdargs, we do want to skip the last named argument.  */
    4780        21366 :   next_cum = *cum;
    4781        21366 :   if ((!TYPE_NO_NAMED_ARGS_STDARG_P (TREE_TYPE (current_function_decl))
    4782          122 :        || arg.type != NULL_TREE)
    4783        21391 :       && stdarg_p (fntype))
    4784        21269 :     ix86_function_arg_advance (pack_cumulative_args (&next_cum), arg);
    4785              : 
    4786        21366 :   if (cum->call_abi == MS_ABI)
    4787         5652 :     setup_incoming_varargs_ms_64 (&next_cum);
    4788              :   else
    4789        15714 :     setup_incoming_varargs_64 (&next_cum);
    4790              : }
    4791              : 
    4792              : /* Checks if TYPE is of kind va_list char *.  */
    4793              : 
    4794              : static bool
    4795        73110 : is_va_list_char_pointer (tree type)
    4796              : {
    4797        73110 :   tree canonic;
    4798              : 
    4799              :   /* For 32-bit it is always true.  */
    4800        73110 :   if (!TARGET_64BIT)
    4801              :     return true;
    4802        72948 :   canonic = ix86_canonical_va_list_type (type);
    4803        72948 :   return (canonic == ms_va_list_type_node
    4804        72948 :           || (ix86_abi == MS_ABI && canonic == va_list_type_node));
    4805              : }
    4806              : 
    4807              : /* Implement va_start.  */
    4808              : 
    4809              : static void
    4810        21009 : ix86_va_start (tree valist, rtx nextarg)
    4811              : {
    4812        21009 :   HOST_WIDE_INT words, n_gpr, n_fpr;
    4813        21009 :   tree f_gpr, f_fpr, f_ovf, f_sav;
    4814        21009 :   tree gpr, fpr, ovf, sav, t;
    4815        21009 :   tree type;
    4816        21009 :   rtx ovf_rtx;
    4817              : 
    4818        21009 :   if (flag_split_stack
    4819           12 :       && cfun->machine->split_stack_varargs_pointer == NULL_RTX)
    4820              :     {
    4821           12 :       unsigned int scratch_regno;
    4822              : 
    4823              :       /* When we are splitting the stack, we can't refer to the stack
    4824              :          arguments using internal_arg_pointer, because they may be on
    4825              :          the old stack.  The split stack prologue will arrange to
    4826              :          leave a pointer to the old stack arguments in a scratch
    4827              :          register, which we here copy to a pseudo-register.  The split
    4828              :          stack prologue can't set the pseudo-register directly because
    4829              :          it (the prologue) runs before any registers have been saved.  */
    4830              : 
    4831           12 :       scratch_regno = split_stack_prologue_scratch_regno ();
    4832           12 :       if (scratch_regno != INVALID_REGNUM)
    4833              :         {
    4834           12 :           rtx reg;
    4835           12 :           rtx_insn *seq;
    4836              : 
    4837           16 :           reg = gen_reg_rtx (Pmode);
    4838           12 :           cfun->machine->split_stack_varargs_pointer = reg;
    4839              : 
    4840           12 :           start_sequence ();
    4841           16 :           emit_move_insn (reg, gen_rtx_REG (Pmode, scratch_regno));
    4842           12 :           seq = end_sequence ();
    4843              : 
    4844           12 :           push_topmost_sequence ();
    4845           12 :           emit_insn_after (seq, entry_of_function ());
    4846           12 :           pop_topmost_sequence ();
    4847              :         }
    4848              :     }
    4849              : 
    4850              :   /* Only 64bit target needs something special.  */
    4851        21009 :   if (is_va_list_char_pointer (TREE_TYPE (valist)))
    4852              :     {
    4853         5656 :       if (cfun->machine->split_stack_varargs_pointer == NULL_RTX)
    4854         5652 :         std_expand_builtin_va_start (valist, nextarg);
    4855              :       else
    4856              :         {
    4857            4 :           rtx va_r, next;
    4858              : 
    4859            4 :           va_r = expand_expr (valist, NULL_RTX, VOIDmode, EXPAND_WRITE);
    4860            8 :           next = expand_binop (ptr_mode, add_optab,
    4861            4 :                                cfun->machine->split_stack_varargs_pointer,
    4862              :                                crtl->args.arg_offset_rtx,
    4863              :                                NULL_RTX, 0, OPTAB_LIB_WIDEN);
    4864            4 :           convert_move (va_r, next, 0);
    4865              :         }
    4866         5656 :       return;
    4867              :     }
    4868              : 
    4869        15353 :   f_gpr = TYPE_FIELDS (TREE_TYPE (sysv_va_list_type_node));
    4870        15353 :   f_fpr = DECL_CHAIN (f_gpr);
    4871        15353 :   f_ovf = DECL_CHAIN (f_fpr);
    4872        15353 :   f_sav = DECL_CHAIN (f_ovf);
    4873              : 
    4874        15353 :   valist = build_simple_mem_ref (valist);
    4875        15353 :   TREE_TYPE (valist) = TREE_TYPE (sysv_va_list_type_node);
    4876              :   /* The following should be folded into the MEM_REF offset.  */
    4877        15353 :   gpr = build3 (COMPONENT_REF, TREE_TYPE (f_gpr), unshare_expr (valist),
    4878              :                 f_gpr, NULL_TREE);
    4879        15353 :   fpr = build3 (COMPONENT_REF, TREE_TYPE (f_fpr), unshare_expr (valist),
    4880              :                 f_fpr, NULL_TREE);
    4881        15353 :   ovf = build3 (COMPONENT_REF, TREE_TYPE (f_ovf), unshare_expr (valist),
    4882              :                 f_ovf, NULL_TREE);
    4883        15353 :   sav = build3 (COMPONENT_REF, TREE_TYPE (f_sav), unshare_expr (valist),
    4884              :                 f_sav, NULL_TREE);
    4885              : 
    4886              :   /* Count number of gp and fp argument registers used.  */
    4887        15353 :   words = crtl->args.info.words;
    4888        15353 :   n_gpr = crtl->args.info.regno;
    4889        15353 :   n_fpr = crtl->args.info.sse_regno;
    4890              : 
    4891        15353 :   if (cfun->va_list_gpr_size)
    4892              :     {
    4893        15119 :       type = TREE_TYPE (gpr);
    4894        15119 :       t = build2 (MODIFY_EXPR, type,
    4895        15119 :                   gpr, build_int_cst (type, n_gpr * 8));
    4896        15119 :       TREE_SIDE_EFFECTS (t) = 1;
    4897        15119 :       expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
    4898              :     }
    4899              : 
    4900        15353 :   if (TARGET_SSE && cfun->va_list_fpr_size)
    4901              :     {
    4902        14500 :       type = TREE_TYPE (fpr);
    4903        14500 :       t = build2 (MODIFY_EXPR, type, fpr,
    4904        14500 :                   build_int_cst (type, n_fpr * 16 + 8*X86_64_REGPARM_MAX));
    4905        14500 :       TREE_SIDE_EFFECTS (t) = 1;
    4906        14500 :       expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
    4907              :     }
    4908              : 
    4909              :   /* Find the overflow area.  */
    4910        15353 :   type = TREE_TYPE (ovf);
    4911        15353 :   if (cfun->machine->split_stack_varargs_pointer == NULL_RTX)
    4912        15345 :     ovf_rtx = crtl->args.internal_arg_pointer;
    4913              :   else
    4914              :     ovf_rtx = cfun->machine->split_stack_varargs_pointer;
    4915        15353 :   t = make_tree (type, ovf_rtx);
    4916        15353 :   if (words != 0)
    4917          498 :     t = fold_build_pointer_plus_hwi (t, words * UNITS_PER_WORD);
    4918              : 
    4919        15353 :   t = build2 (MODIFY_EXPR, type, ovf, t);
    4920        15353 :   TREE_SIDE_EFFECTS (t) = 1;
    4921        15353 :   expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
    4922              : 
    4923        15353 :   if (ix86_varargs_gpr_size || ix86_varargs_fpr_size)
    4924              :     {
    4925              :       /* Find the register save area.
    4926              :          Prologue of the function save it right above stack frame.  */
    4927        15288 :       type = TREE_TYPE (sav);
    4928        15288 :       t = make_tree (type, frame_pointer_rtx);
    4929        15288 :       if (!ix86_varargs_gpr_size)
    4930          169 :         t = fold_build_pointer_plus_hwi (t, -8 * X86_64_REGPARM_MAX);
    4931              : 
    4932        15288 :       t = build2 (MODIFY_EXPR, type, sav, t);
    4933        15288 :       TREE_SIDE_EFFECTS (t) = 1;
    4934        15288 :       expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
    4935              :     }
    4936              : }
    4937              : 
    4938              : /* Implement va_arg.  */
    4939              : 
    4940              : static tree
    4941        52101 : ix86_gimplify_va_arg (tree valist, tree type, gimple_seq *pre_p,
    4942              :                       gimple_seq *post_p)
    4943              : {
    4944        52101 :   static const int intreg[6] = { 0, 1, 2, 3, 4, 5 };
    4945        52101 :   tree f_gpr, f_fpr, f_ovf, f_sav;
    4946        52101 :   tree gpr, fpr, ovf, sav, t;
    4947        52101 :   int size, rsize;
    4948        52101 :   tree lab_false, lab_over = NULL_TREE;
    4949        52101 :   tree addr, t2;
    4950        52101 :   rtx container;
    4951        52101 :   int indirect_p = 0;
    4952        52101 :   tree ptrtype;
    4953        52101 :   machine_mode nat_mode;
    4954        52101 :   unsigned int arg_boundary;
    4955        52101 :   unsigned int type_align;
    4956              : 
    4957              :   /* Only 64bit target needs something special.  */
    4958        52101 :   if (is_va_list_char_pointer (TREE_TYPE (valist)))
    4959          260 :     return std_gimplify_va_arg_expr (valist, type, pre_p, post_p);
    4960              : 
    4961        51841 :   f_gpr = TYPE_FIELDS (TREE_TYPE (sysv_va_list_type_node));
    4962        51841 :   f_fpr = DECL_CHAIN (f_gpr);
    4963        51841 :   f_ovf = DECL_CHAIN (f_fpr);
    4964        51841 :   f_sav = DECL_CHAIN (f_ovf);
    4965              : 
    4966        51841 :   gpr = build3 (COMPONENT_REF, TREE_TYPE (f_gpr),
    4967              :                 valist, f_gpr, NULL_TREE);
    4968              : 
    4969        51841 :   fpr = build3 (COMPONENT_REF, TREE_TYPE (f_fpr), valist, f_fpr, NULL_TREE);
    4970        51841 :   ovf = build3 (COMPONENT_REF, TREE_TYPE (f_ovf), valist, f_ovf, NULL_TREE);
    4971        51841 :   sav = build3 (COMPONENT_REF, TREE_TYPE (f_sav), valist, f_sav, NULL_TREE);
    4972              : 
    4973        51841 :   indirect_p = pass_va_arg_by_reference (type);
    4974        51841 :   if (indirect_p)
    4975          103 :     type = build_pointer_type (type);
    4976        51841 :   size = arg_int_size_in_bytes (type);
    4977        51841 :   rsize = CEIL (size, UNITS_PER_WORD);
    4978              : 
    4979        51841 :   nat_mode = type_natural_mode (type, NULL, false);
    4980        51841 :   switch (nat_mode)
    4981              :     {
    4982           28 :     case E_V16HFmode:
    4983           28 :     case E_V16BFmode:
    4984           28 :     case E_V8SFmode:
    4985           28 :     case E_V8SImode:
    4986           28 :     case E_V32QImode:
    4987           28 :     case E_V16HImode:
    4988           28 :     case E_V4DFmode:
    4989           28 :     case E_V4DImode:
    4990           28 :     case E_V32HFmode:
    4991           28 :     case E_V32BFmode:
    4992           28 :     case E_V16SFmode:
    4993           28 :     case E_V16SImode:
    4994           28 :     case E_V64QImode:
    4995           28 :     case E_V32HImode:
    4996           28 :     case E_V8DFmode:
    4997           28 :     case E_V8DImode:
    4998              :       /* Unnamed 256 and 512bit vector mode parameters are passed on stack.  */
    4999           28 :       if (!TARGET_64BIT_MS_ABI)
    5000              :         {
    5001              :           container = NULL;
    5002              :           break;
    5003              :         }
    5004              :       /* FALLTHRU */
    5005              : 
    5006        51813 :     default:
    5007        51813 :       container = construct_container (nat_mode, TYPE_MODE (type),
    5008              :                                        type, 0, X86_64_REGPARM_MAX,
    5009              :                                        X86_64_SSE_REGPARM_MAX, intreg,
    5010              :                                        0);
    5011        51813 :       break;
    5012              :     }
    5013              : 
    5014              :   /* Pull the value out of the saved registers.  */
    5015              : 
    5016        51841 :   addr = create_tmp_var (ptr_type_node, "addr");
    5017        51841 :   type_align = TYPE_ALIGN (type);
    5018              : 
    5019        51841 :   if (container)
    5020              :     {
    5021        28750 :       int needed_intregs, needed_sseregs;
    5022        28750 :       bool need_temp;
    5023        28750 :       tree int_addr, sse_addr;
    5024              : 
    5025        28750 :       lab_false = create_artificial_label (UNKNOWN_LOCATION);
    5026        28750 :       lab_over = create_artificial_label (UNKNOWN_LOCATION);
    5027              : 
    5028        28750 :       examine_argument (nat_mode, type, 0, &needed_intregs, &needed_sseregs);
    5029              : 
    5030        28750 :       bool container_in_reg = false;
    5031        28750 :       if (REG_P (container))
    5032              :         container_in_reg = true;
    5033         1641 :       else if (GET_CODE (container) == PARALLEL
    5034         1641 :                && GET_MODE (container) == BLKmode
    5035          580 :                && XVECLEN (container, 0) == 1)
    5036              :         {
    5037              :           /* Check if it is a PARALLEL BLKmode container of an EXPR_LIST
    5038              :              expression in a TImode register.  In this case, temp isn't
    5039              :              needed.  Otherwise, the TImode variable will be put in the
    5040              :              GPR save area which guarantees only 8-byte alignment.   */
    5041          509 :           rtx x = XVECEXP (container, 0, 0);
    5042          509 :           if (GET_CODE (x) == EXPR_LIST
    5043          509 :               && REG_P (XEXP (x, 0))
    5044          509 :               && XEXP (x, 1) == const0_rtx)
    5045              :             container_in_reg = true;
    5046              :         }
    5047              : 
    5048          680 :       need_temp = (!container_in_reg
    5049         1150 :                    && ((needed_intregs && TYPE_ALIGN (type) > 64)
    5050          680 :                        || TYPE_ALIGN (type) > 128));
    5051              : 
    5052              :       /* In case we are passing structure, verify that it is consecutive block
    5053              :          on the register save area.  If not we need to do moves.  */
    5054          680 :       if (!need_temp && !container_in_reg)
    5055              :         {
    5056              :           /* Verify that all registers are strictly consecutive  */
    5057          966 :           if (SSE_REGNO_P (REGNO (XEXP (XVECEXP (container, 0, 0), 0))))
    5058              :             {
    5059              :               int i;
    5060              : 
    5061          815 :               for (i = 0; i < XVECLEN (container, 0) && !need_temp; i++)
    5062              :                 {
    5063          529 :                   rtx slot = XVECEXP (container, 0, i);
    5064          529 :                   if (REGNO (XEXP (slot, 0)) != FIRST_SSE_REG + (unsigned int) i
    5065          529 :                       || INTVAL (XEXP (slot, 1)) != i * 16)
    5066              :                     need_temp = true;
    5067              :                 }
    5068              :             }
    5069              :           else
    5070              :             {
    5071              :               int i;
    5072              : 
    5073         1120 :               for (i = 0; i < XVECLEN (container, 0) && !need_temp; i++)
    5074              :                 {
    5075          726 :                   rtx slot = XVECEXP (container, 0, i);
    5076          726 :                   if (REGNO (XEXP (slot, 0)) != (unsigned int) i
    5077          726 :                       || INTVAL (XEXP (slot, 1)) != i * 8)
    5078              :                     need_temp = true;
    5079              :                 }
    5080              :             }
    5081              :         }
    5082        28750 :       if (!need_temp)
    5083              :         {
    5084              :           int_addr = addr;
    5085              :           sse_addr = addr;
    5086              :         }
    5087              :       else
    5088              :         {
    5089          877 :           int_addr = create_tmp_var (ptr_type_node, "int_addr");
    5090          877 :           sse_addr = create_tmp_var (ptr_type_node, "sse_addr");
    5091              :         }
    5092              : 
    5093              :       /* First ensure that we fit completely in registers.  */
    5094        28750 :       if (needed_intregs)
    5095              :         {
    5096        18033 :           t = build_int_cst (TREE_TYPE (gpr),
    5097        18033 :                              (X86_64_REGPARM_MAX - needed_intregs + 1) * 8);
    5098        18033 :           t = build2 (GE_EXPR, boolean_type_node, gpr, t);
    5099        18033 :           t2 = build1 (GOTO_EXPR, void_type_node, lab_false);
    5100        18033 :           t = build3 (COND_EXPR, void_type_node, t, t2, NULL_TREE);
    5101        18033 :           gimplify_and_add (t, pre_p);
    5102              :         }
    5103        28750 :       if (needed_sseregs)
    5104              :         {
    5105        11109 :           t = build_int_cst (TREE_TYPE (fpr),
    5106              :                              (X86_64_SSE_REGPARM_MAX - needed_sseregs + 1) * 16
    5107        11109 :                              + X86_64_REGPARM_MAX * 8);
    5108        11109 :           t = build2 (GE_EXPR, boolean_type_node, fpr, t);
    5109        11109 :           t2 = build1 (GOTO_EXPR, void_type_node, lab_false);
    5110        11109 :           t = build3 (COND_EXPR, void_type_node, t, t2, NULL_TREE);
    5111        11109 :           gimplify_and_add (t, pre_p);
    5112              :         }
    5113              : 
    5114              :       /* Compute index to start of area used for integer regs.  */
    5115        28750 :       if (needed_intregs)
    5116              :         {
    5117              :           /* int_addr = gpr + sav; */
    5118        18033 :           t = fold_build_pointer_plus (sav, gpr);
    5119        18033 :           gimplify_assign (int_addr, t, pre_p);
    5120              :         }
    5121        28750 :       if (needed_sseregs)
    5122              :         {
    5123              :           /* sse_addr = fpr + sav; */
    5124        11109 :           t = fold_build_pointer_plus (sav, fpr);
    5125        11109 :           gimplify_assign (sse_addr, t, pre_p);
    5126              :         }
    5127        28750 :       if (need_temp)
    5128              :         {
    5129          877 :           int i, prev_size = 0;
    5130          877 :           tree temp = create_tmp_var (type, "va_arg_tmp");
    5131          877 :           TREE_ADDRESSABLE (temp) = 1;
    5132              : 
    5133              :           /* addr = &temp; */
    5134          877 :           t = build1 (ADDR_EXPR, build_pointer_type (type), temp);
    5135          877 :           gimplify_assign (addr, t, pre_p);
    5136              : 
    5137         2241 :           for (i = 0; i < XVECLEN (container, 0); i++)
    5138              :             {
    5139         1364 :               rtx slot = XVECEXP (container, 0, i);
    5140         1364 :               rtx reg = XEXP (slot, 0);
    5141         1364 :               machine_mode mode = GET_MODE (reg);
    5142         1364 :               tree piece_type;
    5143         1364 :               tree addr_type;
    5144         1364 :               tree daddr_type;
    5145         1364 :               tree src_addr, src;
    5146         1364 :               int src_offset;
    5147         1364 :               tree dest_addr, dest;
    5148         1364 :               int cur_size = GET_MODE_SIZE (mode);
    5149              : 
    5150         1364 :               gcc_assert (prev_size <= INTVAL (XEXP (slot, 1)));
    5151         1364 :               prev_size = INTVAL (XEXP (slot, 1));
    5152         1364 :               if (prev_size + cur_size > size)
    5153              :                 {
    5154           30 :                   cur_size = size - prev_size;
    5155           30 :                   unsigned int nbits = cur_size * BITS_PER_UNIT;
    5156           30 :                   if (!int_mode_for_size (nbits, 1).exists (&mode))
    5157           10 :                     mode = QImode;
    5158              :                 }
    5159         1364 :               piece_type = lang_hooks.types.type_for_mode (mode, 1);
    5160         1364 :               if (mode == GET_MODE (reg))
    5161         1334 :                 addr_type = build_pointer_type (piece_type);
    5162              :               else
    5163           30 :                 addr_type = build_pointer_type_for_mode (piece_type, ptr_mode,
    5164              :                                                          true);
    5165         1364 :               daddr_type = build_pointer_type_for_mode (piece_type, ptr_mode,
    5166              :                                                         true);
    5167              : 
    5168         1364 :               if (SSE_REGNO_P (REGNO (reg)))
    5169              :                 {
    5170          534 :                   src_addr = sse_addr;
    5171          534 :                   src_offset = (REGNO (reg) - FIRST_SSE_REG) * 16;
    5172              :                 }
    5173              :               else
    5174              :                 {
    5175          830 :                   src_addr = int_addr;
    5176          830 :                   src_offset = REGNO (reg) * 8;
    5177              :                 }
    5178         1364 :               src_addr = fold_convert (addr_type, src_addr);
    5179         1364 :               src_addr = fold_build_pointer_plus_hwi (src_addr, src_offset);
    5180              : 
    5181         1364 :               dest_addr = fold_convert (daddr_type, addr);
    5182         1364 :               dest_addr = fold_build_pointer_plus_hwi (dest_addr, prev_size);
    5183         2728 :               if (cur_size == GET_MODE_SIZE (mode))
    5184              :                 {
    5185         1354 :                   src = build_va_arg_indirect_ref (src_addr);
    5186         1354 :                   dest = build_va_arg_indirect_ref (dest_addr);
    5187              : 
    5188         1354 :                   gimplify_assign (dest, src, pre_p);
    5189              :                 }
    5190              :               else
    5191              :                 {
    5192           10 :                   tree copy
    5193           20 :                     = build_call_expr (builtin_decl_implicit (BUILT_IN_MEMCPY),
    5194              :                                        3, dest_addr, src_addr,
    5195           10 :                                        size_int (cur_size));
    5196           10 :                   gimplify_and_add (copy, pre_p);
    5197              :                 }
    5198         1364 :               prev_size += cur_size;
    5199              :             }
    5200              :         }
    5201              : 
    5202        28750 :       if (needed_intregs)
    5203              :         {
    5204        18033 :           t = build2 (PLUS_EXPR, TREE_TYPE (gpr), gpr,
    5205        18033 :                       build_int_cst (TREE_TYPE (gpr), needed_intregs * 8));
    5206        18033 :           gimplify_assign (gpr, t, pre_p);
    5207              :           /* The GPR save area guarantees only 8-byte alignment.  */
    5208        18033 :           if (!need_temp)
    5209        17229 :             type_align = MIN (type_align, 64);
    5210              :         }
    5211              : 
    5212        28750 :       if (needed_sseregs)
    5213              :         {
    5214        11109 :           t = build2 (PLUS_EXPR, TREE_TYPE (fpr), fpr,
    5215        11109 :                       build_int_cst (TREE_TYPE (fpr), needed_sseregs * 16));
    5216        11109 :           gimplify_assign (unshare_expr (fpr), t, pre_p);
    5217              :         }
    5218              : 
    5219        28750 :       gimple_seq_add_stmt (pre_p, gimple_build_goto (lab_over));
    5220              : 
    5221        28750 :       gimple_seq_add_stmt (pre_p, gimple_build_label (lab_false));
    5222              :     }
    5223              : 
    5224              :   /* ... otherwise out of the overflow area.  */
    5225              : 
    5226              :   /* When we align parameter on stack for caller, if the parameter
    5227              :      alignment is beyond MAX_SUPPORTED_STACK_ALIGNMENT, it will be
    5228              :      aligned at MAX_SUPPORTED_STACK_ALIGNMENT.  We will match callee
    5229              :      here with caller.  */
    5230        51841 :   arg_boundary = ix86_function_arg_boundary (VOIDmode, type);
    5231        51841 :   if ((unsigned int) arg_boundary > MAX_SUPPORTED_STACK_ALIGNMENT)
    5232              :     arg_boundary = MAX_SUPPORTED_STACK_ALIGNMENT;
    5233              : 
    5234              :   /* Care for on-stack alignment if needed.  */
    5235        51841 :   if (arg_boundary <= 64 || size == 0)
    5236        34804 :     t = ovf;
    5237              :  else
    5238              :     {
    5239        17037 :       HOST_WIDE_INT align = arg_boundary / 8;
    5240        17037 :       t = fold_build_pointer_plus_hwi (ovf, align - 1);
    5241        17037 :       t = build2 (BIT_AND_EXPR, TREE_TYPE (t), t,
    5242        17037 :                   build_int_cst (TREE_TYPE (t), -align));
    5243              :     }
    5244              : 
    5245        51841 :   gimplify_expr (&t, pre_p, NULL, is_gimple_val, fb_rvalue);
    5246        51841 :   gimplify_assign (addr, t, pre_p);
    5247              : 
    5248        51841 :   t = fold_build_pointer_plus_hwi (t, rsize * UNITS_PER_WORD);
    5249        51841 :   gimplify_assign (unshare_expr (ovf), t, pre_p);
    5250              : 
    5251        51841 :   if (container)
    5252        28750 :     gimple_seq_add_stmt (pre_p, gimple_build_label (lab_over));
    5253              : 
    5254        51841 :   type = build_aligned_type (type, type_align);
    5255        51841 :   ptrtype = build_pointer_type_for_mode (type, ptr_mode, true);
    5256        51841 :   addr = fold_convert (ptrtype, addr);
    5257              : 
    5258        51841 :   if (indirect_p)
    5259          103 :     addr = build_va_arg_indirect_ref (addr);
    5260        51841 :   return build_va_arg_indirect_ref (addr);
    5261              : }
    5262              : 
    5263              : /* Return true if OPNUM's MEM should be matched
    5264              :    in movabs* patterns.  */
    5265              : 
    5266              : bool
    5267          505 : ix86_check_movabs (rtx insn, int opnum)
    5268              : {
    5269          505 :   rtx set, mem;
    5270              : 
    5271          505 :   set = PATTERN (insn);
    5272          505 :   if (GET_CODE (set) == PARALLEL)
    5273            0 :     set = XVECEXP (set, 0, 0);
    5274          505 :   gcc_assert (GET_CODE (set) == SET);
    5275          505 :   mem = XEXP (set, opnum);
    5276          505 :   while (SUBREG_P (mem))
    5277            0 :     mem = SUBREG_REG (mem);
    5278          505 :   gcc_assert (MEM_P (mem));
    5279          505 :   return volatile_ok || !MEM_VOLATILE_P (mem);
    5280              : }
    5281              : 
    5282              : /* Return true if XVECEXP idx of INSN satisfies MOVS arguments.  */
    5283              : bool
    5284       221047 : ix86_check_movs (rtx insn, int idx)
    5285              : {
    5286       221047 :   rtx pat = PATTERN (insn);
    5287       221047 :   gcc_assert (GET_CODE (pat) == PARALLEL);
    5288              : 
    5289       221047 :   rtx set = XVECEXP (pat, 0, idx);
    5290       221047 :   gcc_assert (GET_CODE (set) == SET);
    5291              : 
    5292       221047 :   rtx dst = SET_DEST (set);
    5293       221047 :   gcc_assert (MEM_P (dst));
    5294              : 
    5295       221047 :   rtx src = SET_SRC (set);
    5296       221047 :   gcc_assert (MEM_P (src));
    5297              : 
    5298       221047 :   return (ADDR_SPACE_GENERIC_P (MEM_ADDR_SPACE (dst))
    5299       442094 :           && (ADDR_SPACE_GENERIC_P (MEM_ADDR_SPACE (src))
    5300            0 :               || Pmode == word_mode));
    5301              : }
    5302              : 
    5303              : /* Return false if INSN contains a MEM with a non-default address space.  */
    5304              : bool
    5305        65277 : ix86_check_no_addr_space (rtx insn)
    5306              : {
    5307        65277 :   subrtx_var_iterator::array_type array;
    5308      1436550 :   FOR_EACH_SUBRTX_VAR (iter, array, PATTERN (insn), ALL)
    5309              :     {
    5310      1371273 :       rtx x = *iter;
    5311      1501827 :       if (MEM_P (x) && !ADDR_SPACE_GENERIC_P (MEM_ADDR_SPACE (x)))
    5312            0 :         return false;
    5313              :     }
    5314        65277 :   return true;
    5315        65277 : }
    5316              : 
    5317              : /* Initialize the table of extra 80387 mathematical constants.  */
    5318              : 
    5319              : static void
    5320         2346 : init_ext_80387_constants (void)
    5321              : {
    5322         2346 :   static const char * cst[5] =
    5323              :   {
    5324              :     "0.3010299956639811952256464283594894482",  /* 0: fldlg2  */
    5325              :     "0.6931471805599453094286904741849753009",  /* 1: fldln2  */
    5326              :     "1.4426950408889634073876517827983434472",  /* 2: fldl2e  */
    5327              :     "3.3219280948873623478083405569094566090",  /* 3: fldl2t  */
    5328              :     "3.1415926535897932385128089594061862044",  /* 4: fldpi   */
    5329              :   };
    5330         2346 :   int i;
    5331              : 
    5332        14076 :   for (i = 0; i < 5; i++)
    5333              :     {
    5334        11730 :       real_from_string (&ext_80387_constants_table[i], cst[i]);
    5335              :       /* Ensure each constant is rounded to XFmode precision.  */
    5336        11730 :       real_convert (&ext_80387_constants_table[i],
    5337        23460 :                     XFmode, &ext_80387_constants_table[i]);
    5338              :     }
    5339              : 
    5340         2346 :   ext_80387_constants_init = 1;
    5341         2346 : }
    5342              : 
    5343              : /* Return non-zero if the constant is something that
    5344              :    can be loaded with a special instruction.  */
    5345              : 
    5346              : int
    5347      5062375 : standard_80387_constant_p (rtx x)
    5348              : {
    5349      5062375 :   machine_mode mode = GET_MODE (x);
    5350              : 
    5351      5062375 :   const REAL_VALUE_TYPE *r;
    5352              : 
    5353      5062375 :   if (!(CONST_DOUBLE_P (x) && X87_FLOAT_MODE_P (mode)))
    5354              :     return -1;
    5355              : 
    5356      4602731 :   if (x == CONST0_RTX (mode))
    5357              :     return 1;
    5358      2114661 :   if (x == CONST1_RTX (mode))
    5359              :     return 2;
    5360              : 
    5361      1227721 :   r = CONST_DOUBLE_REAL_VALUE (x);
    5362              : 
    5363              :   /* For XFmode constants, try to find a special 80387 instruction when
    5364              :      optimizing for size or on those CPUs that benefit from them.  */
    5365      1227721 :   if (mode == XFmode
    5366       790763 :       && (optimize_function_for_size_p (cfun) || TARGET_EXT_80387_CONSTANTS)
    5367      2018484 :       && !flag_rounding_math)
    5368              :     {
    5369       782501 :       int i;
    5370              : 
    5371       782501 :       if (! ext_80387_constants_init)
    5372         2339 :         init_ext_80387_constants ();
    5373              : 
    5374      4684494 :       for (i = 0; i < 5; i++)
    5375      3910824 :         if (real_identical (r, &ext_80387_constants_table[i]))
    5376         8831 :           return i + 3;
    5377              :     }
    5378              : 
    5379              :   /* Load of the constant -0.0 or -1.0 will be split as
    5380              :      fldz;fchs or fld1;fchs sequence.  */
    5381      1218890 :   if (real_isnegzero (r))
    5382              :     return 8;
    5383      1202513 :   if (real_identical (r, &dconstm1))
    5384       301764 :     return 9;
    5385              : 
    5386              :   return 0;
    5387              : }
    5388              : 
    5389              : /* Return the opcode of the special instruction to be used to load
    5390              :    the constant X.  */
    5391              : 
    5392              : const char *
    5393        54508 : standard_80387_constant_opcode (rtx x)
    5394              : {
    5395        54508 :   switch (standard_80387_constant_p (x))
    5396              :     {
    5397              :     case 1:
    5398              :       return "fldz";
    5399        33957 :     case 2:
    5400        33957 :       return "fld1";
    5401            1 :     case 3:
    5402            1 :       return "fldlg2";
    5403           10 :     case 4:
    5404           10 :       return "fldln2";
    5405           12 :     case 5:
    5406           12 :       return "fldl2e";
    5407            2 :     case 6:
    5408            2 :       return "fldl2t";
    5409          192 :     case 7:
    5410          192 :       return "fldpi";
    5411            0 :     case 8:
    5412            0 :     case 9:
    5413            0 :       return "#";
    5414            0 :     default:
    5415            0 :       gcc_unreachable ();
    5416              :     }
    5417              : }
    5418              : 
    5419              : /* Return the CONST_DOUBLE representing the 80387 constant that is
    5420              :    loaded by the specified special instruction.  The argument IDX
    5421              :    matches the return value from standard_80387_constant_p.  */
    5422              : 
    5423              : rtx
    5424           24 : standard_80387_constant_rtx (int idx)
    5425              : {
    5426           24 :   int i;
    5427              : 
    5428           24 :   if (! ext_80387_constants_init)
    5429            7 :     init_ext_80387_constants ();
    5430              : 
    5431           24 :   switch (idx)
    5432              :     {
    5433           24 :     case 3:
    5434           24 :     case 4:
    5435           24 :     case 5:
    5436           24 :     case 6:
    5437           24 :     case 7:
    5438           24 :       i = idx - 3;
    5439           24 :       break;
    5440              : 
    5441            0 :     default:
    5442            0 :       gcc_unreachable ();
    5443              :     }
    5444              : 
    5445           24 :   return const_double_from_real_value (ext_80387_constants_table[i],
    5446           24 :                                        XFmode);
    5447              : }
    5448              : 
    5449              : /* Return 1 if X is all bits 0, 2 if X is all bits 1
    5450              :    and 3 if X is all bits 1 with zero extend
    5451              :    in supported SSE/AVX vector mode.  */
    5452              : 
    5453              : int
    5454     55010084 : standard_sse_constant_p (rtx x, machine_mode pred_mode)
    5455              : {
    5456     55010084 :   machine_mode mode;
    5457              : 
    5458     55010084 :   if (!TARGET_SSE)
    5459              :     return 0;
    5460              : 
    5461     54841345 :   mode = GET_MODE (x);
    5462              : 
    5463     54841345 :   if (x == const0_rtx || const0_operand (x, mode))
    5464     13135126 :     return 1;
    5465              : 
    5466     41706219 :   if (x == constm1_rtx
    5467     41569209 :       || vector_all_ones_operand (x, mode)
    5468     82746568 :       || ((GET_MODE_CLASS (mode) == MODE_VECTOR_FLOAT
    5469     34388841 :            || GET_MODE_CLASS (pred_mode) == MODE_VECTOR_FLOAT)
    5470      6652458 :           && float_vector_all_ones_operand (x, mode)))
    5471              :     {
    5472              :       /* VOIDmode integer constant, get mode from the predicate.  */
    5473       667973 :       if (mode == VOIDmode)
    5474       137010 :         mode = pred_mode;
    5475              : 
    5476      1335946 :       switch (GET_MODE_SIZE (mode))
    5477              :         {
    5478        31179 :         case 64:
    5479        31179 :           if (TARGET_AVX512F)
    5480              :             return 2;
    5481              :           break;
    5482        38905 :         case 32:
    5483        38905 :           if (TARGET_AVX2)
    5484              :             return 2;
    5485              :           break;
    5486       585694 :         case 16:
    5487       585694 :           if (TARGET_SSE2)
    5488              :             return 2;
    5489              :           break;
    5490            0 :         case 0:
    5491              :           /* VOIDmode */
    5492            0 :           gcc_unreachable ();
    5493              :         default:
    5494              :           break;
    5495              :         }
    5496              :     }
    5497              : 
    5498     41051371 :   if (vector_all_ones_zero_extend_half_operand (x, mode)
    5499     41051371 :       || vector_all_ones_zero_extend_quarter_operand (x, mode))
    5500          706 :     return 3;
    5501              : 
    5502              :   return 0;
    5503              : }
    5504              : 
    5505              : /* Return the opcode of the special instruction to be used to load
    5506              :    the constant operands[1] into operands[0].  */
    5507              : 
    5508              : const char *
    5509       462498 : standard_sse_constant_opcode (rtx_insn *insn, rtx *operands)
    5510              : {
    5511       462498 :   machine_mode mode;
    5512       462498 :   rtx x = operands[1];
    5513              : 
    5514       462498 :   gcc_assert (TARGET_SSE);
    5515              : 
    5516       462498 :   mode = GET_MODE (x);
    5517              : 
    5518       462498 :   if (x == const0_rtx || const0_operand (x, mode))
    5519              :     {
    5520       451121 :       switch (get_attr_mode (insn))
    5521              :         {
    5522       433430 :         case MODE_TI:
    5523       433430 :           if (!EXT_REX_SSE_REG_P (operands[0]))
    5524              :             return "%vpxor\t%0, %d0";
    5525              :           /* FALLTHRU */
    5526         6239 :         case MODE_XI:
    5527         6239 :         case MODE_OI:
    5528         6239 :           if (EXT_REX_SSE_REG_P (operands[0]))
    5529              :             {
    5530           71 :               if (TARGET_AVX512VL)
    5531              :                 return "vpxord\t%x0, %x0, %x0";
    5532              :               else
    5533           29 :                 return "vpxord\t%g0, %g0, %g0";
    5534              :             }
    5535              :           return "vpxor\t%x0, %x0, %x0";
    5536              : 
    5537         2073 :         case MODE_V2DF:
    5538         2073 :           if (!EXT_REX_SSE_REG_P (operands[0]))
    5539              :             return "%vxorpd\t%0, %d0";
    5540              :           /* FALLTHRU */
    5541          853 :         case MODE_V8DF:
    5542          853 :         case MODE_V4DF:
    5543          853 :           if (EXT_REX_SSE_REG_P (operands[0]))
    5544              :             {
    5545            4 :               if (TARGET_AVX512DQ)
    5546              :                 {
    5547            0 :                   if (TARGET_AVX512VL)
    5548              :                     return "vxorpd\t%x0, %x0, %x0";
    5549              :                   else
    5550            0 :                     return "vxorpd\t%g0, %g0, %g0";
    5551              :                 }
    5552              :               else
    5553              :                 {
    5554            4 :                   if (TARGET_AVX512VL)
    5555              :                     return "vpxorq\t%x0, %x0, %x0";
    5556              :                   else
    5557            4 :                     return "vpxorq\t%g0, %g0, %g0";
    5558              :                 }
    5559              :             }
    5560              :           return "vxorpd\t%x0, %x0, %x0";
    5561              : 
    5562         6569 :         case MODE_V4SF:
    5563         6569 :           if (!EXT_REX_SSE_REG_P (operands[0]))
    5564              :             return "%vxorps\t%0, %d0";
    5565              :           /* FALLTHRU */
    5566         2023 :         case MODE_V16SF:
    5567         2023 :         case MODE_V8SF:
    5568         2023 :           if (EXT_REX_SSE_REG_P (operands[0]))
    5569              :             {
    5570           66 :               if (TARGET_AVX512DQ)
    5571              :                 {
    5572           26 :                   if (TARGET_AVX512VL)
    5573              :                     return "vxorps\t%x0, %x0, %x0";
    5574              :                   else
    5575            0 :                     return "vxorps\t%g0, %g0, %g0";
    5576              :                 }
    5577              :               else
    5578              :                 {
    5579           40 :                   if (TARGET_AVX512VL)
    5580              :                     return "vpxord\t%x0, %x0, %x0";
    5581              :                   else
    5582           38 :                     return "vpxord\t%g0, %g0, %g0";
    5583              :                 }
    5584              :             }
    5585              :           return "vxorps\t%x0, %x0, %x0";
    5586              : 
    5587            0 :         default:
    5588            0 :           gcc_unreachable ();
    5589              :         }
    5590              :     }
    5591        11377 :   else if (x == constm1_rtx
    5592        11366 :            || vector_all_ones_operand (x, mode)
    5593        11444 :            || (GET_MODE_CLASS (mode) == MODE_VECTOR_FLOAT
    5594           45 :                && float_vector_all_ones_operand (x, mode)))
    5595              :     {
    5596        11355 :       enum attr_mode insn_mode = get_attr_mode (insn);
    5597              : 
    5598        11355 :       switch (insn_mode)
    5599              :         {
    5600            4 :         case MODE_XI:
    5601            4 :         case MODE_V8DF:
    5602            4 :         case MODE_V16SF:
    5603            4 :           gcc_assert (TARGET_AVX512F);
    5604              :           return "vpternlogd\t{$0xFF, %g0, %g0, %g0|%g0, %g0, %g0, 0xFF}";
    5605              : 
    5606          947 :         case MODE_OI:
    5607          947 :         case MODE_V4DF:
    5608          947 :         case MODE_V8SF:
    5609          947 :           gcc_assert (TARGET_AVX2);
    5610              :           /* FALLTHRU */
    5611        11351 :         case MODE_TI:
    5612        11351 :         case MODE_V2DF:
    5613        11351 :         case MODE_V4SF:
    5614        11351 :           gcc_assert (TARGET_SSE2);
    5615        11351 :           if (EXT_REX_SSE_REG_P (operands[0]))
    5616              :             {
    5617            2 :               if (TARGET_AVX512VL)
    5618              :                 return "vpternlogd\t{$0xFF, %0, %0, %0|%0, %0, %0, 0xFF}";
    5619              :               else
    5620            0 :                 return "vpternlogd\t{$0xFF, %g0, %g0, %g0|%g0, %g0, %g0, 0xFF}";
    5621              :             }
    5622        11349 :           return (TARGET_AVX
    5623        11349 :                   ? "vpcmpeqd\t%0, %0, %0"
    5624        11349 :                   : "pcmpeqd\t%0, %0");
    5625              : 
    5626            0 :         default:
    5627            0 :           gcc_unreachable ();
    5628              :         }
    5629              :    }
    5630           22 :   else if (vector_all_ones_zero_extend_half_operand (x, mode))
    5631              :     {
    5632           40 :       if (GET_MODE_SIZE (mode) == 64)
    5633              :         {
    5634            5 :           gcc_assert (TARGET_AVX512F);
    5635              :           return "vpcmpeqd\t%t0, %t0, %t0";
    5636              :         }
    5637           30 :       else if (GET_MODE_SIZE (mode) == 32)
    5638              :         {
    5639           15 :           gcc_assert (TARGET_AVX);
    5640              :           return "vpcmpeqd\t%x0, %x0, %x0";
    5641              :         }
    5642            0 :       gcc_unreachable ();
    5643              :     }
    5644            2 :   else if (vector_all_ones_zero_extend_quarter_operand (x, mode))
    5645              :     {
    5646            2 :       gcc_assert (TARGET_AVX512F);
    5647              :       return "vpcmpeqd\t%x0, %x0, %x0";
    5648              :     }
    5649              : 
    5650            0 :   gcc_unreachable ();
    5651              : }
    5652              : 
    5653              : /* Returns true if INSN can be transformed from a memory load
    5654              :    to a supported FP constant load.  */
    5655              : 
    5656              : bool
    5657      2135347 : ix86_standard_x87sse_constant_load_p (const rtx_insn *insn, rtx dst)
    5658              : {
    5659      2135347 :   rtx src = find_constant_src (insn);
    5660              : 
    5661      2135347 :   gcc_assert (REG_P (dst));
    5662              : 
    5663      2135347 :   if (src == NULL
    5664       593057 :       || (SSE_REGNO_P (REGNO (dst))
    5665       461573 :           && standard_sse_constant_p (src, GET_MODE (dst)) != 1)
    5666       165170 :       || (!TARGET_AVX512VL
    5667       165109 :           && EXT_REX_SSE_REGNO_P (REGNO (dst))
    5668            0 :           && standard_sse_constant_p (src, GET_MODE (dst)) == 1)
    5669      2300517 :       || (STACK_REGNO_P (REGNO (dst))
    5670       131484 :            && standard_80387_constant_p (src) < 1))
    5671      2059112 :     return false;
    5672              : 
    5673              :   return true;
    5674              : }
    5675              : 
    5676              : /* Predicate for pre-reload splitters with associated instructions,
    5677              :    which can match any time before the split1 pass (usually combine),
    5678              :    then are unconditionally split in that pass and should not be
    5679              :    matched again afterwards.  */
    5680              : 
    5681              : bool
    5682     17724661 : ix86_pre_reload_split (void)
    5683              : {
    5684     17724661 :   return (can_create_pseudo_p ()
    5685     26884165 :           && !(cfun->curr_properties & PROP_rtl_split_insns));
    5686              : }
    5687              : 
    5688              : /* Return the opcode of the TYPE_SSEMOV instruction.  To move from
    5689              :    or to xmm16-xmm31/ymm16-ymm31 registers, we either require
    5690              :    TARGET_AVX512VL or it is a register to register move which can
    5691              :    be done with zmm register move. */
    5692              : 
    5693              : static const char *
    5694      4210827 : ix86_get_ssemov (rtx *operands, unsigned size,
    5695              :                  enum attr_mode insn_mode, machine_mode mode)
    5696              : {
    5697      4210827 :   char buf[128];
    5698      4210827 :   bool misaligned_p = (misaligned_operand (operands[0], mode)
    5699      4210827 :                        || misaligned_operand (operands[1], mode));
    5700      4210827 :   bool evex_reg_p = (size == 64
    5701      4124077 :                      || EXT_REX_SSE_REG_P (operands[0])
    5702      8334168 :                      || EXT_REX_SSE_REG_P (operands[1]));
    5703              : 
    5704      4210827 :   bool egpr_p = (TARGET_APX_EGPR
    5705      4210827 :                  && (x86_extended_rex2reg_mentioned_p (operands[0])
    5706          184 :                      || x86_extended_rex2reg_mentioned_p (operands[1])));
    5707          196 :   bool egpr_vl = egpr_p && TARGET_AVX512VL;
    5708              : 
    5709      4210827 :   machine_mode scalar_mode;
    5710              : 
    5711      4210827 :   const char *opcode = NULL;
    5712      4210827 :   enum
    5713              :     {
    5714              :       opcode_int,
    5715              :       opcode_float,
    5716              :       opcode_double
    5717      4210827 :     } type = opcode_int;
    5718              : 
    5719      4210827 :   switch (insn_mode)
    5720              :     {
    5721              :     case MODE_V16SF:
    5722              :     case MODE_V8SF:
    5723              :     case MODE_V4SF:
    5724              :       scalar_mode = E_SFmode;
    5725              :       type = opcode_float;
    5726              :       break;
    5727       209481 :     case MODE_V8DF:
    5728       209481 :     case MODE_V4DF:
    5729       209481 :     case MODE_V2DF:
    5730       209481 :       scalar_mode = E_DFmode;
    5731       209481 :       type = opcode_double;
    5732       209481 :       break;
    5733      1527005 :     case MODE_XI:
    5734      1527005 :     case MODE_OI:
    5735      1527005 :     case MODE_TI:
    5736      1527005 :       scalar_mode = GET_MODE_INNER (mode);
    5737              :       break;
    5738            0 :     default:
    5739            0 :       gcc_unreachable ();
    5740              :     }
    5741              : 
    5742              :   /* NB: To move xmm16-xmm31/ymm16-ymm31 registers without AVX512VL,
    5743              :      we can only use zmm register move without memory operand.  */
    5744      4210827 :   if (evex_reg_p
    5745        88787 :       && !TARGET_AVX512VL
    5746      4261302 :       && GET_MODE_SIZE (mode) < 64)
    5747              :     {
    5748              :       /* NB: Even though ix86_hard_regno_mode_ok doesn't allow
    5749              :          xmm16-xmm31 nor ymm16-ymm31 in 128/256 bit modes when
    5750              :          AVX512VL is disabled, LRA can still generate reg to
    5751              :          reg moves with xmm16-xmm31 and ymm16-ymm31 in 128/256 bit
    5752              :          modes.  */
    5753            0 :       if (memory_operand (operands[0], mode)
    5754            0 :           || memory_operand (operands[1], mode))
    5755            0 :         gcc_unreachable ();
    5756            0 :       size = 64;
    5757            0 :       switch (type)
    5758              :         {
    5759            0 :         case opcode_int:
    5760            0 :           if (scalar_mode == E_HFmode || scalar_mode == E_BFmode)
    5761            0 :             opcode = (misaligned_p
    5762            0 :                       ? (TARGET_AVX512BW ? "vmovdqu16" : "vmovdqu64")
    5763              :                       : "vmovdqa64");
    5764              :           else
    5765            0 :             opcode = misaligned_p ? "vmovdqu32" : "vmovdqa32";
    5766              :           break;
    5767            0 :         case opcode_float:
    5768            0 :           opcode = misaligned_p ? "vmovups" : "vmovaps";
    5769              :           break;
    5770            0 :         case opcode_double:
    5771            0 :           opcode = misaligned_p ? "vmovupd" : "vmovapd";
    5772              :           break;
    5773              :         }
    5774              :     }
    5775      4210827 :   else if (SCALAR_FLOAT_MODE_P (scalar_mode))
    5776              :     {
    5777      2861110 :       switch (scalar_mode)
    5778              :         {
    5779        36736 :         case E_HFmode:
    5780        36736 :         case E_BFmode:
    5781        36736 :           if (evex_reg_p || egpr_vl)
    5782        11597 :             opcode = (misaligned_p
    5783          173 :                       ? (TARGET_AVX512BW
    5784              :                          ? "vmovdqu16"
    5785              :                          : "vmovdqu64")
    5786              :                       : "vmovdqa64");
    5787        25139 :           else if (egpr_p)
    5788       824418 :             opcode = (misaligned_p
    5789            0 :                       ? (TARGET_AVX512BW
    5790            0 :                          ? "vmovdqu16"
    5791              :                          : "%vmovups")
    5792              :                       : "%vmovaps");
    5793              :           else
    5794       434341 :             opcode = (misaligned_p
    5795        25139 :                       ? (TARGET_AVX512BW && evex_reg_p
    5796              :                          ? "vmovdqu16"
    5797              :                          : "%vmovdqu")
    5798              :                       : "%vmovdqa");
    5799              :           break;
    5800      2474341 :         case E_SFmode:
    5801      2474341 :           opcode = misaligned_p ? "%vmovups" : "%vmovaps";
    5802              :           break;
    5803       209481 :         case E_DFmode:
    5804       209481 :           opcode = misaligned_p ? "%vmovupd" : "%vmovapd";
    5805              :           break;
    5806       140552 :         case E_TFmode:
    5807       140552 :           if (evex_reg_p || egpr_vl)
    5808           14 :             opcode = misaligned_p ? "vmovdqu64" : "vmovdqa64";
    5809       140538 :           else if (egpr_p)
    5810            0 :             opcode = misaligned_p ? "%vmovups" : "%vmovaps";
    5811              :           else
    5812       140538 :             opcode = misaligned_p ? "%vmovdqu" : "%vmovdqa";
    5813              :           break;
    5814            0 :         default:
    5815            0 :           gcc_unreachable ();
    5816              :         }
    5817              :     }
    5818      1349717 :   else if (SCALAR_INT_MODE_P (scalar_mode))
    5819              :     {
    5820      1349717 :       switch (scalar_mode)
    5821              :         {
    5822       111293 :         case E_QImode:
    5823       111293 :           if (evex_reg_p || egpr_vl)
    5824      4220730 :             opcode = (misaligned_p
    5825         9903 :                       ? (TARGET_AVX512BW
    5826         5074 :                          ? "vmovdqu8"
    5827              :                          : "vmovdqu64")
    5828              :                       : "vmovdqa64");
    5829       101390 :           else if (egpr_p)
    5830           30 :             opcode = (misaligned_p
    5831            0 :                       ? (TARGET_AVX512BW
    5832              :                          ? "vmovdqu8"
    5833              :                          : "%vmovups")
    5834              :                       : "%vmovaps");
    5835              :           else
    5836       101360 :             opcode = (misaligned_p
    5837              :                       ? (TARGET_AVX512BW && evex_reg_p
    5838              :                          ? "vmovdqu8"
    5839              :                          : "%vmovdqu")
    5840              :                       : "%vmovdqa");
    5841              :           break;
    5842        42979 :         case E_HImode:
    5843        42979 :           if (evex_reg_p || egpr_vl)
    5844         3770 :             opcode = (misaligned_p
    5845          294 :                       ? (TARGET_AVX512BW
    5846              :                          ? "vmovdqu16"
    5847              :                          : "vmovdqu64")
    5848              :                       : "vmovdqa64");
    5849        39209 :           else if (egpr_p)
    5850       824418 :             opcode = (misaligned_p
    5851           27 :                       ? (TARGET_AVX512BW
    5852            0 :                          ? "vmovdqu16"
    5853              :                          : "%vmovups")
    5854              :                       : "%vmovaps");
    5855              :           else
    5856       409202 :             opcode = (misaligned_p
    5857        39182 :                       ? (TARGET_AVX512BW && evex_reg_p
    5858              :                          ? "vmovdqu16"
    5859              :                          : "%vmovdqu")
    5860              :                       : "%vmovdqa");
    5861              :           break;
    5862       183581 :         case E_SImode:
    5863       183581 :           if (evex_reg_p || egpr_vl)
    5864         8291 :             opcode = misaligned_p ? "vmovdqu32" : "vmovdqa32";
    5865       175290 :           else if (egpr_p)
    5866           14 :             opcode = misaligned_p ? "%vmovups" : "%vmovaps";
    5867              :           else
    5868       175276 :             opcode = misaligned_p ? "%vmovdqu" : "%vmovdqa";
    5869              :           break;
    5870      1000056 :         case E_DImode:
    5871      1000056 :         case E_TImode:
    5872      1000056 :         case E_OImode:
    5873      1000056 :           if (evex_reg_p || egpr_vl)
    5874        18817 :             opcode = misaligned_p ? "vmovdqu64" : "vmovdqa64";
    5875       981239 :           else if (egpr_p)
    5876           26 :             opcode = misaligned_p ? "%vmovups" : "%vmovaps";
    5877              :           else
    5878       981213 :             opcode = misaligned_p ? "%vmovdqu" : "%vmovdqa";
    5879              :           break;
    5880        11808 :         case E_XImode:
    5881        49556 :           opcode = misaligned_p ? "vmovdqu64" : "vmovdqa64";
    5882              :           break;
    5883            0 :         default:
    5884            0 :           gcc_unreachable ();
    5885              :         }
    5886              :     }
    5887              :   else
    5888            0 :     gcc_unreachable ();
    5889              : 
    5890      4210827 :   switch (size)
    5891              :     {
    5892        86750 :     case 64:
    5893        86750 :       snprintf (buf, sizeof (buf), "%s\t{%%g1, %%g0|%%g0, %%g1}",
    5894              :                 opcode);
    5895        86750 :       break;
    5896        94153 :     case 32:
    5897        94153 :       snprintf (buf, sizeof (buf), "%s\t{%%t1, %%t0|%%t0, %%t1}",
    5898              :                 opcode);
    5899        94153 :       break;
    5900      4029924 :     case 16:
    5901      4029924 :       snprintf (buf, sizeof (buf), "%s\t{%%x1, %%x0|%%x0, %%x1}",
    5902              :                 opcode);
    5903      4029924 :       break;
    5904            0 :     default:
    5905            0 :       gcc_unreachable ();
    5906              :     }
    5907      4210827 :   output_asm_insn (buf, operands);
    5908      4210827 :   return "";
    5909              : }
    5910              : 
    5911              : /* Return the template of the TYPE_SSEMOV instruction to move
    5912              :    operands[1] into operands[0].  */
    5913              : 
    5914              : const char *
    5915      6575852 : ix86_output_ssemov (rtx_insn *insn, rtx *operands)
    5916              : {
    5917      6575852 :   machine_mode mode = GET_MODE (operands[0]);
    5918      6575852 :   if (get_attr_type (insn) != TYPE_SSEMOV
    5919      6575852 :       || mode != GET_MODE (operands[1]))
    5920            0 :     gcc_unreachable ();
    5921              : 
    5922      6575852 :   enum attr_mode insn_mode = get_attr_mode (insn);
    5923              : 
    5924      6575852 :   switch (insn_mode)
    5925              :     {
    5926        86750 :     case MODE_XI:
    5927        86750 :     case MODE_V8DF:
    5928        86750 :     case MODE_V16SF:
    5929        86750 :       return ix86_get_ssemov (operands, 64, insn_mode, mode);
    5930              : 
    5931        94153 :     case MODE_OI:
    5932        94153 :     case MODE_V4DF:
    5933        94153 :     case MODE_V8SF:
    5934        94153 :       return ix86_get_ssemov (operands, 32, insn_mode, mode);
    5935              : 
    5936      4029924 :     case MODE_TI:
    5937      4029924 :     case MODE_V2DF:
    5938      4029924 :     case MODE_V4SF:
    5939      4029924 :       return ix86_get_ssemov (operands, 16, insn_mode, mode);
    5940              : 
    5941       656631 :     case MODE_DI:
    5942              :       /* Handle broken assemblers that require movd instead of movq. */
    5943       656631 :       if (GENERAL_REG_P (operands[0]))
    5944              :         {
    5945              :           if (HAVE_AS_IX86_INTERUNIT_MOVQ)
    5946              :             return "%vmovq\t{%1, %q0|%q0, %1}";
    5947              :           else
    5948              :             return "%vmovd\t{%1, %q0|%q0, %1}";
    5949              :         }
    5950       581162 :       else if (GENERAL_REG_P (operands[1]))
    5951              :         {
    5952              :           if (HAVE_AS_IX86_INTERUNIT_MOVQ)
    5953              :             return "%vmovq\t{%q1, %0|%0, %q1}";
    5954              :           else
    5955              :             return "%vmovd\t{%q1, %0|%0, %q1}";
    5956              :         }
    5957              :       else
    5958       421828 :         return "%vmovq\t{%1, %0|%0, %1}";
    5959              : 
    5960       201129 :     case MODE_SI:
    5961       201129 :       if (GENERAL_REG_P (operands[0]))
    5962              :         return "%vmovd\t{%1, %k0|%k0, %1}";
    5963       145371 :       else if (GENERAL_REG_P (operands[1]))
    5964              :         return "%vmovd\t{%k1, %0|%0, %k1}";
    5965              :       else
    5966        60852 :         return "%vmovd\t{%1, %0|%0, %1}";
    5967              : 
    5968        54154 :     case MODE_HI:
    5969        54154 :       if (GENERAL_REG_P (operands[0]))
    5970              :         return "vmovw\t{%1, %k0|%k0, %1}";
    5971        53991 :       else if (GENERAL_REG_P (operands[1]))
    5972              :         return "vmovw\t{%k1, %0|%0, %k1}";
    5973              :       else
    5974        53757 :         return "vmovw\t{%1, %0|%0, %1}";
    5975              : 
    5976       778123 :     case MODE_DF:
    5977       778123 :       if (TARGET_AVX && REG_P (operands[0]) && REG_P (operands[1]))
    5978              :         return "vmovsd\t{%d1, %0|%0, %d1}";
    5979              :       else
    5980       777362 :         return "%vmovsd\t{%1, %0|%0, %1}";
    5981              : 
    5982       671002 :     case MODE_SF:
    5983       671002 :       if (TARGET_AVX && REG_P (operands[0]) && REG_P (operands[1]))
    5984              :         return "vmovss\t{%d1, %0|%0, %d1}";
    5985              :       else
    5986       670426 :         return "%vmovss\t{%1, %0|%0, %1}";
    5987              : 
    5988           96 :     case MODE_HF:
    5989           96 :     case MODE_BF:
    5990           96 :       if (REG_P (operands[0]) && REG_P (operands[1]))
    5991              :         return "vmovsh\t{%d1, %0|%0, %d1}";
    5992              :       else
    5993            0 :         return "vmovsh\t{%1, %0|%0, %1}";
    5994              : 
    5995           36 :     case MODE_V1DF:
    5996           36 :       gcc_assert (!TARGET_AVX);
    5997              :       return "movlpd\t{%1, %0|%0, %1}";
    5998              : 
    5999         3854 :     case MODE_V2SF:
    6000         3854 :       if (TARGET_AVX && REG_P (operands[0]))
    6001              :         return "vmovlps\t{%1, %d0|%d0, %1}";
    6002              :       else
    6003         3774 :         return "%vmovlps\t{%1, %0|%0, %1}";
    6004              : 
    6005            0 :     default:
    6006            0 :       gcc_unreachable ();
    6007              :     }
    6008              : }
    6009              : 
    6010              : /* Returns true if OP contains a symbol reference */
    6011              : 
    6012              : bool
    6013    583616846 : symbolic_reference_mentioned_p (const_rtx op)
    6014              : {
    6015    583616846 :   const char *fmt;
    6016    583616846 :   int i;
    6017              : 
    6018    583616846 :   if (SYMBOL_REF_P (op) || LABEL_REF_P (op))
    6019              :     return true;
    6020              : 
    6021    441152390 :   fmt = GET_RTX_FORMAT (GET_CODE (op));
    6022    748506230 :   for (i = GET_RTX_LENGTH (GET_CODE (op)) - 1; i >= 0; i--)
    6023              :     {
    6024    597207550 :       if (fmt[i] == 'E')
    6025              :         {
    6026      2021445 :           int j;
    6027              : 
    6028      4045183 :           for (j = XVECLEN (op, i) - 1; j >= 0; j--)
    6029      3329181 :             if (symbolic_reference_mentioned_p (XVECEXP (op, i, j)))
    6030              :               return true;
    6031              :         }
    6032              : 
    6033    595186105 :       else if (fmt[i] == 'e' && symbolic_reference_mentioned_p (XEXP (op, i)))
    6034              :         return true;
    6035              :     }
    6036              : 
    6037              :   return false;
    6038              : }
    6039              : 
    6040              : /* Return true if it is appropriate to emit `ret' instructions in the
    6041              :    body of a function.  Do this only if the epilogue is simple, needing a
    6042              :    couple of insns.  Prior to reloading, we can't tell how many registers
    6043              :    must be saved, so return false then.  Return false if there is no frame
    6044              :    marker to de-allocate.  */
    6045              : 
    6046              : bool
    6047            0 : ix86_can_use_return_insn_p (void)
    6048              : {
    6049            0 :   if (ix86_function_ms_hook_prologue (current_function_decl))
    6050              :     return false;
    6051              : 
    6052            0 :   if (ix86_function_naked (current_function_decl))
    6053              :     return false;
    6054              : 
    6055              :   /* Don't use `ret' instruction in interrupt handler.  */
    6056            0 :   if (! reload_completed
    6057            0 :       || frame_pointer_needed
    6058            0 :       || cfun->machine->func_type != TYPE_NORMAL)
    6059              :     return 0;
    6060              : 
    6061              :   /* Don't allow more than 32k pop, since that's all we can do
    6062              :      with one instruction.  */
    6063            0 :   if (crtl->args.pops_args && crtl->args.size >= 32768)
    6064              :     return 0;
    6065              : 
    6066            0 :   struct ix86_frame &frame = cfun->machine->frame;
    6067            0 :   return (frame.stack_pointer_offset == UNITS_PER_WORD
    6068            0 :           && (frame.nregs + frame.nsseregs) == 0);
    6069              : }
    6070              : 
    6071              : /* Return stack frame size.  get_frame_size () returns used stack slots
    6072              :    during compilation, which may be optimized out later.  If stack frame
    6073              :    is needed, stack_frame_required should be true.  */
    6074              : 
    6075              : static HOST_WIDE_INT
    6076      8235049 : ix86_get_frame_size (void)
    6077              : {
    6078      8235049 :   if (cfun->machine->stack_frame_required)
    6079      8165712 :     return get_frame_size ();
    6080              :   else
    6081              :     return 0;
    6082              : }
    6083              : 
    6084              : /* Value should be nonzero if functions must have frame pointers.
    6085              :    Zero means the frame pointer need not be set up (and parms may
    6086              :    be accessed via the stack pointer) in functions that seem suitable.  */
    6087              : 
    6088              : static bool
    6089      1226016 : ix86_frame_pointer_required (void)
    6090              : {
    6091              :   /* If we accessed previous frames, then the generated code expects
    6092              :      to be able to access the saved ebp value in our frame.  */
    6093      1226016 :   if (cfun->machine->accesses_prev_frame)
    6094              :     return true;
    6095              : 
    6096              :   /* Several x86 os'es need a frame pointer for other reasons,
    6097              :      usually pertaining to setjmp.  */
    6098      1225983 :   if (SUBTARGET_FRAME_POINTER_REQUIRED)
    6099              :     return true;
    6100              : 
    6101              :   /* For older 32-bit runtimes setjmp requires valid frame-pointer.  */
    6102      1225983 :   if (TARGET_32BIT_MS_ABI && cfun->calls_setjmp)
    6103              :     return true;
    6104              : 
    6105              :   /* Win64 SEH, very large frames need a frame-pointer as maximum stack
    6106              :      allocation is 4GB.  */
    6107      1225983 :   if (TARGET_64BIT_MS_ABI && ix86_get_frame_size () > SEH_MAX_FRAME_SIZE)
    6108              :     return true;
    6109              : 
    6110              :   /* SSE saves require frame-pointer when stack is misaligned.  */
    6111      1225983 :   if (TARGET_64BIT_MS_ABI && ix86_incoming_stack_boundary < 128)
    6112              :     return true;
    6113              : 
    6114              :   /* In ix86_option_override_internal, TARGET_OMIT_LEAF_FRAME_POINTER
    6115              :      turns off the frame pointer by default.  Turn it back on now if
    6116              :      we've not got a leaf function.  */
    6117      1225982 :   if (TARGET_OMIT_LEAF_FRAME_POINTER
    6118      1225982 :       && (!crtl->is_leaf
    6119            0 :           || ix86_current_function_calls_tls_descriptor))
    6120            0 :     return true;
    6121              : 
    6122              :   /* Several versions of mcount for the x86 assumes that there is a
    6123              :      frame, so we cannot allow profiling without a frame pointer.  */
    6124      1225982 :   if (crtl->profile && !flag_fentry)
    6125              :     return true;
    6126              : 
    6127              :   return false;
    6128              : }
    6129              : 
    6130              : /* Record that the current function accesses previous call frames.  */
    6131              : 
    6132              : void
    6133          966 : ix86_setup_frame_addresses (void)
    6134              : {
    6135          966 :   cfun->machine->accesses_prev_frame = 1;
    6136          966 : }
    6137              : 
    6138              : #if defined(HAVE_GAS_HIDDEN) && (SUPPORTS_ONE_ONLY - 0)
    6139              : # define USE_HIDDEN_LINKONCE 1
    6140              : #else
    6141              : # define USE_HIDDEN_LINKONCE 0
    6142              : #endif
    6143              : 
    6144              : /* Label count for call and return thunks.  It is used to make unique
    6145              :    labels in call and return thunks.  */
    6146              : static int indirectlabelno;
    6147              : 
    6148              : /* True if call thunk function is needed.  */
    6149              : static bool indirect_thunk_needed = false;
    6150              : 
    6151              : /* Bit masks of integer registers, which contain branch target, used
    6152              :    by call thunk functions.  */
    6153              : static HARD_REG_SET indirect_thunks_used;
    6154              : 
    6155              : /* True if return thunk function is needed.  */
    6156              : static bool indirect_return_needed = false;
    6157              : 
    6158              : /* True if return thunk function via CX is needed.  */
    6159              : static bool indirect_return_via_cx;
    6160              : 
    6161              : #ifndef INDIRECT_LABEL
    6162              : # define INDIRECT_LABEL "LIND"
    6163              : #endif
    6164              : 
    6165              : /* Indicate what prefix is needed for an indirect branch.  */
    6166              : enum indirect_thunk_prefix
    6167              : {
    6168              :   indirect_thunk_prefix_none,
    6169              :   indirect_thunk_prefix_nt
    6170              : };
    6171              : 
    6172              : /* Return the prefix needed for an indirect branch INSN.  */
    6173              : 
    6174              : enum indirect_thunk_prefix
    6175           68 : indirect_thunk_need_prefix (rtx_insn *insn)
    6176              : {
    6177           68 :   enum indirect_thunk_prefix need_prefix;
    6178           68 :   if ((cfun->machine->indirect_branch_type
    6179           68 :             == indirect_branch_thunk_extern)
    6180           68 :            && ix86_notrack_prefixed_insn_p (insn))
    6181              :     {
    6182              :       /* NOTRACK prefix is only used with external thunk so that it
    6183              :          can be properly updated to support CET at run-time.  */
    6184              :       need_prefix = indirect_thunk_prefix_nt;
    6185              :     }
    6186              :   else
    6187              :     need_prefix = indirect_thunk_prefix_none;
    6188           68 :   return need_prefix;
    6189              : }
    6190              : 
    6191              : /* Fills in the label name that should be used for the indirect thunk.  */
    6192              : 
    6193              : static void
    6194           74 : indirect_thunk_name (char name[32], unsigned int regno,
    6195              :                      enum indirect_thunk_prefix need_prefix,
    6196              :                      bool ret_p)
    6197              : {
    6198           74 :   if (regno != INVALID_REGNUM && regno != CX_REG && ret_p)
    6199            0 :     gcc_unreachable ();
    6200              : 
    6201           74 :   if (USE_HIDDEN_LINKONCE)
    6202              :     {
    6203           74 :       const char *prefix;
    6204              : 
    6205           74 :       if (need_prefix == indirect_thunk_prefix_nt
    6206           74 :           && regno != INVALID_REGNUM)
    6207              :         {
    6208              :           /* NOTRACK prefix is only used with external thunk via
    6209              :              register so that NOTRACK prefix can be added to indirect
    6210              :              branch via register to support CET at run-time.  */
    6211              :           prefix = "_nt";
    6212              :         }
    6213              :       else
    6214           72 :         prefix = "";
    6215              : 
    6216           74 :       const char *ret = ret_p ? "return" : "indirect";
    6217              : 
    6218           74 :       if (regno != INVALID_REGNUM)
    6219              :         {
    6220           55 :           const char *reg_prefix;
    6221           55 :           if (LEGACY_INT_REGNO_P (regno))
    6222           53 :             reg_prefix = TARGET_64BIT ? "r" : "e";
    6223              :           else
    6224              :             reg_prefix = "";
    6225           55 :           sprintf (name, "__x86_%s_thunk%s_%s%s",
    6226              :                    ret, prefix, reg_prefix, reg_names[regno]);
    6227              :         }
    6228              :       else
    6229           19 :         sprintf (name, "__x86_%s_thunk%s", ret, prefix);
    6230              :     }
    6231              :   else
    6232              :     {
    6233              :       if (regno != INVALID_REGNUM)
    6234              :         ASM_GENERATE_INTERNAL_LABEL (name, "LITR", regno);
    6235              :       else
    6236              :         {
    6237              :           if (ret_p)
    6238              :             ASM_GENERATE_INTERNAL_LABEL (name, "LRT", 0);
    6239              :           else
    6240           74 :             ASM_GENERATE_INTERNAL_LABEL (name, "LIT", 0);
    6241              :         }
    6242              :     }
    6243           74 : }
    6244              : 
    6245              : /* Output a call and return thunk for indirect branch.  If REGNO != -1,
    6246              :    the function address is in REGNO and the call and return thunk looks like:
    6247              : 
    6248              :         call    L2
    6249              :    L1:
    6250              :         pause
    6251              :         lfence
    6252              :         jmp     L1
    6253              :    L2:
    6254              :         mov     %REG, (%sp)
    6255              :         ret
    6256              : 
    6257              :    Otherwise, the function address is on the top of stack and the
    6258              :    call and return thunk looks like:
    6259              : 
    6260              :         call L2
    6261              :   L1:
    6262              :         pause
    6263              :         lfence
    6264              :         jmp L1
    6265              :   L2:
    6266              :         lea WORD_SIZE(%sp), %sp
    6267              :         ret
    6268              :  */
    6269              : 
    6270              : static void
    6271           38 : output_indirect_thunk (unsigned int regno)
    6272              : {
    6273           38 :   char indirectlabel1[32];
    6274           38 :   char indirectlabel2[32];
    6275              : 
    6276           38 :   ASM_GENERATE_INTERNAL_LABEL (indirectlabel1, INDIRECT_LABEL,
    6277              :                                indirectlabelno++);
    6278           38 :   ASM_GENERATE_INTERNAL_LABEL (indirectlabel2, INDIRECT_LABEL,
    6279              :                                indirectlabelno++);
    6280              : 
    6281              :   /* Call */
    6282           38 :   fputs ("\tcall\t", asm_out_file);
    6283           38 :   assemble_name_raw (asm_out_file, indirectlabel2);
    6284           38 :   fputc ('\n', asm_out_file);
    6285              : 
    6286           38 :   ASM_OUTPUT_INTERNAL_LABEL (asm_out_file, indirectlabel1);
    6287              : 
    6288              :   /* AMD and Intel CPUs prefer each a different instruction as loop filler.
    6289              :      Usage of both pause + lfence is compromise solution.  */
    6290           38 :   fprintf (asm_out_file, "\tpause\n\tlfence\n");
    6291              : 
    6292              :   /* Jump.  */
    6293           38 :   fputs ("\tjmp\t", asm_out_file);
    6294           38 :   assemble_name_raw (asm_out_file, indirectlabel1);
    6295           38 :   fputc ('\n', asm_out_file);
    6296              : 
    6297           38 :   ASM_OUTPUT_INTERNAL_LABEL (asm_out_file, indirectlabel2);
    6298              : 
    6299              :   /* The above call insn pushed a word to stack.  Adjust CFI info.  */
    6300           38 :   if (flag_asynchronous_unwind_tables && dwarf2out_do_frame ())
    6301              :     {
    6302           38 :       if (! dwarf2out_do_cfi_asm ())
    6303              :         {
    6304            0 :           dw_cfi_ref xcfi = ggc_cleared_alloc<dw_cfi_node> ();
    6305            0 :           xcfi->dw_cfi_opc = DW_CFA_advance_loc4;
    6306            0 :           xcfi->dw_cfi_oprnd1.dw_cfi_addr = ggc_strdup (indirectlabel2);
    6307            0 :           vec_safe_push (cfun->fde->dw_fde_cfi, xcfi);
    6308              :         }
    6309           38 :       dw_cfi_ref xcfi = ggc_cleared_alloc<dw_cfi_node> ();
    6310           38 :       xcfi->dw_cfi_opc = DW_CFA_def_cfa_offset;
    6311           38 :       xcfi->dw_cfi_oprnd1.dw_cfi_offset = 2 * UNITS_PER_WORD;
    6312           38 :       vec_safe_push (cfun->fde->dw_fde_cfi, xcfi);
    6313           38 :       dwarf2out_emit_cfi (xcfi);
    6314              :     }
    6315              : 
    6316           38 :   if (regno != INVALID_REGNUM)
    6317              :     {
    6318              :       /* MOV.  */
    6319           27 :       rtx xops[2];
    6320           27 :       xops[0] = gen_rtx_MEM (word_mode, stack_pointer_rtx);
    6321           27 :       xops[1] = gen_rtx_REG (word_mode, regno);
    6322           27 :       output_asm_insn ("mov\t{%1, %0|%0, %1}", xops);
    6323              :     }
    6324              :   else
    6325              :     {
    6326              :       /* LEA.  */
    6327           11 :       rtx xops[2];
    6328           11 :       xops[0] = stack_pointer_rtx;
    6329           11 :       xops[1] = plus_constant (Pmode, stack_pointer_rtx, UNITS_PER_WORD);
    6330           11 :       output_asm_insn ("lea\t{%E1, %0|%0, %E1}", xops);
    6331              :     }
    6332              : 
    6333           38 :   fputs ("\tret\n", asm_out_file);
    6334           38 :   if ((ix86_harden_sls & harden_sls_return))
    6335            1 :     fputs ("\tint3\n", asm_out_file);
    6336           38 : }
    6337              : 
    6338              : /* Output a funtion with a call and return thunk for indirect branch.
    6339              :    If REGNO != INVALID_REGNUM, the function address is in REGNO.
    6340              :    Otherwise, the function address is on the top of stack.  Thunk is
    6341              :    used for function return if RET_P is true.  */
    6342              : 
    6343              : static void
    6344           22 : output_indirect_thunk_function (enum indirect_thunk_prefix need_prefix,
    6345              :                                 unsigned int regno, bool ret_p)
    6346              : {
    6347           22 :   char name[32];
    6348           22 :   tree decl;
    6349              : 
    6350              :   /* Create __x86_indirect_thunk.  */
    6351           22 :   indirect_thunk_name (name, regno, need_prefix, ret_p);
    6352           22 :   decl = build_decl (BUILTINS_LOCATION, FUNCTION_DECL,
    6353              :                      get_identifier (name),
    6354              :                      build_function_type_list (void_type_node, NULL_TREE));
    6355           22 :   DECL_RESULT (decl) = build_decl (BUILTINS_LOCATION, RESULT_DECL,
    6356              :                                    NULL_TREE, void_type_node);
    6357           22 :   TREE_PUBLIC (decl) = 1;
    6358           22 :   TREE_STATIC (decl) = 1;
    6359           22 :   DECL_IGNORED_P (decl) = 1;
    6360              : 
    6361              : #if TARGET_MACHO
    6362              :   if (TARGET_MACHO)
    6363              :     {
    6364              :       switch_to_section (darwin_sections[picbase_thunk_section]);
    6365              :       fputs ("\t.weak_definition\t", asm_out_file);
    6366              :       assemble_name (asm_out_file, name);
    6367              :       fputs ("\n\t.private_extern\t", asm_out_file);
    6368              :       assemble_name (asm_out_file, name);
    6369              :       putc ('\n', asm_out_file);
    6370              :       ASM_OUTPUT_LABEL (asm_out_file, name);
    6371              :       DECL_WEAK (decl) = 1;
    6372              :     }
    6373              :   else
    6374              : #endif
    6375           22 :     if (USE_HIDDEN_LINKONCE)
    6376              :       {
    6377           22 :         cgraph_node::create (decl)->set_comdat_group (DECL_ASSEMBLER_NAME (decl));
    6378              : 
    6379           22 :         targetm.asm_out.unique_section (decl, 0);
    6380           22 :         switch_to_section (get_named_section (decl, NULL, 0));
    6381              : 
    6382           22 :         targetm.asm_out.globalize_label (asm_out_file, name);
    6383           22 :         fputs ("\t.hidden\t", asm_out_file);
    6384           22 :         assemble_name (asm_out_file, name);
    6385           22 :         putc ('\n', asm_out_file);
    6386           22 :         ASM_DECLARE_FUNCTION_NAME (asm_out_file, name, decl);
    6387              :       }
    6388              :     else
    6389              :       {
    6390              :         switch_to_section (text_section);
    6391           22 :         ASM_OUTPUT_LABEL (asm_out_file, name);
    6392              :       }
    6393              : 
    6394           22 :   DECL_INITIAL (decl) = make_node (BLOCK);
    6395           22 :   current_function_decl = decl;
    6396           22 :   allocate_struct_function (decl, false);
    6397           22 :   init_function_start (decl);
    6398              :   /* We're about to hide the function body from callees of final_* by
    6399              :      emitting it directly; tell them we're a thunk, if they care.  */
    6400           22 :   cfun->is_thunk = true;
    6401           22 :   first_function_block_is_cold = false;
    6402              :   /* Make sure unwind info is emitted for the thunk if needed.  */
    6403           22 :   final_start_function (emit_barrier (), asm_out_file, 1);
    6404              : 
    6405           22 :   output_indirect_thunk (regno);
    6406              : 
    6407           22 :   final_end_function ();
    6408           22 :   init_insn_lengths ();
    6409           22 :   free_after_compilation (cfun);
    6410           22 :   set_cfun (NULL);
    6411           22 :   current_function_decl = NULL;
    6412           22 : }
    6413              : 
    6414              : static int pic_labels_used;
    6415              : 
    6416              : /* Fills in the label name that should be used for a pc thunk for
    6417              :    the given register.  */
    6418              : 
    6419              : static void
    6420        37434 : get_pc_thunk_name (char name[32], unsigned int regno)
    6421              : {
    6422        37434 :   gcc_assert (!TARGET_64BIT);
    6423              : 
    6424        37434 :   if (USE_HIDDEN_LINKONCE)
    6425        37434 :     sprintf (name, "__x86.get_pc_thunk.%s", reg_names[regno]);
    6426              :   else
    6427        37434 :     ASM_GENERATE_INTERNAL_LABEL (name, "LPR", regno);
    6428        37434 : }
    6429              : 
    6430              : 
    6431              : /* This function generates code for -fpic that loads %ebx with
    6432              :    the return address of the caller and then returns.  */
    6433              : 
    6434              : static void
    6435       232593 : ix86_code_end (void)
    6436              : {
    6437       232593 :   rtx xops[2];
    6438       232593 :   unsigned int regno;
    6439              : 
    6440       232593 :   if (indirect_return_needed)
    6441            6 :     output_indirect_thunk_function (indirect_thunk_prefix_none,
    6442              :                                     INVALID_REGNUM, true);
    6443       232593 :   if (indirect_return_via_cx)
    6444            0 :     output_indirect_thunk_function (indirect_thunk_prefix_none,
    6445              :                                     CX_REG, true);
    6446       232593 :   if (indirect_thunk_needed)
    6447            0 :     output_indirect_thunk_function (indirect_thunk_prefix_none,
    6448              :                                     INVALID_REGNUM, false);
    6449              : 
    6450      2093337 :   for (regno = FIRST_REX_INT_REG; regno <= LAST_REX_INT_REG; regno++)
    6451              :     {
    6452      1860744 :       if (TEST_HARD_REG_BIT (indirect_thunks_used, regno))
    6453            0 :         output_indirect_thunk_function (indirect_thunk_prefix_none,
    6454              :                                         regno, false);
    6455              :     }
    6456              : 
    6457      3954081 :   for (regno = FIRST_REX2_INT_REG; regno <= LAST_REX2_INT_REG; regno++)
    6458              :     {
    6459      3721488 :       if (TEST_HARD_REG_BIT (indirect_thunks_used, regno))
    6460            0 :         output_indirect_thunk_function (indirect_thunk_prefix_none,
    6461              :                                         regno, false);
    6462              :     }
    6463              : 
    6464      2093337 :   for (regno = FIRST_INT_REG; regno <= LAST_INT_REG; regno++)
    6465              :     {
    6466      1860744 :       char name[32];
    6467      1860744 :       tree decl;
    6468              : 
    6469      1860744 :       if (TEST_HARD_REG_BIT (indirect_thunks_used, regno))
    6470           16 :         output_indirect_thunk_function (indirect_thunk_prefix_none,
    6471              :                                         regno, false);
    6472              : 
    6473      1860744 :       if (!(pic_labels_used & (1 << regno)))
    6474      1857165 :         continue;
    6475              : 
    6476         3579 :       get_pc_thunk_name (name, regno);
    6477              : 
    6478         3579 :       decl = build_decl (BUILTINS_LOCATION, FUNCTION_DECL,
    6479              :                          get_identifier (name),
    6480              :                          build_function_type_list (void_type_node, NULL_TREE));
    6481         3579 :       DECL_RESULT (decl) = build_decl (BUILTINS_LOCATION, RESULT_DECL,
    6482              :                                        NULL_TREE, void_type_node);
    6483         3579 :       TREE_PUBLIC (decl) = 1;
    6484         3579 :       TREE_STATIC (decl) = 1;
    6485         3579 :       DECL_IGNORED_P (decl) = 1;
    6486              : 
    6487              : #if TARGET_MACHO
    6488              :       if (TARGET_MACHO)
    6489              :         {
    6490              :           switch_to_section (darwin_sections[picbase_thunk_section]);
    6491              :           fputs ("\t.weak_definition\t", asm_out_file);
    6492              :           assemble_name (asm_out_file, name);
    6493              :           fputs ("\n\t.private_extern\t", asm_out_file);
    6494              :           assemble_name (asm_out_file, name);
    6495              :           putc ('\n', asm_out_file);
    6496              :           ASM_OUTPUT_LABEL (asm_out_file, name);
    6497              :           DECL_WEAK (decl) = 1;
    6498              :         }
    6499              :       else
    6500              : #endif
    6501         3579 :       if (USE_HIDDEN_LINKONCE)
    6502              :         {
    6503         3579 :           cgraph_node::create (decl)->set_comdat_group (DECL_ASSEMBLER_NAME (decl));
    6504              : 
    6505         3579 :           targetm.asm_out.unique_section (decl, 0);
    6506         3579 :           switch_to_section (get_named_section (decl, NULL, 0));
    6507              : 
    6508         3579 :           targetm.asm_out.globalize_label (asm_out_file, name);
    6509         3579 :           fputs ("\t.hidden\t", asm_out_file);
    6510         3579 :           assemble_name (asm_out_file, name);
    6511         3579 :           putc ('\n', asm_out_file);
    6512         3579 :           ASM_DECLARE_FUNCTION_NAME (asm_out_file, name, decl);
    6513              :         }
    6514              :       else
    6515              :         {
    6516              :           switch_to_section (text_section);
    6517         3579 :           ASM_OUTPUT_LABEL (asm_out_file, name);
    6518              :         }
    6519              : 
    6520         3579 :       DECL_INITIAL (decl) = make_node (BLOCK);
    6521         3579 :       current_function_decl = decl;
    6522         3579 :       allocate_struct_function (decl, false);
    6523         3579 :       init_function_start (decl);
    6524              :       /* We're about to hide the function body from callees of final_* by
    6525              :          emitting it directly; tell them we're a thunk, if they care.  */
    6526         3579 :       cfun->is_thunk = true;
    6527         3579 :       first_function_block_is_cold = false;
    6528              :       /* Make sure unwind info is emitted for the thunk if needed.  */
    6529         3579 :       final_start_function (emit_barrier (), asm_out_file, 1);
    6530              : 
    6531              :       /* Pad stack IP move with 4 instructions (two NOPs count
    6532              :          as one instruction).  */
    6533         3579 :       if (TARGET_PAD_SHORT_FUNCTION)
    6534              :         {
    6535              :           int i = 8;
    6536              : 
    6537            0 :           while (i--)
    6538            0 :             fputs ("\tnop\n", asm_out_file);
    6539              :         }
    6540              : 
    6541         7158 :       xops[0] = gen_rtx_REG (Pmode, regno);
    6542         7158 :       xops[1] = gen_rtx_MEM (Pmode, stack_pointer_rtx);
    6543         3579 :       output_asm_insn ("mov%z0\t{%1, %0|%0, %1}", xops);
    6544         3579 :       fputs ("\tret\n", asm_out_file);
    6545         3579 :       final_end_function ();
    6546         3579 :       init_insn_lengths ();
    6547         3579 :       free_after_compilation (cfun);
    6548         3579 :       set_cfun (NULL);
    6549         3579 :       current_function_decl = NULL;
    6550              :     }
    6551              : 
    6552       232593 :   if (flag_split_stack)
    6553         4710 :     file_end_indicate_split_stack ();
    6554       232593 : }
    6555              : 
    6556              : /* Emit code for the SET_GOT patterns.  */
    6557              : 
    6558              : const char *
    6559        33855 : output_set_got (rtx dest, rtx label)
    6560              : {
    6561        33855 :   rtx xops[3];
    6562              : 
    6563        33855 :   xops[0] = dest;
    6564              : 
    6565        33855 :   if (TARGET_VXWORKS_GOTTPIC && TARGET_VXWORKS_RTP && flag_pic)
    6566              :     {
    6567              :       /* Load (*VXWORKS_GOTT_BASE) into the PIC register.  */
    6568              :       xops[2] = gen_rtx_MEM (Pmode,
    6569              :                              gen_rtx_SYMBOL_REF (Pmode, VXWORKS_GOTT_BASE));
    6570              :       output_asm_insn ("mov{l}\t{%2, %0|%0, %2}", xops);
    6571              : 
    6572              :       /* Load (*VXWORKS_GOTT_BASE)[VXWORKS_GOTT_INDEX] into the PIC register.
    6573              :          Use %P and a local symbol in order to print VXWORKS_GOTT_INDEX as
    6574              :          an unadorned address.  */
    6575              :       xops[2] = gen_rtx_SYMBOL_REF (Pmode, VXWORKS_GOTT_INDEX);
    6576              :       SYMBOL_REF_FLAGS (xops[2]) |= SYMBOL_FLAG_LOCAL;
    6577              :       output_asm_insn ("mov{l}\t{%P2(%0), %0|%0, DWORD PTR %P2[%0]}", xops);
    6578              :       return "";
    6579              :     }
    6580              : 
    6581        67710 :   xops[1] = gen_rtx_SYMBOL_REF (Pmode, GOT_SYMBOL_NAME);
    6582              : 
    6583        33855 :   if (flag_pic)
    6584              :     {
    6585        33855 :       char name[32];
    6586        33855 :       get_pc_thunk_name (name, REGNO (dest));
    6587        33855 :       pic_labels_used |= 1 << REGNO (dest);
    6588              : 
    6589        67710 :       xops[2] = gen_rtx_SYMBOL_REF (Pmode, ggc_strdup (name));
    6590        33855 :       xops[2] = gen_rtx_MEM (QImode, xops[2]);
    6591        33855 :       output_asm_insn ("%!call\t%X2", xops);
    6592              : 
    6593              : #if TARGET_MACHO
    6594              :       /* Output the Mach-O "canonical" pic base label name ("Lxx$pb") here.
    6595              :          This is what will be referenced by the Mach-O PIC subsystem.  */
    6596              :       if (machopic_should_output_picbase_label () || !label)
    6597              :         ASM_OUTPUT_LABEL (asm_out_file, MACHOPIC_FUNCTION_BASE_NAME);
    6598              : 
    6599              :       /* When we are restoring the pic base at the site of a nonlocal label,
    6600              :          and we decided to emit the pic base above, we will still output a
    6601              :          local label used for calculating the correction offset (even though
    6602              :          the offset will be 0 in that case).  */
    6603              :       if (label)
    6604              :         targetm.asm_out.internal_label (asm_out_file, "L",
    6605              :                                            CODE_LABEL_NUMBER (label));
    6606              : #endif
    6607              :     }
    6608              :   else
    6609              :     {
    6610            0 :       if (TARGET_MACHO)
    6611              :         /* We don't need a pic base, we're not producing pic.  */
    6612              :         gcc_unreachable ();
    6613              : 
    6614            0 :       xops[2] = gen_rtx_LABEL_REF (Pmode, label ? label : gen_label_rtx ());
    6615            0 :       output_asm_insn ("mov%z0\t{%2, %0|%0, %2}", xops);
    6616            0 :       targetm.asm_out.internal_label (asm_out_file, "L",
    6617            0 :                                       CODE_LABEL_NUMBER (XEXP (xops[2], 0)));
    6618              :     }
    6619              : 
    6620        33855 :   if (!TARGET_MACHO)
    6621        33855 :     output_asm_insn ("add%z0\t{%1, %0|%0, %1}", xops);
    6622              : 
    6623        33855 :   return "";
    6624              : }
    6625              : 
    6626              : /* Generate an "push" pattern for input ARG.  */
    6627              : 
    6628              : rtx
    6629      1878151 : gen_push (rtx arg, bool ppx_p)
    6630              : {
    6631      1878151 :   struct machine_function *m = cfun->machine;
    6632              : 
    6633      1878151 :   if (m->fs.cfa_reg == stack_pointer_rtx)
    6634      1602921 :     m->fs.cfa_offset += UNITS_PER_WORD;
    6635      1878151 :   m->fs.sp_offset += UNITS_PER_WORD;
    6636              : 
    6637      1878151 :   if (REG_P (arg) && GET_MODE (arg) != word_mode)
    6638           30 :     arg = gen_rtx_REG (word_mode, REGNO (arg));
    6639              : 
    6640      1878151 :   rtx stack = gen_rtx_MEM (word_mode,
    6641      1878151 :                            gen_rtx_PRE_DEC (Pmode,
    6642              :                                             stack_pointer_rtx));
    6643      3756214 :   return ppx_p ? gen_pushp_di (stack, arg) : gen_rtx_SET (stack, arg);
    6644              : }
    6645              : 
    6646              : rtx
    6647           23 : gen_pushfl (void)
    6648              : {
    6649           23 :   struct machine_function *m = cfun->machine;
    6650           23 :   rtx flags, mem;
    6651              : 
    6652           23 :   if (m->fs.cfa_reg == stack_pointer_rtx)
    6653            0 :     m->fs.cfa_offset += UNITS_PER_WORD;
    6654           23 :   m->fs.sp_offset += UNITS_PER_WORD;
    6655              : 
    6656           23 :   flags = gen_rtx_REG (CCmode, FLAGS_REG);
    6657              : 
    6658           23 :   mem = gen_rtx_MEM (word_mode,
    6659           23 :                      gen_rtx_PRE_DEC (Pmode, stack_pointer_rtx));
    6660              : 
    6661           23 :   return gen_pushfl2 (word_mode, mem, flags);
    6662              : }
    6663              : 
    6664              : /* Generate an "pop" pattern for input ARG.  */
    6665              : 
    6666              : rtx
    6667      1461178 : gen_pop (rtx arg, bool ppx_p)
    6668              : {
    6669      1461178 :   if (REG_P (arg) && GET_MODE (arg) != word_mode)
    6670           26 :     arg = gen_rtx_REG (word_mode, REGNO (arg));
    6671              : 
    6672      1461178 :   rtx stack = gen_rtx_MEM (word_mode,
    6673      1461178 :                            gen_rtx_POST_INC (Pmode,
    6674              :                                              stack_pointer_rtx));
    6675              : 
    6676      2922268 :   return ppx_p ? gen_popp_di (arg, stack) : gen_rtx_SET (arg, stack);
    6677              : }
    6678              : 
    6679              : rtx
    6680           21 : gen_popfl (void)
    6681              : {
    6682           21 :   rtx flags, mem;
    6683              : 
    6684           21 :   flags = gen_rtx_REG (CCmode, FLAGS_REG);
    6685              : 
    6686           21 :   mem = gen_rtx_MEM (word_mode,
    6687           21 :                      gen_rtx_POST_INC (Pmode, stack_pointer_rtx));
    6688              : 
    6689           21 :   return gen_popfl1 (word_mode, flags, mem);
    6690              : }
    6691              : 
    6692              : /* Generate a "push2" pattern for input ARG.  */
    6693              : rtx
    6694           19 : gen_push2 (rtx mem, rtx reg1, rtx reg2, bool ppx_p = false)
    6695              : {
    6696           19 :   struct machine_function *m = cfun->machine;
    6697           19 :   const int offset = UNITS_PER_WORD * 2;
    6698              : 
    6699           19 :   if (m->fs.cfa_reg == stack_pointer_rtx)
    6700           14 :     m->fs.cfa_offset += offset;
    6701           19 :   m->fs.sp_offset += offset;
    6702              : 
    6703           19 :   if (REG_P (reg1) && GET_MODE (reg1) != word_mode)
    6704            0 :     reg1 = gen_rtx_REG (word_mode, REGNO (reg1));
    6705              : 
    6706           19 :   if (REG_P (reg2) && GET_MODE (reg2) != word_mode)
    6707            0 :     reg2 = gen_rtx_REG (word_mode, REGNO (reg2));
    6708              : 
    6709           19 :   return ppx_p ? gen_push2p_di (mem, reg1, reg2)
    6710            4 :                : gen_push2_di (mem, reg1, reg2);
    6711              : }
    6712              : 
    6713              : /* Return >= 0 if there is an unused call-clobbered register available
    6714              :    for the entire function.  */
    6715              : 
    6716              : static unsigned int
    6717            0 : ix86_select_alt_pic_regnum (void)
    6718              : {
    6719            0 :   if (ix86_use_pseudo_pic_reg ())
    6720              :     return INVALID_REGNUM;
    6721              : 
    6722            0 :   if (crtl->is_leaf
    6723            0 :       && !crtl->profile
    6724            0 :       && !ix86_current_function_calls_tls_descriptor)
    6725              :     {
    6726            0 :       int i, drap;
    6727              :       /* Can't use the same register for both PIC and DRAP.  */
    6728            0 :       if (crtl->drap_reg)
    6729            0 :         drap = REGNO (crtl->drap_reg);
    6730              :       else
    6731              :         drap = -1;
    6732            0 :       for (i = 2; i >= 0; --i)
    6733            0 :         if (i != drap && !df_regs_ever_live_p (i))
    6734              :           return i;
    6735              :     }
    6736              : 
    6737              :   return INVALID_REGNUM;
    6738              : }
    6739              : 
    6740              : /* Return true if REGNO is used by the epilogue.  */
    6741              : 
    6742              : bool
    6743   1664085058 : ix86_epilogue_uses (int regno)
    6744              : {
    6745              :   /* If there are no caller-saved registers, we preserve all registers,
    6746              :      except for MMX and x87 registers which aren't supported when saving
    6747              :      and restoring registers.  Don't explicitly save SP register since
    6748              :      it is always preserved.  */
    6749   1664085058 :   return (epilogue_completed
    6750    263344050 :           && (cfun->machine->call_saved_registers
    6751    263344050 :               == TYPE_NO_CALLER_SAVED_REGISTERS)
    6752        27140 :           && !fixed_regs[regno]
    6753         4857 :           && !STACK_REGNO_P (regno)
    6754   1664089915 :           && !MMX_REGNO_P (regno));
    6755              : }
    6756              : 
    6757              : /* Return nonzero if register REGNO can be used as a scratch register
    6758              :    in peephole2.  */
    6759              : 
    6760              : static bool
    6761      1236134 : ix86_hard_regno_scratch_ok (unsigned int regno)
    6762              : {
    6763              :   /* If there are no caller-saved registers, we can't use any register
    6764              :      as a scratch register after epilogue and use REGNO as scratch
    6765              :      register only if it has been used before to avoid saving and
    6766              :      restoring it.  */
    6767      1236134 :   return ((cfun->machine->call_saved_registers
    6768      1236134 :            != TYPE_NO_CALLER_SAVED_REGISTERS)
    6769      1236134 :           || (!epilogue_completed
    6770            0 :               && df_regs_ever_live_p (regno)));
    6771              : }
    6772              : 
    6773              : /* Return TRUE if we need to save REGNO.  */
    6774              : 
    6775              : bool
    6776    353618094 : ix86_save_reg (unsigned int regno, bool maybe_eh_return, bool ignore_outlined)
    6777              : {
    6778    353618094 :   rtx reg;
    6779              : 
    6780    353618094 :   switch (cfun->machine->call_saved_registers)
    6781              :     {
    6782              :     case TYPE_DEFAULT_CALL_SAVED_REGISTERS:
    6783              :       break;
    6784              : 
    6785        57152 :     case TYPE_NO_CALLER_SAVED_REGISTERS:
    6786              :       /* If there are no caller-saved registers, we preserve all
    6787              :          registers, except for MMX and x87 registers which aren't
    6788              :          supported when saving and restoring registers.  Don't
    6789              :          explicitly save SP register since it is always preserved.
    6790              : 
    6791              :          Don't preserve registers used for function return value.  */
    6792        57152 :       reg = crtl->return_rtx;
    6793        57152 :       if (reg)
    6794              :         {
    6795          768 :           unsigned int i = REGNO (reg);
    6796          768 :           unsigned int nregs = REG_NREGS (reg);
    6797         1522 :           while (nregs-- > 0)
    6798          768 :             if ((i + nregs) == regno)
    6799              :               return false;
    6800              :         }
    6801              : 
    6802        57138 :       return (df_regs_ever_live_p (regno)
    6803         6932 :               && !fixed_regs[regno]
    6804         5964 :               && !STACK_REGNO_P (regno)
    6805         5964 :               && !MMX_REGNO_P (regno)
    6806        63102 :               && (regno != HARD_FRAME_POINTER_REGNUM
    6807          249 :                   || !frame_pointer_needed));
    6808              : 
    6809        17696 :     case TYPE_NO_CALLEE_SAVED_REGISTERS:
    6810        17696 :     case TYPE_PRESERVE_NONE:
    6811        17696 :       if (regno != HARD_FRAME_POINTER_REGNUM)
    6812              :         return false;
    6813              :       break;
    6814              :     }
    6815              : 
    6816    387529751 :   if (regno == REAL_PIC_OFFSET_TABLE_REGNUM
    6817     10755632 :       && pic_offset_table_rtx)
    6818              :     {
    6819       385700 :       if (ix86_use_pseudo_pic_reg ())
    6820              :         {
    6821              :           /* REAL_PIC_OFFSET_TABLE_REGNUM used by call to
    6822              :           _mcount in prologue.  */
    6823       385700 :           if (!TARGET_64BIT && flag_pic && crtl->profile)
    6824              :             return true;
    6825              :         }
    6826            0 :       else if (df_regs_ever_live_p (REAL_PIC_OFFSET_TABLE_REGNUM)
    6827            0 :                || crtl->profile
    6828            0 :                || crtl->calls_eh_return
    6829            0 :                || crtl->uses_const_pool
    6830            0 :                || cfun->has_nonlocal_label)
    6831            0 :         return ix86_select_alt_pic_regnum () == INVALID_REGNUM;
    6832              :     }
    6833              : 
    6834    353543799 :   if (crtl->calls_eh_return && maybe_eh_return)
    6835              :     {
    6836              :       unsigned i;
    6837        13237 :       for (i = 0; ; i++)
    6838              :         {
    6839        20181 :           unsigned test = EH_RETURN_DATA_REGNO (i);
    6840        13671 :           if (test == INVALID_REGNUM)
    6841              :             break;
    6842        13671 :           if (test == regno)
    6843              :             return true;
    6844        13237 :         }
    6845              :     }
    6846              : 
    6847    353543365 :   if (ignore_outlined && cfun->machine->call_ms2sysv)
    6848              :     {
    6849      2650688 :       unsigned count = cfun->machine->call_ms2sysv_extra_regs
    6850              :                        + xlogue_layout::MIN_REGS;
    6851      2650688 :       if (xlogue_layout::is_stub_managed_reg (regno, count))
    6852              :         return false;
    6853              :     }
    6854              : 
    6855    353043496 :   if (crtl->drap_reg
    6856      2194784 :       && regno == REGNO (crtl->drap_reg)
    6857    353099193 :       && !cfun->machine->no_drap_save_restore)
    6858              :     return true;
    6859              : 
    6860    352987799 :   return (df_regs_ever_live_p (regno)
    6861    372342283 :           && !call_used_or_fixed_reg_p (regno)
    6862    371711988 :           && (regno != HARD_FRAME_POINTER_REGNUM || !frame_pointer_needed));
    6863              : }
    6864              : 
    6865              : /* Return number of saved general prupose registers.  */
    6866              : 
    6867              : static int
    6868      8159307 : ix86_nsaved_regs (void)
    6869              : {
    6870      8159307 :   int nregs = 0;
    6871      8159307 :   int regno;
    6872              : 
    6873    758815551 :   for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
    6874    750656244 :     if (GENERAL_REGNO_P (regno) && ix86_save_reg (regno, true, true))
    6875      8172070 :       nregs ++;
    6876      8159307 :   return nregs;
    6877              : }
    6878              : 
    6879              : /* Return number of saved SSE registers.  */
    6880              : 
    6881              : static int
    6882      8194252 : ix86_nsaved_sseregs (void)
    6883              : {
    6884      8194252 :   int nregs = 0;
    6885      8194252 :   int regno;
    6886              : 
    6887      7392456 :   if (!TARGET_64BIT_MS_ABI
    6888      8194252 :       && (cfun->machine->call_saved_registers
    6889      7968846 :           != TYPE_NO_CALLER_SAVED_REGISTERS))
    6890              :     return 0;
    6891     21031299 :   for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
    6892     20805156 :     if (SSE_REGNO_P (regno) && ix86_save_reg (regno, true, true))
    6893      1896395 :       nregs ++;
    6894              :   return nregs;
    6895              : }
    6896              : 
    6897              : /* Given FROM and TO register numbers, say whether this elimination is
    6898              :    allowed.  If stack alignment is needed, we can only replace argument
    6899              :    pointer with hard frame pointer, or replace frame pointer with stack
    6900              :    pointer.  Otherwise, frame pointer elimination is automatically
    6901              :    handled and all other eliminations are valid.  */
    6902              : 
    6903              : static bool
    6904     48322220 : ix86_can_eliminate (const int from, const int to)
    6905              : {
    6906     48322220 :   if (stack_realign_fp)
    6907      1706656 :     return ((from == ARG_POINTER_REGNUM
    6908      1706656 :              && to == HARD_FRAME_POINTER_REGNUM)
    6909      1706656 :             || (from == FRAME_POINTER_REGNUM
    6910      1706656 :                 && to == STACK_POINTER_REGNUM));
    6911              :   else
    6912     86735368 :     return to == STACK_POINTER_REGNUM ? !frame_pointer_needed : true;
    6913              : }
    6914              : 
    6915              : /* Return the offset between two registers, one to be eliminated, and the other
    6916              :    its replacement, at the start of a routine.  */
    6917              : 
    6918              : HOST_WIDE_INT
    6919    141257046 : ix86_initial_elimination_offset (int from, int to)
    6920              : {
    6921    141257046 :   struct ix86_frame &frame = cfun->machine->frame;
    6922              : 
    6923    141257046 :   if (from == ARG_POINTER_REGNUM && to == HARD_FRAME_POINTER_REGNUM)
    6924     10413478 :     return frame.hard_frame_pointer_offset;
    6925    130843568 :   else if (from == FRAME_POINTER_REGNUM
    6926    130843568 :            && to == HARD_FRAME_POINTER_REGNUM)
    6927      8124036 :     return frame.hard_frame_pointer_offset - frame.frame_pointer_offset;
    6928              :   else
    6929              :     {
    6930    122719532 :       gcc_assert (to == STACK_POINTER_REGNUM);
    6931              : 
    6932    122719532 :       if (from == ARG_POINTER_REGNUM)
    6933    114595496 :         return frame.stack_pointer_offset;
    6934              : 
    6935      8124036 :       gcc_assert (from == FRAME_POINTER_REGNUM);
    6936      8124036 :       return frame.stack_pointer_offset - frame.frame_pointer_offset;
    6937              :     }
    6938              : }
    6939              : 
    6940              : /* Emits a warning for unsupported msabi to sysv pro/epilogues.  */
    6941              : void
    6942            0 : warn_once_call_ms2sysv_xlogues (const char *feature)
    6943              : {
    6944            0 :   static bool warned_once = false;
    6945            0 :   if (!warned_once)
    6946              :     {
    6947            0 :       warning (0, "%<-mcall-ms2sysv-xlogues%> is not compatible with %s",
    6948              :                feature);
    6949            0 :       warned_once = true;
    6950              :     }
    6951            0 : }
    6952              : 
    6953              : /* Return the probing interval for -fstack-clash-protection.  */
    6954              : 
    6955              : static HOST_WIDE_INT
    6956          496 : get_probe_interval (void)
    6957              : {
    6958          341 :   if (flag_stack_clash_protection)
    6959          412 :     return (HOST_WIDE_INT_1U
    6960          412 :             << param_stack_clash_protection_probe_interval);
    6961              :   else
    6962              :     return (HOST_WIDE_INT_1U << STACK_CHECK_PROBE_INTERVAL_EXP);
    6963              : }
    6964              : 
    6965              : /* When using -fsplit-stack, the allocation routines set a field in
    6966              :    the TCB to the bottom of the stack plus this much space, measured
    6967              :    in bytes.  */
    6968              : 
    6969              : #define SPLIT_STACK_AVAILABLE 256
    6970              : 
    6971              : /* Return true if push2/pop2 can be generated.  */
    6972              : 
    6973              : static bool
    6974      8159961 : ix86_can_use_push2pop2 (void)
    6975              : {
    6976              :   /* Use push2/pop2 only if the incoming stack is 16-byte aligned.  */
    6977      8159961 :   unsigned int incoming_stack_boundary
    6978      8159961 :     = (crtl->parm_stack_boundary > ix86_incoming_stack_boundary
    6979      8159961 :        ? crtl->parm_stack_boundary : ix86_incoming_stack_boundary);
    6980      8159961 :   return incoming_stack_boundary % 128 == 0;
    6981              : }
    6982              : 
    6983              : /* Helper function to determine whether push2/pop2 can be used in prologue or
    6984              :    epilogue for register save/restore.  */
    6985              : static bool
    6986      8159307 : ix86_pro_and_epilogue_can_use_push2pop2 (int nregs)
    6987              : {
    6988      8159307 :   if (!ix86_can_use_push2pop2 ())
    6989              :     return false;
    6990      8123356 :   int aligned = cfun->machine->fs.sp_offset % 16 == 0;
    6991      8123356 :   return TARGET_APX_PUSH2POP2
    6992         2844 :          && !cfun->machine->frame.save_regs_using_mov
    6993         2832 :          && cfun->machine->func_type == TYPE_NORMAL
    6994      8126180 :          && (nregs + aligned) >= 3;
    6995              : }
    6996              : 
    6997              : /* Check if push/pop should be used to save/restore registers.  */
    6998              : static bool
    6999      8884705 : save_regs_using_push_pop (HOST_WIDE_INT to_allocate)
    7000              : {
    7001      3199743 :   return ((!to_allocate && cfun->machine->frame.nregs <= 1)
    7002      5934481 :           || (TARGET_64BIT && to_allocate >= HOST_WIDE_INT_C (0x80000000))
    7003              :           /* If static stack checking is enabled and done with probes,
    7004              :              the registers need to be saved before allocating the frame.  */
    7005      5933820 :           || flag_stack_check == STATIC_BUILTIN_STACK_CHECK
    7006              :           /* If stack clash probing needs a loop, then it needs a
    7007              :              scratch register.  But the returned register is only guaranteed
    7008              :              to be safe to use after register saves are complete.  So if
    7009              :              stack clash protections are enabled and the allocated frame is
    7010              :              larger than the probe interval, then use pushes to save
    7011              :              callee saved registers.  */
    7012     14818445 :           || (flag_stack_clash_protection
    7013          341 :               && !ix86_target_stack_probe ()
    7014          341 :               && to_allocate > get_probe_interval ()));
    7015              : }
    7016              : 
    7017              : /* Fill structure ix86_frame about frame of currently computed function.  */
    7018              : 
    7019              : static void
    7020      8159307 : ix86_compute_frame_layout (void)
    7021              : {
    7022      8159307 :   struct ix86_frame *frame = &cfun->machine->frame;
    7023      8159307 :   struct machine_function *m = cfun->machine;
    7024      8159307 :   unsigned HOST_WIDE_INT stack_alignment_needed;
    7025      8159307 :   HOST_WIDE_INT offset;
    7026      8159307 :   unsigned HOST_WIDE_INT preferred_alignment;
    7027      8159307 :   HOST_WIDE_INT size = ix86_get_frame_size ();
    7028      8159307 :   HOST_WIDE_INT to_allocate;
    7029              : 
    7030              :   /* m->call_ms2sysv is initially enabled in ix86_expand_call for all 64-bit
    7031              :    * ms_abi functions that call a sysv function.  We now need to prune away
    7032              :    * cases where it should be disabled.  */
    7033      8159307 :   if (TARGET_64BIT && m->call_ms2sysv)
    7034              :     {
    7035        35225 :       gcc_assert (TARGET_64BIT_MS_ABI);
    7036        35225 :       gcc_assert (TARGET_CALL_MS2SYSV_XLOGUES);
    7037        35225 :       gcc_assert (!TARGET_SEH);
    7038        35225 :       gcc_assert (TARGET_SSE);
    7039        35225 :       gcc_assert (!ix86_using_red_zone ());
    7040              : 
    7041        35225 :       if (crtl->calls_eh_return)
    7042              :         {
    7043            0 :           gcc_assert (!reload_completed);
    7044            0 :           m->call_ms2sysv = false;
    7045            0 :           warn_once_call_ms2sysv_xlogues ("__builtin_eh_return");
    7046              :         }
    7047              : 
    7048        35225 :       else if (ix86_static_chain_on_stack)
    7049              :         {
    7050            0 :           gcc_assert (!reload_completed);
    7051            0 :           m->call_ms2sysv = false;
    7052            0 :           warn_once_call_ms2sysv_xlogues ("static call chains");
    7053              :         }
    7054              : 
    7055              :       /* Finally, compute which registers the stub will manage.  */
    7056              :       else
    7057              :         {
    7058        35225 :           unsigned count = xlogue_layout::count_stub_managed_regs ();
    7059        35225 :           m->call_ms2sysv_extra_regs = count - xlogue_layout::MIN_REGS;
    7060        35225 :           m->call_ms2sysv_pad_in = 0;
    7061              :         }
    7062              :     }
    7063              : 
    7064      8159307 :   frame->nregs = ix86_nsaved_regs ();
    7065      8159307 :   frame->nsseregs = ix86_nsaved_sseregs ();
    7066              : 
    7067              :   /* 64-bit MS ABI seem to require stack alignment to be always 16,
    7068              :      except for function prologues, leaf functions and when the defult
    7069              :      incoming stack boundary is overriden at command line or via
    7070              :      force_align_arg_pointer attribute.
    7071              : 
    7072              :      Darwin's ABI specifies 128b alignment for both 32 and  64 bit variants
    7073              :      at call sites, including profile function calls.
    7074              : 
    7075              :      For APX push2/pop2, the stack also requires 128b alignment.  */
    7076      8159307 :   if ((ix86_pro_and_epilogue_can_use_push2pop2 (frame->nregs)
    7077           67 :        && crtl->preferred_stack_boundary < 128)
    7078      8159372 :       || (((TARGET_64BIT_MS_ABI || TARGET_MACHO)
    7079       225404 :            && crtl->preferred_stack_boundary < 128)
    7080            0 :           && (!crtl->is_leaf || cfun->calls_alloca != 0
    7081            0 :               || ix86_current_function_calls_tls_descriptor
    7082            0 :               || (TARGET_MACHO && crtl->profile)
    7083            0 :               || ix86_incoming_stack_boundary < 128)))
    7084              :     {
    7085            2 :       crtl->preferred_stack_boundary = 128;
    7086            2 :       if (crtl->stack_alignment_needed < 128)
    7087            1 :         crtl->stack_alignment_needed = 128;
    7088              :     }
    7089              : 
    7090      8159307 :   stack_alignment_needed = crtl->stack_alignment_needed / BITS_PER_UNIT;
    7091      8159307 :   preferred_alignment = crtl->preferred_stack_boundary / BITS_PER_UNIT;
    7092              : 
    7093      8159307 :   gcc_assert (!size || stack_alignment_needed);
    7094      8961074 :   gcc_assert (preferred_alignment >= STACK_BOUNDARY / BITS_PER_UNIT);
    7095      8159307 :   gcc_assert (preferred_alignment <= stack_alignment_needed);
    7096              : 
    7097              :   /* The only ABI saving SSE regs should be 64-bit ms_abi or with
    7098              :      no_caller_saved_registers attribue.  */
    7099      8159307 :   gcc_assert (TARGET_64BIT
    7100              :               || (cfun->machine->call_saved_registers
    7101              :                   == TYPE_NO_CALLER_SAVED_REGISTERS)
    7102              :               || !frame->nsseregs);
    7103      8159307 :   if (TARGET_64BIT && m->call_ms2sysv)
    7104              :     {
    7105        35225 :       gcc_assert (stack_alignment_needed >= 16);
    7106        35225 :       gcc_assert ((cfun->machine->call_saved_registers
    7107              :                    == TYPE_NO_CALLER_SAVED_REGISTERS)
    7108              :                   || !frame->nsseregs);
    7109              :     }
    7110              : 
    7111              :   /* For SEH we have to limit the amount of code movement into the prologue.
    7112              :      At present we do this via a BLOCKAGE, at which point there's very little
    7113              :      scheduling that can be done, which means that there's very little point
    7114              :      in doing anything except PUSHs.  */
    7115      8159307 :   if (TARGET_SEH)
    7116              :     m->use_fast_prologue_epilogue = false;
    7117      8159307 :   else if (!optimize_bb_for_size_p (ENTRY_BLOCK_PTR_FOR_FN (cfun)))
    7118              :     {
    7119      7825077 :       int count = frame->nregs;
    7120      7825077 :       struct cgraph_node *node = cgraph_node::get (current_function_decl);
    7121              : 
    7122              :       /* The fast prologue uses move instead of push to save registers.  This
    7123              :          is significantly longer, but also executes faster as modern hardware
    7124              :          can execute the moves in parallel, but can't do that for push/pop.
    7125              : 
    7126              :          Be careful about choosing what prologue to emit:  When function takes
    7127              :          many instructions to execute we may use slow version as well as in
    7128              :          case function is known to be outside hot spot (this is known with
    7129              :          feedback only).  Weight the size of function by number of registers
    7130              :          to save as it is cheap to use one or two push instructions but very
    7131              :          slow to use many of them.
    7132              : 
    7133              :          Calling this hook multiple times with the same frame requirements
    7134              :          must produce the same layout, since the RA might otherwise be
    7135              :          unable to reach a fixed point or might fail its final sanity checks.
    7136              :          This means that once we've assumed that a function does or doesn't
    7137              :          have a particular size, we have to stick to that assumption
    7138              :          regardless of how the function has changed since.  */
    7139      7825077 :       if (count)
    7140      2600676 :         count = (count - 1) * FAST_PROLOGUE_INSN_COUNT;
    7141      7825077 :       if (node->frequency < NODE_FREQUENCY_NORMAL
    7142      7131668 :           || (flag_branch_probabilities
    7143          971 :               && node->frequency < NODE_FREQUENCY_HOT))
    7144       693719 :         m->use_fast_prologue_epilogue = false;
    7145              :       else
    7146              :         {
    7147      7131358 :           if (count != frame->expensive_count)
    7148              :             {
    7149       285068 :               frame->expensive_count = count;
    7150       285068 :               frame->expensive_p = expensive_function_p (count);
    7151              :             }
    7152      7131358 :           m->use_fast_prologue_epilogue = !frame->expensive_p;
    7153              :         }
    7154              :     }
    7155              : 
    7156      8159307 :   frame->save_regs_using_mov
    7157      8159307 :     = TARGET_PROLOGUE_USING_MOVE && m->use_fast_prologue_epilogue;
    7158              : 
    7159              :   /* Skip return address and error code in exception handler.  */
    7160      8159307 :   offset = INCOMING_FRAME_SP_OFFSET;
    7161              : 
    7162              :   /* Skip pushed static chain.  */
    7163      8159307 :   if (ix86_static_chain_on_stack)
    7164            0 :     offset += UNITS_PER_WORD;
    7165              : 
    7166              :   /* Skip saved base pointer.  */
    7167      8159307 :   if (frame_pointer_needed)
    7168      2767252 :     offset += UNITS_PER_WORD;
    7169      8159307 :   frame->hfp_save_offset = offset;
    7170              : 
    7171              :   /* The traditional frame pointer location is at the top of the frame.  */
    7172      8159307 :   frame->hard_frame_pointer_offset = offset;
    7173              : 
    7174              :   /* Register save area */
    7175      8159307 :   offset += frame->nregs * UNITS_PER_WORD;
    7176      8159307 :   frame->reg_save_offset = offset;
    7177              : 
    7178              :   /* Calculate the size of the va-arg area (not including padding, if any).  */
    7179      8159307 :   frame->va_arg_size = ix86_varargs_gpr_size + ix86_varargs_fpr_size;
    7180              : 
    7181              :   /* Also adjust stack_realign_offset for the largest alignment of
    7182              :      stack slot actually used.  */
    7183      8159307 :   if (stack_realign_fp
    7184      7852314 :       || (cfun->machine->max_used_stack_alignment != 0
    7185          134 :           && (offset % cfun->machine->max_used_stack_alignment) != 0))
    7186              :     {
    7187              :       /* We may need a 16-byte aligned stack for the remainder of the
    7188              :          register save area, but the stack frame for the local function
    7189              :          may require a greater alignment if using AVX/2/512.  In order
    7190              :          to avoid wasting space, we first calculate the space needed for
    7191              :          the rest of the register saves, add that to the stack pointer,
    7192              :          and then realign the stack to the boundary of the start of the
    7193              :          frame for the local function.  */
    7194       307059 :       HOST_WIDE_INT space_needed = 0;
    7195       307059 :       HOST_WIDE_INT sse_reg_space_needed = 0;
    7196              : 
    7197       307059 :       if (TARGET_64BIT)
    7198              :         {
    7199       305259 :           if (m->call_ms2sysv)
    7200              :             {
    7201         6415 :               m->call_ms2sysv_pad_in = 0;
    7202         6415 :               space_needed = xlogue_layout::get_instance ().get_stack_space_used ();
    7203              :             }
    7204              : 
    7205       298844 :           else if (frame->nsseregs)
    7206              :             /* The only ABI that has saved SSE registers (Win64) also has a
    7207              :                16-byte aligned default stack.  However, many programs violate
    7208              :                the ABI, and Wine64 forces stack realignment to compensate.  */
    7209         6447 :             space_needed = frame->nsseregs * 16;
    7210              : 
    7211       305259 :           sse_reg_space_needed = space_needed = ROUND_UP (space_needed, 16);
    7212              : 
    7213              :           /* 64-bit frame->va_arg_size should always be a multiple of 16, but
    7214              :              rounding to be pedantic.  */
    7215       305259 :           space_needed = ROUND_UP (space_needed + frame->va_arg_size, 16);
    7216              :         }
    7217              :       else
    7218         1800 :         space_needed = frame->va_arg_size;
    7219              : 
    7220              :       /* Record the allocation size required prior to the realignment AND.  */
    7221       307059 :       frame->stack_realign_allocate = space_needed;
    7222              : 
    7223              :       /* The re-aligned stack starts at frame->stack_realign_offset.  Values
    7224              :          before this point are not directly comparable with values below
    7225              :          this point.  Use sp_valid_at to determine if the stack pointer is
    7226              :          valid for a given offset, fp_valid_at for the frame pointer, or
    7227              :          choose_baseaddr to have a base register chosen for you.
    7228              : 
    7229              :          Note that the result of (frame->stack_realign_offset
    7230              :          & (stack_alignment_needed - 1)) may not equal zero.  */
    7231       307059 :       offset = ROUND_UP (offset + space_needed, stack_alignment_needed);
    7232       307059 :       frame->stack_realign_offset = offset - space_needed;
    7233       307059 :       frame->sse_reg_save_offset = frame->stack_realign_offset
    7234       307059 :                                                         + sse_reg_space_needed;
    7235       307059 :     }
    7236              :   else
    7237              :     {
    7238      7852248 :       frame->stack_realign_offset = offset;
    7239              : 
    7240      7852248 :       if (TARGET_64BIT && m->call_ms2sysv)
    7241              :         {
    7242        28810 :           m->call_ms2sysv_pad_in = !!(offset & UNITS_PER_WORD);
    7243        28810 :           offset += xlogue_layout::get_instance ().get_stack_space_used ();
    7244              :         }
    7245              : 
    7246              :       /* Align and set SSE register save area.  */
    7247      7823438 :       else if (frame->nsseregs)
    7248              :         {
    7249              :           /* If the incoming stack boundary is at least 16 bytes, or DRAP is
    7250              :              required and the DRAP re-alignment boundary is at least 16 bytes,
    7251              :              then we want the SSE register save area properly aligned.  */
    7252       183182 :           if (ix86_incoming_stack_boundary >= 128
    7253         6400 :                   || (stack_realign_drap && stack_alignment_needed >= 16))
    7254       183182 :             offset = ROUND_UP (offset, 16);
    7255       183182 :           offset += frame->nsseregs * 16;
    7256              :         }
    7257      7852248 :       frame->sse_reg_save_offset = offset;
    7258      7852248 :       offset += frame->va_arg_size;
    7259              :     }
    7260              : 
    7261              :   /* Align start of frame for local function.  When a function call
    7262              :      is removed, it may become a leaf function.  But if argument may
    7263              :      be passed on stack, we need to align the stack when there is no
    7264              :      tail call.  */
    7265      8159307 :   if (m->call_ms2sysv
    7266      8124082 :       || frame->va_arg_size != 0
    7267      8044924 :       || size != 0
    7268      4391394 :       || !crtl->is_leaf
    7269      2052771 :       || (!crtl->tail_call_emit
    7270      1730807 :           && cfun->machine->outgoing_args_on_stack)
    7271      2052721 :       || cfun->calls_alloca
    7272     10210206 :       || ix86_current_function_calls_tls_descriptor)
    7273      6108822 :     offset = ROUND_UP (offset, stack_alignment_needed);
    7274              : 
    7275              :   /* Frame pointer points here.  */
    7276      8159307 :   frame->frame_pointer_offset = offset;
    7277              : 
    7278      8159307 :   offset += size;
    7279              : 
    7280              :   /* Add outgoing arguments area.  Can be skipped if we eliminated
    7281              :      all the function calls as dead code.
    7282              :      Skipping is however impossible when function calls alloca.  Alloca
    7283              :      expander assumes that last crtl->outgoing_args_size
    7284              :      of stack frame are unused.  */
    7285      8159307 :   if (ACCUMULATE_OUTGOING_ARGS
    7286      8777228 :       && (!crtl->is_leaf || cfun->calls_alloca
    7287       391782 :           || ix86_current_function_calls_tls_descriptor))
    7288              :     {
    7289       226139 :       offset += crtl->outgoing_args_size;
    7290       226139 :       frame->outgoing_arguments_size = crtl->outgoing_args_size;
    7291              :     }
    7292              :   else
    7293      7933168 :     frame->outgoing_arguments_size = 0;
    7294              : 
    7295              :   /* Align stack boundary.  Only needed if we're calling another function
    7296              :      or using alloca.  */
    7297      2763034 :   if (!crtl->is_leaf || cfun->calls_alloca
    7298     10918824 :       || ix86_current_function_calls_tls_descriptor)
    7299      5401588 :     offset = ROUND_UP (offset, preferred_alignment);
    7300              : 
    7301              :   /* We've reached end of stack frame.  */
    7302      8159307 :   frame->stack_pointer_offset = offset;
    7303              : 
    7304              :   /* Size prologue needs to allocate.  */
    7305      8159307 :   to_allocate = offset - frame->sse_reg_save_offset;
    7306              : 
    7307      8159307 :   if (save_regs_using_push_pop (to_allocate))
    7308      2577034 :     frame->save_regs_using_mov = false;
    7309              : 
    7310      8159307 :   if (ix86_using_red_zone ()
    7311      7131945 :       && crtl->sp_is_unchanging
    7312      6490960 :       && crtl->is_leaf
    7313      2663757 :       && !cfun->machine->asm_redzone_clobber_seen
    7314      2663744 :       && !ix86_pc_thunk_call_expanded
    7315     10823051 :       && !ix86_current_function_calls_tls_descriptor)
    7316              :     {
    7317      2663729 :       frame->red_zone_size = to_allocate;
    7318      2663729 :       if (frame->save_regs_using_mov)
    7319       139945 :         frame->red_zone_size += frame->nregs * UNITS_PER_WORD;
    7320      2663729 :       if (frame->red_zone_size > RED_ZONE_SIZE - RED_ZONE_RESERVE)
    7321       102151 :         frame->red_zone_size = RED_ZONE_SIZE - RED_ZONE_RESERVE;
    7322              :     }
    7323              :   else
    7324      5495578 :     frame->red_zone_size = 0;
    7325      8159307 :   frame->stack_pointer_offset -= frame->red_zone_size;
    7326              : 
    7327              :   /* The SEH frame pointer location is near the bottom of the frame.
    7328              :      This is enforced by the fact that the difference between the
    7329              :      stack pointer and the frame pointer is limited to 240 bytes in
    7330              :      the unwind data structure.  */
    7331      8159307 :   if (TARGET_SEH)
    7332              :     {
    7333              :       /* Force the frame pointer to point at or below the lowest register save
    7334              :          area, see the SEH code in config/i386/winnt.cc for the rationale.  */
    7335              :       frame->hard_frame_pointer_offset = frame->sse_reg_save_offset;
    7336              : 
    7337              :       /* If we can leave the frame pointer where it is, do so; however return
    7338              :          the establisher frame for __builtin_frame_address (0) or else if the
    7339              :          frame overflows the SEH maximum frame size.
    7340              : 
    7341              :          Note that the value returned by __builtin_frame_address (0) is quite
    7342              :          constrained, because setjmp is piggybacked on the SEH machinery with
    7343              :          recent versions of MinGW:
    7344              : 
    7345              :           #    elif defined(__SEH__)
    7346              :           #     if defined(__aarch64__) || defined(_ARM64_)
    7347              :           #      define setjmp(BUF) _setjmp((BUF), __builtin_sponentry())
    7348              :           #     elif (__MINGW_GCC_VERSION < 40702)
    7349              :           #      define setjmp(BUF) _setjmp((BUF), mingw_getsp())
    7350              :           #     else
    7351              :           #      define setjmp(BUF) _setjmp((BUF), __builtin_frame_address (0))
    7352              :           #     endif
    7353              : 
    7354              :          and the second argument passed to _setjmp, if not null, is forwarded
    7355              :          to the TargetFrame parameter of RtlUnwindEx by longjmp (after it has
    7356              :          built an ExceptionRecord on the fly describing the setjmp buffer).  */
    7357              :       const HOST_WIDE_INT diff
    7358              :         = frame->stack_pointer_offset - frame->hard_frame_pointer_offset;
    7359              :       if (diff <= 255 && !crtl->accesses_prior_frames)
    7360              :         {
    7361              :           /* The resulting diff will be a multiple of 16 lower than 255,
    7362              :              i.e. at most 240 as required by the unwind data structure.  */
    7363              :           frame->hard_frame_pointer_offset += (diff & 15);
    7364              :         }
    7365              :       else if (diff <= SEH_MAX_FRAME_SIZE && !crtl->accesses_prior_frames)
    7366              :         {
    7367              :           /* Ideally we'd determine what portion of the local stack frame
    7368              :              (within the constraint of the lowest 240) is most heavily used.
    7369              :              But without that complication, simply bias the frame pointer
    7370              :              by 128 bytes so as to maximize the amount of the local stack
    7371              :              frame that is addressable with 8-bit offsets.  */
    7372              :           frame->hard_frame_pointer_offset = frame->stack_pointer_offset - 128;
    7373              :         }
    7374              :       else
    7375              :         frame->hard_frame_pointer_offset = frame->hfp_save_offset;
    7376              :     }
    7377      8159307 : }
    7378              : 
    7379              : /* This is semi-inlined memory_address_length, but simplified
    7380              :    since we know that we're always dealing with reg+offset, and
    7381              :    to avoid having to create and discard all that rtl.  */
    7382              : 
    7383              : static inline int
    7384      1013011 : choose_baseaddr_len (unsigned int regno, HOST_WIDE_INT offset)
    7385              : {
    7386      1013011 :   int len = 4;
    7387              : 
    7388            0 :   if (offset == 0)
    7389              :     {
    7390              :       /* EBP and R13 cannot be encoded without an offset.  */
    7391            0 :       len = (regno == BP_REG || regno == R13_REG);
    7392              :     }
    7393      1004739 :   else if (IN_RANGE (offset, -128, 127))
    7394       634575 :     len = 1;
    7395              : 
    7396              :   /* ESP and R12 must be encoded with a SIB byte.  */
    7397            0 :   if (regno == SP_REG || regno == R12_REG)
    7398            0 :     len++;
    7399              : 
    7400      1013011 :   return len;
    7401              : }
    7402              : 
    7403              : /* Determine if the stack pointer is valid for accessing the CFA_OFFSET in
    7404              :    the frame save area.  The register is saved at CFA - CFA_OFFSET.  */
    7405              : 
    7406              : static bool
    7407      3497151 : sp_valid_at (HOST_WIDE_INT cfa_offset)
    7408              : {
    7409      3497151 :   const struct machine_frame_state &fs = cfun->machine->fs;
    7410      3497151 :   if (fs.sp_realigned && cfa_offset <= fs.sp_realigned_offset)
    7411              :     {
    7412              :       /* Validate that the cfa_offset isn't in a "no-man's land".  */
    7413        46484 :       gcc_assert (cfa_offset <= fs.sp_realigned_fp_last);
    7414              :       return false;
    7415              :     }
    7416      3450667 :   return fs.sp_valid;
    7417              : }
    7418              : 
    7419              : /* Determine if the frame pointer is valid for accessing the CFA_OFFSET in
    7420              :    the frame save area.  The register is saved at CFA - CFA_OFFSET.  */
    7421              : 
    7422              : static inline bool
    7423      1367013 : fp_valid_at (HOST_WIDE_INT cfa_offset)
    7424              : {
    7425      1367013 :   const struct machine_frame_state &fs = cfun->machine->fs;
    7426      1367013 :   if (fs.sp_realigned && cfa_offset > fs.sp_realigned_fp_last)
    7427              :     {
    7428              :       /* Validate that the cfa_offset isn't in a "no-man's land".  */
    7429        28328 :       gcc_assert (cfa_offset >= fs.sp_realigned_offset);
    7430              :       return false;
    7431              :     }
    7432      1338685 :   return fs.fp_valid;
    7433              : }
    7434              : 
    7435              : /* Choose a base register based upon alignment requested, speed and/or
    7436              :    size.  */
    7437              : 
    7438              : static void
    7439      1367013 : choose_basereg (HOST_WIDE_INT cfa_offset, rtx &base_reg,
    7440              :                 HOST_WIDE_INT &base_offset,
    7441              :                 unsigned int align_reqested, unsigned int *align)
    7442              : {
    7443      1367013 :   const struct machine_function *m = cfun->machine;
    7444      1367013 :   unsigned int hfp_align;
    7445      1367013 :   unsigned int drap_align;
    7446      1367013 :   unsigned int sp_align;
    7447      1367013 :   bool hfp_ok  = fp_valid_at (cfa_offset);
    7448      1367013 :   bool drap_ok = m->fs.drap_valid;
    7449      1367013 :   bool sp_ok   = sp_valid_at (cfa_offset);
    7450              : 
    7451      1367013 :   hfp_align = drap_align = sp_align = INCOMING_STACK_BOUNDARY;
    7452              : 
    7453              :   /* Filter out any registers that don't meet the requested alignment
    7454              :      criteria.  */
    7455      1367013 :   if (align_reqested)
    7456              :     {
    7457       966231 :       if (m->fs.realigned)
    7458        28160 :         hfp_align = drap_align = sp_align = crtl->stack_alignment_needed;
    7459              :       /* SEH unwind code does do not currently support REG_CFA_EXPRESSION
    7460              :          notes (which we would need to use a realigned stack pointer),
    7461              :          so disable on SEH targets.  */
    7462       938071 :       else if (m->fs.sp_realigned)
    7463        28328 :         sp_align = crtl->stack_alignment_needed;
    7464              : 
    7465       966231 :       hfp_ok = hfp_ok && hfp_align >= align_reqested;
    7466       966231 :       drap_ok = drap_ok && drap_align >= align_reqested;
    7467       966231 :       sp_ok = sp_ok && sp_align >= align_reqested;
    7468              :     }
    7469              : 
    7470      1367013 :   if (m->use_fast_prologue_epilogue)
    7471              :     {
    7472              :       /* Choose the base register most likely to allow the most scheduling
    7473              :          opportunities.  Generally FP is valid throughout the function,
    7474              :          while DRAP must be reloaded within the epilogue.  But choose either
    7475              :          over the SP due to increased encoding size.  */
    7476              : 
    7477       649280 :       if (hfp_ok)
    7478              :         {
    7479       117856 :           base_reg = hard_frame_pointer_rtx;
    7480       117856 :           base_offset = m->fs.fp_offset - cfa_offset;
    7481              :         }
    7482       531424 :       else if (drap_ok)
    7483              :         {
    7484            0 :           base_reg = crtl->drap_reg;
    7485            0 :           base_offset = 0 - cfa_offset;
    7486              :         }
    7487       531424 :       else if (sp_ok)
    7488              :         {
    7489       531424 :           base_reg = stack_pointer_rtx;
    7490       531424 :           base_offset = m->fs.sp_offset - cfa_offset;
    7491              :         }
    7492              :     }
    7493              :   else
    7494              :     {
    7495       717733 :       HOST_WIDE_INT toffset;
    7496       717733 :       int len = 16, tlen;
    7497              : 
    7498              :       /* Choose the base register with the smallest address encoding.
    7499              :          With a tie, choose FP > DRAP > SP.  */
    7500       717733 :       if (sp_ok)
    7501              :         {
    7502       700402 :           base_reg = stack_pointer_rtx;
    7503       700402 :           base_offset = m->fs.sp_offset - cfa_offset;
    7504      1392532 :           len = choose_baseaddr_len (STACK_POINTER_REGNUM, base_offset);
    7505              :         }
    7506       717733 :       if (drap_ok)
    7507              :         {
    7508            0 :           toffset = 0 - cfa_offset;
    7509            0 :           tlen = choose_baseaddr_len (REGNO (crtl->drap_reg), toffset);
    7510            0 :           if (tlen <= len)
    7511              :             {
    7512            0 :               base_reg = crtl->drap_reg;
    7513            0 :               base_offset = toffset;
    7514            0 :               len = tlen;
    7515              :             }
    7516              :         }
    7517       717733 :       if (hfp_ok)
    7518              :         {
    7519       312609 :           toffset = m->fs.fp_offset - cfa_offset;
    7520       312609 :           tlen = choose_baseaddr_len (HARD_FRAME_POINTER_REGNUM, toffset);
    7521       312609 :           if (tlen <= len)
    7522              :             {
    7523       221977 :               base_reg = hard_frame_pointer_rtx;
    7524       221977 :               base_offset = toffset;
    7525              :             }
    7526              :         }
    7527              :     }
    7528              : 
    7529              :     /* Set the align return value.  */
    7530      1367013 :     if (align)
    7531              :       {
    7532       966231 :         if (base_reg == stack_pointer_rtx)
    7533       684595 :           *align = sp_align;
    7534       281636 :         else if (base_reg == crtl->drap_reg)
    7535            0 :           *align = drap_align;
    7536       281636 :         else if (base_reg == hard_frame_pointer_rtx)
    7537       281636 :           *align = hfp_align;
    7538              :       }
    7539      1367013 : }
    7540              : 
    7541              : /* Return an RTX that points to CFA_OFFSET within the stack frame and
    7542              :    the alignment of address.  If ALIGN is non-null, it should point to
    7543              :    an alignment value (in bits) that is preferred or zero and will
    7544              :    recieve the alignment of the base register that was selected,
    7545              :    irrespective of rather or not CFA_OFFSET is a multiple of that
    7546              :    alignment value.  If it is possible for the base register offset to be
    7547              :    non-immediate then SCRATCH_REGNO should specify a scratch register to
    7548              :    use.
    7549              : 
    7550              :    The valid base registers are taken from CFUN->MACHINE->FS.  */
    7551              : 
    7552              : static rtx
    7553      1367013 : choose_baseaddr (HOST_WIDE_INT cfa_offset, unsigned int *align,
    7554              :                  unsigned int scratch_regno = INVALID_REGNUM)
    7555              : {
    7556      1367013 :   rtx base_reg = NULL;
    7557      1367013 :   HOST_WIDE_INT base_offset = 0;
    7558              : 
    7559              :   /* If a specific alignment is requested, try to get a base register
    7560              :      with that alignment first.  */
    7561      1367013 :   if (align && *align)
    7562       966231 :     choose_basereg (cfa_offset, base_reg, base_offset, *align, align);
    7563              : 
    7564      1367013 :   if (!base_reg)
    7565       400782 :     choose_basereg (cfa_offset, base_reg, base_offset, 0, align);
    7566              : 
    7567      1367013 :   gcc_assert (base_reg != NULL);
    7568              : 
    7569      1367013 :   rtx base_offset_rtx = GEN_INT (base_offset);
    7570              : 
    7571      1419567 :   if (!x86_64_immediate_operand (base_offset_rtx, Pmode))
    7572              :     {
    7573            1 :       gcc_assert (scratch_regno != INVALID_REGNUM);
    7574              : 
    7575            1 :       rtx scratch_reg = gen_rtx_REG (Pmode, scratch_regno);
    7576            1 :       emit_move_insn (scratch_reg, base_offset_rtx);
    7577              : 
    7578            1 :       return gen_rtx_PLUS (Pmode, base_reg, scratch_reg);
    7579              :     }
    7580              : 
    7581      1419566 :   return plus_constant (Pmode, base_reg, base_offset);
    7582              : }
    7583              : 
    7584              : /* Emit code to save registers in the prologue.  */
    7585              : 
    7586              : static void
    7587       427594 : ix86_emit_save_regs (void)
    7588              : {
    7589       427594 :   int regno;
    7590       427594 :   rtx_insn *insn;
    7591       427594 :   bool use_ppx = TARGET_APX_PPX && !crtl->calls_eh_return;
    7592              : 
    7593       427594 :   if (!TARGET_APX_PUSH2POP2
    7594           90 :       || !ix86_can_use_push2pop2 ()
    7595       427682 :       || cfun->machine->func_type != TYPE_NORMAL)
    7596              :     {
    7597     39758151 :       for (regno = FIRST_PSEUDO_REGISTER - 1; regno >= 0; regno--)
    7598     39330644 :         if (GENERAL_REGNO_P (regno) && ix86_save_reg (regno, true, true))
    7599              :           {
    7600      1194950 :             insn = emit_insn (gen_push (gen_rtx_REG (word_mode, regno),
    7601              :                                         use_ppx));
    7602      1194950 :             RTX_FRAME_RELATED_P (insn) = 1;
    7603              :           }
    7604              :     }
    7605              :   else
    7606              :     {
    7607           87 :       int regno_list[2];
    7608           87 :       regno_list[0] = regno_list[1] = -1;
    7609           87 :       int loaded_regnum = 0;
    7610           87 :       bool aligned = cfun->machine->fs.sp_offset % 16 == 0;
    7611              : 
    7612         8091 :       for (regno = FIRST_PSEUDO_REGISTER - 1; regno >= 0; regno--)
    7613         8004 :         if (GENERAL_REGNO_P (regno) && ix86_save_reg (regno, true, true))
    7614              :           {
    7615          127 :             if (aligned)
    7616              :               {
    7617           45 :                 regno_list[loaded_regnum++] = regno;
    7618           45 :                 if (loaded_regnum == 2)
    7619              :                   {
    7620           19 :                     gcc_assert (regno_list[0] != -1
    7621              :                                 && regno_list[1] != -1
    7622              :                                 && regno_list[0] != regno_list[1]);
    7623           19 :                     const int offset = UNITS_PER_WORD * 2;
    7624           19 :                     rtx mem = gen_rtx_MEM (TImode,
    7625           19 :                                            gen_rtx_PRE_DEC (Pmode,
    7626              :                                                             stack_pointer_rtx));
    7627           19 :                     insn = emit_insn (gen_push2 (mem,
    7628              :                                                  gen_rtx_REG (word_mode,
    7629              :                                                               regno_list[0]),
    7630              :                                                  gen_rtx_REG (word_mode,
    7631              :                                                               regno_list[1]),
    7632              :                                                  use_ppx));
    7633           19 :                     RTX_FRAME_RELATED_P (insn) = 1;
    7634           19 :                     rtx dwarf = gen_rtx_SEQUENCE (VOIDmode, rtvec_alloc (3));
    7635              : 
    7636           57 :                     for (int i = 0; i < 2; i++)
    7637              :                       {
    7638           76 :                         rtx dwarf_reg = gen_rtx_REG (word_mode,
    7639           38 :                                                      regno_list[i]);
    7640           38 :                         rtx sp_offset = plus_constant (Pmode,
    7641              :                                                        stack_pointer_rtx,
    7642           38 :                                                        + UNITS_PER_WORD
    7643           38 :                                                          * (1 - i));
    7644           38 :                         rtx tmp = gen_rtx_SET (gen_frame_mem (DImode,
    7645              :                                                               sp_offset),
    7646              :                                                dwarf_reg);
    7647           38 :                         RTX_FRAME_RELATED_P (tmp) = 1;
    7648           38 :                         XVECEXP (dwarf, 0, i + 1) = tmp;
    7649              :                       }
    7650           19 :                     rtx sp_tmp = gen_rtx_SET (stack_pointer_rtx,
    7651              :                                               plus_constant (Pmode,
    7652              :                                                              stack_pointer_rtx,
    7653              :                                                              -offset));
    7654           19 :                     RTX_FRAME_RELATED_P (sp_tmp) = 1;
    7655           19 :                     XVECEXP (dwarf, 0, 0) = sp_tmp;
    7656           19 :                     add_reg_note (insn, REG_FRAME_RELATED_EXPR, dwarf);
    7657              : 
    7658           19 :                     loaded_regnum = 0;
    7659           19 :                     regno_list[0] = regno_list[1] = -1;
    7660              :                   }
    7661              :               }
    7662              :             else
    7663              :               {
    7664           82 :                 insn = emit_insn (gen_push (gen_rtx_REG (word_mode, regno),
    7665              :                                             use_ppx));
    7666           82 :                 RTX_FRAME_RELATED_P (insn) = 1;
    7667           82 :                 aligned = true;
    7668              :               }
    7669              :           }
    7670           87 :       if (loaded_regnum == 1)
    7671              :         {
    7672            7 :           insn = emit_insn (gen_push (gen_rtx_REG (word_mode,
    7673            7 :                                                    regno_list[0]),
    7674              :                                       use_ppx));
    7675            7 :           RTX_FRAME_RELATED_P (insn) = 1;
    7676              :         }
    7677              :     }
    7678       427594 : }
    7679              : 
    7680              : /* Emit a single register save at CFA - CFA_OFFSET.  */
    7681              : 
    7682              : static void
    7683       612248 : ix86_emit_save_reg_using_mov (machine_mode mode, unsigned int regno,
    7684              :                               HOST_WIDE_INT cfa_offset)
    7685              : {
    7686       612248 :   struct machine_function *m = cfun->machine;
    7687       612248 :   rtx reg = gen_rtx_REG (mode, regno);
    7688       612248 :   rtx mem, addr, base, insn;
    7689       612248 :   unsigned int align = GET_MODE_ALIGNMENT (mode);
    7690              : 
    7691       612248 :   addr = choose_baseaddr (cfa_offset, &align);
    7692       612248 :   mem = gen_frame_mem (mode, addr);
    7693              : 
    7694              :   /* The location aligment depends upon the base register.  */
    7695       612248 :   align = MIN (GET_MODE_ALIGNMENT (mode), align);
    7696       612248 :   gcc_assert (! (cfa_offset & (align / BITS_PER_UNIT - 1)));
    7697       612248 :   set_mem_align (mem, align);
    7698              : 
    7699       612248 :   insn = emit_insn (gen_rtx_SET (mem, reg));
    7700       612248 :   RTX_FRAME_RELATED_P (insn) = 1;
    7701              : 
    7702       612248 :   base = addr;
    7703       612248 :   if (GET_CODE (base) == PLUS)
    7704       600442 :     base = XEXP (base, 0);
    7705       612248 :   gcc_checking_assert (REG_P (base));
    7706              : 
    7707              :   /* When saving registers into a re-aligned local stack frame, avoid
    7708              :      any tricky guessing by dwarf2out.  */
    7709       612248 :   if (m->fs.realigned)
    7710              :     {
    7711        12800 :       gcc_checking_assert (stack_realign_drap);
    7712              : 
    7713        12800 :       if (regno == REGNO (crtl->drap_reg))
    7714              :         {
    7715              :           /* A bit of a hack.  We force the DRAP register to be saved in
    7716              :              the re-aligned stack frame, which provides us with a copy
    7717              :              of the CFA that will last past the prologue.  Install it.  */
    7718            0 :           gcc_checking_assert (cfun->machine->fs.fp_valid);
    7719            0 :           addr = plus_constant (Pmode, hard_frame_pointer_rtx,
    7720            0 :                                 cfun->machine->fs.fp_offset - cfa_offset);
    7721            0 :           mem = gen_rtx_MEM (mode, addr);
    7722            0 :           add_reg_note (insn, REG_CFA_DEF_CFA, mem);
    7723              :         }
    7724              :       else
    7725              :         {
    7726              :           /* The frame pointer is a stable reference within the
    7727              :              aligned frame.  Use it.  */
    7728        12800 :           gcc_checking_assert (cfun->machine->fs.fp_valid);
    7729        12800 :           addr = plus_constant (Pmode, hard_frame_pointer_rtx,
    7730        12800 :                                 cfun->machine->fs.fp_offset - cfa_offset);
    7731        12800 :           mem = gen_rtx_MEM (mode, addr);
    7732        12800 :           add_reg_note (insn, REG_CFA_EXPRESSION, gen_rtx_SET (mem, reg));
    7733              :         }
    7734              :     }
    7735              : 
    7736       599448 :   else if (base == stack_pointer_rtx && m->fs.sp_realigned
    7737        12881 :            && cfa_offset >= m->fs.sp_realigned_offset)
    7738              :     {
    7739        12881 :       gcc_checking_assert (stack_realign_fp);
    7740        12881 :       add_reg_note (insn, REG_CFA_EXPRESSION, gen_rtx_SET (mem, reg));
    7741              :     }
    7742              : 
    7743              :   /* The memory may not be relative to the current CFA register,
    7744              :      which means that we may need to generate a new pattern for
    7745              :      use by the unwind info.  */
    7746       586567 :   else if (base != m->fs.cfa_reg)
    7747              :     {
    7748        45078 :       addr = plus_constant (Pmode, m->fs.cfa_reg,
    7749        45078 :                             m->fs.cfa_offset - cfa_offset);
    7750        45078 :       mem = gen_rtx_MEM (mode, addr);
    7751        45078 :       add_reg_note (insn, REG_CFA_OFFSET, gen_rtx_SET (mem, reg));
    7752              :     }
    7753       612248 : }
    7754              : 
    7755              : /* Emit code to save registers using MOV insns.
    7756              :    First register is stored at CFA - CFA_OFFSET.  */
    7757              : static void
    7758        44814 : ix86_emit_save_regs_using_mov (HOST_WIDE_INT cfa_offset)
    7759              : {
    7760        44814 :   unsigned int regno;
    7761              : 
    7762      4167702 :   for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
    7763      4122888 :     if (GENERAL_REGNO_P (regno) && ix86_save_reg (regno, true, true))
    7764              :       {
    7765              :         /* Skip registers, already processed by shrink wrap separate.  */
    7766       189107 :         if (!cfun->machine->reg_is_wrapped_separately[regno])
    7767        84107 :           ix86_emit_save_reg_using_mov (word_mode, regno, cfa_offset);
    7768       203862 :         cfa_offset -= UNITS_PER_WORD;
    7769              :       }
    7770        44814 : }
    7771              : 
    7772              : /* Emit code to save SSE registers using MOV insns.
    7773              :    First register is stored at CFA - CFA_OFFSET.  */
    7774              : static void
    7775        33353 : ix86_emit_save_sse_regs_using_mov (HOST_WIDE_INT cfa_offset)
    7776              : {
    7777        33353 :   unsigned int regno;
    7778              : 
    7779      3101829 :   for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
    7780      3068476 :     if (SSE_REGNO_P (regno) && ix86_save_reg (regno, true, true))
    7781              :       {
    7782       333557 :         ix86_emit_save_reg_using_mov (V4SFmode, regno, cfa_offset);
    7783       333557 :         cfa_offset -= GET_MODE_SIZE (V4SFmode);
    7784              :       }
    7785        33353 : }
    7786              : 
    7787              : static GTY(()) rtx queued_cfa_restores;
    7788              : 
    7789              : /* Add a REG_CFA_RESTORE REG note to INSN or queue them until next stack
    7790              :    manipulation insn.  The value is on the stack at CFA - CFA_OFFSET.
    7791              :    Don't add the note if the previously saved value will be left untouched
    7792              :    within stack red-zone till return, as unwinders can find the same value
    7793              :    in the register and on the stack.  */
    7794              : 
    7795              : static void
    7796      2281132 : ix86_add_cfa_restore_note (rtx_insn *insn, rtx reg, HOST_WIDE_INT cfa_offset)
    7797              : {
    7798      2281132 :   if (!crtl->shrink_wrapped
    7799      2262555 :       && cfa_offset <= cfun->machine->fs.red_zone_offset)
    7800              :     return;
    7801              : 
    7802       770800 :   if (insn)
    7803              :     {
    7804       360544 :       add_reg_note (insn, REG_CFA_RESTORE, reg);
    7805       360544 :       RTX_FRAME_RELATED_P (insn) = 1;
    7806              :     }
    7807              :   else
    7808       410256 :     queued_cfa_restores
    7809       410256 :       = alloc_reg_note (REG_CFA_RESTORE, reg, queued_cfa_restores);
    7810              : }
    7811              : 
    7812              : /* Add queued REG_CFA_RESTORE notes if any to INSN.  */
    7813              : 
    7814              : static void
    7815      2548646 : ix86_add_queued_cfa_restore_notes (rtx insn)
    7816              : {
    7817      2548646 :   rtx last;
    7818      2548646 :   if (!queued_cfa_restores)
    7819              :     return;
    7820       410256 :   for (last = queued_cfa_restores; XEXP (last, 1); last = XEXP (last, 1))
    7821              :     ;
    7822        52910 :   XEXP (last, 1) = REG_NOTES (insn);
    7823        52910 :   REG_NOTES (insn) = queued_cfa_restores;
    7824        52910 :   queued_cfa_restores = NULL_RTX;
    7825        52910 :   RTX_FRAME_RELATED_P (insn) = 1;
    7826              : }
    7827              : 
    7828              : /* Expand prologue or epilogue stack adjustment.
    7829              :    The pattern exist to put a dependency on all ebp-based memory accesses.
    7830              :    STYLE should be negative if instructions should be marked as frame related,
    7831              :    zero if %r11 register is live and cannot be freely used and positive
    7832              :    otherwise.  */
    7833              : 
    7834              : static rtx
    7835      1580874 : pro_epilogue_adjust_stack (rtx dest, rtx src, rtx offset,
    7836              :                            int style, bool set_cfa)
    7837              : {
    7838      1580874 :   struct machine_function *m = cfun->machine;
    7839      1580874 :   rtx addend = offset;
    7840      1580874 :   rtx insn;
    7841      1580874 :   bool add_frame_related_expr = false;
    7842              : 
    7843      1799320 :   if (!x86_64_immediate_operand (offset, Pmode))
    7844              :     {
    7845              :       /* r11 is used by indirect sibcall return as well, set before the
    7846              :          epilogue and used after the epilogue.  */
    7847          199 :       if (style)
    7848          174 :         addend = gen_rtx_REG (Pmode, R11_REG);
    7849              :       else
    7850              :         {
    7851           25 :           gcc_assert (src != hard_frame_pointer_rtx
    7852              :                       && dest != hard_frame_pointer_rtx);
    7853              :           addend = hard_frame_pointer_rtx;
    7854              :         }
    7855          199 :       emit_insn (gen_rtx_SET (addend, offset));
    7856          199 :       if (style < 0)
    7857           88 :         add_frame_related_expr = true;
    7858              :     }
    7859              : 
    7860              :   /*  Shrink wrap separate may insert prologue between TEST and JMP.  In order
    7861              :       not to affect EFlags, emit add without reg clobbering.  */
    7862      1580874 :   if (crtl->shrink_wrapped_separate)
    7863        94806 :     insn = emit_insn (gen_pro_epilogue_adjust_stack_add_nocc
    7864        94806 :                       (Pmode, dest, src, addend));
    7865              :   else
    7866      1486068 :     insn = emit_insn (gen_pro_epilogue_adjust_stack_add
    7867      1486068 :                       (Pmode, dest, src, addend));
    7868              : 
    7869      1580874 :   if (style >= 0)
    7870       697338 :     ix86_add_queued_cfa_restore_notes (insn);
    7871              : 
    7872      1580874 :   if (set_cfa)
    7873              :     {
    7874      1219823 :       rtx r;
    7875              : 
    7876      1219823 :       gcc_assert (m->fs.cfa_reg == src);
    7877      1219823 :       m->fs.cfa_offset += INTVAL (offset);
    7878      1219823 :       m->fs.cfa_reg = dest;
    7879              : 
    7880      1415908 :       r = gen_rtx_PLUS (Pmode, src, offset);
    7881      1219823 :       r = gen_rtx_SET (dest, r);
    7882      1219823 :       add_reg_note (insn, REG_CFA_ADJUST_CFA, r);
    7883      1219823 :       RTX_FRAME_RELATED_P (insn) = 1;
    7884              :     }
    7885       361051 :   else if (style < 0)
    7886              :     {
    7887       294534 :       RTX_FRAME_RELATED_P (insn) = 1;
    7888       294534 :       if (add_frame_related_expr)
    7889              :         {
    7890           20 :           rtx r = gen_rtx_PLUS (Pmode, src, offset);
    7891           20 :           r = gen_rtx_SET (dest, r);
    7892           20 :           add_reg_note (insn, REG_FRAME_RELATED_EXPR, r);
    7893              :         }
    7894              :     }
    7895              : 
    7896      1580874 :   if (dest == stack_pointer_rtx)
    7897              :     {
    7898      1580874 :       HOST_WIDE_INT ooffset = m->fs.sp_offset;
    7899      1580874 :       bool valid = m->fs.sp_valid;
    7900      1580874 :       bool realigned = m->fs.sp_realigned;
    7901              : 
    7902      1580874 :       if (src == hard_frame_pointer_rtx)
    7903              :         {
    7904        29768 :           valid = m->fs.fp_valid;
    7905        29768 :           realigned = false;
    7906        29768 :           ooffset = m->fs.fp_offset;
    7907              :         }
    7908      1551106 :       else if (src == crtl->drap_reg)
    7909              :         {
    7910            0 :           valid = m->fs.drap_valid;
    7911            0 :           realigned = false;
    7912            0 :           ooffset = 0;
    7913              :         }
    7914              :       else
    7915              :         {
    7916              :           /* Else there are two possibilities: SP itself, which we set
    7917              :              up as the default above.  Or EH_RETURN_STACKADJ_RTX, which is
    7918              :              taken care of this by hand along the eh_return path.  */
    7919      1551106 :           gcc_checking_assert (src == stack_pointer_rtx
    7920              :                                || offset == const0_rtx);
    7921              :         }
    7922              : 
    7923      1580874 :       m->fs.sp_offset = ooffset - INTVAL (offset);
    7924      1580874 :       m->fs.sp_valid = valid;
    7925      1580874 :       m->fs.sp_realigned = realigned;
    7926              :     }
    7927      1580874 :   return insn;
    7928              : }
    7929              : 
    7930              : /* Find an available register to be used as dynamic realign argument
    7931              :    pointer regsiter.  Such a register will be written in prologue and
    7932              :    used in begin of body, so it must not be
    7933              :         1. parameter passing register.
    7934              :         2. GOT pointer.
    7935              :    We reuse static-chain register if it is available.  Otherwise, we
    7936              :    use DI for i386 and R13 for x86-64.  We chose R13 since it has
    7937              :    shorter encoding.
    7938              : 
    7939              :    Return: the regno of chosen register.  */
    7940              : 
    7941              : static unsigned int
    7942         7300 : find_drap_reg (void)
    7943              : {
    7944         7300 :   tree decl = cfun->decl;
    7945              : 
    7946              :   /* Always use callee-saved register if there are no caller-saved
    7947              :      registers.  */
    7948         7300 :   if (TARGET_64BIT)
    7949              :     {
    7950              :       /* Use R13 for nested function or function need static chain.
    7951              :          Since function with tail call may use any caller-saved
    7952              :          registers in epilogue, DRAP must not use caller-saved
    7953              :          register in such case.  */
    7954         7015 :       if (DECL_STATIC_CHAIN (decl)
    7955         6973 :           || (cfun->machine->call_saved_registers
    7956         6973 :               == TYPE_NO_CALLER_SAVED_REGISTERS)
    7957        13988 :           || crtl->tail_call_emit)
    7958          190 :         return R13_REG;
    7959              : 
    7960              :       return R10_REG;
    7961              :     }
    7962              :   else
    7963              :     {
    7964              :       /* Use DI for nested function or function need static chain.
    7965              :          Since function with tail call may use any caller-saved
    7966              :          registers in epilogue, DRAP must not use caller-saved
    7967              :          register in such case.  */
    7968          285 :       if (DECL_STATIC_CHAIN (decl)
    7969          285 :           || (cfun->machine->call_saved_registers
    7970          285 :               == TYPE_NO_CALLER_SAVED_REGISTERS)
    7971          285 :           || crtl->tail_call_emit
    7972          550 :           || crtl->calls_eh_return)
    7973              :         return DI_REG;
    7974              : 
    7975              :       /* Reuse static chain register if it isn't used for parameter
    7976              :          passing.  */
    7977          265 :       if (ix86_function_regparm (TREE_TYPE (decl), decl) <= 2)
    7978              :         {
    7979          265 :           unsigned int ccvt = ix86_get_callcvt (TREE_TYPE (decl));
    7980          265 :           if ((ccvt & (IX86_CALLCVT_FASTCALL | IX86_CALLCVT_THISCALL)) == 0)
    7981              :             return CX_REG;
    7982              :         }
    7983            0 :       return DI_REG;
    7984              :     }
    7985              : }
    7986              : 
    7987              : /* Return minimum incoming stack alignment.  */
    7988              : 
    7989              : static unsigned int
    7990      1615193 : ix86_minimum_incoming_stack_boundary (bool sibcall)
    7991              : {
    7992      1615193 :   unsigned int incoming_stack_boundary;
    7993              : 
    7994              :   /* Stack of interrupt handler is aligned to 128 bits in 64bit mode.  */
    7995      1615193 :   if (cfun->machine->func_type != TYPE_NORMAL)
    7996          120 :     incoming_stack_boundary = TARGET_64BIT ? 128 : MIN_STACK_BOUNDARY;
    7997              :   /* Prefer the one specified at command line. */
    7998      1615073 :   else if (ix86_user_incoming_stack_boundary)
    7999              :     incoming_stack_boundary = ix86_user_incoming_stack_boundary;
    8000              :   /* In 32bit, use MIN_STACK_BOUNDARY for incoming stack boundary
    8001              :      if -mstackrealign is used, it isn't used for sibcall check and
    8002              :      estimated stack alignment is 128bit.  */
    8003      1615051 :   else if (!sibcall
    8004      1480758 :            && ix86_force_align_arg_pointer
    8005         4574 :            && crtl->stack_alignment_estimated == 128)
    8006          596 :     incoming_stack_boundary = MIN_STACK_BOUNDARY;
    8007              :   else
    8008      1614455 :     incoming_stack_boundary = ix86_default_incoming_stack_boundary;
    8009              : 
    8010              :   /* Incoming stack alignment can be changed on individual functions
    8011              :      via force_align_arg_pointer attribute.  We use the smallest
    8012              :      incoming stack boundary.  */
    8013      1615193 :   if (incoming_stack_boundary > MIN_STACK_BOUNDARY
    8014      3229780 :       && lookup_attribute ("force_align_arg_pointer",
    8015      1614587 :                            TYPE_ATTRIBUTES (TREE_TYPE (current_function_decl))))
    8016         5708 :     incoming_stack_boundary = MIN_STACK_BOUNDARY;
    8017              : 
    8018              :   /* The incoming stack frame has to be aligned at least at
    8019              :      parm_stack_boundary.  */
    8020      1615193 :   if (incoming_stack_boundary < crtl->parm_stack_boundary)
    8021              :     incoming_stack_boundary = crtl->parm_stack_boundary;
    8022              : 
    8023              :   /* Stack at entrance of main is aligned by runtime.  We use the
    8024              :      smallest incoming stack boundary. */
    8025      1615193 :   if (incoming_stack_boundary > MAIN_STACK_BOUNDARY
    8026       140726 :       && DECL_NAME (current_function_decl)
    8027       140726 :       && MAIN_NAME_P (DECL_NAME (current_function_decl))
    8028      1617675 :       && DECL_FILE_SCOPE_P (current_function_decl))
    8029         2482 :     incoming_stack_boundary = MAIN_STACK_BOUNDARY;
    8030              : 
    8031      1615193 :   return incoming_stack_boundary;
    8032              : }
    8033              : 
    8034              : /* Update incoming stack boundary and estimated stack alignment.  */
    8035              : 
    8036              : static void
    8037      1480895 : ix86_update_stack_boundary (void)
    8038              : {
    8039      1480895 :   ix86_incoming_stack_boundary
    8040      1480895 :     = ix86_minimum_incoming_stack_boundary (false);
    8041              : 
    8042              :   /* x86_64 vararg needs 16byte stack alignment for register save area.  */
    8043      1480895 :   if (TARGET_64BIT
    8044      1354411 :       && cfun->stdarg
    8045        21367 :       && crtl->stack_alignment_estimated < 128)
    8046        10178 :     crtl->stack_alignment_estimated = 128;
    8047              : 
    8048              :   /* __tls_get_addr needs to be called with 16-byte aligned stack.  */
    8049      1480895 :   if (ix86_tls_descriptor_calls_expanded_in_cfun
    8050         1072 :       && crtl->preferred_stack_boundary < 128)
    8051          745 :     crtl->preferred_stack_boundary = 128;
    8052              : 
    8053              :   /* For 32-bit MS ABI, both the incoming and preferred stack boundaries
    8054              :      are 32 bits, but if force_align_arg_pointer is specified, it should
    8055              :      prefer 128 bits for a backward-compatibility reason, which is also
    8056              :      what the doc suggests.  */
    8057      1480895 :   if (lookup_attribute ("force_align_arg_pointer",
    8058      1480895 :                         TYPE_ATTRIBUTES (TREE_TYPE (current_function_decl)))
    8059      1480895 :       && crtl->preferred_stack_boundary < 128)
    8060            4 :     crtl->preferred_stack_boundary = 128;
    8061      1480895 : }
    8062              : 
    8063              : /* Handle the TARGET_GET_DRAP_RTX hook.  Return NULL if no DRAP is
    8064              :    needed or an rtx for DRAP otherwise.  */
    8065              : 
    8066              : static rtx
    8067      1584222 : ix86_get_drap_rtx (void)
    8068              : {
    8069              :   /* We must use DRAP if there are outgoing arguments on stack or
    8070              :      the stack pointer register is clobbered by asm statement and
    8071              :      ACCUMULATE_OUTGOING_ARGS is false.  */
    8072      1584222 :   if (ix86_force_drap
    8073      1584222 :       || ((cfun->machine->outgoing_args_on_stack
    8074      1252693 :            || crtl->sp_is_clobbered_by_asm)
    8075       329583 :           && !ACCUMULATE_OUTGOING_ARGS))
    8076       309391 :     crtl->need_drap = true;
    8077              : 
    8078      1584222 :   if (stack_realign_drap)
    8079              :     {
    8080              :       /* Assign DRAP to vDRAP and returns vDRAP */
    8081         7300 :       unsigned int regno = find_drap_reg ();
    8082         7300 :       rtx drap_vreg;
    8083         7300 :       rtx arg_ptr;
    8084         7300 :       rtx_insn *seq, *insn;
    8085              : 
    8086         7585 :       arg_ptr = gen_rtx_REG (Pmode, regno);
    8087         7300 :       crtl->drap_reg = arg_ptr;
    8088              : 
    8089         7300 :       start_sequence ();
    8090         7300 :       drap_vreg = copy_to_reg (arg_ptr);
    8091         7300 :       seq = end_sequence ();
    8092              : 
    8093         7300 :       insn = emit_insn_before (seq, NEXT_INSN (entry_of_function ()));
    8094         7300 :       if (!optimize)
    8095              :         {
    8096         1894 :           add_reg_note (insn, REG_CFA_SET_VDRAP, drap_vreg);
    8097         1894 :           RTX_FRAME_RELATED_P (insn) = 1;
    8098              :         }
    8099         7300 :       return drap_vreg;
    8100              :     }
    8101              :   else
    8102              :     return NULL;
    8103              : }
    8104              : 
    8105              : /* Handle the TARGET_INTERNAL_ARG_POINTER hook.  */
    8106              : 
    8107              : static rtx
    8108      1480896 : ix86_internal_arg_pointer (void)
    8109              : {
    8110      1480896 :   return virtual_incoming_args_rtx;
    8111              : }
    8112              : 
    8113              : struct scratch_reg {
    8114              :   rtx reg;
    8115              :   bool saved;
    8116              : };
    8117              : 
    8118              : /* Return a short-lived scratch register for use on function entry.
    8119              :    In 32-bit mode, it is valid only after the registers are saved
    8120              :    in the prologue.  This register must be released by means of
    8121              :    release_scratch_register_on_entry once it is dead.  */
    8122              : 
    8123              : static void
    8124           25 : get_scratch_register_on_entry (struct scratch_reg *sr)
    8125              : {
    8126           25 :   int regno;
    8127              : 
    8128           25 :   sr->saved = false;
    8129              : 
    8130           25 :   if (TARGET_64BIT)
    8131              :     {
    8132              :       /* We always use R11 in 64-bit mode.  */
    8133              :       regno = R11_REG;
    8134              :     }
    8135              :   else
    8136              :     {
    8137            0 :       tree decl = current_function_decl, fntype = TREE_TYPE (decl);
    8138            0 :       bool fastcall_p
    8139            0 :         = lookup_attribute ("fastcall", TYPE_ATTRIBUTES (fntype)) != NULL_TREE;
    8140            0 :       bool thiscall_p
    8141            0 :         = lookup_attribute ("thiscall", TYPE_ATTRIBUTES (fntype)) != NULL_TREE;
    8142            0 :       bool static_chain_p = DECL_STATIC_CHAIN (decl);
    8143            0 :       int regparm = ix86_function_regparm (fntype, decl);
    8144            0 :       int drap_regno
    8145            0 :         = crtl->drap_reg ? REGNO (crtl->drap_reg) : INVALID_REGNUM;
    8146              : 
    8147              :       /* 'fastcall' sets regparm to 2, uses ecx/edx for arguments and eax
    8148              :           for the static chain register.  */
    8149            0 :       if ((regparm < 1 || (fastcall_p && !static_chain_p))
    8150            0 :           && drap_regno != AX_REG)
    8151              :         regno = AX_REG;
    8152              :       /* 'thiscall' sets regparm to 1, uses ecx for arguments and edx
    8153              :           for the static chain register.  */
    8154            0 :       else if (thiscall_p && !static_chain_p && drap_regno != AX_REG)
    8155              :         regno = AX_REG;
    8156            0 :       else if (regparm < 2 && !thiscall_p && drap_regno != DX_REG)
    8157              :         regno = DX_REG;
    8158              :       /* ecx is the static chain register.  */
    8159            0 :       else if (regparm < 3 && !fastcall_p && !thiscall_p
    8160            0 :                && !static_chain_p
    8161            0 :                && drap_regno != CX_REG)
    8162              :         regno = CX_REG;
    8163            0 :       else if (ix86_save_reg (BX_REG, true, false))
    8164              :         regno = BX_REG;
    8165              :       /* esi is the static chain register.  */
    8166            0 :       else if (!(regparm == 3 && static_chain_p)
    8167            0 :                && ix86_save_reg (SI_REG, true, false))
    8168              :         regno = SI_REG;
    8169            0 :       else if (ix86_save_reg (DI_REG, true, false))
    8170              :         regno = DI_REG;
    8171              :       else
    8172              :         {
    8173            0 :           regno = (drap_regno == AX_REG ? DX_REG : AX_REG);
    8174            0 :           sr->saved = true;
    8175              :         }
    8176              :     }
    8177              : 
    8178           25 :   sr->reg = gen_rtx_REG (Pmode, regno);
    8179           25 :   if (sr->saved)
    8180              :     {
    8181            0 :       rtx_insn *insn = emit_insn (gen_push (sr->reg));
    8182            0 :       RTX_FRAME_RELATED_P (insn) = 1;
    8183              :     }
    8184           25 : }
    8185              : 
    8186              : /* Release a scratch register obtained from the preceding function.
    8187              : 
    8188              :    If RELEASE_VIA_POP is true, we just pop the register off the stack
    8189              :    to release it.  This is what non-Linux systems use with -fstack-check.
    8190              : 
    8191              :    Otherwise we use OFFSET to locate the saved register and the
    8192              :    allocated stack space becomes part of the local frame and is
    8193              :    deallocated by the epilogue.  */
    8194              : 
    8195              : static void
    8196           25 : release_scratch_register_on_entry (struct scratch_reg *sr, HOST_WIDE_INT offset,
    8197              :                                    bool release_via_pop)
    8198              : {
    8199           25 :   if (sr->saved)
    8200              :     {
    8201            0 :       if (release_via_pop)
    8202              :         {
    8203            0 :           struct machine_function *m = cfun->machine;
    8204            0 :           rtx x, insn = emit_insn (gen_pop (sr->reg));
    8205              : 
    8206              :           /* The RX FRAME_RELATED_P mechanism doesn't know about pop.  */
    8207            0 :           RTX_FRAME_RELATED_P (insn) = 1;
    8208            0 :           x = plus_constant (Pmode, stack_pointer_rtx, UNITS_PER_WORD);
    8209            0 :           x = gen_rtx_SET (stack_pointer_rtx, x);
    8210            0 :           add_reg_note (insn, REG_FRAME_RELATED_EXPR, x);
    8211            0 :           m->fs.sp_offset -= UNITS_PER_WORD;
    8212              :         }
    8213              :       else
    8214              :         {
    8215            0 :           rtx x = plus_constant (Pmode, stack_pointer_rtx, offset);
    8216            0 :           x = gen_rtx_SET (sr->reg, gen_rtx_MEM (word_mode, x));
    8217            0 :           emit_insn (x);
    8218              :         }
    8219              :     }
    8220           25 : }
    8221              : 
    8222              : /* Emit code to adjust the stack pointer by SIZE bytes while probing it.
    8223              : 
    8224              :    If INT_REGISTERS_SAVED is true, then integer registers have already been
    8225              :    pushed on the stack.
    8226              : 
    8227              :    If PROTECTION AREA is true, then probe PROBE_INTERVAL plus a small dope
    8228              :    beyond SIZE bytes.
    8229              : 
    8230              :    This assumes no knowledge of the current probing state, i.e. it is never
    8231              :    allowed to allocate more than PROBE_INTERVAL bytes of stack space without
    8232              :    a suitable probe.  */
    8233              : 
    8234              : static void
    8235          127 : ix86_adjust_stack_and_probe (HOST_WIDE_INT size,
    8236              :                              const bool int_registers_saved,
    8237              :                              const bool protection_area)
    8238              : {
    8239          127 :   struct machine_function *m = cfun->machine;
    8240              : 
    8241              :   /* If this function does not statically allocate stack space, then
    8242              :      no probes are needed.  */
    8243          127 :   if (!size)
    8244              :     {
    8245              :       /* However, the allocation of space via pushes for register
    8246              :          saves could be viewed as allocating space, but without the
    8247              :          need to probe.  */
    8248           43 :       if (m->frame.nregs || m->frame.nsseregs || frame_pointer_needed)
    8249           23 :         dump_stack_clash_frame_info (NO_PROBE_SMALL_FRAME, true);
    8250              :       else
    8251           20 :         dump_stack_clash_frame_info (NO_PROBE_NO_FRAME, false);
    8252           43 :       return;
    8253              :     }
    8254              : 
    8255              :   /* If we are a noreturn function, then we have to consider the
    8256              :      possibility that we're called via a jump rather than a call.
    8257              : 
    8258              :      Thus we don't have the implicit probe generated by saving the
    8259              :      return address into the stack at the call.  Thus, the stack
    8260              :      pointer could be anywhere in the guard page.  The safe thing
    8261              :      to do is emit a probe now.
    8262              : 
    8263              :      The probe can be avoided if we have already emitted any callee
    8264              :      register saves into the stack or have a frame pointer (which will
    8265              :      have been saved as well).  Those saves will function as implicit
    8266              :      probes.
    8267              : 
    8268              :      ?!? This should be revamped to work like aarch64 and s390 where
    8269              :      we track the offset from the most recent probe.  Normally that
    8270              :      offset would be zero.  For a noreturn function we would reset
    8271              :      it to PROBE_INTERVAL - (STACK_BOUNDARY / BITS_PER_UNIT).   Then
    8272              :      we just probe when we cross PROBE_INTERVAL.  */
    8273           84 :   if (TREE_THIS_VOLATILE (cfun->decl)
    8274           15 :       && !(m->frame.nregs || m->frame.nsseregs || frame_pointer_needed))
    8275              :     {
    8276              :       /* We can safely use any register here since we're just going to push
    8277              :          its value and immediately pop it back.  But we do try and avoid
    8278              :          argument passing registers so as not to introduce dependencies in
    8279              :          the pipeline.  For 32 bit we use %esi and for 64 bit we use %rax.  */
    8280           15 :       rtx dummy_reg = gen_rtx_REG (word_mode, TARGET_64BIT ? AX_REG : SI_REG);
    8281           15 :       rtx_insn *insn_push = emit_insn (gen_push (dummy_reg));
    8282           15 :       rtx_insn *insn_pop = emit_insn (gen_pop (dummy_reg));
    8283           15 :       m->fs.sp_offset -= UNITS_PER_WORD;
    8284           15 :       if (m->fs.cfa_reg == stack_pointer_rtx)
    8285              :         {
    8286           15 :           m->fs.cfa_offset -= UNITS_PER_WORD;
    8287           15 :           rtx x = plus_constant (Pmode, stack_pointer_rtx, -UNITS_PER_WORD);
    8288           15 :           x = gen_rtx_SET (stack_pointer_rtx, x);
    8289           15 :           add_reg_note (insn_push, REG_CFA_ADJUST_CFA, x);
    8290           15 :           RTX_FRAME_RELATED_P (insn_push) = 1;
    8291           15 :           x = plus_constant (Pmode, stack_pointer_rtx, UNITS_PER_WORD);
    8292           15 :           x = gen_rtx_SET (stack_pointer_rtx, x);
    8293           15 :           add_reg_note (insn_pop, REG_CFA_ADJUST_CFA, x);
    8294           15 :           RTX_FRAME_RELATED_P (insn_pop) = 1;
    8295              :         }
    8296           15 :       emit_insn (gen_blockage ());
    8297              :     }
    8298              : 
    8299           84 :   const HOST_WIDE_INT probe_interval = get_probe_interval ();
    8300           84 :   const int dope = 4 * UNITS_PER_WORD;
    8301              : 
    8302              :   /* If there is protection area, take it into account in the size.  */
    8303           84 :   if (protection_area)
    8304           25 :     size += probe_interval + dope;
    8305              : 
    8306              :   /* If we allocate less than the size of the guard statically,
    8307              :      then no probing is necessary, but we do need to allocate
    8308              :      the stack.  */
    8309           59 :   else if (size < (1 << param_stack_clash_protection_guard_size))
    8310              :     {
    8311           38 :       pro_epilogue_adjust_stack (stack_pointer_rtx, stack_pointer_rtx,
    8312              :                                  GEN_INT (-size), -1,
    8313           38 :                                  m->fs.cfa_reg == stack_pointer_rtx);
    8314           38 :       dump_stack_clash_frame_info (NO_PROBE_SMALL_FRAME, true);
    8315           38 :       return;
    8316              :     }
    8317              : 
    8318              :   /* We're allocating a large enough stack frame that we need to
    8319              :      emit probes.  Either emit them inline or in a loop depending
    8320              :      on the size.  */
    8321           46 :   if (size <= 4 * probe_interval)
    8322              :     {
    8323              :       HOST_WIDE_INT i;
    8324           49 :       for (i = probe_interval; i <= size; i += probe_interval)
    8325              :         {
    8326              :           /* Allocate PROBE_INTERVAL bytes.  */
    8327           28 :           rtx insn
    8328           28 :             = pro_epilogue_adjust_stack (stack_pointer_rtx, stack_pointer_rtx,
    8329              :                                          GEN_INT (-probe_interval), -1,
    8330           28 :                                          m->fs.cfa_reg == stack_pointer_rtx);
    8331           28 :           add_reg_note (insn, REG_STACK_CHECK, const0_rtx);
    8332              : 
    8333              :           /* And probe at *sp.  */
    8334           28 :           emit_stack_probe (stack_pointer_rtx);
    8335           28 :           emit_insn (gen_blockage ());
    8336              :         }
    8337              : 
    8338              :       /* We need to allocate space for the residual, but we do not need
    8339              :          to probe the residual...  */
    8340           21 :       HOST_WIDE_INT residual = (i - probe_interval - size);
    8341           21 :       if (residual)
    8342              :         {
    8343           21 :           pro_epilogue_adjust_stack (stack_pointer_rtx, stack_pointer_rtx,
    8344              :                                      GEN_INT (residual), -1,
    8345           21 :                                      m->fs.cfa_reg == stack_pointer_rtx);
    8346              : 
    8347              :           /* ...except if there is a protection area to maintain.  */
    8348           21 :           if (protection_area)
    8349           12 :             emit_stack_probe (stack_pointer_rtx);
    8350              :         }
    8351              : 
    8352           21 :       dump_stack_clash_frame_info (PROBE_INLINE, residual != 0);
    8353              :     }
    8354              :   else
    8355              :     {
    8356              :       /* We expect the GP registers to be saved when probes are used
    8357              :          as the probing sequences might need a scratch register and
    8358              :          the routine to allocate one assumes the integer registers
    8359              :          have already been saved.  */
    8360           25 :       gcc_assert (int_registers_saved);
    8361              : 
    8362           25 :       struct scratch_reg sr;
    8363           25 :       get_scratch_register_on_entry (&sr);
    8364              : 
    8365              :       /* If we needed to save a register, then account for any space
    8366              :          that was pushed (we are not going to pop the register when
    8367              :          we do the restore).  */
    8368           25 :       if (sr.saved)
    8369            0 :         size -= UNITS_PER_WORD;
    8370              : 
    8371              :       /* Step 1: round SIZE down to a multiple of the interval.  */
    8372           25 :       HOST_WIDE_INT rounded_size = size & -probe_interval;
    8373              : 
    8374              :       /* Step 2: compute final value of the loop counter.  Use lea if
    8375              :          possible.  */
    8376           25 :       rtx addr = plus_constant (Pmode, stack_pointer_rtx, -rounded_size);
    8377           25 :       rtx insn;
    8378           25 :       if (address_no_seg_operand (addr, Pmode))
    8379           13 :         insn = emit_insn (gen_rtx_SET (sr.reg, addr));
    8380              :       else
    8381              :         {
    8382           12 :           emit_move_insn (sr.reg, GEN_INT (-rounded_size));
    8383           12 :           insn = emit_insn (gen_rtx_SET (sr.reg,
    8384              :                                          gen_rtx_PLUS (Pmode, sr.reg,
    8385              :                                                        stack_pointer_rtx)));
    8386              :         }
    8387           25 :       if (m->fs.cfa_reg == stack_pointer_rtx)
    8388              :         {
    8389           22 :           add_reg_note (insn, REG_CFA_DEF_CFA,
    8390           22 :                         plus_constant (Pmode, sr.reg,
    8391           22 :                                        m->fs.cfa_offset + rounded_size));
    8392           22 :           RTX_FRAME_RELATED_P (insn) = 1;
    8393              :         }
    8394              : 
    8395              :       /* Step 3: the loop.  */
    8396           25 :       rtx size_rtx = GEN_INT (rounded_size);
    8397           25 :       insn = emit_insn (gen_adjust_stack_and_probe (Pmode, sr.reg, sr.reg,
    8398              :                                                     size_rtx));
    8399           25 :       if (m->fs.cfa_reg == stack_pointer_rtx)
    8400              :         {
    8401           22 :           m->fs.cfa_offset += rounded_size;
    8402           22 :           add_reg_note (insn, REG_CFA_DEF_CFA,
    8403           22 :                         plus_constant (Pmode, stack_pointer_rtx,
    8404           22 :                                        m->fs.cfa_offset));
    8405           22 :           RTX_FRAME_RELATED_P (insn) = 1;
    8406              :         }
    8407           25 :       m->fs.sp_offset += rounded_size;
    8408           25 :       emit_insn (gen_blockage ());
    8409              : 
    8410              :       /* Step 4: adjust SP if we cannot assert at compile-time that SIZE
    8411              :          is equal to ROUNDED_SIZE.  */
    8412              : 
    8413           25 :       if (size != rounded_size)
    8414              :         {
    8415           25 :           pro_epilogue_adjust_stack (stack_pointer_rtx, stack_pointer_rtx,
    8416              :                                      GEN_INT (rounded_size - size), -1,
    8417           25 :                                      m->fs.cfa_reg == stack_pointer_rtx);
    8418              : 
    8419           25 :           if (protection_area)
    8420           13 :             emit_stack_probe (stack_pointer_rtx);
    8421              :         }
    8422              : 
    8423           25 :       dump_stack_clash_frame_info (PROBE_LOOP, size != rounded_size);
    8424              : 
    8425              :       /* This does not deallocate the space reserved for the scratch
    8426              :          register.  That will be deallocated in the epilogue.  */
    8427           25 :       release_scratch_register_on_entry (&sr, size, false);
    8428              :     }
    8429              : 
    8430              :   /* Adjust back to account for the protection area.  */
    8431           46 :   if (protection_area)
    8432           25 :     pro_epilogue_adjust_stack (stack_pointer_rtx, stack_pointer_rtx,
    8433           25 :                                GEN_INT (probe_interval + dope), -1,
    8434           25 :                                m->fs.cfa_reg == stack_pointer_rtx);
    8435              : 
    8436              :   /* Make sure nothing is scheduled before we are done.  */
    8437           46 :   emit_insn (gen_blockage ());
    8438              : }
    8439              : 
    8440              : /* Adjust the stack pointer up to REG while probing it.  */
    8441              : 
    8442              : const char *
    8443           25 : output_adjust_stack_and_probe (rtx reg)
    8444              : {
    8445           25 :   static int labelno = 0;
    8446           25 :   char loop_lab[32];
    8447           25 :   rtx xops[2];
    8448              : 
    8449           25 :   ASM_GENERATE_INTERNAL_LABEL (loop_lab, "LPSRL", labelno++);
    8450              : 
    8451              :   /* Loop.  */
    8452           25 :   ASM_OUTPUT_INTERNAL_LABEL (asm_out_file, loop_lab);
    8453              : 
    8454              :   /* SP = SP + PROBE_INTERVAL.  */
    8455           25 :   xops[0] = stack_pointer_rtx;
    8456           37 :   xops[1] = GEN_INT (get_probe_interval ());
    8457           25 :   output_asm_insn ("sub%z0\t{%1, %0|%0, %1}", xops);
    8458              : 
    8459              :   /* Probe at SP.  */
    8460           25 :   xops[1] = const0_rtx;
    8461           25 :   output_asm_insn ("or{b}\t{%1, (%0)|BYTE PTR [%0], %1}", xops);
    8462              : 
    8463              :   /* Test if SP == LAST_ADDR.  */
    8464           25 :   xops[0] = stack_pointer_rtx;
    8465           25 :   xops[1] = reg;
    8466           25 :   output_asm_insn ("cmp%z0\t{%1, %0|%0, %1}", xops);
    8467              : 
    8468              :   /* Branch.  */
    8469           25 :   fputs ("\tjne\t", asm_out_file);
    8470           25 :   assemble_name_raw (asm_out_file, loop_lab);
    8471           25 :   fputc ('\n', asm_out_file);
    8472              : 
    8473           25 :   return "";
    8474              : }
    8475              : 
    8476              : /* Emit code to probe a range of stack addresses from FIRST to FIRST+SIZE,
    8477              :    inclusive.  These are offsets from the current stack pointer.
    8478              : 
    8479              :    INT_REGISTERS_SAVED is true if integer registers have already been
    8480              :    pushed on the stack.  */
    8481              : 
    8482              : static void
    8483            0 : ix86_emit_probe_stack_range (HOST_WIDE_INT first, HOST_WIDE_INT size,
    8484              :                              const bool int_registers_saved)
    8485              : {
    8486            0 :   const HOST_WIDE_INT probe_interval = get_probe_interval ();
    8487              : 
    8488              :   /* See if we have a constant small number of probes to generate.  If so,
    8489              :      that's the easy case.  The run-time loop is made up of 6 insns in the
    8490              :      generic case while the compile-time loop is made up of n insns for n #
    8491              :      of intervals.  */
    8492            0 :   if (size <= 6 * probe_interval)
    8493              :     {
    8494              :       HOST_WIDE_INT i;
    8495              : 
    8496              :       /* Probe at FIRST + N * PROBE_INTERVAL for values of N from 1 until
    8497              :          it exceeds SIZE.  If only one probe is needed, this will not
    8498              :          generate any code.  Then probe at FIRST + SIZE.  */
    8499            0 :       for (i = probe_interval; i < size; i += probe_interval)
    8500            0 :         emit_stack_probe (plus_constant (Pmode, stack_pointer_rtx,
    8501            0 :                                          -(first + i)));
    8502              : 
    8503            0 :       emit_stack_probe (plus_constant (Pmode, stack_pointer_rtx,
    8504            0 :                                        -(first + size)));
    8505              :     }
    8506              : 
    8507              :   /* Otherwise, do the same as above, but in a loop.  Note that we must be
    8508              :      extra careful with variables wrapping around because we might be at
    8509              :      the very top (or the very bottom) of the address space and we have
    8510              :      to be able to handle this case properly; in particular, we use an
    8511              :      equality test for the loop condition.  */
    8512              :   else
    8513              :     {
    8514              :       /* We expect the GP registers to be saved when probes are used
    8515              :          as the probing sequences might need a scratch register and
    8516              :          the routine to allocate one assumes the integer registers
    8517              :          have already been saved.  */
    8518            0 :       gcc_assert (int_registers_saved);
    8519              : 
    8520            0 :       HOST_WIDE_INT rounded_size, last;
    8521            0 :       struct scratch_reg sr;
    8522              : 
    8523            0 :       get_scratch_register_on_entry (&sr);
    8524              : 
    8525              : 
    8526              :       /* Step 1: round SIZE to the previous multiple of the interval.  */
    8527              : 
    8528            0 :       rounded_size = ROUND_DOWN (size, probe_interval);
    8529              : 
    8530              : 
    8531              :       /* Step 2: compute initial and final value of the loop counter.  */
    8532              : 
    8533              :       /* TEST_OFFSET = FIRST.  */
    8534            0 :       emit_move_insn (sr.reg, GEN_INT (-first));
    8535              : 
    8536              :       /* LAST_OFFSET = FIRST + ROUNDED_SIZE.  */
    8537            0 :       last = first + rounded_size;
    8538              : 
    8539              : 
    8540              :       /* Step 3: the loop
    8541              : 
    8542              :          do
    8543              :            {
    8544              :              TEST_ADDR = TEST_ADDR + PROBE_INTERVAL
    8545              :              probe at TEST_ADDR
    8546              :            }
    8547              :          while (TEST_ADDR != LAST_ADDR)
    8548              : 
    8549              :          probes at FIRST + N * PROBE_INTERVAL for values of N from 1
    8550              :          until it is equal to ROUNDED_SIZE.  */
    8551              : 
    8552            0 :       emit_insn
    8553            0 :         (gen_probe_stack_range (Pmode, sr.reg, sr.reg, GEN_INT (-last)));
    8554              : 
    8555              : 
    8556              :       /* Step 4: probe at FIRST + SIZE if we cannot assert at compile-time
    8557              :          that SIZE is equal to ROUNDED_SIZE.  */
    8558              : 
    8559            0 :       if (size != rounded_size)
    8560            0 :         emit_stack_probe (plus_constant (Pmode,
    8561            0 :                                          gen_rtx_PLUS (Pmode,
    8562              :                                                        stack_pointer_rtx,
    8563              :                                                        sr.reg),
    8564            0 :                                          rounded_size - size));
    8565              : 
    8566            0 :       release_scratch_register_on_entry (&sr, size, true);
    8567              :     }
    8568              : 
    8569              :   /* Make sure nothing is scheduled before we are done.  */
    8570            0 :   emit_insn (gen_blockage ());
    8571            0 : }
    8572              : 
    8573              : /* Probe a range of stack addresses from REG to END, inclusive.  These are
    8574              :    offsets from the current stack pointer.  */
    8575              : 
    8576              : const char *
    8577            0 : output_probe_stack_range (rtx reg, rtx end)
    8578              : {
    8579            0 :   static int labelno = 0;
    8580            0 :   char loop_lab[32];
    8581            0 :   rtx xops[3];
    8582              : 
    8583            0 :   ASM_GENERATE_INTERNAL_LABEL (loop_lab, "LPSRL", labelno++);
    8584              : 
    8585              :   /* Loop.  */
    8586            0 :   ASM_OUTPUT_INTERNAL_LABEL (asm_out_file, loop_lab);
    8587              : 
    8588              :   /* TEST_ADDR = TEST_ADDR + PROBE_INTERVAL.  */
    8589            0 :   xops[0] = reg;
    8590            0 :   xops[1] = GEN_INT (get_probe_interval ());
    8591            0 :   output_asm_insn ("sub%z0\t{%1, %0|%0, %1}", xops);
    8592              : 
    8593              :   /* Probe at TEST_ADDR.  */
    8594            0 :   xops[0] = stack_pointer_rtx;
    8595            0 :   xops[1] = reg;
    8596            0 :   xops[2] = const0_rtx;
    8597            0 :   output_asm_insn ("or{b}\t{%2, (%0,%1)|BYTE PTR [%0+%1], %2}", xops);
    8598              : 
    8599              :   /* Test if TEST_ADDR == LAST_ADDR.  */
    8600            0 :   xops[0] = reg;
    8601            0 :   xops[1] = end;
    8602            0 :   output_asm_insn ("cmp%z0\t{%1, %0|%0, %1}", xops);
    8603              : 
    8604              :   /* Branch.  */
    8605            0 :   fputs ("\tjne\t", asm_out_file);
    8606            0 :   assemble_name_raw (asm_out_file, loop_lab);
    8607            0 :   fputc ('\n', asm_out_file);
    8608              : 
    8609            0 :   return "";
    8610              : }
    8611              : 
    8612              : /* Data passed to ix86_update_stack_alignment.  */
    8613              : struct stack_access_data
    8614              : {
    8615              :   /* The stack access register.  */
    8616              :   const_rtx reg;
    8617              :   /* Pointer to stack alignment.  */
    8618              :   unsigned int *stack_alignment;
    8619              : };
    8620              : 
    8621              : /* Return true if OP references an argument passed on stack.  */
    8622              : 
    8623              : static bool
    8624       135374 : ix86_argument_passed_on_stack_p (const_rtx op)
    8625              : {
    8626       135374 :   tree mem_expr = MEM_EXPR (op);
    8627       135374 :   if (mem_expr)
    8628              :     {
    8629       133507 :       tree var = get_base_address (mem_expr);
    8630       133507 :       return TREE_CODE (var) == PARM_DECL;
    8631              :     }
    8632              :   return false;
    8633              : }
    8634              : 
    8635              : /* Update the maximum stack slot alignment from memory alignment in PAT.  */
    8636              : 
    8637              : static void
    8638       169625 : ix86_update_stack_alignment (rtx, const_rtx pat, void *data)
    8639              : {
    8640              :   /* This insn may reference stack slot.  Update the maximum stack slot
    8641              :      alignment if the memory is referenced by the stack access register. */
    8642       169625 :   stack_access_data *p = (stack_access_data *) data;
    8643              : 
    8644       169625 :   subrtx_iterator::array_type array;
    8645       709369 :   FOR_EACH_SUBRTX (iter, array, pat, ALL)
    8646              :     {
    8647       568389 :       auto op = *iter;
    8648       568389 :       if (MEM_P (op))
    8649              :         {
    8650              :           /* NB: Ignore arguments passed on stack since caller is
    8651              :              responsible to align the outgoing stack for arguments
    8652              :              passed on stack.  */
    8653       166276 :           if (reg_mentioned_p (p->reg, XEXP (op, 0))
    8654       166276 :               && !ix86_argument_passed_on_stack_p (op))
    8655              :             {
    8656        28645 :               unsigned int alignment = MEM_ALIGN (op);
    8657              : 
    8658        28645 :               if (alignment > *p->stack_alignment)
    8659        28564 :                 *p->stack_alignment = alignment;
    8660              :               break;
    8661              :             }
    8662              :           else
    8663       137631 :             iter.skip_subrtxes ();
    8664              :         }
    8665              :     }
    8666       169625 : }
    8667              : 
    8668              : /* Helper function for ix86_find_all_reg_uses.  */
    8669              : 
    8670              : static void
    8671     45351149 : ix86_find_all_reg_uses_1 (HARD_REG_SET &regset,
    8672              :                           rtx set, unsigned int regno,
    8673              :                           auto_bitmap &worklist)
    8674              : {
    8675     45351149 :   rtx dest = SET_DEST (set);
    8676              : 
    8677     45351149 :   if (!REG_P (dest))
    8678     41085370 :     return;
    8679              : 
    8680              :   /* Reject non-Pmode modes.  */
    8681     34323346 :   if (GET_MODE (dest) != Pmode)
    8682              :     return;
    8683              : 
    8684     18136988 :   unsigned int dst_regno = REGNO (dest);
    8685              : 
    8686     18136988 :   if (TEST_HARD_REG_BIT (regset, dst_regno))
    8687              :     return;
    8688              : 
    8689      4265779 :   const_rtx src = SET_SRC (set);
    8690              : 
    8691      4265779 :   subrtx_iterator::array_type array;
    8692      8473362 :   FOR_EACH_SUBRTX (iter, array, src, ALL)
    8693              :     {
    8694      5470908 :       auto op = *iter;
    8695              : 
    8696      5470908 :       if (MEM_P (op))
    8697      2981982 :         iter.skip_subrtxes ();
    8698              : 
    8699      5470908 :       if (REG_P (op) && REGNO (op) == regno)
    8700              :         {
    8701              :           /* Add this register to register set.  */
    8702      1431691 :           add_to_hard_reg_set (&regset, Pmode, dst_regno);
    8703      1263325 :           bitmap_set_bit (worklist, dst_regno);
    8704      1263325 :           break;
    8705              :         }
    8706              :     }
    8707      4265779 : }
    8708              : 
    8709              : /* Find all registers defined with register REGNO.  */
    8710              : 
    8711              : static void
    8712      2279722 : ix86_find_all_reg_uses (HARD_REG_SET &regset,
    8713              :                         unsigned int regno, auto_bitmap &worklist)
    8714              : {
    8715      2279722 :   for (df_ref ref = DF_REG_USE_CHAIN (regno);
    8716     81621470 :        ref != NULL;
    8717     79341748 :        ref = DF_REF_NEXT_REG (ref))
    8718              :     {
    8719     79341748 :       if (DF_REF_IS_ARTIFICIAL (ref))
    8720     16580437 :         continue;
    8721              : 
    8722     62761311 :       rtx_insn *insn = DF_REF_INSN (ref);
    8723              : 
    8724     62761311 :       if (!NONJUMP_INSN_P (insn))
    8725     18065107 :         continue;
    8726              : 
    8727     44696204 :       unsigned int ref_regno = DF_REF_REGNO (ref);
    8728              : 
    8729     44696204 :       rtx set = single_set (insn);
    8730     44696204 :       if (set)
    8731              :         {
    8732     43927503 :           ix86_find_all_reg_uses_1 (regset, set,
    8733              :                                     ref_regno, worklist);
    8734     43927503 :           continue;
    8735              :         }
    8736              : 
    8737       768701 :       rtx pat = PATTERN (insn);
    8738       768701 :       if (GET_CODE (pat) != PARALLEL)
    8739       123512 :         continue;
    8740              : 
    8741      2490932 :       for (int i = 0; i < XVECLEN (pat, 0); i++)
    8742              :         {
    8743      1845743 :           rtx exp = XVECEXP (pat, 0, i);
    8744              : 
    8745      1845743 :           if (GET_CODE (exp) == SET)
    8746      1423646 :             ix86_find_all_reg_uses_1 (regset, exp,
    8747              :                                       ref_regno, worklist);
    8748              :         }
    8749              :     }
    8750      2279722 : }
    8751              : 
    8752              : /* Return true if the hard register REGNO used for a stack access is
    8753              :    defined in a basic block that dominates the block where it is used.  */
    8754              : 
    8755              : static bool
    8756        40356 : ix86_access_stack_p (unsigned int regno, basic_block bb,
    8757              :                      HARD_REG_SET &set_up_by_prologue,
    8758              :                      HARD_REG_SET &prologue_used,
    8759              :                      auto_bitmap reg_dominate_bbs_known[],
    8760              :                      auto_bitmap reg_dominate_bbs[])
    8761              : {
    8762        40356 :   if (bitmap_bit_p (reg_dominate_bbs_known[regno], bb->index))
    8763        11319 :     return bitmap_bit_p (reg_dominate_bbs[regno], bb->index);
    8764              : 
    8765        29037 :   bitmap_set_bit (reg_dominate_bbs_known[regno], bb->index);
    8766              : 
    8767              :   /* Get all BBs which set REGNO and dominate the current BB from all
    8768              :      DEFs of REGNO.  */
    8769        29037 :   for (df_ref def = DF_REG_DEF_CHAIN (regno);
    8770      1618002 :        def;
    8771      1588965 :        def = DF_REF_NEXT_REG (def))
    8772      1616481 :     if (!DF_REF_IS_ARTIFICIAL (def)
    8773      1614659 :         && !DF_REF_FLAGS_IS_SET (def, DF_REF_MAY_CLOBBER)
    8774      1586425 :         && !DF_REF_FLAGS_IS_SET (def, DF_REF_MUST_CLOBBER))
    8775              :       {
    8776      1584492 :         basic_block set_bb = DF_REF_BB (def);
    8777      1584492 :         if (dominated_by_p (CDI_DOMINATORS, bb, set_bb))
    8778              :           {
    8779        88686 :             rtx_insn *insn = DF_REF_INSN (def);
    8780              :             /* Return true if INSN requires stack.  */
    8781        88686 :             if (requires_stack_frame_p (insn, prologue_used,
    8782              :                                         set_up_by_prologue))
    8783              :               {
    8784        27516 :                 bitmap_set_bit (reg_dominate_bbs[regno], bb->index);
    8785        27516 :                 return true;
    8786              :               }
    8787              :           }
    8788              :       }
    8789              : 
    8790              :   /* When we get here, REGNO used in the current BB doesn't access
    8791              :      stack.  */
    8792              :   return false;
    8793              : }
    8794              : 
    8795              : /* Return true if OP isn't a memory operand with SYMBOLIC_CONST and
    8796              :    needs alignment > ALIGNMENT.  */
    8797              : 
    8798              : static bool
    8799     27722044 : ix86_need_alignment_p_2 (const_rtx op, unsigned int alignment)
    8800              : {
    8801     27722044 :   bool need_alignment = MEM_ALIGN (op) > alignment;
    8802     27722044 :   tree mem_expr = MEM_EXPR (op);
    8803     27722044 :   if (!mem_expr)
    8804              :     return need_alignment;
    8805              : 
    8806     22665700 :   tree var = get_base_address (mem_expr);
    8807     22665700 :   if (!VAR_P (var) || !DECL_RTL_SET_P (var))
    8808              :     return need_alignment;
    8809              : 
    8810     14357091 :   rtx x = DECL_RTL (var);
    8811     14357091 :   if (!MEM_P (x))
    8812              :     return need_alignment;
    8813              : 
    8814     14357088 :   x = XEXP (x, 0);
    8815     14357088 :   return !SYMBOLIC_CONST (x) && need_alignment;
    8816              : }
    8817              : 
    8818              : /* Return true if SET needs alignment > ALIGNMENT.  */
    8819              : 
    8820              : static bool
    8821     45350463 : ix86_need_alignment_p_1 (rtx set, unsigned int alignment)
    8822              : {
    8823     45350463 :   rtx dest = SET_DEST (set);
    8824              : 
    8825     45350463 :   if (MEM_P (dest))
    8826     17183293 :     return ix86_need_alignment_p_2 (dest, alignment);
    8827              : 
    8828     28167170 :   const_rtx src = SET_SRC (set);
    8829              : 
    8830     28167170 :   subrtx_iterator::array_type array;
    8831     81435580 :   FOR_EACH_SUBRTX (iter, array, src, ALL)
    8832              :     {
    8833     63807161 :       auto op = *iter;
    8834              : 
    8835     63807161 :       if (MEM_P (op))
    8836     10538751 :         return ix86_need_alignment_p_2 (op, alignment);
    8837              :     }
    8838              : 
    8839     17628419 :   return false;
    8840     28167170 : }
    8841              : 
    8842              : /* Return true if INSN needs alignment > ALIGNMENT.  */
    8843              : 
    8844              : static bool
    8845     44696204 : ix86_need_alignment_p (rtx_insn *insn, unsigned int alignment)
    8846              : {
    8847     44696204 :   rtx set = single_set (insn);
    8848     44696204 :   if (set)
    8849     43927503 :     return ix86_need_alignment_p_1 (set, alignment);
    8850              : 
    8851       768701 :   rtx pat = PATTERN (insn);
    8852       768701 :   if (GET_CODE (pat) != PARALLEL)
    8853              :     return false;
    8854              : 
    8855      2489281 :   for (int i = 0; i < XVECLEN (pat, 0); i++)
    8856              :     {
    8857      1844861 :       rtx exp = XVECEXP (pat, 0, i);
    8858              : 
    8859      1844861 :       if (GET_CODE (exp) == SET
    8860      1844861 :           && ix86_need_alignment_p_1 (exp, alignment))
    8861              :         return true;
    8862              :     }
    8863              : 
    8864              :   return false;
    8865              : }
    8866              : 
    8867              : /* Set stack_frame_required to false if stack frame isn't required.
    8868              :    Update STACK_ALIGNMENT to the largest alignment, in bits, of stack
    8869              :    slot used if stack frame is required and CHECK_STACK_SLOT is true.  */
    8870              : 
    8871              : static void
    8872      1480044 : ix86_find_max_used_stack_alignment (unsigned int &stack_alignment,
    8873              :                                     bool check_stack_slot)
    8874              : {
    8875      1480044 :   HARD_REG_SET set_up_by_prologue, prologue_used;
    8876      1480044 :   basic_block bb;
    8877              : 
    8878      5920176 :   CLEAR_HARD_REG_SET (prologue_used);
    8879      1480044 :   CLEAR_HARD_REG_SET (set_up_by_prologue);
    8880      1606637 :   add_to_hard_reg_set (&set_up_by_prologue, Pmode, STACK_POINTER_REGNUM);
    8881      1480044 :   add_to_hard_reg_set (&set_up_by_prologue, Pmode, ARG_POINTER_REGNUM);
    8882      1480044 :   add_to_hard_reg_set (&set_up_by_prologue, Pmode,
    8883              :                        HARD_FRAME_POINTER_REGNUM);
    8884              : 
    8885      1480044 :   bool require_stack_frame = false;
    8886              : 
    8887     15862060 :   FOR_EACH_BB_FN (bb, cfun)
    8888              :     {
    8889     14382016 :       rtx_insn *insn;
    8890     90350328 :       FOR_BB_INSNS (bb, insn)
    8891     83835005 :         if (NONDEBUG_INSN_P (insn)
    8892     83835005 :             && requires_stack_frame_p (insn, prologue_used,
    8893              :                                        set_up_by_prologue))
    8894              :           {
    8895              :             require_stack_frame = true;
    8896              :             break;
    8897              :           }
    8898              :     }
    8899              : 
    8900      1480044 :   cfun->machine->stack_frame_required = require_stack_frame;
    8901              : 
    8902              :   /* Stop if we don't need to check stack slot.  */
    8903      1480044 :   if (!check_stack_slot)
    8904       788861 :     return;
    8905              : 
    8906              :   /* The preferred stack alignment is the minimum stack alignment.  */
    8907       691183 :   if (stack_alignment > crtl->preferred_stack_boundary)
    8908       143005 :     stack_alignment = crtl->preferred_stack_boundary;
    8909              : 
    8910              :   HARD_REG_SET stack_slot_access;
    8911       691183 :   CLEAR_HARD_REG_SET (stack_slot_access);
    8912              : 
    8913              :   /* Stack slot can be accessed by stack pointer, frame pointer or
    8914              :      registers defined by stack pointer or frame pointer.  */
    8915       691183 :   auto_bitmap worklist;
    8916              : 
    8917       750693 :   add_to_hard_reg_set (&stack_slot_access, Pmode, STACK_POINTER_REGNUM);
    8918       691183 :   bitmap_set_bit (worklist, STACK_POINTER_REGNUM);
    8919              : 
    8920       691183 :   if (frame_pointer_needed)
    8921              :     {
    8922       334240 :       add_to_hard_reg_set (&stack_slot_access, Pmode,
    8923              :                            HARD_FRAME_POINTER_REGNUM);
    8924       325214 :       bitmap_set_bit (worklist, HARD_FRAME_POINTER_REGNUM);
    8925              :     }
    8926              : 
    8927              :   /* Registers on HARD_STACK_SLOT_ACCESS always access stack.  */
    8928       691183 :   HARD_REG_SET hard_stack_slot_access = stack_slot_access;
    8929              : 
    8930       691183 :   calculate_dominance_info (CDI_DOMINATORS);
    8931              : 
    8932      2279722 :   unsigned int regno;
    8933              : 
    8934      2279722 :   do
    8935              :     {
    8936      2279722 :       regno = bitmap_clear_first_set_bit (worklist);
    8937      2279722 :       ix86_find_all_reg_uses (stack_slot_access, regno, worklist);
    8938              :     }
    8939      2279722 :   while (!bitmap_empty_p (worklist));
    8940              : 
    8941              :   hard_reg_set_iterator hrsi;
    8942              :   stack_access_data data;
    8943              : 
    8944    127868855 :   auto_bitmap reg_dominate_bbs_known[FIRST_PSEUDO_REGISTER];
    8945    127868855 :   auto_bitmap reg_dominate_bbs[FIRST_PSEUDO_REGISTER];
    8946              : 
    8947       691183 :   data.stack_alignment = &stack_alignment;
    8948              : 
    8949      2970905 :   EXECUTE_IF_SET_IN_HARD_REG_SET (stack_slot_access, 0, regno, hrsi)
    8950              :     {
    8951      2279722 :       for (df_ref ref = DF_REG_USE_CHAIN (regno);
    8952     81621470 :            ref != NULL;
    8953     79341748 :            ref = DF_REF_NEXT_REG (ref))
    8954              :         {
    8955     79341748 :           if (DF_REF_IS_ARTIFICIAL (ref))
    8956     16580437 :             continue;
    8957              : 
    8958     62761311 :           rtx_insn *insn = DF_REF_INSN (ref);
    8959              : 
    8960     62761311 :           if (!NONJUMP_INSN_P (insn))
    8961     18065107 :             continue;
    8962              : 
    8963              :           /* Call ix86_access_stack_p only if INSN needs alignment >
    8964              :              STACK_ALIGNMENT.  */
    8965     44696204 :           if (ix86_need_alignment_p (insn, stack_alignment)
    8966     44696204 :               && (TEST_HARD_REG_BIT (hard_stack_slot_access, regno)
    8967        40356 :                   || ix86_access_stack_p (regno, BLOCK_FOR_INSN (insn),
    8968              :                                           set_up_by_prologue,
    8969              :                                           prologue_used,
    8970              :                                           reg_dominate_bbs_known,
    8971              :                                           reg_dominate_bbs)))
    8972              :             {
    8973              :               /* Update stack alignment if REGNO is used for stack
    8974              :                  access.  */
    8975       162917 :               data.reg = DF_REF_REG (ref);
    8976       162917 :               note_stores (insn, ix86_update_stack_alignment, &data);
    8977              :             }
    8978              :         }
    8979              :     }
    8980              : 
    8981       691183 :   free_dominance_info (CDI_DOMINATORS);
    8982    129251221 : }
    8983              : 
    8984              : /* Finalize stack_realign_needed and frame_pointer_needed flags, which
    8985              :    will guide prologue/epilogue to be generated in correct form.  */
    8986              : 
    8987              : static void
    8988      3434721 : ix86_finalize_stack_frame_flags (void)
    8989              : {
    8990              :   /* Check if stack realign is really needed after reload, and
    8991              :      stores result in cfun */
    8992      3434721 :   unsigned int incoming_stack_boundary
    8993      3434721 :     = (crtl->parm_stack_boundary > ix86_incoming_stack_boundary
    8994      3434721 :        ? crtl->parm_stack_boundary : ix86_incoming_stack_boundary);
    8995      3434721 :   unsigned int stack_alignment
    8996      1180946 :     = (crtl->is_leaf && !ix86_current_function_calls_tls_descriptor
    8997      4615667 :        ? crtl->max_used_stack_slot_alignment
    8998      3434721 :        : crtl->stack_alignment_needed);
    8999      3434721 :   unsigned int stack_realign
    9000      3434721 :     = (incoming_stack_boundary < stack_alignment);
    9001      3434721 :   bool recompute_frame_layout_p = false;
    9002              : 
    9003      3434721 :   if (crtl->stack_realign_finalized)
    9004              :     {
    9005              :       /* After stack_realign_needed is finalized, we can't no longer
    9006              :          change it.  */
    9007      1954677 :       gcc_assert (crtl->stack_realign_needed == stack_realign);
    9008      1954677 :       return;
    9009              :     }
    9010              : 
    9011              :   /* It is always safe to compute max_used_stack_alignment.  We
    9012              :      compute it only if 128-bit aligned load/store may be generated
    9013              :      on misaligned stack slot which will lead to segfault. */
    9014      2960088 :   bool check_stack_slot
    9015      1480044 :     = (stack_realign || crtl->max_used_stack_slot_alignment >= 128);
    9016      1480044 :   ix86_find_max_used_stack_alignment (stack_alignment,
    9017              :                                       check_stack_slot);
    9018              : 
    9019              :   /* If the only reason for frame_pointer_needed is that we conservatively
    9020              :      assumed stack realignment might be needed or -fno-omit-frame-pointer
    9021              :      is used, but in the end nothing that needed the stack alignment had
    9022              :      been spilled nor stack access, clear frame_pointer_needed and say we
    9023              :      don't need stack realignment.
    9024              : 
    9025              :      When vector register is used for piecewise move and store, we don't
    9026              :      increase stack_alignment_needed as there is no register spill for
    9027              :      piecewise move and store.  Since stack_realign_needed is set to true
    9028              :      by checking stack_alignment_estimated which is updated by pseudo
    9029              :      vector register usage, we also need to check stack_realign_needed to
    9030              :      eliminate frame pointer.  */
    9031      1480044 :   if ((stack_realign
    9032      1413749 :        || (!flag_omit_frame_pointer && optimize)
    9033      1403499 :        || crtl->stack_realign_needed)
    9034        77201 :       && frame_pointer_needed
    9035        77201 :       && crtl->is_leaf
    9036        52701 :       && crtl->sp_is_unchanging
    9037        52649 :       && !ix86_current_function_calls_tls_descriptor
    9038        52649 :       && !crtl->accesses_prior_frames
    9039        52649 :       && !cfun->calls_alloca
    9040        52649 :       && !crtl->calls_eh_return
    9041              :       /* See ira_setup_eliminable_regset for the rationale.  */
    9042        52649 :       && !(STACK_CHECK_MOVING_SP
    9043        52649 :            && flag_stack_check
    9044            0 :            && flag_exceptions
    9045            0 :            && cfun->can_throw_non_call_exceptions)
    9046        52649 :       && !ix86_frame_pointer_required ()
    9047        52648 :       && ix86_get_frame_size () == 0
    9048        34945 :       && ix86_nsaved_sseregs () == 0
    9049      1514989 :       && ix86_varargs_gpr_size + ix86_varargs_fpr_size == 0)
    9050              :     {
    9051        34945 :       if (cfun->machine->stack_frame_required)
    9052              :         {
    9053              :           /* Stack frame is required.  If stack alignment needed is less
    9054              :              than incoming stack boundary, don't realign stack.  */
    9055          278 :           stack_realign = incoming_stack_boundary < stack_alignment;
    9056          278 :           if (!stack_realign)
    9057              :             {
    9058          278 :               crtl->max_used_stack_slot_alignment
    9059          278 :                 = incoming_stack_boundary;
    9060          278 :               crtl->stack_alignment_needed
    9061          278 :                 = incoming_stack_boundary;
    9062              :               /* Also update preferred_stack_boundary for leaf
    9063              :                  functions.  */
    9064          278 :               crtl->preferred_stack_boundary
    9065          278 :                 = incoming_stack_boundary;
    9066              :             }
    9067              :         }
    9068              :       else
    9069              :         {
    9070              :           /* If drap has been set, but it actually isn't live at the
    9071              :              start of the function, there is no reason to set it up.  */
    9072        34667 :           if (crtl->drap_reg)
    9073              :             {
    9074           35 :               basic_block bb = ENTRY_BLOCK_PTR_FOR_FN (cfun)->next_bb;
    9075           70 :               if (! REGNO_REG_SET_P (DF_LR_IN (bb),
    9076              :                                      REGNO (crtl->drap_reg)))
    9077              :                 {
    9078           35 :                   crtl->drap_reg = NULL_RTX;
    9079           35 :                   crtl->need_drap = false;
    9080              :                 }
    9081              :             }
    9082              :           else
    9083        34632 :             cfun->machine->no_drap_save_restore = true;
    9084              : 
    9085        34667 :           frame_pointer_needed = false;
    9086        34667 :           stack_realign = false;
    9087        34667 :           crtl->max_used_stack_slot_alignment = incoming_stack_boundary;
    9088        34667 :           crtl->stack_alignment_needed = incoming_stack_boundary;
    9089        34667 :           crtl->stack_alignment_estimated = incoming_stack_boundary;
    9090        34667 :           if (crtl->preferred_stack_boundary > incoming_stack_boundary)
    9091            1 :             crtl->preferred_stack_boundary = incoming_stack_boundary;
    9092        34667 :           df_finish_pass (true);
    9093        34667 :           df_scan_alloc (NULL);
    9094        34667 :           df_scan_blocks ();
    9095        34667 :           df_compute_regs_ever_live (true);
    9096        34667 :           df_analyze ();
    9097              : 
    9098        34667 :           if (flag_var_tracking)
    9099              :             {
    9100              :               /* Since frame pointer is no longer available, replace it with
    9101              :                  stack pointer - UNITS_PER_WORD in debug insns.  */
    9102          133 :               df_ref ref, next;
    9103          133 :               for (ref = DF_REG_USE_CHAIN (HARD_FRAME_POINTER_REGNUM);
    9104          133 :                    ref; ref = next)
    9105              :                 {
    9106            0 :                   next = DF_REF_NEXT_REG (ref);
    9107            0 :                   if (!DF_REF_INSN_INFO (ref))
    9108            0 :                     continue;
    9109              : 
    9110              :                   /* Make sure the next ref is for a different instruction,
    9111              :                      so that we're not affected by the rescan.  */
    9112            0 :                   rtx_insn *insn = DF_REF_INSN (ref);
    9113            0 :                   while (next && DF_REF_INSN (next) == insn)
    9114            0 :                     next = DF_REF_NEXT_REG (next);
    9115              : 
    9116            0 :                   if (DEBUG_INSN_P (insn))
    9117              :                     {
    9118              :                       bool changed = false;
    9119            0 :                       for (; ref != next; ref = DF_REF_NEXT_REG (ref))
    9120              :                         {
    9121            0 :                           rtx *loc = DF_REF_LOC (ref);
    9122            0 :                           if (*loc == hard_frame_pointer_rtx)
    9123              :                             {
    9124            0 :                               *loc = plus_constant (Pmode,
    9125              :                                                     stack_pointer_rtx,
    9126            0 :                                                     -UNITS_PER_WORD);
    9127            0 :                               changed = true;
    9128              :                             }
    9129              :                         }
    9130            0 :                       if (changed)
    9131            0 :                         df_insn_rescan (insn);
    9132              :                     }
    9133              :                 }
    9134              :             }
    9135              : 
    9136              :           recompute_frame_layout_p = true;
    9137              :         }
    9138              :     }
    9139      1445099 :   else if (crtl->max_used_stack_slot_alignment >= 128
    9140       655211 :            && cfun->machine->stack_frame_required)
    9141              :     {
    9142              :       /* We don't need to realign stack.  max_used_stack_alignment is
    9143              :          used to decide how stack frame should be aligned.  This is
    9144              :          independent of any psABIs nor 32-bit vs 64-bit.  */
    9145       610007 :       cfun->machine->max_used_stack_alignment
    9146       610007 :         = stack_alignment / BITS_PER_UNIT;
    9147              :     }
    9148              : 
    9149      1480044 :   if (crtl->stack_realign_needed != stack_realign)
    9150        35178 :     recompute_frame_layout_p = true;
    9151      1480044 :   crtl->stack_realign_needed = stack_realign;
    9152      1480044 :   crtl->stack_realign_finalized = true;
    9153      1480044 :   if (recompute_frame_layout_p)
    9154        35271 :     ix86_compute_frame_layout ();
    9155              : }
    9156              : 
    9157              : /* Delete SET_GOT right after entry block if it is allocated to reg.  */
    9158              : 
    9159              : static void
    9160            0 : ix86_elim_entry_set_got (rtx reg)
    9161              : {
    9162            0 :   basic_block bb = ENTRY_BLOCK_PTR_FOR_FN (cfun)->next_bb;
    9163            0 :   rtx_insn *c_insn = BB_HEAD (bb);
    9164            0 :   if (!NONDEBUG_INSN_P (c_insn))
    9165            0 :     c_insn = next_nonnote_nondebug_insn (c_insn);
    9166            0 :   if (c_insn && NONJUMP_INSN_P (c_insn))
    9167              :     {
    9168            0 :       rtx pat = PATTERN (c_insn);
    9169            0 :       if (GET_CODE (pat) == PARALLEL)
    9170              :         {
    9171            0 :           rtx set = XVECEXP (pat, 0, 0);
    9172            0 :           if (GET_CODE (set) == SET
    9173            0 :               && GET_CODE (SET_SRC (set)) == UNSPEC
    9174            0 :               && XINT (SET_SRC (set), 1) == UNSPEC_SET_GOT
    9175            0 :               && REGNO (SET_DEST (set)) == REGNO (reg))
    9176            0 :             delete_insn (c_insn);
    9177              :         }
    9178              :     }
    9179            0 : }
    9180              : 
    9181              : static rtx
    9182       193166 : gen_frame_set (rtx reg, rtx frame_reg, int offset, bool store)
    9183              : {
    9184       193166 :   rtx addr, mem;
    9185              : 
    9186       193166 :   if (offset)
    9187       184480 :     addr = plus_constant (Pmode, frame_reg, offset);
    9188       193166 :   mem = gen_frame_mem (GET_MODE (reg), offset ? addr : frame_reg);
    9189       193166 :   return gen_rtx_SET (store ? mem : reg, store ? reg : mem);
    9190              : }
    9191              : 
    9192              : static inline rtx
    9193       100333 : gen_frame_load (rtx reg, rtx frame_reg, int offset)
    9194              : {
    9195       100333 :   return gen_frame_set (reg, frame_reg, offset, false);
    9196              : }
    9197              : 
    9198              : static inline rtx
    9199        92833 : gen_frame_store (rtx reg, rtx frame_reg, int offset)
    9200              : {
    9201        92833 :   return gen_frame_set (reg, frame_reg, offset, true);
    9202              : }
    9203              : 
    9204              : static void
    9205         7045 : ix86_emit_outlined_ms2sysv_save (const struct ix86_frame &frame)
    9206              : {
    9207         7045 :   struct machine_function *m = cfun->machine;
    9208         7045 :   const unsigned ncregs = NUM_X86_64_MS_CLOBBERED_REGS
    9209         7045 :                           + m->call_ms2sysv_extra_regs;
    9210         7045 :   rtvec v = rtvec_alloc (ncregs + 1);
    9211         7045 :   unsigned int align, i, vi = 0;
    9212         7045 :   rtx_insn *insn;
    9213         7045 :   rtx sym, addr;
    9214         7045 :   rtx rax = gen_rtx_REG (word_mode, AX_REG);
    9215         7045 :   const class xlogue_layout &xlogue = xlogue_layout::get_instance ();
    9216              : 
    9217              :   /* AL should only be live with sysv_abi.  */
    9218         7045 :   gcc_assert (!ix86_eax_live_at_start_p ());
    9219         7045 :   gcc_assert (m->fs.sp_offset >= frame.sse_reg_save_offset);
    9220              : 
    9221              :   /* Setup RAX as the stub's base pointer.  We use stack_realign_offset rather
    9222              :      we've actually realigned the stack or not.  */
    9223         7045 :   align = GET_MODE_ALIGNMENT (V4SFmode);
    9224         7045 :   addr = choose_baseaddr (frame.stack_realign_offset
    9225         7045 :                           + xlogue.get_stub_ptr_offset (), &align, AX_REG);
    9226         7045 :   gcc_assert (align >= GET_MODE_ALIGNMENT (V4SFmode));
    9227              : 
    9228         7045 :   emit_insn (gen_rtx_SET (rax, addr));
    9229              : 
    9230              :   /* Get the stub symbol.  */
    9231         8327 :   sym = xlogue.get_stub_rtx (frame_pointer_needed ? XLOGUE_STUB_SAVE_HFP
    9232              :                                                   : XLOGUE_STUB_SAVE);
    9233         7045 :   RTVEC_ELT (v, vi++) = gen_rtx_USE (VOIDmode, sym);
    9234              : 
    9235        99878 :   for (i = 0; i < ncregs; ++i)
    9236              :     {
    9237        92833 :       const xlogue_layout::reginfo &r = xlogue.get_reginfo (i);
    9238        92833 :       rtx reg = gen_rtx_REG ((SSE_REGNO_P (r.regno) ? V4SFmode : word_mode),
    9239        92833 :                              r.regno);
    9240        92833 :       RTVEC_ELT (v, vi++) = gen_frame_store (reg, rax, -r.offset);
    9241              :     }
    9242              : 
    9243         7045 :   gcc_assert (vi == (unsigned)GET_NUM_ELEM (v));
    9244              : 
    9245         7045 :   insn = emit_insn (gen_rtx_PARALLEL (VOIDmode, v));
    9246         7045 :   RTX_FRAME_RELATED_P (insn) = true;
    9247         7045 : }
    9248              : 
    9249              : /* Generate and return an insn body to AND X with Y.  */
    9250              : 
    9251              : static rtx_insn *
    9252        31773 : gen_and2_insn (rtx x, rtx y)
    9253              : {
    9254        31773 :   enum insn_code icode = optab_handler (and_optab, GET_MODE (x));
    9255              : 
    9256        31773 :   gcc_assert (insn_operand_matches (icode, 0, x));
    9257        31773 :   gcc_assert (insn_operand_matches (icode, 1, x));
    9258        31773 :   gcc_assert (insn_operand_matches (icode, 2, y));
    9259              : 
    9260        31773 :   return GEN_FCN (icode) (x, x, y);
    9261              : }
    9262              : 
    9263              : /* Expand the prologue into a bunch of separate insns.  */
    9264              : 
    9265              : void
    9266      1524871 : ix86_expand_prologue (void)
    9267              : {
    9268      1524871 :   struct machine_function *m = cfun->machine;
    9269      1524871 :   rtx insn, t;
    9270      1524871 :   HOST_WIDE_INT allocate;
    9271      1524871 :   bool int_registers_saved;
    9272      1524871 :   bool sse_registers_saved;
    9273      1524871 :   bool save_stub_call_needed;
    9274      1524871 :   rtx static_chain = NULL_RTX;
    9275              : 
    9276      1524871 :   ix86_last_zero_store_uid = 0;
    9277      1524871 :   if (ix86_function_naked (current_function_decl))
    9278              :     {
    9279           74 :       if (flag_stack_usage_info)
    9280            0 :         current_function_static_stack_size = 0;
    9281           74 :       return;
    9282              :     }
    9283              : 
    9284      1524797 :   ix86_finalize_stack_frame_flags ();
    9285              : 
    9286              :   /* DRAP should not coexist with stack_realign_fp */
    9287      1524797 :   gcc_assert (!(crtl->drap_reg && stack_realign_fp));
    9288              : 
    9289      1524797 :   memset (&m->fs, 0, sizeof (m->fs));
    9290              : 
    9291              :   /* Initialize CFA state for before the prologue.  */
    9292      1524797 :   m->fs.cfa_reg = stack_pointer_rtx;
    9293      1524797 :   m->fs.cfa_offset = INCOMING_FRAME_SP_OFFSET;
    9294              : 
    9295              :   /* Track SP offset to the CFA.  We continue tracking this after we've
    9296              :      swapped the CFA register away from SP.  In the case of re-alignment
    9297              :      this is fudged; we're interested to offsets within the local frame.  */
    9298      1524797 :   m->fs.sp_offset = INCOMING_FRAME_SP_OFFSET;
    9299      1524797 :   m->fs.sp_valid = true;
    9300      1524797 :   m->fs.sp_realigned = false;
    9301              : 
    9302      1524797 :   const struct ix86_frame &frame = cfun->machine->frame;
    9303              : 
    9304      1524797 :   if (!TARGET_64BIT && ix86_function_ms_hook_prologue (current_function_decl))
    9305              :     {
    9306              :       /* We should have already generated an error for any use of
    9307              :          ms_hook on a nested function.  */
    9308            0 :       gcc_checking_assert (!ix86_static_chain_on_stack);
    9309              : 
    9310              :       /* Check if profiling is active and we shall use profiling before
    9311              :          prologue variant. If so sorry.  */
    9312            0 :       if (crtl->profile && flag_fentry != 0)
    9313            0 :         sorry ("%<ms_hook_prologue%> attribute is not compatible "
    9314              :                "with %<-mfentry%> for 32-bit");
    9315              : 
    9316              :       /* In ix86_asm_output_function_label we emitted:
    9317              :          8b ff     movl.s %edi,%edi
    9318              :          55        push   %ebp
    9319              :          8b ec     movl.s %esp,%ebp
    9320              : 
    9321              :          This matches the hookable function prologue in Win32 API
    9322              :          functions in Microsoft Windows XP Service Pack 2 and newer.
    9323              :          Wine uses this to enable Windows apps to hook the Win32 API
    9324              :          functions provided by Wine.
    9325              : 
    9326              :          What that means is that we've already set up the frame pointer.  */
    9327              : 
    9328            0 :       if (frame_pointer_needed
    9329            0 :           && !(crtl->drap_reg && crtl->stack_realign_needed))
    9330              :         {
    9331            0 :           rtx push, mov;
    9332              : 
    9333              :           /* We've decided to use the frame pointer already set up.
    9334              :              Describe this to the unwinder by pretending that both
    9335              :              push and mov insns happen right here.
    9336              : 
    9337              :              Putting the unwind info here at the end of the ms_hook
    9338              :              is done so that we can make absolutely certain we get
    9339              :              the required byte sequence at the start of the function,
    9340              :              rather than relying on an assembler that can produce
    9341              :              the exact encoding required.
    9342              : 
    9343              :              However it does mean (in the unpatched case) that we have
    9344              :              a 1 insn window where the asynchronous unwind info is
    9345              :              incorrect.  However, if we placed the unwind info at
    9346              :              its correct location we would have incorrect unwind info
    9347              :              in the patched case.  Which is probably all moot since
    9348              :              I don't expect Wine generates dwarf2 unwind info for the
    9349              :              system libraries that use this feature.  */
    9350              : 
    9351            0 :           insn = emit_insn (gen_blockage ());
    9352              : 
    9353            0 :           push = gen_push (hard_frame_pointer_rtx);
    9354            0 :           mov = gen_rtx_SET (hard_frame_pointer_rtx,
    9355              :                              stack_pointer_rtx);
    9356            0 :           RTX_FRAME_RELATED_P (push) = 1;
    9357            0 :           RTX_FRAME_RELATED_P (mov) = 1;
    9358              : 
    9359            0 :           RTX_FRAME_RELATED_P (insn) = 1;
    9360            0 :           add_reg_note (insn, REG_FRAME_RELATED_EXPR,
    9361              :                         gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, push, mov)));
    9362              : 
    9363              :           /* Note that gen_push incremented m->fs.cfa_offset, even
    9364              :              though we didn't emit the push insn here.  */
    9365            0 :           m->fs.cfa_reg = hard_frame_pointer_rtx;
    9366            0 :           m->fs.fp_offset = m->fs.cfa_offset;
    9367            0 :           m->fs.fp_valid = true;
    9368            0 :         }
    9369              :       else
    9370              :         {
    9371              :           /* The frame pointer is not needed so pop %ebp again.
    9372              :              This leaves us with a pristine state.  */
    9373            0 :           emit_insn (gen_pop (hard_frame_pointer_rtx));
    9374              :         }
    9375              :     }
    9376              : 
    9377              :   /* The first insn of a function that accepts its static chain on the
    9378              :      stack is to push the register that would be filled in by a direct
    9379              :      call.  This insn will be skipped by the trampoline.  */
    9380      1524797 :   else if (ix86_static_chain_on_stack)
    9381              :     {
    9382            0 :       static_chain = ix86_static_chain (cfun->decl, false);
    9383            0 :       insn = emit_insn (gen_push (static_chain));
    9384            0 :       emit_insn (gen_blockage ());
    9385              : 
    9386              :       /* We don't want to interpret this push insn as a register save,
    9387              :          only as a stack adjustment.  The real copy of the register as
    9388              :          a save will be done later, if needed.  */
    9389            0 :       t = plus_constant (Pmode, stack_pointer_rtx, -UNITS_PER_WORD);
    9390            0 :       t = gen_rtx_SET (stack_pointer_rtx, t);
    9391            0 :       add_reg_note (insn, REG_CFA_ADJUST_CFA, t);
    9392            0 :       RTX_FRAME_RELATED_P (insn) = 1;
    9393              :     }
    9394              : 
    9395              :   /* Emit prologue code to adjust stack alignment and setup DRAP, in case
    9396              :      of DRAP is needed and stack realignment is really needed after reload */
    9397      1524797 :   if (stack_realign_drap)
    9398              :     {
    9399         7084 :       int align_bytes = crtl->stack_alignment_needed / BITS_PER_UNIT;
    9400              : 
    9401              :       /* Can't use DRAP in interrupt function.  */
    9402         7084 :       if (cfun->machine->func_type != TYPE_NORMAL)
    9403            0 :         sorry ("Dynamic Realign Argument Pointer (DRAP) not supported "
    9404              :                "in interrupt service routine.  This may be worked "
    9405              :                "around by avoiding functions with aggregate return.");
    9406              : 
    9407              :       /* Only need to push parameter pointer reg if it is caller saved.  */
    9408         7084 :       if (!call_used_or_fixed_reg_p (REGNO (crtl->drap_reg)))
    9409              :         {
    9410              :           /* Push arg pointer reg */
    9411          136 :           insn = emit_insn (gen_push (crtl->drap_reg));
    9412          136 :           RTX_FRAME_RELATED_P (insn) = 1;
    9413              :         }
    9414              : 
    9415              :       /* Grab the argument pointer.  */
    9416         7369 :       t = plus_constant (Pmode, stack_pointer_rtx, m->fs.sp_offset);
    9417         7084 :       insn = emit_insn (gen_rtx_SET (crtl->drap_reg, t));
    9418         7084 :       RTX_FRAME_RELATED_P (insn) = 1;
    9419         7084 :       m->fs.cfa_reg = crtl->drap_reg;
    9420         7084 :       m->fs.cfa_offset = 0;
    9421              : 
    9422              :       /* Align the stack.  */
    9423         7084 :       insn = emit_insn (gen_and2_insn (stack_pointer_rtx,
    9424         7084 :                                        GEN_INT (-align_bytes)));
    9425         7084 :       RTX_FRAME_RELATED_P (insn) = 1;
    9426              : 
    9427              :       /* Replicate the return address on the stack so that return
    9428              :          address can be reached via (argp - 1) slot.  This is needed
    9429              :          to implement macro RETURN_ADDR_RTX and intrinsic function
    9430              :          expand_builtin_return_addr etc.  */
    9431         7654 :       t = plus_constant (Pmode, crtl->drap_reg, -UNITS_PER_WORD);
    9432         7084 :       t = gen_frame_mem (word_mode, t);
    9433         7084 :       insn = emit_insn (gen_push (t));
    9434         7084 :       RTX_FRAME_RELATED_P (insn) = 1;
    9435              : 
    9436              :       /* For the purposes of frame and register save area addressing,
    9437              :          we've started over with a new frame.  */
    9438         7084 :       m->fs.sp_offset = INCOMING_FRAME_SP_OFFSET;
    9439         7084 :       m->fs.realigned = true;
    9440              : 
    9441         7084 :       if (static_chain)
    9442              :         {
    9443              :           /* Replicate static chain on the stack so that static chain
    9444              :              can be reached via (argp - 2) slot.  This is needed for
    9445              :              nested function with stack realignment.  */
    9446            0 :           insn = emit_insn (gen_push (static_chain));
    9447            0 :           RTX_FRAME_RELATED_P (insn) = 1;
    9448              :         }
    9449              :     }
    9450              : 
    9451      1524797 :   int_registers_saved = (frame.nregs == 0);
    9452      1524797 :   sse_registers_saved = (frame.nsseregs == 0);
    9453      1524797 :   save_stub_call_needed = (m->call_ms2sysv);
    9454      1524797 :   gcc_assert (sse_registers_saved || !save_stub_call_needed);
    9455              : 
    9456      1524797 :   if (frame_pointer_needed && !m->fs.fp_valid)
    9457              :     {
    9458              :       /* Note: AT&T enter does NOT have reversed args.  Enter is probably
    9459              :          slower on all targets.  Also sdb didn't like it.  */
    9460       479942 :       insn = emit_insn (gen_push (hard_frame_pointer_rtx));
    9461       479942 :       RTX_FRAME_RELATED_P (insn) = 1;
    9462              : 
    9463       479942 :       if (m->fs.sp_offset == frame.hard_frame_pointer_offset)
    9464              :         {
    9465       479942 :           insn = emit_move_insn (hard_frame_pointer_rtx, stack_pointer_rtx);
    9466       479942 :           RTX_FRAME_RELATED_P (insn) = 1;
    9467              : 
    9468       479942 :           if (m->fs.cfa_reg == stack_pointer_rtx)
    9469       472858 :             m->fs.cfa_reg = hard_frame_pointer_rtx;
    9470       479942 :           m->fs.fp_offset = m->fs.sp_offset;
    9471       479942 :           m->fs.fp_valid = true;
    9472              :         }
    9473              :     }
    9474              : 
    9475      1524797 :   if (!int_registers_saved)
    9476              :     {
    9477              :       /* If saving registers via PUSH, do so now.  */
    9478       472408 :       if (!frame.save_regs_using_mov)
    9479              :         {
    9480       427594 :           ix86_emit_save_regs ();
    9481       427594 :           m->fs.apx_ppx_used = TARGET_APX_PPX && !crtl->calls_eh_return;
    9482       427594 :           int_registers_saved = true;
    9483       427594 :           gcc_assert (m->fs.sp_offset == frame.reg_save_offset);
    9484              :         }
    9485              : 
    9486              :       /* When using red zone we may start register saving before allocating
    9487              :          the stack frame saving one cycle of the prologue.  However, avoid
    9488              :          doing this if we have to probe the stack; at least on x86_64 the
    9489              :          stack probe can turn into a call that clobbers a red zone location. */
    9490        44814 :       else if (ix86_using_red_zone ()
    9491        44814 :                 && (! TARGET_STACK_PROBE
    9492            0 :                     || frame.stack_pointer_offset < CHECK_STACK_LIMIT))
    9493              :         {
    9494        40353 :           HOST_WIDE_INT allocate_offset;
    9495        40353 :           if (crtl->shrink_wrapped_separate)
    9496              :             {
    9497        40297 :               allocate_offset = m->fs.sp_offset - frame.stack_pointer_offset;
    9498              : 
    9499              :               /* Adjust the total offset at the beginning of the function.  */
    9500        40297 :               pro_epilogue_adjust_stack (stack_pointer_rtx, stack_pointer_rtx,
    9501              :                                          GEN_INT (allocate_offset), -1,
    9502        40297 :                                          m->fs.cfa_reg == stack_pointer_rtx);
    9503        40297 :               m->fs.sp_offset = cfun->machine->frame.stack_pointer_offset;
    9504              :             }
    9505              : 
    9506        40353 :           ix86_emit_save_regs_using_mov (frame.reg_save_offset);
    9507        40353 :           int_registers_saved = true;
    9508              :         }
    9509              :     }
    9510              : 
    9511      1524797 :   if (frame.red_zone_size != 0)
    9512       139987 :     cfun->machine->red_zone_used = true;
    9513              : 
    9514      1524797 :   if (stack_realign_fp)
    9515              :     {
    9516        24689 :       int align_bytes = crtl->stack_alignment_needed / BITS_PER_UNIT;
    9517        25041 :       gcc_assert (align_bytes > MIN_STACK_BOUNDARY / BITS_PER_UNIT);
    9518              : 
    9519              :       /* Record last valid frame pointer offset.  */
    9520        24689 :       m->fs.sp_realigned_fp_last = frame.reg_save_offset;
    9521              : 
    9522              :       /* The computation of the size of the re-aligned stack frame means
    9523              :          that we must allocate the size of the register save area before
    9524              :          performing the actual alignment.  Otherwise we cannot guarantee
    9525              :          that there's enough storage above the realignment point.  */
    9526        24689 :       allocate = frame.reg_save_offset - m->fs.sp_offset
    9527        24689 :                  + frame.stack_realign_allocate;
    9528        24689 :       if (allocate)
    9529         2691 :         pro_epilogue_adjust_stack (stack_pointer_rtx, stack_pointer_rtx,
    9530              :                                    GEN_INT (-allocate), -1, false);
    9531              : 
    9532              :       /* Align the stack.  */
    9533        24689 :       emit_insn (gen_and2_insn (stack_pointer_rtx, GEN_INT (-align_bytes)));
    9534        24689 :       m->fs.sp_offset = ROUND_UP (m->fs.sp_offset, align_bytes);
    9535        24689 :       m->fs.sp_realigned_offset = m->fs.sp_offset
    9536        24689 :                                               - frame.stack_realign_allocate;
    9537              :       /* The stack pointer may no longer be equal to CFA - m->fs.sp_offset.
    9538              :          Beyond this point, stack access should be done via choose_baseaddr or
    9539              :          by using sp_valid_at and fp_valid_at to determine the correct base
    9540              :          register.  Henceforth, any CFA offset should be thought of as logical
    9541              :          and not physical.  */
    9542        24689 :       gcc_assert (m->fs.sp_realigned_offset >= m->fs.sp_realigned_fp_last);
    9543        24689 :       gcc_assert (m->fs.sp_realigned_offset == frame.stack_realign_offset);
    9544        24689 :       m->fs.sp_realigned = true;
    9545              : 
    9546              :       /* SEH unwind emit doesn't currently support REG_CFA_EXPRESSION, which
    9547              :          is needed to describe where a register is saved using a realigned
    9548              :          stack pointer, so we need to invalidate the stack pointer for that
    9549              :          target.  */
    9550        24689 :       if (TARGET_SEH)
    9551              :         m->fs.sp_valid = false;
    9552              : 
    9553              :       /* If SP offset is non-immediate after allocation of the stack frame,
    9554              :          then emit SSE saves or stub call prior to allocating the rest of the
    9555              :          stack frame.  This is less efficient for the out-of-line stub because
    9556              :          we can't combine allocations across the call barrier, but it's better
    9557              :          than using a scratch register.  */
    9558        24689 :       else if (!x86_64_immediate_operand (GEN_INT (frame.stack_pointer_offset
    9559              :                                                    - m->fs.sp_realigned_offset),
    9560        24689 :                                           Pmode))
    9561              :         {
    9562            3 :           if (!sse_registers_saved)
    9563              :             {
    9564            1 :               ix86_emit_save_sse_regs_using_mov (frame.sse_reg_save_offset);
    9565            1 :               sse_registers_saved = true;
    9566              :             }
    9567            2 :           else if (save_stub_call_needed)
    9568              :             {
    9569            1 :               ix86_emit_outlined_ms2sysv_save (frame);
    9570            1 :               save_stub_call_needed = false;
    9571              :             }
    9572              :         }
    9573              :     }
    9574              : 
    9575      1524797 :   allocate = frame.stack_pointer_offset - m->fs.sp_offset;
    9576              : 
    9577      1524797 :   if (flag_stack_usage_info)
    9578              :     {
    9579              :       /* We start to count from ARG_POINTER.  */
    9580          355 :       HOST_WIDE_INT stack_size = frame.stack_pointer_offset;
    9581              : 
    9582              :       /* If it was realigned, take into account the fake frame.  */
    9583          355 :       if (stack_realign_drap)
    9584              :         {
    9585            1 :           if (ix86_static_chain_on_stack)
    9586            0 :             stack_size += UNITS_PER_WORD;
    9587              : 
    9588            1 :           if (!call_used_or_fixed_reg_p (REGNO (crtl->drap_reg)))
    9589            0 :             stack_size += UNITS_PER_WORD;
    9590              : 
    9591              :           /* This over-estimates by 1 minimal-stack-alignment-unit but
    9592              :              mitigates that by counting in the new return address slot.  */
    9593            1 :           current_function_dynamic_stack_size
    9594            1 :             += crtl->stack_alignment_needed / BITS_PER_UNIT;
    9595              :         }
    9596              : 
    9597          355 :       current_function_static_stack_size = stack_size;
    9598              :     }
    9599              : 
    9600              :   /* On SEH target with very large frame size, allocate an area to save
    9601              :      SSE registers (as the very large allocation won't be described).  */
    9602      1524797 :   if (TARGET_SEH
    9603              :       && frame.stack_pointer_offset > SEH_MAX_FRAME_SIZE
    9604              :       && !sse_registers_saved)
    9605              :     {
    9606              :       HOST_WIDE_INT sse_size
    9607              :         = frame.sse_reg_save_offset - frame.reg_save_offset;
    9608              : 
    9609              :       gcc_assert (int_registers_saved);
    9610              : 
    9611              :       /* No need to do stack checking as the area will be immediately
    9612              :          written.  */
    9613              :       pro_epilogue_adjust_stack (stack_pointer_rtx, stack_pointer_rtx,
    9614              :                                  GEN_INT (-sse_size), -1,
    9615              :                                  m->fs.cfa_reg == stack_pointer_rtx);
    9616              :       allocate -= sse_size;
    9617              :       ix86_emit_save_sse_regs_using_mov (frame.sse_reg_save_offset);
    9618              :       sse_registers_saved = true;
    9619              :     }
    9620              : 
    9621              :   /* If stack clash protection is requested, then probe the stack, unless it
    9622              :      is already probed on the target.  */
    9623      1524797 :   if (allocate >= 0
    9624      1524793 :       && flag_stack_clash_protection
    9625      1524895 :       && !ix86_target_stack_probe ())
    9626              :     {
    9627           98 :       ix86_adjust_stack_and_probe (allocate, int_registers_saved, false);
    9628           98 :       allocate = 0;
    9629              :     }
    9630              : 
    9631              :   /* The stack has already been decremented by the instruction calling us
    9632              :      so probe if the size is non-negative to preserve the protection area.  */
    9633      1524699 :   else if (allocate >= 0 && flag_stack_check == STATIC_BUILTIN_STACK_CHECK)
    9634              :     {
    9635           46 :       const HOST_WIDE_INT probe_interval = get_probe_interval ();
    9636              : 
    9637           46 :       if (STACK_CHECK_MOVING_SP)
    9638              :         {
    9639           46 :           if (crtl->is_leaf
    9640           18 :               && !cfun->calls_alloca
    9641           18 :               && allocate <= probe_interval)
    9642              :             ;
    9643              : 
    9644              :           else
    9645              :             {
    9646           29 :               ix86_adjust_stack_and_probe (allocate, int_registers_saved, true);
    9647           29 :               allocate = 0;
    9648              :             }
    9649              :         }
    9650              : 
    9651              :       else
    9652              :         {
    9653              :           HOST_WIDE_INT size = allocate;
    9654              : 
    9655              :           if (TARGET_64BIT && size >= HOST_WIDE_INT_C (0x80000000))
    9656              :             size = 0x80000000 - get_stack_check_protect () - 1;
    9657              : 
    9658              :           if (TARGET_STACK_PROBE)
    9659              :             {
    9660              :               if (crtl->is_leaf && !cfun->calls_alloca)
    9661              :                 {
    9662              :                   if (size > probe_interval)
    9663              :                     ix86_emit_probe_stack_range (0, size, int_registers_saved);
    9664              :                 }
    9665              :               else
    9666              :                 ix86_emit_probe_stack_range (0,
    9667              :                                              size + get_stack_check_protect (),
    9668              :                                              int_registers_saved);
    9669              :             }
    9670              :           else
    9671              :             {
    9672              :               if (crtl->is_leaf && !cfun->calls_alloca)
    9673              :                 {
    9674              :                   if (size > probe_interval
    9675              :                       && size > get_stack_check_protect ())
    9676              :                     ix86_emit_probe_stack_range (get_stack_check_protect (),
    9677              :                                                  (size
    9678              :                                                   - get_stack_check_protect ()),
    9679              :                                                  int_registers_saved);
    9680              :                 }
    9681              :               else
    9682              :                 ix86_emit_probe_stack_range (get_stack_check_protect (), size,
    9683              :                                              int_registers_saved);
    9684              :             }
    9685              :         }
    9686              :     }
    9687              : 
    9688      1524793 :   if (allocate == 0)
    9689              :     ;
    9690       840456 :   else if (!ix86_target_stack_probe ()
    9691       840456 :            || frame.stack_pointer_offset < CHECK_STACK_LIMIT)
    9692              :     {
    9693       840411 :       pro_epilogue_adjust_stack (stack_pointer_rtx, stack_pointer_rtx,
    9694              :                                  GEN_INT (-allocate), -1,
    9695       840411 :                                  m->fs.cfa_reg == stack_pointer_rtx);
    9696              :     }
    9697              :   else
    9698              :     {
    9699           45 :       rtx eax = gen_rtx_REG (Pmode, AX_REG);
    9700           45 :       rtx r10 = NULL;
    9701           45 :       const bool sp_is_cfa_reg = (m->fs.cfa_reg == stack_pointer_rtx);
    9702           45 :       bool eax_live = ix86_eax_live_at_start_p ();
    9703           45 :       bool r10_live = false;
    9704              : 
    9705           45 :       if (TARGET_64BIT)
    9706           45 :         r10_live = (DECL_STATIC_CHAIN (current_function_decl) != 0);
    9707              : 
    9708           45 :       if (eax_live)
    9709              :         {
    9710            0 :           insn = emit_insn (gen_push (eax));
    9711            0 :           allocate -= UNITS_PER_WORD;
    9712              :           /* Note that SEH directives need to continue tracking the stack
    9713              :              pointer even after the frame pointer has been set up.  */
    9714            0 :           if (sp_is_cfa_reg || TARGET_SEH)
    9715              :             {
    9716            0 :               if (sp_is_cfa_reg)
    9717            0 :                 m->fs.cfa_offset += UNITS_PER_WORD;
    9718            0 :               RTX_FRAME_RELATED_P (insn) = 1;
    9719            0 :               add_reg_note (insn, REG_FRAME_RELATED_EXPR,
    9720            0 :                             gen_rtx_SET (stack_pointer_rtx,
    9721              :                                          plus_constant (Pmode,
    9722              :                                                         stack_pointer_rtx,
    9723              :                                                         -UNITS_PER_WORD)));
    9724              :             }
    9725              :         }
    9726              : 
    9727           45 :       if (r10_live)
    9728              :         {
    9729            0 :           r10 = gen_rtx_REG (Pmode, R10_REG);
    9730            0 :           insn = emit_insn (gen_push (r10));
    9731            0 :           allocate -= UNITS_PER_WORD;
    9732            0 :           if (sp_is_cfa_reg || TARGET_SEH)
    9733              :             {
    9734            0 :               if (sp_is_cfa_reg)
    9735            0 :                 m->fs.cfa_offset += UNITS_PER_WORD;
    9736            0 :               RTX_FRAME_RELATED_P (insn) = 1;
    9737            0 :               add_reg_note (insn, REG_FRAME_RELATED_EXPR,
    9738            0 :                             gen_rtx_SET (stack_pointer_rtx,
    9739              :                                          plus_constant (Pmode,
    9740              :                                                         stack_pointer_rtx,
    9741              :                                                         -UNITS_PER_WORD)));
    9742              :             }
    9743              :         }
    9744              : 
    9745           45 :       emit_move_insn (eax, GEN_INT (allocate));
    9746           45 :       emit_insn (gen_allocate_stack_worker_probe (Pmode, eax, eax));
    9747              : 
    9748              :       /* Use the fact that AX still contains ALLOCATE.  */
    9749           45 :       insn = emit_insn (gen_pro_epilogue_adjust_stack_sub
    9750           45 :                         (Pmode, stack_pointer_rtx, stack_pointer_rtx, eax));
    9751              : 
    9752           45 :       if (sp_is_cfa_reg || TARGET_SEH)
    9753              :         {
    9754           37 :           if (sp_is_cfa_reg)
    9755           37 :             m->fs.cfa_offset += allocate;
    9756           37 :           RTX_FRAME_RELATED_P (insn) = 1;
    9757           37 :           add_reg_note (insn, REG_FRAME_RELATED_EXPR,
    9758           37 :                         gen_rtx_SET (stack_pointer_rtx,
    9759              :                                      plus_constant (Pmode, stack_pointer_rtx,
    9760              :                                                     -allocate)));
    9761              :         }
    9762           45 :       m->fs.sp_offset += allocate;
    9763              : 
    9764              :       /* Use stack_pointer_rtx for relative addressing so that code works for
    9765              :          realigned stack.  But this means that we need a blockage to prevent
    9766              :          stores based on the frame pointer from being scheduled before.  */
    9767           45 :       if (r10_live && eax_live)
    9768              :         {
    9769            0 :           t = gen_rtx_PLUS (Pmode, stack_pointer_rtx, eax);
    9770            0 :           emit_move_insn (gen_rtx_REG (word_mode, R10_REG),
    9771              :                           gen_frame_mem (word_mode, t));
    9772            0 :           t = plus_constant (Pmode, t, UNITS_PER_WORD);
    9773            0 :           emit_move_insn (gen_rtx_REG (word_mode, AX_REG),
    9774              :                           gen_frame_mem (word_mode, t));
    9775            0 :           emit_insn (gen_memory_blockage ());
    9776              :         }
    9777           45 :       else if (eax_live || r10_live)
    9778              :         {
    9779            0 :           t = gen_rtx_PLUS (Pmode, stack_pointer_rtx, eax);
    9780            0 :           emit_move_insn (gen_rtx_REG (word_mode,
    9781              :                                        (eax_live ? AX_REG : R10_REG)),
    9782              :                           gen_frame_mem (word_mode, t));
    9783            0 :           emit_insn (gen_memory_blockage ());
    9784              :         }
    9785              :     }
    9786      1524797 :   gcc_assert (m->fs.sp_offset == frame.stack_pointer_offset);
    9787              : 
    9788              :   /* If we havn't already set up the frame pointer, do so now.  */
    9789      1524797 :   if (frame_pointer_needed && !m->fs.fp_valid)
    9790              :     {
    9791            0 :       insn = gen_add3_insn (hard_frame_pointer_rtx, stack_pointer_rtx,
    9792            0 :                             GEN_INT (frame.stack_pointer_offset
    9793              :                                      - frame.hard_frame_pointer_offset));
    9794            0 :       insn = emit_insn (insn);
    9795            0 :       RTX_FRAME_RELATED_P (insn) = 1;
    9796            0 :       add_reg_note (insn, REG_CFA_ADJUST_CFA, NULL);
    9797              : 
    9798            0 :       if (m->fs.cfa_reg == stack_pointer_rtx)
    9799            0 :         m->fs.cfa_reg = hard_frame_pointer_rtx;
    9800            0 :       m->fs.fp_offset = frame.hard_frame_pointer_offset;
    9801            0 :       m->fs.fp_valid = true;
    9802              :     }
    9803              : 
    9804      1524797 :   if (!int_registers_saved)
    9805         4461 :     ix86_emit_save_regs_using_mov (frame.reg_save_offset);
    9806      1524797 :   if (!sse_registers_saved)
    9807        33352 :     ix86_emit_save_sse_regs_using_mov (frame.sse_reg_save_offset);
    9808      1491445 :   else if (save_stub_call_needed)
    9809         7044 :     ix86_emit_outlined_ms2sysv_save (frame);
    9810              : 
    9811              :   /* For the mcount profiling on 32 bit PIC mode we need to emit SET_GOT
    9812              :      in PROLOGUE.  */
    9813      1524797 :   if (!TARGET_64BIT && pic_offset_table_rtx && crtl->profile && !flag_fentry)
    9814              :     {
    9815            0 :       rtx pic = gen_rtx_REG (Pmode, REAL_PIC_OFFSET_TABLE_REGNUM);
    9816            0 :       insn = emit_insn (gen_set_got (pic));
    9817            0 :       RTX_FRAME_RELATED_P (insn) = 1;
    9818            0 :       add_reg_note (insn, REG_CFA_FLUSH_QUEUE, NULL_RTX);
    9819            0 :       emit_insn (gen_prologue_use (pic));
    9820              :       /* Deleting already emmitted SET_GOT if exist and allocated to
    9821              :          REAL_PIC_OFFSET_TABLE_REGNUM.  */
    9822            0 :       ix86_elim_entry_set_got (pic);
    9823              :     }
    9824              : 
    9825      1524797 :   if (crtl->drap_reg && !crtl->stack_realign_needed)
    9826              :     {
    9827              :       /* vDRAP is setup but after reload it turns out stack realign
    9828              :          isn't necessary, here we will emit prologue to setup DRAP
    9829              :          without stack realign adjustment */
    9830          178 :       t = choose_baseaddr (0, NULL);
    9831          178 :       emit_insn (gen_rtx_SET (crtl->drap_reg, t));
    9832              :     }
    9833              : 
    9834              :   /* Prevent instructions from being scheduled into register save push
    9835              :      sequence when access to the redzone area is done through frame pointer.
    9836              :      The offset between the frame pointer and the stack pointer is calculated
    9837              :      relative to the value of the stack pointer at the end of the function
    9838              :      prologue, and moving instructions that access redzone area via frame
    9839              :      pointer inside push sequence violates this assumption.  */
    9840      1524797 :   if (frame_pointer_needed && frame.red_zone_size)
    9841       129166 :     emit_insn (gen_memory_blockage ());
    9842              : 
    9843              :   /* SEH requires that the prologue end within 256 bytes of the start of
    9844              :      the function.  Prevent instruction schedules that would extend that.
    9845              :      Further, prevent alloca modifications to the stack pointer from being
    9846              :      combined with prologue modifications.  */
    9847              :   if (TARGET_SEH)
    9848              :     emit_insn (gen_prologue_use (stack_pointer_rtx));
    9849              : }
    9850              : 
    9851              : /* Emit code to restore REG using a POP or POPP insn.  */
    9852              : 
    9853              : static void
    9854      1461123 : ix86_emit_restore_reg_using_pop (rtx reg, bool ppx_p)
    9855              : {
    9856      1461123 :   struct machine_function *m = cfun->machine;
    9857      1461123 :   rtx_insn *insn = emit_insn (gen_pop (reg, ppx_p));
    9858              : 
    9859      1461123 :   ix86_add_cfa_restore_note (insn, reg, m->fs.sp_offset);
    9860      1461123 :   m->fs.sp_offset -= UNITS_PER_WORD;
    9861              : 
    9862      1461123 :   if (m->fs.cfa_reg == crtl->drap_reg
    9863      1461123 :       && REGNO (reg) == REGNO (crtl->drap_reg))
    9864              :     {
    9865              :       /* Previously we'd represented the CFA as an expression
    9866              :          like *(%ebp - 8).  We've just popped that value from
    9867              :          the stack, which means we need to reset the CFA to
    9868              :          the drap register.  This will remain until we restore
    9869              :          the stack pointer.  */
    9870         4033 :       add_reg_note (insn, REG_CFA_DEF_CFA, reg);
    9871         4033 :       RTX_FRAME_RELATED_P (insn) = 1;
    9872              : 
    9873              :       /* This means that the DRAP register is valid for addressing too.  */
    9874         4033 :       m->fs.drap_valid = true;
    9875         4033 :       return;
    9876              :     }
    9877              : 
    9878      1457090 :   if (m->fs.cfa_reg == stack_pointer_rtx)
    9879              :     {
    9880      1374144 :       rtx x = plus_constant (Pmode, stack_pointer_rtx, UNITS_PER_WORD);
    9881      1011063 :       x = gen_rtx_SET (stack_pointer_rtx, x);
    9882      1011063 :       add_reg_note (insn, REG_CFA_ADJUST_CFA, x);
    9883      1011063 :       RTX_FRAME_RELATED_P (insn) = 1;
    9884              : 
    9885      1192596 :       m->fs.cfa_offset -= UNITS_PER_WORD;
    9886              :     }
    9887              : 
    9888              :   /* When the frame pointer is the CFA, and we pop it, we are
    9889              :      swapping back to the stack pointer as the CFA.  This happens
    9890              :      for stack frames that don't allocate other data, so we assume
    9891              :      the stack pointer is now pointing at the return address, i.e.
    9892              :      the function entry state, which makes the offset be 1 word.  */
    9893      1457090 :   if (reg == hard_frame_pointer_rtx)
    9894              :     {
    9895       237411 :       m->fs.fp_valid = false;
    9896       237411 :       if (m->fs.cfa_reg == hard_frame_pointer_rtx)
    9897              :         {
    9898       233365 :           m->fs.cfa_reg = stack_pointer_rtx;
    9899       233365 :           m->fs.cfa_offset -= UNITS_PER_WORD;
    9900              : 
    9901       233365 :           add_reg_note (insn, REG_CFA_DEF_CFA,
    9902       233365 :                         plus_constant (Pmode, stack_pointer_rtx,
    9903       233365 :                                        m->fs.cfa_offset));
    9904       233365 :           RTX_FRAME_RELATED_P (insn) = 1;
    9905              :         }
    9906              :     }
    9907              : }
    9908              : 
    9909              : /* Emit code to restore REG using a POP2 insn.  */
    9910              : static void
    9911           19 : ix86_emit_restore_reg_using_pop2 (rtx reg1, rtx reg2, bool ppx_p = false)
    9912              : {
    9913           19 :   struct machine_function *m = cfun->machine;
    9914           19 :   const int offset = UNITS_PER_WORD * 2;
    9915           19 :   rtx_insn *insn;
    9916              : 
    9917           19 :   rtx mem = gen_rtx_MEM (TImode, gen_rtx_POST_INC (Pmode,
    9918              :                                                    stack_pointer_rtx));
    9919              : 
    9920           19 :   if (ppx_p)
    9921           15 :     insn = emit_insn (gen_pop2p_di (reg1, mem, reg2));
    9922              :   else
    9923            4 :     insn = emit_insn (gen_pop2_di (reg1, mem, reg2));
    9924              : 
    9925           19 :   RTX_FRAME_RELATED_P (insn) = 1;
    9926              : 
    9927           19 :   rtx dwarf = NULL_RTX;
    9928           19 :   dwarf = alloc_reg_note (REG_CFA_RESTORE, reg1, dwarf);
    9929           19 :   dwarf = alloc_reg_note (REG_CFA_RESTORE, reg2, dwarf);
    9930           19 :   REG_NOTES (insn) = dwarf;
    9931           19 :   m->fs.sp_offset -= offset;
    9932              : 
    9933           19 :   if (m->fs.cfa_reg == crtl->drap_reg
    9934           19 :       && (REGNO (reg1) == REGNO (crtl->drap_reg)
    9935            3 :           || REGNO (reg2) == REGNO (crtl->drap_reg)))
    9936              :     {
    9937              :       /* Previously we'd represented the CFA as an expression
    9938              :          like *(%ebp - 8).  We've just popped that value from
    9939              :          the stack, which means we need to reset the CFA to
    9940              :          the drap register.  This will remain until we restore
    9941              :          the stack pointer.  */
    9942            1 :       add_reg_note (insn, REG_CFA_DEF_CFA,
    9943            1 :                     REGNO (reg1) == REGNO (crtl->drap_reg) ? reg1 : reg2);
    9944            1 :       RTX_FRAME_RELATED_P (insn) = 1;
    9945              : 
    9946              :       /* This means that the DRAP register is valid for addressing too.  */
    9947            1 :       m->fs.drap_valid = true;
    9948            1 :       return;
    9949              :     }
    9950              : 
    9951           18 :   if (m->fs.cfa_reg == stack_pointer_rtx)
    9952              :     {
    9953           14 :       rtx x = plus_constant (Pmode, stack_pointer_rtx, offset);
    9954           14 :       x = gen_rtx_SET (stack_pointer_rtx, x);
    9955           14 :       add_reg_note (insn, REG_CFA_ADJUST_CFA, x);
    9956           14 :       RTX_FRAME_RELATED_P (insn) = 1;
    9957              : 
    9958           14 :       m->fs.cfa_offset -= offset;
    9959              :     }
    9960              : 
    9961              :   /* When the frame pointer is the CFA, and we pop it, we are
    9962              :      swapping back to the stack pointer as the CFA.  This happens
    9963              :      for stack frames that don't allocate other data, so we assume
    9964              :      the stack pointer is now pointing at the return address, i.e.
    9965              :      the function entry state, which makes the offset be 1 word.  */
    9966           18 :   if (reg1 == hard_frame_pointer_rtx || reg2 == hard_frame_pointer_rtx)
    9967              :     {
    9968            0 :       m->fs.fp_valid = false;
    9969            0 :       if (m->fs.cfa_reg == hard_frame_pointer_rtx)
    9970              :         {
    9971            0 :           m->fs.cfa_reg = stack_pointer_rtx;
    9972            0 :           m->fs.cfa_offset -= offset;
    9973              : 
    9974            0 :           add_reg_note (insn, REG_CFA_DEF_CFA,
    9975            0 :                         plus_constant (Pmode, stack_pointer_rtx,
    9976            0 :                                        m->fs.cfa_offset));
    9977            0 :           RTX_FRAME_RELATED_P (insn) = 1;
    9978              :         }
    9979              :     }
    9980              : }
    9981              : 
    9982              : /* Emit code to restore saved registers using POP insns.  */
    9983              : 
    9984              : static void
    9985      1355234 : ix86_emit_restore_regs_using_pop (bool ppx_p)
    9986              : {
    9987      1355234 :   unsigned int regno;
    9988              : 
    9989    126036762 :   for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
    9990    124681528 :     if (GENERAL_REGNO_P (regno) && ix86_save_reg (regno, false, true))
    9991      1223393 :       ix86_emit_restore_reg_using_pop (gen_rtx_REG (word_mode, regno), ppx_p);
    9992      1355234 : }
    9993              : 
    9994              : /* Emit code to restore saved registers using POP2 insns.  */
    9995              : 
    9996              : static void
    9997          561 : ix86_emit_restore_regs_using_pop2 (void)
    9998              : {
    9999          561 :   int regno;
   10000          561 :   int regno_list[2];
   10001          561 :   regno_list[0] = regno_list[1] = -1;
   10002          561 :   int loaded_regnum = 0;
   10003          561 :   bool aligned = cfun->machine->fs.sp_offset % 16 == 0;
   10004              : 
   10005        52173 :   for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
   10006        51612 :     if (GENERAL_REGNO_P (regno) && ix86_save_reg (regno, false, true))
   10007              :       {
   10008          127 :         if (aligned)
   10009              :           {
   10010          120 :             regno_list[loaded_regnum++] = regno;
   10011          120 :             if (loaded_regnum == 2)
   10012              :               {
   10013           19 :                 gcc_assert (regno_list[0] != -1
   10014              :                             && regno_list[1] != -1
   10015              :                             && regno_list[0] != regno_list[1]);
   10016              : 
   10017           19 :                 ix86_emit_restore_reg_using_pop2 (gen_rtx_REG (word_mode,
   10018              :                                                                regno_list[0]),
   10019              :                                                   gen_rtx_REG (word_mode,
   10020              :                                                                regno_list[1]),
   10021           19 :                                                   TARGET_APX_PPX);
   10022           19 :                 loaded_regnum = 0;
   10023           19 :                 regno_list[0] = regno_list[1] = -1;
   10024              :               }
   10025              :           }
   10026              :         else
   10027              :           {
   10028           14 :             ix86_emit_restore_reg_using_pop (gen_rtx_REG (word_mode, regno),
   10029            7 :                                              TARGET_APX_PPX);
   10030            7 :             aligned = true;
   10031              :           }
   10032              :       }
   10033              : 
   10034          561 :   if (loaded_regnum == 1)
   10035           82 :     ix86_emit_restore_reg_using_pop (gen_rtx_REG (word_mode, regno_list[0]),
   10036           82 :                                      TARGET_APX_PPX);
   10037          561 : }
   10038              : 
   10039              : /* Emit code and notes for the LEAVE instruction.  If insn is non-null,
   10040              :    omits the emit and only attaches the notes.  */
   10041              : 
   10042              : static void
   10043       243828 : ix86_emit_leave (rtx_insn *insn)
   10044              : {
   10045       243828 :   struct machine_function *m = cfun->machine;
   10046              : 
   10047       243828 :   if (!insn)
   10048       242857 :     insn = emit_insn (gen_leave (word_mode));
   10049              : 
   10050       243828 :   ix86_add_queued_cfa_restore_notes (insn);
   10051              : 
   10052       243828 :   gcc_assert (m->fs.fp_valid);
   10053       243828 :   m->fs.sp_valid = true;
   10054       243828 :   m->fs.sp_realigned = false;
   10055       243828 :   m->fs.sp_offset = m->fs.fp_offset - UNITS_PER_WORD;
   10056       243828 :   m->fs.fp_valid = false;
   10057              : 
   10058       243828 :   if (m->fs.cfa_reg == hard_frame_pointer_rtx)
   10059              :     {
   10060       240684 :       m->fs.cfa_reg = stack_pointer_rtx;
   10061       240684 :       m->fs.cfa_offset = m->fs.sp_offset;
   10062              : 
   10063       240684 :       add_reg_note (insn, REG_CFA_DEF_CFA,
   10064       240684 :                     plus_constant (Pmode, stack_pointer_rtx,
   10065       240684 :                                    m->fs.sp_offset));
   10066       240684 :       RTX_FRAME_RELATED_P (insn) = 1;
   10067              :     }
   10068       243828 :   ix86_add_cfa_restore_note (insn, hard_frame_pointer_rtx,
   10069              :                              m->fs.fp_offset);
   10070       243828 : }
   10071              : 
   10072              : /* Emit code to restore saved registers using MOV insns.
   10073              :    First register is restored from CFA - CFA_OFFSET.  */
   10074              : static void
   10075        96200 : ix86_emit_restore_regs_using_mov (HOST_WIDE_INT cfa_offset,
   10076              :                                   bool maybe_eh_return)
   10077              : {
   10078        96200 :   struct machine_function *m = cfun->machine;
   10079        96200 :   unsigned int regno;
   10080              : 
   10081      8946600 :   for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
   10082      8850400 :     if (GENERAL_REGNO_P (regno) && ix86_save_reg (regno, maybe_eh_return, true))
   10083              :       {
   10084              : 
   10085              :         /* Skip registers, already processed by shrink wrap separate.  */
   10086       263037 :         if (!cfun->machine->reg_is_wrapped_separately[regno])
   10087              :           {
   10088       139675 :             rtx reg = gen_rtx_REG (word_mode, regno);
   10089       139675 :             rtx mem;
   10090       139675 :             rtx_insn *insn;
   10091              : 
   10092       139675 :             mem = choose_baseaddr (cfa_offset, NULL);
   10093       139675 :             mem = gen_frame_mem (word_mode, mem);
   10094       139675 :             insn = emit_move_insn (reg, mem);
   10095              : 
   10096       139675 :             if (m->fs.cfa_reg == crtl->drap_reg
   10097       139675 :                 && regno == REGNO (crtl->drap_reg))
   10098              :               {
   10099              :                 /* Previously we'd represented the CFA as an expression
   10100              :                    like *(%ebp - 8).  We've just popped that value from
   10101              :                    the stack, which means we need to reset the CFA to
   10102              :                    the drap register.  This will remain until we restore
   10103              :                    the stack pointer.  */
   10104         3144 :                 add_reg_note (insn, REG_CFA_DEF_CFA, reg);
   10105         3144 :                 RTX_FRAME_RELATED_P (insn) = 1;
   10106              : 
   10107              :                 /* DRAP register is valid for addressing.  */
   10108         3144 :                 m->fs.drap_valid = true;
   10109              :               }
   10110              :             else
   10111       136531 :               ix86_add_cfa_restore_note (NULL, reg, cfa_offset);
   10112              :           }
   10113       283564 :         cfa_offset -= UNITS_PER_WORD;
   10114              :       }
   10115        96200 : }
   10116              : 
   10117              : /* Emit code to restore saved registers using MOV insns.
   10118              :    First register is restored from CFA - CFA_OFFSET.  */
   10119              : static void
   10120        33929 : ix86_emit_restore_sse_regs_using_mov (HOST_WIDE_INT cfa_offset,
   10121              :                                       bool maybe_eh_return)
   10122              : {
   10123        33929 :   unsigned int regno;
   10124              : 
   10125      3155397 :   for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
   10126      3121468 :     if (SSE_REGNO_P (regno) && ix86_save_reg (regno, maybe_eh_return, true))
   10127              :       {
   10128       339317 :         rtx reg = gen_rtx_REG (V4SFmode, regno);
   10129       339317 :         rtx mem;
   10130       339317 :         unsigned int align = GET_MODE_ALIGNMENT (V4SFmode);
   10131              : 
   10132       339317 :         mem = choose_baseaddr (cfa_offset, &align);
   10133       339317 :         mem = gen_rtx_MEM (V4SFmode, mem);
   10134              : 
   10135              :         /* The location aligment depends upon the base register.  */
   10136       339317 :         align = MIN (GET_MODE_ALIGNMENT (V4SFmode), align);
   10137       339317 :         gcc_assert (! (cfa_offset & (align / BITS_PER_UNIT - 1)));
   10138       339317 :         set_mem_align (mem, align);
   10139       339317 :         emit_insn (gen_rtx_SET (reg, mem));
   10140              : 
   10141       339317 :         ix86_add_cfa_restore_note (NULL, reg, cfa_offset);
   10142              : 
   10143       339317 :         cfa_offset -= GET_MODE_SIZE (V4SFmode);
   10144              :       }
   10145        33929 : }
   10146              : 
   10147              : static void
   10148         7621 : ix86_emit_outlined_ms2sysv_restore (const struct ix86_frame &frame,
   10149              :                                   bool use_call, int style)
   10150              : {
   10151         7621 :   struct machine_function *m = cfun->machine;
   10152         7621 :   const unsigned ncregs = NUM_X86_64_MS_CLOBBERED_REGS
   10153         7621 :                           + m->call_ms2sysv_extra_regs;
   10154         7621 :   rtvec v;
   10155         7621 :   unsigned int elems_needed, align, i, vi = 0;
   10156         7621 :   rtx_insn *insn;
   10157         7621 :   rtx sym, tmp;
   10158         7621 :   rtx rsi = gen_rtx_REG (word_mode, SI_REG);
   10159         7621 :   rtx r10 = NULL_RTX;
   10160         7621 :   const class xlogue_layout &xlogue = xlogue_layout::get_instance ();
   10161         7621 :   HOST_WIDE_INT stub_ptr_offset = xlogue.get_stub_ptr_offset ();
   10162         7621 :   HOST_WIDE_INT rsi_offset = frame.stack_realign_offset + stub_ptr_offset;
   10163         7621 :   rtx rsi_frame_load = NULL_RTX;
   10164         7621 :   HOST_WIDE_INT rsi_restore_offset = (HOST_WIDE_INT)-1;
   10165         7621 :   enum xlogue_stub stub;
   10166              : 
   10167         7621 :   gcc_assert (!m->fs.fp_valid || frame_pointer_needed);
   10168              : 
   10169              :   /* If using a realigned stack, we should never start with padding.  */
   10170         7621 :   gcc_assert (!stack_realign_fp || !xlogue.get_stack_align_off_in ());
   10171              : 
   10172              :   /* Setup RSI as the stub's base pointer.  */
   10173         7621 :   align = GET_MODE_ALIGNMENT (V4SFmode);
   10174         7621 :   tmp = choose_baseaddr (rsi_offset, &align, SI_REG);
   10175         7621 :   gcc_assert (align >= GET_MODE_ALIGNMENT (V4SFmode));
   10176              : 
   10177         7621 :   emit_insn (gen_rtx_SET (rsi, tmp));
   10178              : 
   10179              :   /* Get a symbol for the stub.  */
   10180         7621 :   if (frame_pointer_needed)
   10181         5955 :     stub = use_call ? XLOGUE_STUB_RESTORE_HFP
   10182              :                     : XLOGUE_STUB_RESTORE_HFP_TAIL;
   10183              :   else
   10184         1666 :     stub = use_call ? XLOGUE_STUB_RESTORE
   10185              :                     : XLOGUE_STUB_RESTORE_TAIL;
   10186         7621 :   sym = xlogue.get_stub_rtx (stub);
   10187              : 
   10188         7621 :   elems_needed = ncregs;
   10189         7621 :   if (use_call)
   10190         6498 :     elems_needed += 1;
   10191              :   else
   10192         1275 :     elems_needed += frame_pointer_needed ? 5 : 3;
   10193         7621 :   v = rtvec_alloc (elems_needed);
   10194              : 
   10195              :   /* We call the epilogue stub when we need to pop incoming args or we are
   10196              :      doing a sibling call as the tail.  Otherwise, we will emit a jmp to the
   10197              :      epilogue stub and it is the tail-call.  */
   10198         7621 :   if (use_call)
   10199         6498 :       RTVEC_ELT (v, vi++) = gen_rtx_USE (VOIDmode, sym);
   10200              :   else
   10201              :     {
   10202         1123 :       RTVEC_ELT (v, vi++) = ret_rtx;
   10203         1123 :       RTVEC_ELT (v, vi++) = gen_rtx_USE (VOIDmode, sym);
   10204         1123 :       if (frame_pointer_needed)
   10205              :         {
   10206          971 :           rtx rbp = gen_rtx_REG (DImode, BP_REG);
   10207          971 :           gcc_assert (m->fs.fp_valid);
   10208          971 :           gcc_assert (m->fs.cfa_reg == hard_frame_pointer_rtx);
   10209              : 
   10210          971 :           tmp = plus_constant (DImode, rbp, 8);
   10211          971 :           RTVEC_ELT (v, vi++) = gen_rtx_SET (stack_pointer_rtx, tmp);
   10212          971 :           RTVEC_ELT (v, vi++) = gen_rtx_SET (rbp, gen_rtx_MEM (DImode, rbp));
   10213          971 :           tmp = gen_rtx_MEM (BLKmode, gen_rtx_SCRATCH (VOIDmode));
   10214          971 :           RTVEC_ELT (v, vi++) = gen_rtx_CLOBBER (VOIDmode, tmp);
   10215              :         }
   10216              :       else
   10217              :         {
   10218              :           /* If no hard frame pointer, we set R10 to the SP restore value.  */
   10219          152 :           gcc_assert (!m->fs.fp_valid);
   10220          152 :           gcc_assert (m->fs.cfa_reg == stack_pointer_rtx);
   10221          152 :           gcc_assert (m->fs.sp_valid);
   10222              : 
   10223          152 :           r10 = gen_rtx_REG (DImode, R10_REG);
   10224          152 :           tmp = plus_constant (Pmode, rsi, stub_ptr_offset);
   10225          152 :           emit_insn (gen_rtx_SET (r10, tmp));
   10226              : 
   10227          152 :           RTVEC_ELT (v, vi++) = gen_rtx_SET (stack_pointer_rtx, r10);
   10228              :         }
   10229              :     }
   10230              : 
   10231              :   /* Generate frame load insns and restore notes.  */
   10232       107954 :   for (i = 0; i < ncregs; ++i)
   10233              :     {
   10234       100333 :       const xlogue_layout::reginfo &r = xlogue.get_reginfo (i);
   10235       100333 :       machine_mode mode = SSE_REGNO_P (r.regno) ? V4SFmode : word_mode;
   10236       100333 :       rtx reg, frame_load;
   10237              : 
   10238       100333 :       reg = gen_rtx_REG (mode, r.regno);
   10239       100333 :       frame_load = gen_frame_load (reg, rsi, r.offset);
   10240              : 
   10241              :       /* Save RSI frame load insn & note to add last.  */
   10242       100333 :       if (r.regno == SI_REG)
   10243              :         {
   10244         7621 :           gcc_assert (!rsi_frame_load);
   10245         7621 :           rsi_frame_load = frame_load;
   10246         7621 :           rsi_restore_offset = r.offset;
   10247              :         }
   10248              :       else
   10249              :         {
   10250        92712 :           RTVEC_ELT (v, vi++) = frame_load;
   10251        92712 :           ix86_add_cfa_restore_note (NULL, reg, r.offset);
   10252              :         }
   10253              :     }
   10254              : 
   10255              :   /* Add RSI frame load & restore note at the end.  */
   10256         7621 :   gcc_assert (rsi_frame_load);
   10257         7621 :   gcc_assert (rsi_restore_offset != (HOST_WIDE_INT)-1);
   10258         7621 :   RTVEC_ELT (v, vi++) = rsi_frame_load;
   10259         7621 :   ix86_add_cfa_restore_note (NULL, gen_rtx_REG (DImode, SI_REG),
   10260              :                              rsi_restore_offset);
   10261              : 
   10262              :   /* Finally, for tail-call w/o a hard frame pointer, set SP to R10.  */
   10263         7621 :   if (!use_call && !frame_pointer_needed)
   10264              :     {
   10265          152 :       gcc_assert (m->fs.sp_valid);
   10266          152 :       gcc_assert (!m->fs.sp_realigned);
   10267              : 
   10268              :       /* At this point, R10 should point to frame.stack_realign_offset.  */
   10269          152 :       if (m->fs.cfa_reg == stack_pointer_rtx)
   10270          152 :         m->fs.cfa_offset += m->fs.sp_offset - frame.stack_realign_offset;
   10271          152 :       m->fs.sp_offset = frame.stack_realign_offset;
   10272              :     }
   10273              : 
   10274         7621 :   gcc_assert (vi == (unsigned int)GET_NUM_ELEM (v));
   10275         7621 :   tmp = gen_rtx_PARALLEL (VOIDmode, v);
   10276         7621 :   if (use_call)
   10277         6498 :       insn = emit_insn (tmp);
   10278              :   else
   10279              :     {
   10280         1123 :       insn = emit_jump_insn (tmp);
   10281         1123 :       JUMP_LABEL (insn) = ret_rtx;
   10282              : 
   10283         1123 :       if (frame_pointer_needed)
   10284          971 :         ix86_emit_leave (insn);
   10285              :       else
   10286              :         {
   10287              :           /* Need CFA adjust note.  */
   10288          152 :           tmp = gen_rtx_SET (stack_pointer_rtx, r10);
   10289          152 :           add_reg_note (insn, REG_CFA_ADJUST_CFA, tmp);
   10290              :         }
   10291              :     }
   10292              : 
   10293         7621 :   RTX_FRAME_RELATED_P (insn) = true;
   10294         7621 :   ix86_add_queued_cfa_restore_notes (insn);
   10295              : 
   10296              :   /* If we're not doing a tail-call, we need to adjust the stack.  */
   10297         7621 :   if (use_call && m->fs.sp_valid)
   10298              :     {
   10299         3706 :       HOST_WIDE_INT dealloc = m->fs.sp_offset - frame.stack_realign_offset;
   10300         3706 :       pro_epilogue_adjust_stack (stack_pointer_rtx, stack_pointer_rtx,
   10301              :                                 GEN_INT (dealloc), style,
   10302         3706 :                                 m->fs.cfa_reg == stack_pointer_rtx);
   10303              :     }
   10304         7621 : }
   10305              : 
   10306              : /* Restore function stack, frame, and registers.  */
   10307              : 
   10308              : void
   10309      1650066 : ix86_expand_epilogue (int style)
   10310              : {
   10311      1650066 :   struct machine_function *m = cfun->machine;
   10312      1650066 :   struct machine_frame_state frame_state_save = m->fs;
   10313      1650066 :   bool restore_regs_via_mov;
   10314      1650066 :   bool using_drap;
   10315      1650066 :   bool restore_stub_is_tail = false;
   10316              : 
   10317      1650066 :   if (ix86_function_naked (current_function_decl))
   10318              :     {
   10319              :       /* The program should not reach this point.  */
   10320           74 :       emit_insn (gen_ud2 ());
   10321       125314 :       return;
   10322              :     }
   10323              : 
   10324      1649992 :   ix86_finalize_stack_frame_flags ();
   10325      1649992 :   const struct ix86_frame &frame = cfun->machine->frame;
   10326              : 
   10327      1649992 :   m->fs.sp_realigned = stack_realign_fp;
   10328        31913 :   m->fs.sp_valid = stack_realign_fp
   10329      1625257 :                    || !frame_pointer_needed
   10330      2106523 :                    || crtl->sp_is_unchanging;
   10331      1649992 :   gcc_assert (!m->fs.sp_valid
   10332              :               || m->fs.sp_offset == frame.stack_pointer_offset);
   10333              : 
   10334              :   /* The FP must be valid if the frame pointer is present.  */
   10335      1649992 :   gcc_assert (frame_pointer_needed == m->fs.fp_valid);
   10336      1649992 :   gcc_assert (!m->fs.fp_valid
   10337              :               || m->fs.fp_offset == frame.hard_frame_pointer_offset);
   10338              : 
   10339              :   /* We must have *some* valid pointer to the stack frame.  */
   10340      1649992 :   gcc_assert (m->fs.sp_valid || m->fs.fp_valid);
   10341              : 
   10342              :   /* The DRAP is never valid at this point.  */
   10343      1649992 :   gcc_assert (!m->fs.drap_valid);
   10344              : 
   10345              :   /* See the comment about red zone and frame
   10346              :      pointer usage in ix86_expand_prologue.  */
   10347      1649992 :   if (frame_pointer_needed && frame.red_zone_size)
   10348       129199 :     emit_insn (gen_memory_blockage ());
   10349              : 
   10350      1649992 :   using_drap = crtl->drap_reg && crtl->stack_realign_needed;
   10351         7178 :   gcc_assert (!using_drap || m->fs.cfa_reg == crtl->drap_reg);
   10352              : 
   10353              :   /* Determine the CFA offset of the end of the red-zone.  */
   10354      1649992 :   m->fs.red_zone_offset = 0;
   10355      1649992 :   if (ix86_using_red_zone () && crtl->args.pops_args < 65536)
   10356              :     {
   10357              :       /* The red-zone begins below return address and error code in
   10358              :          exception handler.  */
   10359      1472629 :       m->fs.red_zone_offset = RED_ZONE_SIZE + INCOMING_FRAME_SP_OFFSET;
   10360              : 
   10361              :       /* When the register save area is in the aligned portion of
   10362              :          the stack, determine the maximum runtime displacement that
   10363              :          matches up with the aligned frame.  */
   10364      1472629 :       if (stack_realign_drap)
   10365         8626 :         m->fs.red_zone_offset -= (crtl->stack_alignment_needed / BITS_PER_UNIT
   10366         4313 :                                   + UNITS_PER_WORD);
   10367              :     }
   10368              : 
   10369      1649992 :   HOST_WIDE_INT reg_save_offset = frame.reg_save_offset;
   10370              : 
   10371              :   /* Special care must be taken for the normal return case of a function
   10372              :      using eh_return: the eax and edx registers are marked as saved, but
   10373              :      not restored along this path.  Adjust the save location to match.  */
   10374      1649992 :   if (crtl->calls_eh_return && style != 2)
   10375           37 :     reg_save_offset -= 2 * UNITS_PER_WORD;
   10376              : 
   10377              :   /* EH_RETURN requires the use of moves to function properly.  */
   10378      1649992 :   if (crtl->calls_eh_return)
   10379              :     restore_regs_via_mov = true;
   10380              :   /* SEH requires the use of pops to identify the epilogue.  */
   10381      1649934 :   else if (TARGET_SEH)
   10382              :     restore_regs_via_mov = false;
   10383              :   /* If we already save reg with pushp, don't use move at epilogue.  */
   10384      1649934 :   else if (m->fs.apx_ppx_used)
   10385              :     restore_regs_via_mov = false;
   10386              :   /* If we're only restoring one register and sp cannot be used then
   10387              :      using a move instruction to restore the register since it's
   10388              :      less work than reloading sp and popping the register.  */
   10389      1649847 :   else if (!sp_valid_at (frame.hfp_save_offset) && frame.nregs <= 1)
   10390              :     restore_regs_via_mov = true;
   10391      1588949 :   else if (crtl->shrink_wrapped_separate
   10392      1536550 :            || (TARGET_EPILOGUE_USING_MOVE
   10393        56735 :                && cfun->machine->use_fast_prologue_epilogue
   10394        56679 :                && (frame.nregs > 1
   10395        56666 :                    || m->fs.sp_offset != reg_save_offset)))
   10396              :     restore_regs_via_mov = true;
   10397      1536315 :   else if (frame_pointer_needed
   10398       417770 :            && !frame.nregs
   10399       322640 :            && m->fs.sp_offset != reg_save_offset)
   10400              :     restore_regs_via_mov = true;
   10401      1385509 :   else if (frame_pointer_needed
   10402       266964 :            && TARGET_USE_LEAVE
   10403       266889 :            && cfun->machine->use_fast_prologue_epilogue
   10404       209986 :            && frame.nregs == 1)
   10405              :     restore_regs_via_mov = true;
   10406              :   else
   10407      1649992 :     restore_regs_via_mov = false;
   10408              : 
   10409      1649992 :   if (crtl->shrink_wrapped_separate)
   10410        52430 :     gcc_assert (restore_regs_via_mov);
   10411              : 
   10412      1597562 :   if (restore_regs_via_mov || frame.nsseregs)
   10413              :     {
   10414              :       /* Ensure that the entire register save area is addressable via
   10415              :          the stack pointer, if we will restore SSE regs via sp.  */
   10416       327772 :       if (TARGET_64BIT
   10417       315150 :           && m->fs.sp_offset > 0x7fffffff
   10418           23 :           && sp_valid_at (frame.stack_realign_offset + 1)
   10419       327794 :           && (frame.nsseregs + frame.nregs) != 0)
   10420              :         {
   10421            6 :           pro_epilogue_adjust_stack (stack_pointer_rtx, stack_pointer_rtx,
   10422            6 :                                      GEN_INT (m->fs.sp_offset
   10423              :                                               - frame.sse_reg_save_offset),
   10424              :                                      style,
   10425            6 :                                      m->fs.cfa_reg == stack_pointer_rtx);
   10426              :         }
   10427              :     }
   10428              : 
   10429              :   /* If there are any SSE registers to restore, then we have to do it
   10430              :      via moves, since there's obviously no pop for SSE regs.  */
   10431      1649992 :   if (frame.nsseregs)
   10432        33929 :     ix86_emit_restore_sse_regs_using_mov (frame.sse_reg_save_offset,
   10433              :                                           style == 2);
   10434              : 
   10435      1649992 :   if (m->call_ms2sysv)
   10436              :     {
   10437         7621 :       int pop_incoming_args = crtl->args.pops_args && crtl->args.size;
   10438              : 
   10439              :       /* We cannot use a tail-call for the stub if:
   10440              :          1. We have to pop incoming args,
   10441              :          2. We have additional int regs to restore, or
   10442              :          3. A sibling call will be the tail-call, or
   10443              :          4. We are emitting an eh_return_internal epilogue.
   10444              : 
   10445              :          TODO: Item 4 has not yet tested!
   10446              : 
   10447              :          If any of the above are true, we will call the stub rather than
   10448              :          jump to it.  */
   10449         7621 :       restore_stub_is_tail = !(pop_incoming_args || frame.nregs || style != 1);
   10450         7621 :       ix86_emit_outlined_ms2sysv_restore (frame, !restore_stub_is_tail, style);
   10451              :     }
   10452              : 
   10453              :   /* If using out-of-line stub that is a tail-call, then...*/
   10454      1649992 :   if (m->call_ms2sysv && restore_stub_is_tail)
   10455              :     {
   10456              :       /* TODO: parinoid tests. (remove eventually)  */
   10457         1123 :       gcc_assert (m->fs.sp_valid);
   10458         1123 :       gcc_assert (!m->fs.sp_realigned);
   10459         1123 :       gcc_assert (!m->fs.fp_valid);
   10460         1123 :       gcc_assert (!m->fs.realigned);
   10461         1123 :       gcc_assert (m->fs.sp_offset == UNITS_PER_WORD);
   10462         1123 :       gcc_assert (!crtl->drap_reg);
   10463         1123 :       gcc_assert (!frame.nregs);
   10464         1123 :       gcc_assert (!crtl->shrink_wrapped_separate);
   10465              :     }
   10466      1648869 :   else if (restore_regs_via_mov)
   10467              :     {
   10468       293074 :       rtx t;
   10469              : 
   10470       293074 :       if (frame.nregs)
   10471        96200 :         ix86_emit_restore_regs_using_mov (reg_save_offset, style == 2);
   10472              : 
   10473              :       /* eh_return epilogues need %ecx added to the stack pointer.  */
   10474       293074 :       if (style == 2)
   10475              :         {
   10476           37 :           rtx sa = EH_RETURN_STACKADJ_RTX;
   10477           29 :           rtx_insn *insn;
   10478              : 
   10479           29 :           gcc_assert (!crtl->shrink_wrapped_separate);
   10480              : 
   10481              :           /* Stack realignment doesn't work with eh_return.  */
   10482           29 :           if (crtl->stack_realign_needed)
   10483            0 :             sorry ("Stack realignment not supported with "
   10484              :                    "%<__builtin_eh_return%>");
   10485              : 
   10486              :           /* regparm nested functions don't work with eh_return.  */
   10487           29 :           if (ix86_static_chain_on_stack)
   10488            0 :             sorry ("regparm nested function not supported with "
   10489              :                    "%<__builtin_eh_return%>");
   10490              : 
   10491           29 :           if (frame_pointer_needed)
   10492              :             {
   10493           35 :               t = gen_rtx_PLUS (Pmode, hard_frame_pointer_rtx, sa);
   10494           43 :               t = plus_constant (Pmode, t, m->fs.fp_offset - UNITS_PER_WORD);
   10495           27 :               emit_insn (gen_rtx_SET (sa, t));
   10496              : 
   10497              :               /* NB: eh_return epilogues must restore the frame pointer
   10498              :                  in word_mode since the upper 32 bits of RBP register
   10499              :                  can have any values.  */
   10500           27 :               t = gen_frame_mem (word_mode, hard_frame_pointer_rtx);
   10501           27 :               rtx frame_reg = gen_rtx_REG (word_mode,
   10502              :                                            HARD_FRAME_POINTER_REGNUM);
   10503           27 :               insn = emit_move_insn (frame_reg, t);
   10504              : 
   10505              :               /* Note that we use SA as a temporary CFA, as the return
   10506              :                  address is at the proper place relative to it.  We
   10507              :                  pretend this happens at the FP restore insn because
   10508              :                  prior to this insn the FP would be stored at the wrong
   10509              :                  offset relative to SA, and after this insn we have no
   10510              :                  other reasonable register to use for the CFA.  We don't
   10511              :                  bother resetting the CFA to the SP for the duration of
   10512              :                  the return insn, unless the control flow instrumentation
   10513              :                  is done.  In this case the SP is used later and we have
   10514              :                  to reset CFA to SP.  */
   10515           27 :               add_reg_note (insn, REG_CFA_DEF_CFA,
   10516           35 :                             plus_constant (Pmode, sa, UNITS_PER_WORD));
   10517           27 :               ix86_add_queued_cfa_restore_notes (insn);
   10518           27 :               add_reg_note (insn, REG_CFA_RESTORE, frame_reg);
   10519           27 :               RTX_FRAME_RELATED_P (insn) = 1;
   10520              : 
   10521           27 :               m->fs.cfa_reg = sa;
   10522           27 :               m->fs.cfa_offset = UNITS_PER_WORD;
   10523           27 :               m->fs.fp_valid = false;
   10524              : 
   10525           27 :               pro_epilogue_adjust_stack (stack_pointer_rtx, sa,
   10526              :                                          const0_rtx, style,
   10527           27 :                                          flag_cf_protection);
   10528              :             }
   10529              :           else
   10530              :             {
   10531            2 :               t = gen_rtx_PLUS (Pmode, stack_pointer_rtx, sa);
   10532            2 :               t = plus_constant (Pmode, t, m->fs.sp_offset - UNITS_PER_WORD);
   10533            2 :               insn = emit_insn (gen_rtx_SET (stack_pointer_rtx, t));
   10534            2 :               ix86_add_queued_cfa_restore_notes (insn);
   10535              : 
   10536            2 :               gcc_assert (m->fs.cfa_reg == stack_pointer_rtx);
   10537            2 :               if (m->fs.cfa_offset != UNITS_PER_WORD)
   10538              :                 {
   10539            2 :                   m->fs.cfa_offset = UNITS_PER_WORD;
   10540            2 :                   add_reg_note (insn, REG_CFA_DEF_CFA,
   10541            2 :                                 plus_constant (Pmode, stack_pointer_rtx,
   10542            2 :                                                UNITS_PER_WORD));
   10543            2 :                   RTX_FRAME_RELATED_P (insn) = 1;
   10544              :                 }
   10545              :             }
   10546           29 :           m->fs.sp_offset = UNITS_PER_WORD;
   10547           29 :           m->fs.sp_valid = true;
   10548           29 :           m->fs.sp_realigned = false;
   10549              :         }
   10550              :     }
   10551              :   else
   10552              :     {
   10553              :       /* SEH requires that the function end with (1) a stack adjustment
   10554              :          if necessary, (2) a sequence of pops, and (3) a return or
   10555              :          jump instruction.  Prevent insns from the function body from
   10556              :          being scheduled into this sequence.  */
   10557      1355795 :       if (TARGET_SEH)
   10558              :         {
   10559              :           /* Prevent a catch region from being adjacent to the standard
   10560              :              epilogue sequence.  Unfortunately neither crtl->uses_eh_lsda
   10561              :              nor several other flags that would be interesting to test are
   10562              :              set up yet.  */
   10563              :           if (flag_non_call_exceptions)
   10564              :             emit_insn (gen_nops (const1_rtx));
   10565              :           else
   10566              :             emit_insn (gen_blockage ());
   10567              :         }
   10568              : 
   10569              :       /* First step is to deallocate the stack frame so that we can
   10570              :          pop the registers.  If the stack pointer was realigned, it needs
   10571              :          to be restored now.  Also do it on SEH target for very large
   10572              :          frame as the emitted instructions aren't allowed by the ABI
   10573              :          in epilogues.  */
   10574      1355795 :       if (!m->fs.sp_valid || m->fs.sp_realigned
   10575              :           || (TARGET_SEH
   10576              :               && (m->fs.sp_offset - reg_save_offset
   10577              :                   >= SEH_MAX_FRAME_SIZE)))
   10578              :         {
   10579        29756 :           pro_epilogue_adjust_stack (stack_pointer_rtx, hard_frame_pointer_rtx,
   10580        29756 :                                      GEN_INT (m->fs.fp_offset
   10581              :                                               - reg_save_offset),
   10582              :                                      style, false);
   10583              :         }
   10584      1326039 :       else if (m->fs.sp_offset != reg_save_offset)
   10585              :         {
   10586       613669 :           pro_epilogue_adjust_stack (stack_pointer_rtx, stack_pointer_rtx,
   10587              :                                      GEN_INT (m->fs.sp_offset
   10588              :                                               - reg_save_offset),
   10589              :                                      style,
   10590       613669 :                                      m->fs.cfa_reg == stack_pointer_rtx);
   10591              :         }
   10592              : 
   10593      1355795 :       if (TARGET_APX_PUSH2POP2
   10594          564 :           && ix86_can_use_push2pop2 ()
   10595      1356357 :           && m->func_type == TYPE_NORMAL)
   10596          561 :         ix86_emit_restore_regs_using_pop2 ();
   10597              :       else
   10598      1355234 :         ix86_emit_restore_regs_using_pop (TARGET_APX_PPX);
   10599              :     }
   10600              : 
   10601              :   /* If we used a stack pointer and haven't already got rid of it,
   10602              :      then do so now.  */
   10603      1649992 :   if (m->fs.fp_valid)
   10604              :     {
   10605              :       /* If the stack pointer is valid and pointing at the frame
   10606              :          pointer store address, then we only need a pop.  */
   10607       480268 :       if (sp_valid_at (frame.hfp_save_offset)
   10608       480268 :           && m->fs.sp_offset == frame.hfp_save_offset)
   10609       237399 :         ix86_emit_restore_reg_using_pop (hard_frame_pointer_rtx);
   10610              :       /* Leave results in shorter dependency chains on CPUs that are
   10611              :          able to grok it fast.  */
   10612       242869 :       else if (TARGET_USE_LEAVE
   10613           12 :                || optimize_bb_for_size_p (EXIT_BLOCK_PTR_FOR_FN (cfun))
   10614       242881 :                || !cfun->machine->use_fast_prologue_epilogue)
   10615       242857 :         ix86_emit_leave (NULL);
   10616              :       else
   10617              :         {
   10618           12 :           pro_epilogue_adjust_stack (stack_pointer_rtx,
   10619              :                                      hard_frame_pointer_rtx,
   10620           12 :                                      const0_rtx, style, !using_drap);
   10621           12 :           ix86_emit_restore_reg_using_pop (hard_frame_pointer_rtx);
   10622              :         }
   10623              :     }
   10624              : 
   10625      1649992 :   if (using_drap)
   10626              :     {
   10627         7178 :       int param_ptr_offset = UNITS_PER_WORD;
   10628         7178 :       rtx_insn *insn;
   10629              : 
   10630         7178 :       gcc_assert (stack_realign_drap);
   10631              : 
   10632         7178 :       if (ix86_static_chain_on_stack)
   10633            0 :         param_ptr_offset += UNITS_PER_WORD;
   10634         7178 :       if (!call_used_or_fixed_reg_p (REGNO (crtl->drap_reg)))
   10635          230 :         param_ptr_offset += UNITS_PER_WORD;
   10636              : 
   10637         7483 :       insn = emit_insn (gen_rtx_SET
   10638              :                         (stack_pointer_rtx,
   10639              :                          plus_constant (Pmode, crtl->drap_reg,
   10640              :                                         -param_ptr_offset)));
   10641         7178 :       m->fs.cfa_reg = stack_pointer_rtx;
   10642         7178 :       m->fs.cfa_offset = param_ptr_offset;
   10643         7178 :       m->fs.sp_offset = param_ptr_offset;
   10644         7178 :       m->fs.realigned = false;
   10645              : 
   10646         7483 :       add_reg_note (insn, REG_CFA_DEF_CFA,
   10647         7178 :                     plus_constant (Pmode, stack_pointer_rtx,
   10648         7178 :                                    param_ptr_offset));
   10649         7178 :       RTX_FRAME_RELATED_P (insn) = 1;
   10650              : 
   10651         7178 :       if (!call_used_or_fixed_reg_p (REGNO (crtl->drap_reg)))
   10652          230 :         ix86_emit_restore_reg_using_pop (crtl->drap_reg);
   10653              :     }
   10654              : 
   10655              :   /* At this point the stack pointer must be valid, and we must have
   10656              :      restored all of the registers.  We may not have deallocated the
   10657              :      entire stack frame.  We've delayed this until now because it may
   10658              :      be possible to merge the local stack deallocation with the
   10659              :      deallocation forced by ix86_static_chain_on_stack.   */
   10660      1649992 :   gcc_assert (m->fs.sp_valid);
   10661      1649992 :   gcc_assert (!m->fs.sp_realigned);
   10662      1649992 :   gcc_assert (!m->fs.fp_valid);
   10663      1649992 :   gcc_assert (!m->fs.realigned);
   10664      1785639 :   if (m->fs.sp_offset != UNITS_PER_WORD)
   10665              :     {
   10666        50162 :       pro_epilogue_adjust_stack (stack_pointer_rtx, stack_pointer_rtx,
   10667              :                                  GEN_INT (m->fs.sp_offset - UNITS_PER_WORD),
   10668              :                                  style, true);
   10669              :     }
   10670              :   else
   10671      1599830 :     ix86_add_queued_cfa_restore_notes (get_last_insn ());
   10672              : 
   10673              :   /* Sibcall epilogues don't want a return instruction.  */
   10674      1649992 :   if (style == 0)
   10675              :     {
   10676       125166 :       m->fs = frame_state_save;
   10677       125166 :       return;
   10678              :     }
   10679              : 
   10680      1524826 :   if (cfun->machine->func_type != TYPE_NORMAL)
   10681          120 :     emit_jump_insn (gen_interrupt_return ());
   10682      1524706 :   else if (crtl->args.pops_args && crtl->args.size)
   10683              :     {
   10684        25987 :       rtx popc = GEN_INT (crtl->args.pops_args);
   10685              : 
   10686              :       /* i386 can only pop 64K bytes.  If asked to pop more, pop return
   10687              :          address, do explicit add, and jump indirectly to the caller.  */
   10688              : 
   10689        25987 :       if (crtl->args.pops_args >= 65536)
   10690              :         {
   10691            0 :           rtx ecx = gen_rtx_REG (SImode, CX_REG);
   10692            0 :           rtx_insn *insn;
   10693              : 
   10694              :           /* There is no "pascal" calling convention in any 64bit ABI.  */
   10695            0 :           gcc_assert (!TARGET_64BIT);
   10696              : 
   10697            0 :           insn = emit_insn (gen_pop (ecx));
   10698            0 :           m->fs.cfa_offset -= UNITS_PER_WORD;
   10699            0 :           m->fs.sp_offset -= UNITS_PER_WORD;
   10700              : 
   10701            0 :           rtx x = plus_constant (Pmode, stack_pointer_rtx, UNITS_PER_WORD);
   10702            0 :           x = gen_rtx_SET (stack_pointer_rtx, x);
   10703            0 :           add_reg_note (insn, REG_CFA_ADJUST_CFA, x);
   10704            0 :           add_reg_note (insn, REG_CFA_REGISTER, gen_rtx_SET (ecx, pc_rtx));
   10705            0 :           RTX_FRAME_RELATED_P (insn) = 1;
   10706              : 
   10707            0 :           pro_epilogue_adjust_stack (stack_pointer_rtx, stack_pointer_rtx,
   10708              :                                      popc, -1, true);
   10709            0 :           emit_jump_insn (gen_simple_return_indirect_internal (ecx));
   10710              :         }
   10711              :       else
   10712        25987 :         emit_jump_insn (gen_simple_return_pop_internal (popc));
   10713              :     }
   10714      1498719 :   else if (!m->call_ms2sysv || !restore_stub_is_tail)
   10715              :     {
   10716              :       /* In case of return from EH a simple return cannot be used
   10717              :          as a return address will be compared with a shadow stack
   10718              :          return address.  Use indirect jump instead.  */
   10719      1497596 :       if (style == 2 && flag_cf_protection)
   10720              :         {
   10721              :           /* Register used in indirect jump must be in word_mode.  But
   10722              :              Pmode may not be the same as word_mode for x32.  */
   10723           17 :           rtx ecx = gen_rtx_REG (word_mode, CX_REG);
   10724           17 :           rtx_insn *insn;
   10725              : 
   10726           17 :           insn = emit_insn (gen_pop (ecx));
   10727           17 :           m->fs.cfa_offset -= UNITS_PER_WORD;
   10728           17 :           m->fs.sp_offset -= UNITS_PER_WORD;
   10729              : 
   10730           33 :           rtx x = plus_constant (Pmode, stack_pointer_rtx, UNITS_PER_WORD);
   10731           17 :           x = gen_rtx_SET (stack_pointer_rtx, x);
   10732           17 :           add_reg_note (insn, REG_CFA_ADJUST_CFA, x);
   10733           17 :           add_reg_note (insn, REG_CFA_REGISTER, gen_rtx_SET (ecx, pc_rtx));
   10734           17 :           RTX_FRAME_RELATED_P (insn) = 1;
   10735              : 
   10736           17 :           emit_jump_insn (gen_simple_return_indirect_internal (ecx));
   10737           17 :         }
   10738              :       else
   10739      1497579 :         emit_jump_insn (gen_simple_return_internal ());
   10740              :     }
   10741              : 
   10742              :   /* Restore the state back to the state from the prologue,
   10743              :      so that it's correct for the next epilogue.  */
   10744      1524826 :   m->fs = frame_state_save;
   10745              : }
   10746              : 
   10747              : /* Reset from the function's potential modifications.  */
   10748              : 
   10749              : static void
   10750      1485481 : ix86_output_function_epilogue (FILE *file ATTRIBUTE_UNUSED)
   10751              : {
   10752      1485481 :   if (pic_offset_table_rtx
   10753      1485481 :       && !ix86_use_pseudo_pic_reg ())
   10754            0 :     SET_REGNO (pic_offset_table_rtx, REAL_PIC_OFFSET_TABLE_REGNUM);
   10755              : 
   10756      1485481 :   if (TARGET_MACHO)
   10757              :     {
   10758              :       rtx_insn *insn = get_last_insn ();
   10759              :       rtx_insn *deleted_debug_label = NULL;
   10760              : 
   10761              :       /* Mach-O doesn't support labels at the end of objects, so if
   10762              :          it looks like we might want one, take special action.
   10763              :         First, collect any sequence of deleted debug labels.  */
   10764              :       while (insn
   10765              :              && NOTE_P (insn)
   10766              :              && NOTE_KIND (insn) != NOTE_INSN_DELETED_LABEL)
   10767              :         {
   10768              :           /* Don't insert a nop for NOTE_INSN_DELETED_DEBUG_LABEL
   10769              :              notes only, instead set their CODE_LABEL_NUMBER to -1,
   10770              :              otherwise there would be code generation differences
   10771              :              in between -g and -g0.  */
   10772              :           if (NOTE_P (insn) && NOTE_KIND (insn)
   10773              :               == NOTE_INSN_DELETED_DEBUG_LABEL)
   10774              :             deleted_debug_label = insn;
   10775              :           insn = PREV_INSN (insn);
   10776              :         }
   10777              : 
   10778              :       /* If we have:
   10779              :          label:
   10780              :             barrier
   10781              :           then this needs to be detected, so skip past the barrier.  */
   10782              : 
   10783              :       if (insn && BARRIER_P (insn))
   10784              :         insn = PREV_INSN (insn);
   10785              : 
   10786              :       /* Up to now we've only seen notes or barriers.  */
   10787              :       if (insn)
   10788              :         {
   10789              :           if (LABEL_P (insn)
   10790              :               || (NOTE_P (insn)
   10791              :                   && NOTE_KIND (insn) == NOTE_INSN_DELETED_LABEL))
   10792              :             /* Trailing label.  */
   10793              :             fputs ("\tnop\n", file);
   10794              :           else if (cfun && ! cfun->is_thunk)
   10795              :             {
   10796              :               /* See if we have a completely empty function body, skipping
   10797              :                  the special case of the picbase thunk emitted as asm.  */
   10798              :               while (insn && ! INSN_P (insn))
   10799              :                 insn = PREV_INSN (insn);
   10800              :               /* If we don't find any insns, we've got an empty function body;
   10801              :                  I.e. completely empty - without a return or branch.  This is
   10802              :                  taken as the case where a function body has been removed
   10803              :                  because it contains an inline __builtin_unreachable().  GCC
   10804              :                  declares that reaching __builtin_unreachable() means UB so
   10805              :                  we're not obliged to do anything special; however, we want
   10806              :                  non-zero-sized function bodies.  To meet this, and help the
   10807              :                  user out, let's trap the case.  */
   10808              :               if (insn == NULL)
   10809              :                 fputs ("\tud2\n", file);
   10810              :             }
   10811              :         }
   10812              :       else if (deleted_debug_label)
   10813              :         for (insn = deleted_debug_label; insn; insn = NEXT_INSN (insn))
   10814              :           if (NOTE_KIND (insn) == NOTE_INSN_DELETED_DEBUG_LABEL)
   10815              :             CODE_LABEL_NUMBER (insn) = -1;
   10816              :     }
   10817      1485481 : }
   10818              : 
   10819              : /* Implement TARGET_ASM_PRINT_PATCHABLE_FUNCTION_ENTRY.  */
   10820              : 
   10821              : void
   10822           59 : ix86_print_patchable_function_entry (FILE *file,
   10823              :                                      unsigned HOST_WIDE_INT patch_area_size,
   10824              :                                      bool record_p)
   10825              : {
   10826           59 :   if (cfun->machine->function_label_emitted)
   10827              :     {
   10828              :       /* NB: When ix86_print_patchable_function_entry is called after
   10829              :          function table has been emitted, we have inserted or queued
   10830              :          a pseudo UNSPECV_PATCHABLE_AREA instruction at the proper
   10831              :          place.  There is nothing to do here.  */
   10832              :       return;
   10833              :     }
   10834              : 
   10835            8 :   default_print_patchable_function_entry (file, patch_area_size,
   10836              :                                           record_p);
   10837              : }
   10838              : 
   10839              : /* Output patchable area.  NB: default_print_patchable_function_entry
   10840              :    isn't available in i386.md.  */
   10841              : 
   10842              : void
   10843           51 : ix86_output_patchable_area (unsigned int patch_area_size,
   10844              :                             bool record_p)
   10845              : {
   10846           51 :   default_print_patchable_function_entry (asm_out_file,
   10847              :                                           patch_area_size,
   10848              :                                           record_p);
   10849           51 : }
   10850              : 
   10851              : /* Return a scratch register to use in the split stack prologue.  The
   10852              :    split stack prologue is used for -fsplit-stack.  It is the first
   10853              :    instructions in the function, even before the regular prologue.
   10854              :    The scratch register can be any caller-saved register which is not
   10855              :    used for parameters or for the static chain.  */
   10856              : 
   10857              : static unsigned int
   10858        24613 : split_stack_prologue_scratch_regno (void)
   10859              : {
   10860        24613 :   if (TARGET_64BIT)
   10861              :     return R11_REG;
   10862              :   else
   10863              :     {
   10864         6950 :       bool is_fastcall, is_thiscall;
   10865         6950 :       int regparm;
   10866              : 
   10867         6950 :       is_fastcall = (lookup_attribute ("fastcall",
   10868         6950 :                                        TYPE_ATTRIBUTES (TREE_TYPE (cfun->decl)))
   10869              :                      != NULL);
   10870         6950 :       is_thiscall = (lookup_attribute ("thiscall",
   10871         6950 :                                        TYPE_ATTRIBUTES (TREE_TYPE (cfun->decl)))
   10872              :                      != NULL);
   10873         6950 :       regparm = ix86_function_regparm (TREE_TYPE (cfun->decl), cfun->decl);
   10874              : 
   10875         6950 :       if (is_fastcall)
   10876              :         {
   10877            0 :           if (DECL_STATIC_CHAIN (cfun->decl))
   10878              :             {
   10879            0 :               sorry ("%<-fsplit-stack%> does not support fastcall with "
   10880              :                      "nested function");
   10881            0 :               return INVALID_REGNUM;
   10882              :             }
   10883              :           return AX_REG;
   10884              :         }
   10885         6950 :       else if (is_thiscall)
   10886              :         {
   10887            0 :           if (!DECL_STATIC_CHAIN (cfun->decl))
   10888              :             return DX_REG;
   10889            0 :           return AX_REG;
   10890              :         }
   10891         6950 :       else if (regparm < 3)
   10892              :         {
   10893         6950 :           if (!DECL_STATIC_CHAIN (cfun->decl))
   10894              :             return CX_REG;
   10895              :           else
   10896              :             {
   10897          459 :               if (regparm >= 2)
   10898              :                 {
   10899            0 :                   sorry ("%<-fsplit-stack%> does not support 2 register "
   10900              :                          "parameters for a nested function");
   10901            0 :                   return INVALID_REGNUM;
   10902              :                 }
   10903              :               return DX_REG;
   10904              :             }
   10905              :         }
   10906              :       else
   10907              :         {
   10908              :           /* FIXME: We could make this work by pushing a register
   10909              :              around the addition and comparison.  */
   10910            0 :           sorry ("%<-fsplit-stack%> does not support 3 register parameters");
   10911            0 :           return INVALID_REGNUM;
   10912              :         }
   10913              :     }
   10914              : }
   10915              : 
   10916              : /* A SYMBOL_REF for the function which allocates new stackspace for
   10917              :    -fsplit-stack.  */
   10918              : 
   10919              : static GTY(()) rtx split_stack_fn;
   10920              : 
   10921              : /* A SYMBOL_REF for the more stack function when using the large model.  */
   10922              : 
   10923              : static GTY(()) rtx split_stack_fn_large;
   10924              : 
   10925              : /* Return location of the stack guard value in the TLS block.  */
   10926              : 
   10927              : rtx
   10928       259942 : ix86_split_stack_guard (void)
   10929              : {
   10930       259942 :   int offset;
   10931       259942 :   addr_space_t as = DEFAULT_TLS_SEG_REG;
   10932       259942 :   rtx r;
   10933              : 
   10934       259942 :   gcc_assert (flag_split_stack);
   10935              : 
   10936              : #ifdef TARGET_THREAD_SPLIT_STACK_OFFSET
   10937       259942 :   offset = TARGET_THREAD_SPLIT_STACK_OFFSET;
   10938              : #else
   10939              :   gcc_unreachable ();
   10940              : #endif
   10941              : 
   10942       259942 :   r = GEN_INT (offset);
   10943       357899 :   r = gen_const_mem (Pmode, r);
   10944       259942 :   set_mem_addr_space (r, as);
   10945              : 
   10946       259942 :   return r;
   10947              : }
   10948              : 
   10949              : /* Handle -fsplit-stack.  These are the first instructions in the
   10950              :    function, even before the regular prologue.  */
   10951              : 
   10952              : void
   10953       259932 : ix86_expand_split_stack_prologue (void)
   10954              : {
   10955       259932 :   HOST_WIDE_INT allocate;
   10956       259932 :   unsigned HOST_WIDE_INT args_size;
   10957       259932 :   rtx_code_label *label;
   10958       259932 :   rtx limit, current, allocate_rtx, call_fusage;
   10959       259932 :   rtx_insn *call_insn;
   10960       259932 :   unsigned int scratch_regno = INVALID_REGNUM;
   10961       259932 :   rtx scratch_reg = NULL_RTX;
   10962       259932 :   rtx_code_label *varargs_label = NULL;
   10963       259932 :   rtx fn;
   10964              : 
   10965       259932 :   gcc_assert (flag_split_stack && reload_completed);
   10966              : 
   10967       259932 :   ix86_finalize_stack_frame_flags ();
   10968       259932 :   struct ix86_frame &frame = cfun->machine->frame;
   10969       259932 :   allocate = frame.stack_pointer_offset - INCOMING_FRAME_SP_OFFSET;
   10970              : 
   10971              :   /* This is the label we will branch to if we have enough stack
   10972              :      space.  We expect the basic block reordering pass to reverse this
   10973              :      branch if optimizing, so that we branch in the unlikely case.  */
   10974       259932 :   label = gen_label_rtx ();
   10975              : 
   10976              :   /* We need to compare the stack pointer minus the frame size with
   10977              :      the stack boundary in the TCB.  The stack boundary always gives
   10978              :      us SPLIT_STACK_AVAILABLE bytes, so if we need less than that we
   10979              :      can compare directly.  Otherwise we need to do an addition.  */
   10980              : 
   10981       259932 :   limit = ix86_split_stack_guard ();
   10982              : 
   10983       259932 :   if (allocate >= SPLIT_STACK_AVAILABLE
   10984       235482 :       || flag_force_indirect_call)
   10985              :     {
   10986        24465 :       scratch_regno = split_stack_prologue_scratch_regno ();
   10987        24465 :       if (scratch_regno == INVALID_REGNUM)
   10988            0 :         return;
   10989              :     }
   10990              : 
   10991       259932 :   if (allocate >= SPLIT_STACK_AVAILABLE)
   10992              :     {
   10993        24450 :       rtx offset;
   10994              : 
   10995              :       /* We need a scratch register to hold the stack pointer minus
   10996              :          the required frame size.  Since this is the very start of the
   10997              :          function, the scratch register can be any caller-saved
   10998              :          register which is not used for parameters.  */
   10999        24450 :       offset = GEN_INT (- allocate);
   11000              : 
   11001        31346 :       scratch_reg = gen_rtx_REG (Pmode, scratch_regno);
   11002        24450 :       if (!TARGET_64BIT || x86_64_immediate_operand (offset, Pmode))
   11003              :         {
   11004              :           /* We don't use gen_add in this case because it will
   11005              :              want to split to lea, but when not optimizing the insn
   11006              :              will not be split after this point.  */
   11007        31346 :           emit_insn (gen_rtx_SET (scratch_reg,
   11008              :                                   gen_rtx_PLUS (Pmode, stack_pointer_rtx,
   11009              :                                                 offset)));
   11010              :         }
   11011              :       else
   11012              :         {
   11013            0 :           emit_move_insn (scratch_reg, offset);
   11014            0 :           emit_insn (gen_add2_insn (scratch_reg, stack_pointer_rtx));
   11015              :         }
   11016              :       current = scratch_reg;
   11017              :     }
   11018              :   else
   11019       235482 :     current = stack_pointer_rtx;
   11020              : 
   11021       259932 :   ix86_expand_branch (GEU, current, limit, label);
   11022       259932 :   rtx_insn *jump_insn = get_last_insn ();
   11023       259932 :   JUMP_LABEL (jump_insn) = label;
   11024              : 
   11025              :   /* Mark the jump as very likely to be taken.  */
   11026       259932 :   add_reg_br_prob_note (jump_insn, profile_probability::very_likely ());
   11027              : 
   11028       259932 :   if (split_stack_fn == NULL_RTX)
   11029              :     {
   11030         5451 :       split_stack_fn = gen_rtx_SYMBOL_REF (Pmode, "__morestack");
   11031         4347 :       SYMBOL_REF_FLAGS (split_stack_fn) |= SYMBOL_FLAG_LOCAL;
   11032              :     }
   11033       259932 :   fn = split_stack_fn;
   11034              : 
   11035              :   /* Get more stack space.  We pass in the desired stack space and the
   11036              :      size of the arguments to copy to the new stack.  In 32-bit mode
   11037              :      we push the parameters; __morestack will return on a new stack
   11038              :      anyhow.  In 64-bit mode we pass the parameters in r10 and
   11039              :      r11.  */
   11040       259932 :   allocate_rtx = GEN_INT (allocate);
   11041       259932 :   args_size = crtl->args.size >= 0 ? (HOST_WIDE_INT) crtl->args.size : 0;
   11042       259932 :   call_fusage = NULL_RTX;
   11043       259932 :   rtx pop = NULL_RTX;
   11044       259932 :   if (TARGET_64BIT)
   11045              :     {
   11046       161975 :       rtx reg10, reg11;
   11047              : 
   11048       161975 :       reg10 = gen_rtx_REG (DImode, R10_REG);
   11049       161975 :       reg11 = gen_rtx_REG (DImode, R11_REG);
   11050              : 
   11051              :       /* If this function uses a static chain, it will be in %r10.
   11052              :          Preserve it across the call to __morestack.  */
   11053       161975 :       if (DECL_STATIC_CHAIN (cfun->decl))
   11054              :         {
   11055         7505 :           rtx rax;
   11056              : 
   11057         7505 :           rax = gen_rtx_REG (word_mode, AX_REG);
   11058         7505 :           emit_move_insn (rax, gen_rtx_REG (word_mode, R10_REG));
   11059         7505 :           use_reg (&call_fusage, rax);
   11060              :         }
   11061              : 
   11062       161975 :       if (flag_force_indirect_call
   11063       161960 :           || ix86_cmodel == CM_LARGE || ix86_cmodel == CM_LARGE_PIC)
   11064              :         {
   11065           16 :           HOST_WIDE_INT argval;
   11066              : 
   11067           16 :           if (split_stack_fn_large == NULL_RTX)
   11068              :             {
   11069            7 :               split_stack_fn_large
   11070            7 :                 = gen_rtx_SYMBOL_REF (Pmode, "__morestack_large_model");
   11071            7 :               SYMBOL_REF_FLAGS (split_stack_fn_large) |= SYMBOL_FLAG_LOCAL;
   11072              :             }
   11073              : 
   11074           16 :           fn = split_stack_fn_large;
   11075              : 
   11076           16 :           if (ix86_cmodel == CM_LARGE_PIC)
   11077              :             {
   11078            3 :               rtx_code_label *label;
   11079            3 :               rtx x;
   11080              : 
   11081            3 :               gcc_assert (Pmode == DImode);
   11082              : 
   11083            3 :               label = gen_label_rtx ();
   11084            3 :               emit_label (label);
   11085            3 :               LABEL_PRESERVE_P (label) = 1;
   11086            3 :               emit_insn (gen_set_rip_rex64 (reg10, label));
   11087            3 :               emit_insn (gen_set_got_offset_rex64 (reg11, label));
   11088            3 :               emit_insn (gen_add2_insn (reg10, reg11));
   11089            3 :               x = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, fn), UNSPEC_GOT);
   11090            3 :               x = gen_rtx_CONST (Pmode, x);
   11091            3 :               emit_move_insn (reg11, x);
   11092            3 :               x = gen_rtx_PLUS (Pmode, reg10, reg11);
   11093            3 :               x = gen_const_mem (Pmode, x);
   11094            3 :               fn = copy_to_suggested_reg (x, reg11, Pmode);
   11095              :             }
   11096           13 :           else if (ix86_cmodel == CM_LARGE)
   11097            1 :             fn = copy_to_suggested_reg (fn, reg11, Pmode);
   11098              : 
   11099              :           /* When using the large model we need to load the address
   11100              :              into a register, and we've run out of registers.  So we
   11101              :              switch to a different calling convention, and we call a
   11102              :              different function: __morestack_large.  We pass the
   11103              :              argument size in the upper 32 bits of r10 and pass the
   11104              :              frame size in the lower 32 bits.  */
   11105           16 :           gcc_assert ((allocate & HOST_WIDE_INT_C (0xffffffff)) == allocate);
   11106           16 :           gcc_assert ((args_size & 0xffffffff) == args_size);
   11107              : 
   11108           16 :           argval = ((args_size << 16) << 16) + allocate;
   11109           16 :           emit_move_insn (reg10, GEN_INT (argval));
   11110           16 :         }
   11111              :       else
   11112              :         {
   11113       161959 :           emit_move_insn (reg10, allocate_rtx);
   11114       161959 :           emit_move_insn (reg11, GEN_INT (args_size));
   11115       161959 :           use_reg (&call_fusage, reg11);
   11116              :         }
   11117              : 
   11118       161975 :       use_reg (&call_fusage, reg10);
   11119              :     }
   11120              :   else
   11121              :     {
   11122        97957 :       if (flag_force_indirect_call && flag_pic)
   11123              :         {
   11124            0 :           rtx x;
   11125              : 
   11126            0 :           gcc_assert (Pmode == SImode);
   11127              : 
   11128            0 :           scratch_reg = gen_rtx_REG (Pmode, scratch_regno);
   11129              : 
   11130            0 :           emit_insn (gen_set_got (scratch_reg));
   11131            0 :           x = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, split_stack_fn),
   11132              :                               UNSPEC_GOT);
   11133            0 :           x = gen_rtx_CONST (Pmode, x);
   11134            0 :           x = gen_rtx_PLUS (Pmode, scratch_reg, x);
   11135            0 :           x = gen_const_mem (Pmode, x);
   11136            0 :           fn = copy_to_suggested_reg (x, scratch_reg, Pmode);
   11137              :         }
   11138              : 
   11139        97957 :       rtx_insn *insn = emit_insn (gen_push (GEN_INT (args_size)));
   11140       195914 :       add_reg_note (insn, REG_ARGS_SIZE, GEN_INT (UNITS_PER_WORD));
   11141        97957 :       insn = emit_insn (gen_push (allocate_rtx));
   11142       195914 :       add_reg_note (insn, REG_ARGS_SIZE, GEN_INT (2 * UNITS_PER_WORD));
   11143       195914 :       pop = GEN_INT (2 * UNITS_PER_WORD);
   11144              :     }
   11145              : 
   11146       259932 :   if (flag_force_indirect_call && !register_operand (fn, VOIDmode))
   11147              :     {
   11148           12 :       scratch_reg = gen_rtx_REG (word_mode, scratch_regno);
   11149              : 
   11150           12 :       if (GET_MODE (fn) != word_mode)
   11151            0 :         fn = gen_rtx_ZERO_EXTEND (word_mode, fn);
   11152              : 
   11153           12 :       fn = copy_to_suggested_reg (fn, scratch_reg, word_mode);
   11154              :     }
   11155              : 
   11156       259932 :   call_insn = ix86_expand_call (NULL_RTX, gen_rtx_MEM (QImode, fn),
   11157       259932 :                                 GEN_INT (UNITS_PER_WORD), constm1_rtx,
   11158              :                                 pop, false);
   11159       259932 :   add_function_usage_to (call_insn, call_fusage);
   11160       259932 :   if (!TARGET_64BIT)
   11161        97957 :     add_reg_note (call_insn, REG_ARGS_SIZE, GEN_INT (0));
   11162              :   /* Indicate that this function can't jump to non-local gotos.  */
   11163       259932 :   make_reg_eh_region_note_nothrow_nononlocal (call_insn);
   11164              : 
   11165              :   /* In order to make call/return prediction work right, we now need
   11166              :      to execute a return instruction.  See
   11167              :      libgcc/config/i386/morestack.S for the details on how this works.
   11168              : 
   11169              :      For flow purposes gcc must not see this as a return
   11170              :      instruction--we need control flow to continue at the subsequent
   11171              :      label.  Therefore, we use an unspec.  */
   11172       259932 :   gcc_assert (crtl->args.pops_args < 65536);
   11173       259932 :   rtx_insn *ret_insn
   11174       259932 :     = emit_insn (gen_split_stack_return (GEN_INT (crtl->args.pops_args)));
   11175              : 
   11176       259932 :   if ((flag_cf_protection & CF_BRANCH))
   11177              :     {
   11178              :       /* Insert ENDBR since __morestack will jump back here via indirect
   11179              :          call.  */
   11180           21 :       rtx cet_eb = gen_nop_endbr ();
   11181           21 :       emit_insn_after (cet_eb, ret_insn);
   11182              :     }
   11183              : 
   11184              :   /* If we are in 64-bit mode and this function uses a static chain,
   11185              :      we saved %r10 in %rax before calling _morestack.  */
   11186       259932 :   if (TARGET_64BIT && DECL_STATIC_CHAIN (cfun->decl))
   11187         7505 :     emit_move_insn (gen_rtx_REG (word_mode, R10_REG),
   11188              :                     gen_rtx_REG (word_mode, AX_REG));
   11189              : 
   11190              :   /* If this function calls va_start, we need to store a pointer to
   11191              :      the arguments on the old stack, because they may not have been
   11192              :      all copied to the new stack.  At this point the old stack can be
   11193              :      found at the frame pointer value used by __morestack, because
   11194              :      __morestack has set that up before calling back to us.  Here we
   11195              :      store that pointer in a scratch register, and in
   11196              :      ix86_expand_prologue we store the scratch register in a stack
   11197              :      slot.  */
   11198       259932 :   if (cfun->machine->split_stack_varargs_pointer != NULL_RTX)
   11199              :     {
   11200           12 :       rtx frame_reg;
   11201           12 :       int words;
   11202              : 
   11203           12 :       scratch_regno = split_stack_prologue_scratch_regno ();
   11204           16 :       scratch_reg = gen_rtx_REG (Pmode, scratch_regno);
   11205           16 :       frame_reg = gen_rtx_REG (Pmode, BP_REG);
   11206              : 
   11207              :       /* 64-bit:
   11208              :          fp -> old fp value
   11209              :                return address within this function
   11210              :                return address of caller of this function
   11211              :                stack arguments
   11212              :          So we add three words to get to the stack arguments.
   11213              : 
   11214              :          32-bit:
   11215              :          fp -> old fp value
   11216              :                return address within this function
   11217              :                first argument to __morestack
   11218              :                second argument to __morestack
   11219              :                return address of caller of this function
   11220              :                stack arguments
   11221              :          So we add five words to get to the stack arguments.
   11222              :       */
   11223           12 :       words = TARGET_64BIT ? 3 : 5;
   11224           20 :       emit_insn (gen_rtx_SET (scratch_reg,
   11225              :                               plus_constant (Pmode, frame_reg,
   11226              :                                              words * UNITS_PER_WORD)));
   11227              : 
   11228           12 :       varargs_label = gen_label_rtx ();
   11229           12 :       emit_jump_insn (gen_jump (varargs_label));
   11230           12 :       JUMP_LABEL (get_last_insn ()) = varargs_label;
   11231              : 
   11232           12 :       emit_barrier ();
   11233              :     }
   11234              : 
   11235       259932 :   emit_label (label);
   11236       259932 :   LABEL_NUSES (label) = 1;
   11237              : 
   11238              :   /* If this function calls va_start, we now have to set the scratch
   11239              :      register for the case where we do not call __morestack.  In this
   11240              :      case we need to set it based on the stack pointer.  */
   11241       259932 :   if (cfun->machine->split_stack_varargs_pointer != NULL_RTX)
   11242              :     {
   11243           20 :       emit_insn (gen_rtx_SET (scratch_reg,
   11244              :                               plus_constant (Pmode, stack_pointer_rtx,
   11245              :                                              UNITS_PER_WORD)));
   11246              : 
   11247           12 :       emit_label (varargs_label);
   11248           12 :       LABEL_NUSES (varargs_label) = 1;
   11249              :     }
   11250              : }
   11251              : 
   11252              : /* We may have to tell the dataflow pass that the split stack prologue
   11253              :    is initializing a scratch register.  */
   11254              : 
   11255              : static void
   11256     15851749 : ix86_live_on_entry (bitmap regs)
   11257              : {
   11258     15851749 :   if (cfun->machine->split_stack_varargs_pointer != NULL_RTX)
   11259              :     {
   11260          124 :       gcc_assert (flag_split_stack);
   11261          124 :       bitmap_set_bit (regs, split_stack_prologue_scratch_regno ());
   11262              :     }
   11263     15851749 : }
   11264              : 
   11265              : /* Extract the parts of an RTL expression that is a valid memory address
   11266              :    for an instruction.  Return false if the structure of the address is
   11267              :    grossly off.  */
   11268              : 
   11269              : bool
   11270   4333103405 : ix86_decompose_address (rtx addr, struct ix86_address *out)
   11271              : {
   11272   4333103405 :   rtx base = NULL_RTX, index = NULL_RTX, disp = NULL_RTX;
   11273   4333103405 :   rtx base_reg, index_reg;
   11274   4333103405 :   HOST_WIDE_INT scale = 1;
   11275   4333103405 :   rtx scale_rtx = NULL_RTX;
   11276   4333103405 :   rtx tmp;
   11277   4333103405 :   addr_space_t seg = ADDR_SPACE_GENERIC;
   11278              : 
   11279              :   /* Allow zero-extended SImode addresses,
   11280              :      they will be emitted with addr32 prefix.  */
   11281   4333103405 :   if (TARGET_64BIT && GET_MODE (addr) == DImode)
   11282              :     {
   11283   2290324611 :       if (GET_CODE (addr) == ZERO_EXTEND
   11284      2176855 :           && GET_MODE (XEXP (addr, 0)) == SImode)
   11285              :         {
   11286      2083078 :           addr = XEXP (addr, 0);
   11287      2083078 :           if (CONST_INT_P (addr))
   11288              :             return false;
   11289              :         }
   11290   2288241533 :       else if (GET_CODE (addr) == AND)
   11291              :         {
   11292      2748438 :           rtx mask = XEXP (addr, 1);
   11293      2748438 :           rtx shift_val;
   11294              : 
   11295      2748438 :           if (const_32bit_mask (mask, DImode)
   11296              :               /* For ASHIFT inside AND, combine will not generate
   11297              :                  canonical zero-extend. Merge mask for AND and shift_count
   11298              :                  to check if it is canonical zero-extend.  */
   11299      2748438 :               || (CONST_INT_P (mask)
   11300      1766410 :                   && GET_CODE (XEXP (addr, 0)) == ASHIFT
   11301       141575 :                   && CONST_INT_P (shift_val = XEXP (XEXP (addr, 0), 1))
   11302       138478 :                   && ((UINTVAL (mask)
   11303       138478 :                        | ((HOST_WIDE_INT_1U << INTVAL (shift_val)) - 1))
   11304              :                       == HOST_WIDE_INT_UC (0xffffffff))))
   11305              :             {
   11306        82793 :               addr = lowpart_subreg (SImode, XEXP (addr, 0), DImode);
   11307        82793 :               if (addr == NULL_RTX)
   11308              :                 return false;
   11309              : 
   11310        82793 :               if (CONST_INT_P (addr))
   11311              :                 return false;
   11312              :             }
   11313              :         }
   11314              :     }
   11315              : 
   11316              :   /* Allow SImode subregs of DImode addresses,
   11317              :      they will be emitted with addr32 prefix.  */
   11318   4333103405 :   if (TARGET_64BIT && GET_MODE (addr) == SImode)
   11319              :     {
   11320     17458230 :       if (SUBREG_P (addr)
   11321       208839 :           && GET_MODE (SUBREG_REG (addr)) == DImode)
   11322              :         {
   11323       187364 :           addr = SUBREG_REG (addr);
   11324       187364 :           if (CONST_INT_P (addr))
   11325              :             return false;
   11326              :         }
   11327              :     }
   11328              : 
   11329   4333103405 :   if (REG_P (addr))
   11330              :     base = addr;
   11331              :   else if (SUBREG_P (addr))
   11332              :     {
   11333       444617 :       if (REG_P (SUBREG_REG (addr)))
   11334              :         base = addr;
   11335              :       else
   11336              :         return false;
   11337              :     }
   11338              :   else if (GET_CODE (addr) == PLUS)
   11339              :     {
   11340              :       rtx addends[4], op;
   11341              :       int n = 0, i;
   11342              : 
   11343              :       op = addr;
   11344   3162729874 :       do
   11345              :         {
   11346   3162729874 :           if (n >= 4)
   11347    643338806 :             return false;
   11348   3162723716 :           addends[n++] = XEXP (op, 1);
   11349   3162723716 :           op = XEXP (op, 0);
   11350              :         }
   11351   3162723716 :       while (GET_CODE (op) == PLUS);
   11352   3098015226 :       if (n >= 4)
   11353              :         return false;
   11354   3098007025 :       addends[n] = op;
   11355              : 
   11356   8075134068 :       for (i = n; i >= 0; --i)
   11357              :         {
   11358   5620451490 :           op = addends[i];
   11359   5620451490 :           switch (GET_CODE (op))
   11360              :             {
   11361     61357954 :             case MULT:
   11362     61357954 :               if (index)
   11363              :                 return false;
   11364     61316177 :               index = XEXP (op, 0);
   11365     61316177 :               scale_rtx = XEXP (op, 1);
   11366     61316177 :               break;
   11367              : 
   11368     12800486 :             case ASHIFT:
   11369     12800486 :               if (index)
   11370              :                 return false;
   11371     12729299 :               index = XEXP (op, 0);
   11372     12729299 :               tmp = XEXP (op, 1);
   11373     12729299 :               if (!CONST_INT_P (tmp))
   11374              :                 return false;
   11375     12714712 :               scale = INTVAL (tmp);
   11376     12714712 :               if ((unsigned HOST_WIDE_INT) scale > 3)
   11377              :                 return false;
   11378     12301754 :               scale = 1 << scale;
   11379     12301754 :               break;
   11380              : 
   11381      1129335 :             case ZERO_EXTEND:
   11382      1129335 :               op = XEXP (op, 0);
   11383      1129335 :               if (GET_CODE (op) != UNSPEC)
   11384              :                 return false;
   11385              :               /* FALLTHRU */
   11386              : 
   11387       707203 :             case UNSPEC:
   11388       707203 :               if (XINT (op, 1) == UNSPEC_TP
   11389       698916 :                   && TARGET_TLS_DIRECT_SEG_REFS
   11390       698916 :                   && seg == ADDR_SPACE_GENERIC)
   11391       698916 :                 seg = DEFAULT_TLS_SEG_REG;
   11392              :               else
   11393              :                 return false;
   11394              :               break;
   11395              : 
   11396       523828 :             case SUBREG:
   11397       523828 :               if (!REG_P (SUBREG_REG (op)))
   11398              :                 return false;
   11399              :               /* FALLTHRU */
   11400              : 
   11401   2526067078 :             case REG:
   11402   2526067078 :               if (!base)
   11403              :                 base = op;
   11404     83146529 :               else if (!index)
   11405              :                 index = op;
   11406              :               else
   11407              :                 return false;
   11408              :               break;
   11409              : 
   11410   2377566609 :             case CONST:
   11411   2377566609 :             case CONST_INT:
   11412   2377566609 :             case SYMBOL_REF:
   11413   2377566609 :             case LABEL_REF:
   11414   2377566609 :               if (disp)
   11415              :                 return false;
   11416              :               disp = op;
   11417              :               break;
   11418              : 
   11419              :             default:
   11420              :               return false;
   11421              :             }
   11422              :         }
   11423              :     }
   11424              :   else if (GET_CODE (addr) == MULT)
   11425              :     {
   11426      3584313 :       index = XEXP (addr, 0);           /* index*scale */
   11427      3584313 :       scale_rtx = XEXP (addr, 1);
   11428              :     }
   11429              :   else if (GET_CODE (addr) == ASHIFT)
   11430              :     {
   11431              :       /* We're called for lea too, which implements ashift on occasion.  */
   11432      3266213 :       index = XEXP (addr, 0);
   11433      3266213 :       tmp = XEXP (addr, 1);
   11434      3266213 :       if (!CONST_INT_P (tmp))
   11435              :         return false;
   11436      2879815 :       scale = INTVAL (tmp);
   11437      2879815 :       if ((unsigned HOST_WIDE_INT) scale > 3)
   11438              :         return false;
   11439      2135857 :       scale = 1 << scale;
   11440              :     }
   11441              :   else
   11442              :     disp = addr;                        /* displacement */
   11443              : 
   11444   2460402748 :   if (index)
   11445              :     {
   11446    152788878 :       if (REG_P (index))
   11447              :         ;
   11448      4028041 :       else if (SUBREG_P (index)
   11449       278174 :                && REG_P (SUBREG_REG (index)))
   11450              :         ;
   11451              :       else
   11452              :         return false;
   11453              :     }
   11454              : 
   11455              :   /* Extract the integral value of scale.  */
   11456   3684825691 :   if (scale_rtx)
   11457              :     {
   11458     56492876 :       if (!CONST_INT_P (scale_rtx))
   11459              :         return false;
   11460     55896536 :       scale = INTVAL (scale_rtx);
   11461              :     }
   11462              : 
   11463   3684229351 :   base_reg = base && SUBREG_P (base) ? SUBREG_REG (base) : base;
   11464   3684229351 :   index_reg = index && SUBREG_P (index) ? SUBREG_REG (index) : index;
   11465              : 
   11466              :   /* Avoid useless 0 displacement.  */
   11467   3684229351 :   if (disp == const0_rtx && (base || index))
   11468   3684229351 :     disp = NULL_RTX;
   11469              : 
   11470              :   /* Allow arg pointer and stack pointer as index if there is not scaling.  */
   11471   2682615586 :   if (base_reg && index_reg && scale == 1
   11472   3765887000 :       && (REGNO (index_reg) == ARG_POINTER_REGNUM
   11473              :           || REGNO (index_reg) == FRAME_POINTER_REGNUM
   11474              :           || REGNO (index_reg) == SP_REG))
   11475              :     {
   11476              :       std::swap (base, index);
   11477              :       std::swap (base_reg, index_reg);
   11478              :     }
   11479              : 
   11480              :   /* Special case: %ebp cannot be encoded as a base without a displacement.
   11481              :      Similarly %r13.  */
   11482    323352119 :   if (!disp && base_reg
   11483   4003381402 :       && (REGNO (base_reg) == ARG_POINTER_REGNUM
   11484              :           || REGNO (base_reg) == FRAME_POINTER_REGNUM
   11485              :           || REGNO (base_reg) == BP_REG
   11486              :           || REGNO (base_reg) == R13_REG))
   11487              :     disp = const0_rtx;
   11488              : 
   11489              :   /* Special case: on K6, [%esi] makes the instruction vector decoded.
   11490              :      Avoid this by transforming to [%esi+0].
   11491              :      Reload calls address legitimization without cfun defined, so we need
   11492              :      to test cfun for being non-NULL. */
   11493            0 :   if (TARGET_CPU_P (K6) && cfun && optimize_function_for_speed_p (cfun)
   11494            0 :       && base_reg && !index_reg && !disp
   11495   3684229351 :       && REGNO (base_reg) == SI_REG)
   11496            0 :     disp = const0_rtx;
   11497              : 
   11498              :   /* Special case: encode reg+reg instead of reg*2.  */
   11499   3684229351 :   if (!base && index && scale == 2)
   11500   1001613765 :     base = index, base_reg = index_reg, scale = 1;
   11501              : 
   11502              :   /* Special case: scaling cannot be encoded without base or displacement.  */
   11503   1001613765 :   if (!base && !disp && index && scale != 1)
   11504      3261386 :     disp = const0_rtx;
   11505              : 
   11506   3684229351 :   out->base = base;
   11507   3684229351 :   out->index = index;
   11508   3684229351 :   out->disp = disp;
   11509   3684229351 :   out->scale = scale;
   11510   3684229351 :   out->seg = seg;
   11511              : 
   11512   3684229351 :   return true;
   11513              : }
   11514              : 
   11515              : /* Return cost of the memory address x.
   11516              :    For i386, it is better to use a complex address than let gcc copy
   11517              :    the address into a reg and make a new pseudo.  But not if the address
   11518              :    requires to two regs - that would mean more pseudos with longer
   11519              :    lifetimes.  */
   11520              : static int
   11521     10912481 : ix86_address_cost (rtx x, machine_mode, addr_space_t, bool)
   11522              : {
   11523     10912481 :   struct ix86_address parts;
   11524     10912481 :   int cost = 1;
   11525     10912481 :   int ok = ix86_decompose_address (x, &parts);
   11526              : 
   11527     10912481 :   gcc_assert (ok);
   11528              : 
   11529     10912481 :   if (parts.base && SUBREG_P (parts.base))
   11530          500 :     parts.base = SUBREG_REG (parts.base);
   11531     10912481 :   if (parts.index && SUBREG_P (parts.index))
   11532           21 :     parts.index = SUBREG_REG (parts.index);
   11533              : 
   11534              :   /* Attempt to minimize number of registers in the address by increasing
   11535              :      address cost for each used register.  We don't increase address cost
   11536              :      for "pic_offset_table_rtx".  When a memopt with "pic_offset_table_rtx"
   11537              :      is not invariant itself it most likely means that base or index is not
   11538              :      invariant.  Therefore only "pic_offset_table_rtx" could be hoisted out,
   11539              :      which is not profitable for x86.  */
   11540     10912481 :   if (parts.base
   11541      9501023 :       && (!REG_P (parts.base) || REGNO (parts.base) >= FIRST_PSEUDO_REGISTER)
   11542     20113846 :       && (current_pass->type == GIMPLE_PASS
   11543      2795359 :           || !pic_offset_table_rtx
   11544       132804 :           || !REG_P (parts.base)
   11545       132804 :           || REGNO (pic_offset_table_rtx) != REGNO (parts.base)))
   11546              :     cost++;
   11547              : 
   11548     10912481 :   if (parts.index
   11549      5213944 :       && (!REG_P (parts.index) || REGNO (parts.index) >= FIRST_PSEUDO_REGISTER)
   11550     16112223 :       && (current_pass->type == GIMPLE_PASS
   11551       672179 :           || !pic_offset_table_rtx
   11552        57261 :           || !REG_P (parts.index)
   11553        57261 :           || REGNO (pic_offset_table_rtx) != REGNO (parts.index)))
   11554      5198464 :     cost++;
   11555              : 
   11556              :   /* AMD-K6 don't like addresses with ModR/M set to 00_xxx_100b,
   11557              :      since it's predecode logic can't detect the length of instructions
   11558              :      and it degenerates to vector decoded.  Increase cost of such
   11559              :      addresses here.  The penalty is minimally 2 cycles.  It may be worthwhile
   11560              :      to split such addresses or even refuse such addresses at all.
   11561              : 
   11562              :      Following addressing modes are affected:
   11563              :       [base+scale*index]
   11564              :       [scale*index+disp]
   11565              :       [base+index]
   11566              : 
   11567              :      The first and last case  may be avoidable by explicitly coding the zero in
   11568              :      memory address, but I don't have AMD-K6 machine handy to check this
   11569              :      theory.  */
   11570              : 
   11571     10912481 :   if (TARGET_CPU_P (K6)
   11572            0 :       && ((!parts.disp && parts.base && parts.index && parts.scale != 1)
   11573            0 :           || (parts.disp && !parts.base && parts.index && parts.scale != 1)
   11574            0 :           || (!parts.disp && parts.base && parts.index && parts.scale == 1)))
   11575            0 :     cost += 10;
   11576              : 
   11577     10912481 :   return cost;
   11578              : }
   11579              : 
   11580              : /* Implement TARGET_USE_BY_PIECES_INFRASTRUCTURE_P.  */
   11581              : 
   11582              : bool
   11583      1182116 : ix86_use_by_pieces_infrastructure_p (unsigned HOST_WIDE_INT size,
   11584              :                                      unsigned int align,
   11585              :                                      enum by_pieces_operation op,
   11586              :                                      bool speed_p)
   11587              : {
   11588              :   /* Return true when we are currently expanding memcpy/memset epilogue
   11589              :      with move_by_pieces or store_by_pieces.  */
   11590      1182116 :   if (cfun->machine->by_pieces_in_use)
   11591              :     return true;
   11592              : 
   11593      1180010 :   return default_use_by_pieces_infrastructure_p (size, align, op,
   11594      1180010 :                                                  speed_p);
   11595              : }
   11596              : 
   11597              : /* Allow {LABEL | SYMBOL}_REF - SYMBOL_REF-FOR-PICBASE for Mach-O as
   11598              :    this is used for to form addresses to local data when -fPIC is in
   11599              :    use.  */
   11600              : 
   11601              : static bool
   11602            0 : darwin_local_data_pic (rtx disp)
   11603              : {
   11604            0 :   return (GET_CODE (disp) == UNSPEC
   11605            0 :           && XINT (disp, 1) == UNSPEC_MACHOPIC_OFFSET);
   11606              : }
   11607              : 
   11608              : /* True if the function symbol operand X should be loaded from GOT.
   11609              :    If CALL_P is true, X is a call operand.
   11610              : 
   11611              :    NB: -mno-direct-extern-access doesn't force load from GOT for
   11612              :    call.
   11613              : 
   11614              :    NB: In 32-bit mode, only non-PIC is allowed in inline assembly
   11615              :    statements, since a PIC register could not be available at the
   11616              :    call site.  */
   11617              : 
   11618              : bool
   11619   1851435129 : ix86_force_load_from_GOT_p (rtx x, bool call_p)
   11620              : {
   11621     96351031 :   return ((TARGET_64BIT || (!flag_pic && HAVE_AS_IX86_GOT32X))
   11622              :           && !TARGET_PECOFF && !TARGET_MACHO
   11623   1848566787 :           && (!flag_pic || this_is_asm_operands)
   11624   1828197842 :           && ix86_cmodel != CM_LARGE
   11625   1828191813 :           && ix86_cmodel != CM_LARGE_PIC
   11626   1828191812 :           && SYMBOL_REF_P (x)
   11627   1828191810 :           && ((!call_p
   11628   1822756886 :                && (!ix86_direct_extern_access
   11629   1822754616 :                    || (SYMBOL_REF_DECL (x)
   11630   1641398617 :                        && lookup_attribute ("nodirect_extern_access",
   11631   1641398617 :                                             DECL_ATTRIBUTES (SYMBOL_REF_DECL (x))))))
   11632   1828189086 :               || (SYMBOL_REF_FUNCTION_P (x)
   11633    689521307 :                   && (!flag_plt
   11634    689516896 :                       || (SYMBOL_REF_DECL (x)
   11635    689516896 :                           && lookup_attribute ("noplt",
   11636    689516896 :                                                DECL_ATTRIBUTES (SYMBOL_REF_DECL (x)))))))
   11637   1851442663 :           && !SYMBOL_REF_LOCAL_P (x));
   11638              : }
   11639              : 
   11640              : /* Determine if a given RTX is a valid constant.  We already know this
   11641              :    satisfies CONSTANT_P.  */
   11642              : 
   11643              : static bool
   11644   1557466768 : ix86_legitimate_constant_p (machine_mode mode, rtx x)
   11645              : {
   11646   1557466768 :   switch (GET_CODE (x))
   11647              :     {
   11648    138469406 :     case CONST:
   11649    138469406 :       x = XEXP (x, 0);
   11650              : 
   11651    138469406 :       if (GET_CODE (x) == PLUS)
   11652              :         {
   11653    138352943 :           if (!CONST_INT_P (XEXP (x, 1)))
   11654              :             return false;
   11655    138352943 :           x = XEXP (x, 0);
   11656              :         }
   11657              : 
   11658    138469406 :       if (TARGET_MACHO && darwin_local_data_pic (x))
   11659              :         return true;
   11660              : 
   11661              :       /* Only some unspecs are valid as "constants".  */
   11662    138469406 :       if (GET_CODE (x) == UNSPEC)
   11663       493535 :         switch (XINT (x, 1))
   11664              :           {
   11665        21077 :           case UNSPEC_GOT:
   11666        21077 :           case UNSPEC_GOTOFF:
   11667        21077 :           case UNSPEC_PLTOFF:
   11668        21077 :             return TARGET_64BIT;
   11669       472095 :           case UNSPEC_TPOFF:
   11670       472095 :           case UNSPEC_NTPOFF:
   11671       472095 :             x = XVECEXP (x, 0, 0);
   11672       472095 :             return (SYMBOL_REF_P (x)
   11673       472095 :                     && SYMBOL_REF_TLS_MODEL (x) == TLS_MODEL_LOCAL_EXEC);
   11674          275 :           case UNSPEC_DTPOFF:
   11675          275 :             x = XVECEXP (x, 0, 0);
   11676          275 :             return (SYMBOL_REF_P (x)
   11677          275 :                     && SYMBOL_REF_TLS_MODEL (x) == TLS_MODEL_LOCAL_DYNAMIC);
   11678            0 :           case UNSPEC_SECREL32:
   11679            0 :             x = XVECEXP (x, 0, 0);
   11680            0 :             return SYMBOL_REF_P (x);
   11681              :           default:
   11682              :             return false;
   11683              :           }
   11684              : 
   11685              :       /* We must have drilled down to a symbol.  */
   11686    137975871 :       if (LABEL_REF_P (x))
   11687              :         return true;
   11688    137970503 :       if (!SYMBOL_REF_P (x))
   11689              :         return false;
   11690              :       /* FALLTHRU */
   11691              : 
   11692    927946124 :     case SYMBOL_REF:
   11693              :       /* TLS symbols are never valid.  */
   11694    927946124 :       if (SYMBOL_REF_TLS_MODEL (x))
   11695              :         return false;
   11696              : 
   11697              :       /* DLLIMPORT symbols are never valid.  */
   11698    927842462 :       if (TARGET_DLLIMPORT_DECL_ATTRIBUTES
   11699              :           && SYMBOL_REF_DLLIMPORT_P (x))
   11700              :         return false;
   11701              : 
   11702              : #if TARGET_MACHO
   11703              :       /* mdynamic-no-pic */
   11704              :       if (MACHO_DYNAMIC_NO_PIC_P)
   11705              :         return machopic_symbol_defined_p (x);
   11706              : #endif
   11707              : 
   11708              :       /* External function address should be loaded
   11709              :          via the GOT slot to avoid PLT.  */
   11710    927842462 :       if (ix86_force_load_from_GOT_p (x))
   11711              :         return false;
   11712              : 
   11713              :       break;
   11714              : 
   11715    608000257 :     CASE_CONST_SCALAR_INT:
   11716    608000257 :       if (ix86_endbr_immediate_operand (x, VOIDmode))
   11717              :         return false;
   11718              : 
   11719    608000056 :       switch (mode)
   11720              :         {
   11721      1461191 :         case E_TImode:
   11722      1461191 :           if (TARGET_64BIT)
   11723              :             return true;
   11724              :           /* FALLTHRU */
   11725        25719 :         case E_OImode:
   11726        25719 :         case E_XImode:
   11727        25719 :           if (!standard_sse_constant_p (x, mode)
   11728        42642 :               && GET_MODE_SIZE (TARGET_AVX512F
   11729              :                                 ? XImode
   11730              :                                 : (TARGET_AVX
   11731              :                                    ? OImode
   11732              :                                    : (TARGET_SSE2
   11733        16923 :                                       ? TImode : DImode))) < GET_MODE_SIZE (mode))
   11734              :             return false;
   11735              :         default:
   11736              :           break;
   11737              :         }
   11738              :       break;
   11739              : 
   11740      8675090 :     case CONST_VECTOR:
   11741      8675090 :       if (!standard_sse_constant_p (x, mode))
   11742              :         return false;
   11743              :       break;
   11744              : 
   11745      7672047 :     case CONST_DOUBLE:
   11746      7672047 :       if (mode == E_BFmode)
   11747              :         return false;
   11748              : 
   11749              :     default:
   11750              :       break;
   11751              :     }
   11752              : 
   11753              :   /* Otherwise we handle everything else in the move patterns.  */
   11754              :   return true;
   11755              : }
   11756              : 
   11757              : /* Determine if it's legal to put X into the constant pool.  This
   11758              :    is not possible for the address of thread-local symbols, which
   11759              :    is checked above.  */
   11760              : 
   11761              : static bool
   11762     61674797 : ix86_cannot_force_const_mem (machine_mode mode, rtx x)
   11763              : {
   11764              :   /* We can put any immediate constant in memory.  */
   11765     61674797 :   switch (GET_CODE (x))
   11766              :     {
   11767              :     CASE_CONST_ANY:
   11768              :       return false;
   11769              : 
   11770      1798665 :     default:
   11771      1798665 :       break;
   11772              :     }
   11773              : 
   11774      1798665 :   return !ix86_legitimate_constant_p (mode, x);
   11775              : }
   11776              : 
   11777              : /* Return a unique alias set for the GOT.  */
   11778              : 
   11779              : alias_set_type
   11780       188822 : ix86_GOT_alias_set (void)
   11781              : {
   11782       188822 :   static alias_set_type set = -1;
   11783       188822 :   if (set == -1)
   11784         2966 :     set = new_alias_set ();
   11785       188822 :   return set;
   11786              : }
   11787              : 
   11788              : /* Nonzero if the constant value X is a legitimate general operand
   11789              :    when generating PIC code.  It is given that flag_pic is on and
   11790              :    that X satisfies CONSTANT_P.  */
   11791              : 
   11792              : bool
   11793    126471771 : legitimate_pic_operand_p (rtx x)
   11794              : {
   11795    126471771 :   rtx inner;
   11796              : 
   11797    126471771 :   switch (GET_CODE (x))
   11798              :     {
   11799      2514255 :     case CONST:
   11800      2514255 :       inner = XEXP (x, 0);
   11801      2514255 :       if (GET_CODE (inner) == PLUS
   11802       357596 :           && CONST_INT_P (XEXP (inner, 1)))
   11803       357596 :         inner = XEXP (inner, 0);
   11804              : 
   11805              :       /* Only some unspecs are valid as "constants".  */
   11806      2514255 :       if (GET_CODE (inner) == UNSPEC)
   11807      2264627 :         switch (XINT (inner, 1))
   11808              :           {
   11809      2203964 :           case UNSPEC_GOT:
   11810      2203964 :           case UNSPEC_GOTOFF:
   11811      2203964 :           case UNSPEC_PLTOFF:
   11812      2203964 :             return TARGET_64BIT;
   11813            0 :           case UNSPEC_TPOFF:
   11814            0 :             x = XVECEXP (inner, 0, 0);
   11815            0 :             return (SYMBOL_REF_P (x)
   11816            0 :                     && SYMBOL_REF_TLS_MODEL (x) == TLS_MODEL_LOCAL_EXEC);
   11817            0 :           case UNSPEC_SECREL32:
   11818            0 :             x = XVECEXP (inner, 0, 0);
   11819            0 :             return SYMBOL_REF_P (x);
   11820            0 :           case UNSPEC_MACHOPIC_OFFSET:
   11821            0 :             return legitimate_pic_address_disp_p (x);
   11822              :           default:
   11823              :             return false;
   11824              :           }
   11825              :       /* FALLTHRU */
   11826              : 
   11827      7011984 :     case SYMBOL_REF:
   11828      7011984 :     case LABEL_REF:
   11829      7011984 :       return legitimate_pic_address_disp_p (x);
   11830              : 
   11831              :     default:
   11832              :       return true;
   11833              :     }
   11834              : }
   11835              : 
   11836              : /* Determine if a given CONST RTX is a valid memory displacement
   11837              :    in PIC mode.  */
   11838              : 
   11839              : bool
   11840     65569063 : legitimate_pic_address_disp_p (rtx disp)
   11841              : {
   11842     65569063 :   bool saw_plus;
   11843              : 
   11844              :   /* In 64bit mode we can allow direct addresses of symbols and labels
   11845              :      when they are not dynamic symbols.  */
   11846     65569063 :   if (TARGET_64BIT)
   11847              :     {
   11848     40364014 :       rtx op0 = disp, op1;
   11849              : 
   11850     40364014 :       switch (GET_CODE (disp))
   11851              :         {
   11852              :         case LABEL_REF:
   11853              :           return true;
   11854              : 
   11855     10967101 :         case CONST:
   11856     10967101 :           if (GET_CODE (XEXP (disp, 0)) != PLUS)
   11857              :             break;
   11858      1173060 :           op0 = XEXP (XEXP (disp, 0), 0);
   11859      1173060 :           op1 = XEXP (XEXP (disp, 0), 1);
   11860      1173060 :           if (!CONST_INT_P (op1))
   11861              :             break;
   11862      1173060 :           if (GET_CODE (op0) == UNSPEC
   11863          296 :               && (XINT (op0, 1) == UNSPEC_DTPOFF
   11864          296 :                   || XINT (op0, 1) == UNSPEC_NTPOFF)
   11865      1173356 :               && trunc_int_for_mode (INTVAL (op1), SImode) == INTVAL (op1))
   11866              :             return true;
   11867      1172764 :           if (INTVAL (op1) >= 16*1024*1024
   11868      1172764 :               || INTVAL (op1) < -16*1024*1024)
   11869              :             break;
   11870      1172676 :           if (LABEL_REF_P (op0))
   11871              :             return true;
   11872      1172676 :           if (GET_CODE (op0) == CONST
   11873            0 :               && GET_CODE (XEXP (op0, 0)) == UNSPEC
   11874            0 :               && XINT (XEXP (op0, 0), 1) == UNSPEC_PCREL)
   11875              :             return true;
   11876      1172676 :           if (GET_CODE (op0) == UNSPEC
   11877            0 :               && XINT (op0, 1) == UNSPEC_PCREL)
   11878              :             return true;
   11879      1172676 :           if (!SYMBOL_REF_P (op0))
   11880              :             break;
   11881              :           /* FALLTHRU */
   11882              : 
   11883     30346743 :         case SYMBOL_REF:
   11884              :           /* TLS references should always be enclosed in UNSPEC.
   11885              :              The dllimported symbol needs always to be resolved.  */
   11886     30346743 :           if (SYMBOL_REF_TLS_MODEL (op0)
   11887              :               || (TARGET_DLLIMPORT_DECL_ATTRIBUTES && SYMBOL_REF_DLLIMPORT_P (op0)))
   11888              :             return false;
   11889              : 
   11890     30191212 :           if (TARGET_PECOFF)
   11891              :             {
   11892              : #if TARGET_PECOFF
   11893              :               if (is_imported_p (op0))
   11894              :                 return true;
   11895              : #endif
   11896              : 
   11897              :               if (SYMBOL_REF_FAR_ADDR_P (op0) || !SYMBOL_REF_LOCAL_P (op0))
   11898              :                 break;
   11899              : 
   11900              :               /* Non-external-weak function symbols need to be resolved only
   11901              :                  for the large model.  Non-external symbols don't need to be
   11902              :                  resolved for large and medium models.  For the small model,
   11903              :                  we don't need to resolve anything here.  */
   11904              :               if ((ix86_cmodel != CM_LARGE_PIC
   11905              :                    && SYMBOL_REF_FUNCTION_P (op0)
   11906              :                    && !(SYMBOL_REF_EXTERNAL_P (op0) && SYMBOL_REF_WEAK (op0)))
   11907              :                   || !SYMBOL_REF_EXTERNAL_P (op0)
   11908              :                   || ix86_cmodel == CM_SMALL_PIC)
   11909              :                 return true;
   11910              :             }
   11911     30191212 :           else if (!SYMBOL_REF_FAR_ADDR_P (op0)
   11912     30191208 :                    && (SYMBOL_REF_LOCAL_P (op0)
   11913     18418178 :                        || ((ix86_direct_extern_access
   11914     36665050 :                             && !(SYMBOL_REF_DECL (op0)
   11915     18247035 :                                  && lookup_attribute ("nodirect_extern_access",
   11916     18247035 :                                                       DECL_ATTRIBUTES (SYMBOL_REF_DECL (op0)))))
   11917              :                            && HAVE_LD_PIE_COPYRELOC
   11918     18417852 :                            && flag_pie
   11919        34047 :                            && !SYMBOL_REF_WEAK (op0)
   11920        33659 :                            && !SYMBOL_REF_FUNCTION_P (op0)))
   11921     41967975 :                    && ix86_cmodel != CM_LARGE_PIC)
   11922              :             return true;
   11923              :           break;
   11924              : 
   11925              :         default:
   11926              :           break;
   11927              :         }
   11928              :     }
   11929     53417491 :   if (GET_CODE (disp) != CONST)
   11930              :     return false;
   11931     15017204 :   disp = XEXP (disp, 0);
   11932              : 
   11933     15017204 :   if (TARGET_64BIT)
   11934              :     {
   11935              :       /* We are unsafe to allow PLUS expressions.  This limit allowed distance
   11936              :          of GOT tables.  We should not need these anyway.  */
   11937      9846434 :       if (GET_CODE (disp) != UNSPEC
   11938      9794041 :           || (XINT (disp, 1) != UNSPEC_GOTPCREL
   11939      9794041 :               && XINT (disp, 1) != UNSPEC_GOTOFF
   11940              :               && XINT (disp, 1) != UNSPEC_PCREL
   11941              :               && XINT (disp, 1) != UNSPEC_PLTOFF))
   11942              :         return false;
   11943              : 
   11944      9794041 :       if (!SYMBOL_REF_P (XVECEXP (disp, 0, 0))
   11945      9794041 :           && !LABEL_REF_P (XVECEXP (disp, 0, 0)))
   11946              :         return false;
   11947              :       return true;
   11948              :     }
   11949              : 
   11950      5170770 :   saw_plus = false;
   11951      5170770 :   if (GET_CODE (disp) == PLUS)
   11952              :     {
   11953       588497 :       if (!CONST_INT_P (XEXP (disp, 1)))
   11954              :         return false;
   11955       588497 :       disp = XEXP (disp, 0);
   11956       588497 :       saw_plus = true;
   11957              :     }
   11958              : 
   11959      5170770 :   if (TARGET_MACHO && darwin_local_data_pic (disp))
   11960              :     return true;
   11961              : 
   11962      5170770 :   if (GET_CODE (disp) != UNSPEC)
   11963              :     return false;
   11964              : 
   11965      5005592 :   switch (XINT (disp, 1))
   11966              :     {
   11967      2268339 :     case UNSPEC_GOT:
   11968      2268339 :       if (saw_plus)
   11969              :         return false;
   11970              :       /* We need to check for both symbols and labels because VxWorks loads
   11971              :          text labels with @GOT rather than @GOTOFF.  See gotoff_operand for
   11972              :          details.  */
   11973      2268338 :       return (SYMBOL_REF_P (XVECEXP (disp, 0, 0))
   11974      2268338 :               || LABEL_REF_P (XVECEXP (disp, 0, 0)));
   11975      2737253 :     case UNSPEC_GOTOFF:
   11976              :       /* Refuse GOTOFF in 64bit mode since it is always 64bit when used.
   11977              :          While ABI specify also 32bit relocation but we don't produce it in
   11978              :          small PIC model at all.  */
   11979      2737253 :       if ((SYMBOL_REF_P (XVECEXP (disp, 0, 0))
   11980      2737253 :            || LABEL_REF_P (XVECEXP (disp, 0, 0)))
   11981              :           && !TARGET_64BIT)
   11982      5474506 :         return !TARGET_PECOFF && gotoff_operand (XVECEXP (disp, 0, 0), Pmode);
   11983              :       return false;
   11984            0 :     case UNSPEC_GOTTPOFF:
   11985            0 :     case UNSPEC_GOTNTPOFF:
   11986            0 :     case UNSPEC_INDNTPOFF:
   11987            0 :       if (saw_plus)
   11988              :         return false;
   11989            0 :       disp = XVECEXP (disp, 0, 0);
   11990            0 :       return (SYMBOL_REF_P (disp)
   11991            0 :               && SYMBOL_REF_TLS_MODEL (disp) == TLS_MODEL_INITIAL_EXEC);
   11992            0 :     case UNSPEC_NTPOFF:
   11993            0 :       disp = XVECEXP (disp, 0, 0);
   11994            0 :       return (SYMBOL_REF_P (disp)
   11995            0 :               && SYMBOL_REF_TLS_MODEL (disp) == TLS_MODEL_LOCAL_EXEC);
   11996            0 :     case UNSPEC_DTPOFF:
   11997            0 :       disp = XVECEXP (disp, 0, 0);
   11998            0 :       return (SYMBOL_REF_P (disp)
   11999            0 :               && SYMBOL_REF_TLS_MODEL (disp) == TLS_MODEL_LOCAL_DYNAMIC);
   12000            0 :     case UNSPEC_SECREL32:
   12001            0 :       disp = XVECEXP (disp, 0, 0);
   12002            0 :       return SYMBOL_REF_P (disp);
   12003              :     }
   12004              : 
   12005              :   return false;
   12006              : }
   12007              : 
   12008              : /* Determine if op is suitable RTX for an address register.
   12009              :    Return naked register if a register or a register subreg is
   12010              :    found, otherwise return NULL_RTX.  */
   12011              : 
   12012              : static rtx
   12013   1372167860 : ix86_validate_address_register (rtx op)
   12014              : {
   12015   1372167860 :   machine_mode mode = GET_MODE (op);
   12016              : 
   12017              :   /* Only SImode or DImode registers can form the address.  */
   12018   1372167860 :   if (mode != SImode && mode != DImode)
   12019              :     return NULL_RTX;
   12020              : 
   12021   1372160990 :   if (REG_P (op))
   12022              :     return op;
   12023       702145 :   else if (SUBREG_P (op))
   12024              :     {
   12025       702145 :       rtx reg = SUBREG_REG (op);
   12026              : 
   12027       702145 :       if (!REG_P (reg))
   12028              :         return NULL_RTX;
   12029              : 
   12030       702145 :       mode = GET_MODE (reg);
   12031              : 
   12032              :       /* Don't allow SUBREGs that span more than a word.  It can
   12033              :          lead to spill failures when the register is one word out
   12034              :          of a two word structure.  */
   12035      1450521 :       if (GET_MODE_SIZE (mode) > UNITS_PER_WORD)
   12036              :         return NULL_RTX;
   12037              : 
   12038              :       /* Allow only SUBREGs of non-eliminable hard registers.  */
   12039       243091 :       if (register_no_elim_operand (reg, mode))
   12040              :         return reg;
   12041              :     }
   12042              : 
   12043              :   /* Op is not a register.  */
   12044              :   return NULL_RTX;
   12045              : }
   12046              : 
   12047              : /* Determine which memory address register set insn can use.  */
   12048              : 
   12049              : static enum attr_addr
   12050    255637521 : ix86_memory_address_reg_class (rtx_insn* insn)
   12051              : {
   12052              :   /* LRA can do some initialization with NULL insn,
   12053              :      return maximum register class in this case.  */
   12054    255637521 :   enum attr_addr addr_rclass = ADDR_GPR32;
   12055              : 
   12056    255637521 :   if (!insn)
   12057              :     return addr_rclass;
   12058              : 
   12059     72694531 :   if (asm_noperands (PATTERN (insn)) >= 0
   12060     72694531 :       || GET_CODE (PATTERN (insn)) == ASM_INPUT)
   12061        75172 :     return ix86_apx_inline_asm_use_gpr32 ? ADDR_GPR32 : ADDR_GPR16;
   12062              : 
   12063              :   /* Return maximum register class for unrecognized instructions.  */
   12064     72656945 :   if (INSN_CODE (insn) < 0)
   12065              :     return addr_rclass;
   12066              : 
   12067              :   /* Try to recognize the insn before calling get_attr_addr.
   12068              :      Save current recog_data and current alternative.  */
   12069     72656945 :   struct recog_data_d saved_recog_data = recog_data;
   12070     72656945 :   int saved_alternative = which_alternative;
   12071              : 
   12072              :   /* Update recog_data for processing of alternatives.  */
   12073     72656945 :   extract_insn_cached (insn);
   12074              : 
   12075              :   /* If current alternative is not set, loop throught enabled
   12076              :      alternatives and get the most limited register class.  */
   12077     72656945 :   if (saved_alternative == -1)
   12078              :     {
   12079     72656945 :       alternative_mask enabled = get_enabled_alternatives (insn);
   12080              : 
   12081   1253468539 :       for (int i = 0; i < recog_data.n_alternatives; i++)
   12082              :         {
   12083   1180811594 :           if (!TEST_BIT (enabled, i))
   12084    348983185 :             continue;
   12085              : 
   12086    831828409 :           which_alternative = i;
   12087    831828409 :           addr_rclass = MIN (addr_rclass, get_attr_addr (insn));
   12088              :         }
   12089              :     }
   12090              :   else
   12091              :     {
   12092            0 :       which_alternative = saved_alternative;
   12093            0 :       addr_rclass = get_attr_addr (insn);
   12094              :     }
   12095              : 
   12096     72656945 :   recog_data = saved_recog_data;
   12097     72656945 :   which_alternative = saved_alternative;
   12098              : 
   12099     72656945 :   return addr_rclass;
   12100              : }
   12101              : 
   12102              : /* Return memory address register class insn can use.  */
   12103              : 
   12104              : enum reg_class
   12105    214899889 : ix86_insn_base_reg_class (rtx_insn* insn)
   12106              : {
   12107    214899889 :   switch (ix86_memory_address_reg_class (insn))
   12108              :     {
   12109              :     case ADDR_GPR8:
   12110              :       return LEGACY_GENERAL_REGS;
   12111              :     case ADDR_GPR16:
   12112              :       return GENERAL_GPR16;
   12113              :     case ADDR_GPR32:
   12114              :       break;
   12115            0 :     default:
   12116            0 :       gcc_unreachable ();
   12117              :     }
   12118              : 
   12119              :   return BASE_REG_CLASS;
   12120              : }
   12121              : 
   12122              : bool
   12123      1285106 : ix86_regno_ok_for_insn_base_p (int regno, rtx_insn* insn)
   12124              : {
   12125      1285106 :   switch (ix86_memory_address_reg_class (insn))
   12126              :     {
   12127            0 :     case ADDR_GPR8:
   12128            0 :       return LEGACY_INT_REGNO_P (regno);
   12129            0 :     case ADDR_GPR16:
   12130            0 :       return GENERAL_GPR16_REGNO_P (regno);
   12131      1285106 :     case ADDR_GPR32:
   12132      1285106 :       break;
   12133            0 :     default:
   12134            0 :       gcc_unreachable ();
   12135              :     }
   12136              : 
   12137      1285106 :   return GENERAL_REGNO_P (regno);
   12138              : }
   12139              : 
   12140              : enum reg_class
   12141     39452526 : ix86_insn_index_reg_class (rtx_insn* insn)
   12142              : {
   12143     39452526 :   switch (ix86_memory_address_reg_class (insn))
   12144              :     {
   12145              :     case ADDR_GPR8:
   12146              :       return LEGACY_INDEX_REGS;
   12147              :     case ADDR_GPR16:
   12148              :       return INDEX_GPR16;
   12149              :     case ADDR_GPR32:
   12150              :       break;
   12151            0 :     default:
   12152            0 :       gcc_unreachable ();
   12153              :     }
   12154              : 
   12155              :   return INDEX_REG_CLASS;
   12156              : }
   12157              : 
   12158              : /* Recognizes RTL expressions that are valid memory addresses for an
   12159              :    instruction.  The MODE argument is the machine mode for the MEM
   12160              :    expression that wants to use this address.
   12161              : 
   12162              :    It only recognizes address in canonical form.  LEGITIMIZE_ADDRESS should
   12163              :    convert common non-canonical forms to canonical form so that they will
   12164              :    be recognized.  */
   12165              : 
   12166              : static bool
   12167   2247915803 : ix86_legitimate_address_p (machine_mode, rtx addr, bool strict,
   12168              :                            code_helper = ERROR_MARK)
   12169              : {
   12170   2247915803 :   struct ix86_address parts;
   12171   2247915803 :   rtx base, index, disp;
   12172   2247915803 :   HOST_WIDE_INT scale;
   12173   2247915803 :   addr_space_t seg;
   12174              : 
   12175   2247915803 :   if (ix86_decompose_address (addr, &parts) == 0)
   12176              :     /* Decomposition failed.  */
   12177              :     return false;
   12178              : 
   12179   2236255338 :   base = parts.base;
   12180   2236255338 :   index = parts.index;
   12181   2236255338 :   disp = parts.disp;
   12182   2236255338 :   scale = parts.scale;
   12183   2236255338 :   seg = parts.seg;
   12184              : 
   12185              :   /* Validate base register.  */
   12186   2236255338 :   if (base)
   12187              :     {
   12188   1284458516 :       rtx reg = ix86_validate_address_register (base);
   12189              : 
   12190   1284458516 :       if (reg == NULL_RTX)
   12191              :         return false;
   12192              : 
   12193   1284034555 :       unsigned int regno = REGNO (reg);
   12194   1284034555 :       if ((strict && !REGNO_OK_FOR_BASE_P (regno))
   12195   1279569519 :           || (!strict && !REGNO_OK_FOR_BASE_NONSTRICT_P (regno)))
   12196              :         /* Base is not valid.  */
   12197              :         return false;
   12198              :     }
   12199              : 
   12200              :   /* Validate index register.  */
   12201   2234447044 :   if (index)
   12202              :     {
   12203     87709344 :       rtx reg = ix86_validate_address_register (index);
   12204              : 
   12205     87709344 :       if (reg == NULL_RTX)
   12206              :         return false;
   12207              : 
   12208     87667205 :       unsigned int regno = REGNO (reg);
   12209     87667205 :       if ((strict && !REGNO_OK_FOR_INDEX_P (regno))
   12210     87658695 :           || (!strict && !REGNO_OK_FOR_INDEX_NONSTRICT_P (regno)))
   12211              :         /* Index is not valid.  */
   12212              :         return false;
   12213              :     }
   12214              : 
   12215              :   /* Index and base should have the same mode.  */
   12216   2234402933 :   if (base && index
   12217     78147280 :       && GET_MODE (base) != GET_MODE (index))
   12218              :     return false;
   12219              : 
   12220              :   /* Address override works only on the (%reg) part of %fs:(%reg).  */
   12221   2234072610 :   if (seg != ADDR_SPACE_GENERIC
   12222   2234072610 :       && ((base && GET_MODE (base) != word_mode)
   12223       339242 :           || (index && GET_MODE (index) != word_mode)))
   12224              :     return false;
   12225              : 
   12226              :   /* Validate scale factor.  */
   12227   2234072581 :   if (scale != 1)
   12228              :     {
   12229     39801265 :       if (!index)
   12230              :         /* Scale without index.  */
   12231              :         return false;
   12232              : 
   12233     39801265 :       if (scale != 2 && scale != 4 && scale != 8)
   12234              :         /* Scale is not a valid multiplier.  */
   12235              :         return false;
   12236              :     }
   12237              : 
   12238              :   /* Validate displacement.  */
   12239   2230918999 :   if (disp)
   12240              :     {
   12241   2004435042 :       if (ix86_endbr_immediate_operand (disp, VOIDmode))
   12242              :         return false;
   12243              : 
   12244   2004434999 :       if (GET_CODE (disp) == CONST
   12245    149004757 :           && GET_CODE (XEXP (disp, 0)) == UNSPEC
   12246     15447027 :           && XINT (XEXP (disp, 0), 1) != UNSPEC_MACHOPIC_OFFSET)
   12247     15447027 :         switch (XINT (XEXP (disp, 0), 1))
   12248              :           {
   12249              :           /* Refuse GOTOFF and GOT in 64bit mode since it is always 64bit
   12250              :              when used.  While ABI specify also 32bit relocations, we
   12251              :              don't produce them at all and use IP relative instead.
   12252              :              Allow GOT in 32bit mode for both PIC and non-PIC if symbol
   12253              :              should be loaded via GOT.  */
   12254      2268397 :           case UNSPEC_GOT:
   12255      2268397 :             if (!TARGET_64BIT
   12256      2268397 :                 && ix86_force_load_from_GOT_p (XVECEXP (XEXP (disp, 0), 0, 0)))
   12257            0 :               goto is_legitimate_pic;
   12258              :             /* FALLTHRU */
   12259      4582476 :           case UNSPEC_GOTOFF:
   12260      4582476 :             gcc_assert (flag_pic);
   12261      4582476 :             if (!TARGET_64BIT)
   12262      4582273 :               goto is_legitimate_pic;
   12263              : 
   12264              :             /* 64bit address unspec.  */
   12265              :             return false;
   12266              : 
   12267      9794013 :           case UNSPEC_GOTPCREL:
   12268      9794013 :             if (ix86_force_load_from_GOT_p (XVECEXP (XEXP (disp, 0), 0, 0)))
   12269         2534 :               goto is_legitimate_pic;
   12270              :             /* FALLTHRU */
   12271      9791479 :           case UNSPEC_PCREL:
   12272      9791479 :             gcc_assert (flag_pic);
   12273      9791479 :             goto is_legitimate_pic;
   12274              : 
   12275              :           case UNSPEC_GOTTPOFF:
   12276              :           case UNSPEC_GOTNTPOFF:
   12277              :           case UNSPEC_INDNTPOFF:
   12278              :           case UNSPEC_NTPOFF:
   12279              :           case UNSPEC_DTPOFF:
   12280              :           case UNSPEC_SECREL32:
   12281              :             break;
   12282              : 
   12283              :           default:
   12284              :             /* Invalid address unspec.  */
   12285              :             return false;
   12286              :           }
   12287              : 
   12288   1262490481 :       else if (SYMBOLIC_CONST (disp)
   12289   2122545702 :                && (flag_pic
   12290              : #if TARGET_MACHO
   12291              :                    || (MACHOPIC_INDIRECT
   12292              :                        && !machopic_operand_p (disp))
   12293              : #endif
   12294              :                   ))
   12295              :         {
   12296              : 
   12297     58395748 :         is_legitimate_pic:
   12298     58395748 :           if (TARGET_64BIT && (index || base))
   12299              :             {
   12300              :               /* foo@dtpoff(%rX) is ok.  */
   12301        37035 :               if (GET_CODE (disp) != CONST
   12302         7024 :                   || GET_CODE (XEXP (disp, 0)) != PLUS
   12303         7024 :                   || GET_CODE (XEXP (XEXP (disp, 0), 0)) != UNSPEC
   12304         4637 :                   || !CONST_INT_P (XEXP (XEXP (disp, 0), 1))
   12305         4637 :                   || (XINT (XEXP (XEXP (disp, 0), 0), 1) != UNSPEC_DTPOFF
   12306         4637 :                       && XINT (XEXP (XEXP (disp, 0), 0), 1) != UNSPEC_NTPOFF
   12307            6 :                       && XINT (XEXP (XEXP (disp, 0), 0), 1) != UNSPEC_SECREL32))
   12308              :                 /* Non-constant pic memory reference.  */
   12309              :                 return false;
   12310              :             }
   12311     58358713 :           else if ((!TARGET_MACHO || flag_pic)
   12312     58358713 :                     && ! legitimate_pic_address_disp_p (disp))
   12313              :             /* Displacement is an invalid pic construct.  */
   12314              :             return false;
   12315              : #if TARGET_MACHO
   12316              :           else if (MACHO_DYNAMIC_NO_PIC_P
   12317              :                    && !ix86_legitimate_constant_p (Pmode, disp))
   12318              :             /* displacment must be referenced via non_lazy_pointer */
   12319              :             return false;
   12320              : #endif
   12321              : 
   12322              :           /* This code used to verify that a symbolic pic displacement
   12323              :              includes the pic_offset_table_rtx register.
   12324              : 
   12325              :              While this is good idea, unfortunately these constructs may
   12326              :              be created by "adds using lea" optimization for incorrect
   12327              :              code like:
   12328              : 
   12329              :              int a;
   12330              :              int foo(int i)
   12331              :                {
   12332              :                  return *(&a+i);
   12333              :                }
   12334              : 
   12335              :              This code is nonsensical, but results in addressing
   12336              :              GOT table with pic_offset_table_rtx base.  We can't
   12337              :              just refuse it easily, since it gets matched by
   12338              :              "addsi3" pattern, that later gets split to lea in the
   12339              :              case output register differs from input.  While this
   12340              :              can be handled by separate addsi pattern for this case
   12341              :              that never results in lea, this seems to be easier and
   12342              :              correct fix for crash to disable this test.  */
   12343              :         }
   12344   1944968510 :       else if (!LABEL_REF_P (disp)
   12345   1944814449 :                && !CONST_INT_P (disp)
   12346    873132928 :                && (GET_CODE (disp) != CONST
   12347    135000357 :                    || !ix86_legitimate_constant_p (Pmode, disp))
   12348   2686066420 :                && (!SYMBOL_REF_P (disp)
   12349    748981427 :                    || !ix86_legitimate_constant_p (Pmode, disp)))
   12350              :         /* Displacement is not constant.  */
   12351     57329467 :         return false;
   12352   1887639043 :       else if (TARGET_64BIT
   12353   1887639043 :                && !x86_64_immediate_operand (disp, VOIDmode))
   12354              :         /* Displacement is out of range.  */
   12355              :         return false;
   12356              :       /* In x32 mode, constant addresses are sign extended to 64bit, so
   12357              :          we have to prevent addresses from 0x80000000 to 0xffffffff.  */
   12358        45179 :       else if (TARGET_X32 && !(index || base)
   12359        17348 :                && CONST_INT_P (disp)
   12360   1887113402 :                && val_signbit_known_set_p (SImode, INTVAL (disp)))
   12361              :         return false;
   12362              :     }
   12363              : 
   12364              :   /* Everything looks valid.  */
   12365              :   return true;
   12366              : }
   12367              : 
   12368              : /* Determine if a given RTX is a valid constant address.  */
   12369              : 
   12370              : bool
   12371   2794367619 : constant_address_p (rtx x)
   12372              : {
   12373   2874748447 :   return CONSTANT_P (x) && ix86_legitimate_address_p (Pmode, x, 1);
   12374              : }
   12375              : 
   12376              : 
   12377              : /* Return a legitimate reference for ORIG (an address) using the
   12378              :    register REG.  If REG is 0, a new pseudo is generated.
   12379              : 
   12380              :    There are two types of references that must be handled:
   12381              : 
   12382              :    1. Global data references must load the address from the GOT, via
   12383              :       the PIC reg.  An insn is emitted to do this load, and the reg is
   12384              :       returned.
   12385              : 
   12386              :    2. Static data references, constant pool addresses, and code labels
   12387              :       compute the address as an offset from the GOT, whose base is in
   12388              :       the PIC reg.  Static data objects have SYMBOL_FLAG_LOCAL set to
   12389              :       differentiate them from global data objects.  The returned
   12390              :       address is the PIC reg + an unspec constant.
   12391              : 
   12392              :    TARGET_LEGITIMATE_ADDRESS_P rejects symbolic references unless the PIC
   12393              :    reg also appears in the address.  */
   12394              : 
   12395              : rtx
   12396       399144 : legitimize_pic_address (rtx orig, rtx reg)
   12397              : {
   12398       399144 :   rtx addr = orig;
   12399       399144 :   rtx new_rtx = orig;
   12400              : 
   12401              : #if TARGET_MACHO
   12402              :   if (TARGET_MACHO && !TARGET_64BIT)
   12403              :     {
   12404              :       if (reg == 0)
   12405              :         reg = gen_reg_rtx (Pmode);
   12406              :       /* Use the generic Mach-O PIC machinery.  */
   12407              :       return machopic_legitimize_pic_address (orig, GET_MODE (orig), reg);
   12408              :     }
   12409              : #endif
   12410              : 
   12411       399144 :   if (TARGET_64BIT && TARGET_DLLIMPORT_DECL_ATTRIBUTES)
   12412              :     {
   12413              : #if TARGET_PECOFF
   12414              :       rtx tmp = legitimize_pe_coff_symbol (addr, true);
   12415              :       if (tmp)
   12416              :         return tmp;
   12417              : #endif
   12418              :     }
   12419              : 
   12420       399144 :   if (TARGET_64BIT && legitimate_pic_address_disp_p (addr))
   12421              :     new_rtx = addr;
   12422       302601 :   else if ((!TARGET_64BIT
   12423       101823 :             || /* TARGET_64BIT && */ ix86_cmodel != CM_SMALL_PIC)
   12424              :            && !TARGET_PECOFF
   12425       503476 :            && gotoff_operand (addr, Pmode))
   12426              :     {
   12427              :       /* This symbol may be referenced via a displacement
   12428              :          from the PIC base address (@GOTOFF).  */
   12429        97362 :       if (GET_CODE (addr) == CONST)
   12430         3047 :         addr = XEXP (addr, 0);
   12431              : 
   12432        97362 :       if (GET_CODE (addr) == PLUS)
   12433              :           {
   12434         6094 :             new_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, XEXP (addr, 0)),
   12435              :                                       UNSPEC_GOTOFF);
   12436         6094 :             new_rtx = gen_rtx_PLUS (Pmode, new_rtx, XEXP (addr, 1));
   12437              :           }
   12438              :         else
   12439       188601 :           new_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), UNSPEC_GOTOFF);
   12440              : 
   12441       194695 :       new_rtx = gen_rtx_CONST (Pmode, new_rtx);
   12442              : 
   12443        97362 :       if (TARGET_64BIT)
   12444           29 :         new_rtx = copy_to_suggested_reg (new_rtx, reg, Pmode);
   12445              : 
   12446        97362 :       if (reg != 0)
   12447              :         {
   12448            3 :           gcc_assert (REG_P (reg));
   12449            3 :           new_rtx = expand_simple_binop (Pmode, PLUS, pic_offset_table_rtx,
   12450              :                                          new_rtx, reg, 1, OPTAB_DIRECT);
   12451              :         }
   12452              :       else
   12453       194692 :         new_rtx = gen_rtx_PLUS (Pmode, pic_offset_table_rtx, new_rtx);
   12454              :     }
   12455       383262 :   else if ((SYMBOL_REF_P (addr) && SYMBOL_REF_TLS_MODEL (addr) == 0)
   12456              :            /* We can't always use @GOTOFF for text labels
   12457              :               on VxWorks, see gotoff_operand.  */
   12458       205239 :            || (TARGET_VXWORKS_VAROFF && LABEL_REF_P (addr)))
   12459              :     {
   12460              : #if TARGET_PECOFF
   12461              :       rtx tmp = legitimize_pe_coff_symbol (addr, true);
   12462              :       if (tmp)
   12463              :         return tmp;
   12464              : #endif
   12465              : 
   12466              :       /* For x64 PE-COFF there is no GOT table,
   12467              :          so we use address directly.  */
   12468       178020 :       if (TARGET_64BIT && TARGET_PECOFF)
   12469              :         {
   12470              :           new_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), UNSPEC_PCREL);
   12471              :           new_rtx = gen_rtx_CONST (Pmode, new_rtx);
   12472              :         }
   12473       178020 :       else if (TARGET_64BIT && ix86_cmodel != CM_LARGE_PIC)
   12474              :         {
   12475        94573 :           new_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr),
   12476              :                                     UNSPEC_GOTPCREL);
   12477        94573 :           new_rtx = gen_rtx_CONST (Pmode, new_rtx);
   12478        94573 :           new_rtx = gen_const_mem (Pmode, new_rtx);
   12479        94570 :           set_mem_alias_set (new_rtx, GOT_ALIAS_SET);
   12480              :         }
   12481              :       else
   12482              :         {
   12483              :           /* This symbol must be referenced via a load
   12484              :              from the Global Offset Table (@GOT).  */
   12485       166877 :           new_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), UNSPEC_GOT);
   12486       166877 :           new_rtx = gen_rtx_CONST (Pmode, new_rtx);
   12487              : 
   12488        83450 :           if (TARGET_64BIT)
   12489           23 :             new_rtx = copy_to_suggested_reg (new_rtx, reg, Pmode);
   12490              : 
   12491        83450 :           if (reg != 0)
   12492              :             {
   12493            0 :               gcc_assert (REG_P (reg));
   12494            0 :               new_rtx = expand_simple_binop (Pmode, PLUS, pic_offset_table_rtx,
   12495              :                                              new_rtx, reg, 1, OPTAB_DIRECT);
   12496              :             }
   12497              :           else
   12498       166877 :             new_rtx = gen_rtx_PLUS (Pmode, pic_offset_table_rtx, new_rtx);
   12499              : 
   12500       166877 :           new_rtx = gen_const_mem (Pmode, new_rtx);
   12501        83450 :           set_mem_alias_set (new_rtx, GOT_ALIAS_SET);
   12502              :         }
   12503              : 
   12504       261450 :       new_rtx = copy_to_suggested_reg (new_rtx, reg, Pmode);
   12505              :     }
   12506              :   else
   12507              :     {
   12508        27219 :       if (CONST_INT_P (addr)
   12509        27219 :           && !x86_64_immediate_operand (addr, VOIDmode))
   12510            8 :         new_rtx = copy_to_suggested_reg (addr, reg, Pmode);
   12511        27211 :       else if (GET_CODE (addr) == CONST)
   12512              :         {
   12513        16717 :           addr = XEXP (addr, 0);
   12514              : 
   12515              :           /* We must match stuff we generate before.  Assume the only
   12516              :              unspecs that can get here are ours.  Not that we could do
   12517              :              anything with them anyway....  */
   12518        16717 :           if (GET_CODE (addr) == UNSPEC
   12519         8963 :               || (GET_CODE (addr) == PLUS
   12520         8963 :                   && GET_CODE (XEXP (addr, 0)) == UNSPEC))
   12521              :             return orig;
   12522         6839 :           gcc_assert (GET_CODE (addr) == PLUS);
   12523              :         }
   12524              : 
   12525        17341 :       if (GET_CODE (addr) == PLUS)
   12526              :         {
   12527         8671 :           rtx op0 = XEXP (addr, 0), op1 = XEXP (addr, 1);
   12528              : 
   12529              :           /* Check first to see if this is a constant
   12530              :              offset from a @GOTOFF symbol reference.  */
   12531         8671 :           if (!TARGET_PECOFF
   12532        13741 :               && gotoff_operand (op0, Pmode)
   12533         8671 :               && CONST_INT_P (op1))
   12534              :             {
   12535            4 :               if (!TARGET_64BIT)
   12536              :                 {
   12537            0 :                   new_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, op0),
   12538              :                                             UNSPEC_GOTOFF);
   12539            0 :                   new_rtx = gen_rtx_PLUS (Pmode, new_rtx, op1);
   12540            0 :                   new_rtx = gen_rtx_CONST (Pmode, new_rtx);
   12541              : 
   12542            0 :                   if (reg != 0)
   12543              :                     {
   12544            0 :                       gcc_assert (REG_P (reg));
   12545            0 :                       new_rtx = expand_simple_binop (Pmode, PLUS,
   12546              :                                                      pic_offset_table_rtx,
   12547              :                                                      new_rtx, reg, 1,
   12548              :                                                      OPTAB_DIRECT);
   12549              :                     }
   12550              :                   else
   12551            0 :                     new_rtx
   12552            0 :                       = gen_rtx_PLUS (Pmode, pic_offset_table_rtx, new_rtx);
   12553              :                 }
   12554              :               else
   12555              :                 {
   12556            4 :                   if (INTVAL (op1) < -16*1024*1024
   12557            4 :                       || INTVAL (op1) >= 16*1024*1024)
   12558              :                     {
   12559            4 :                       if (!x86_64_immediate_operand (op1, Pmode))
   12560            4 :                         op1 = force_reg (Pmode, op1);
   12561              : 
   12562            4 :                       new_rtx
   12563            4 :                         = gen_rtx_PLUS (Pmode, force_reg (Pmode, op0), op1);
   12564              :                     }
   12565              :                 }
   12566              :             }
   12567              :           else
   12568              :             {
   12569         8667 :               rtx base = legitimize_pic_address (op0, reg);
   12570         8667 :               machine_mode mode = GET_MODE (base);
   12571         8667 :               new_rtx
   12572         8667 :                 = legitimize_pic_address (op1, base == reg ? NULL_RTX : reg);
   12573              : 
   12574         8667 :               if (CONST_INT_P (new_rtx))
   12575              :                 {
   12576         6827 :                   if (INTVAL (new_rtx) < -16*1024*1024
   12577         6827 :                       || INTVAL (new_rtx) >= 16*1024*1024)
   12578              :                     {
   12579            0 :                       if (!x86_64_immediate_operand (new_rtx, mode))
   12580            0 :                         new_rtx = force_reg (mode, new_rtx);
   12581              : 
   12582            0 :                       new_rtx
   12583            0 :                         = gen_rtx_PLUS (mode, force_reg (mode, base), new_rtx);
   12584              :                     }
   12585              :                   else
   12586         6827 :                     new_rtx = plus_constant (mode, base, INTVAL (new_rtx));
   12587              :                 }
   12588              :               else
   12589              :                 {
   12590              :                   /* For %rip addressing, we have to use
   12591              :                      just disp32, not base nor index.  */
   12592         1840 :                   if (TARGET_64BIT
   12593          100 :                       && (SYMBOL_REF_P (base)
   12594          100 :                           || LABEL_REF_P (base)))
   12595            7 :                     base = force_reg (mode, base);
   12596         1840 :                   if (GET_CODE (new_rtx) == PLUS
   12597         1719 :                       && CONSTANT_P (XEXP (new_rtx, 1)))
   12598              :                     {
   12599         1715 :                       base = gen_rtx_PLUS (mode, base, XEXP (new_rtx, 0));
   12600         1715 :                       new_rtx = XEXP (new_rtx, 1);
   12601              :                     }
   12602         1840 :                   new_rtx = gen_rtx_PLUS (mode, base, new_rtx);
   12603              :                 }
   12604              :             }
   12605              :         }
   12606              :     }
   12607              :   return new_rtx;
   12608              : }
   12609              : 
   12610              : /* Load the thread pointer.  If TO_REG is true, force it into a register.  */
   12611              : 
   12612              : static rtx
   12613        24398 : get_thread_pointer (machine_mode tp_mode, bool to_reg)
   12614              : {
   12615        24398 :   rtx tp = gen_rtx_UNSPEC (ptr_mode, gen_rtvec (1, const0_rtx), UNSPEC_TP);
   12616              : 
   12617        24398 :   if (GET_MODE (tp) != tp_mode)
   12618              :     {
   12619           11 :       gcc_assert (GET_MODE (tp) == SImode);
   12620           11 :       gcc_assert (tp_mode == DImode);
   12621              : 
   12622           11 :       tp = gen_rtx_ZERO_EXTEND (tp_mode, tp);
   12623              :     }
   12624              : 
   12625        24398 :   if (to_reg)
   12626         8110 :     tp = copy_to_mode_reg (tp_mode, tp);
   12627              : 
   12628        24398 :   return tp;
   12629              : }
   12630              : 
   12631              : /* Construct the SYMBOL_REF for the _tls_index symbol.  */
   12632              : 
   12633              : static GTY(()) rtx ix86_tls_index_symbol;
   12634              : 
   12635              : static rtx
   12636            0 : ix86_tls_index (void)
   12637              : {
   12638            0 :   if (!ix86_tls_index_symbol)
   12639            0 :     ix86_tls_index_symbol = gen_rtx_SYMBOL_REF (SImode, "_tls_index");
   12640              : 
   12641            0 :   if (flag_pic)
   12642            0 :     return gen_rtx_CONST (Pmode,
   12643              :                           gen_rtx_UNSPEC (Pmode,
   12644              :                                           gen_rtvec (1, ix86_tls_index_symbol),
   12645              :                                           UNSPEC_PCREL));
   12646              :   else
   12647            0 :     return ix86_tls_index_symbol;
   12648              : }
   12649              : 
   12650              : /* Construct the SYMBOL_REF for the tls_get_addr function.  */
   12651              : 
   12652              : static GTY(()) rtx ix86_tls_symbol;
   12653              : 
   12654              : rtx
   12655         6715 : ix86_tls_get_addr (void)
   12656              : {
   12657         6715 :   if (cfun->machine->call_saved_registers
   12658         6715 :       == TYPE_NO_CALLER_SAVED_REGISTERS)
   12659              :     {
   12660              :       /* __tls_get_addr doesn't preserve vector registers.  When a
   12661              :          function with no_caller_saved_registers attribute calls
   12662              :          __tls_get_addr, YMM and ZMM registers will be clobbered.
   12663              :          Issue an error and suggest -mtls-dialect=gnu2 in this case.  */
   12664            3 :       if (cfun->machine->func_type == TYPE_NORMAL)
   12665            1 :         error (G_("%<-mtls-dialect=gnu2%> must be used with a function"
   12666              :                   " with the %<no_caller_saved_registers%> attribute"));
   12667              :       else
   12668            3 :         error (cfun->machine->func_type == TYPE_EXCEPTION
   12669              :                ? G_("%<-mtls-dialect=gnu2%> must be used with an"
   12670              :                     " exception service routine")
   12671              :                : G_("%<-mtls-dialect=gnu2%> must be used with an"
   12672              :                     " interrupt service routine"));
   12673              :       /* Don't issue the same error twice.  */
   12674            3 :       cfun->machine->func_type = TYPE_NORMAL;
   12675            3 :       cfun->machine->call_saved_registers
   12676            3 :         = TYPE_DEFAULT_CALL_SAVED_REGISTERS;
   12677              :     }
   12678              : 
   12679         6715 :   if (!ix86_tls_symbol)
   12680              :     {
   12681          204 :       const char *sym
   12682          241 :         = ((TARGET_ANY_GNU_TLS && !TARGET_64BIT)
   12683          241 :            ? "___tls_get_addr" : "__tls_get_addr");
   12684              : 
   12685          278 :       ix86_tls_symbol = gen_rtx_SYMBOL_REF (Pmode, sym);
   12686              :     }
   12687              : 
   12688         6715 :   if (ix86_cmodel == CM_LARGE_PIC && !TARGET_PECOFF)
   12689              :     {
   12690            2 :       rtx unspec = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, ix86_tls_symbol),
   12691              :                                    UNSPEC_PLTOFF);
   12692            2 :       return gen_rtx_PLUS (Pmode, pic_offset_table_rtx,
   12693              :                            gen_rtx_CONST (Pmode, unspec));
   12694              :     }
   12695              : 
   12696         6713 :   return ix86_tls_symbol;
   12697              : }
   12698              : 
   12699              : /* Construct the SYMBOL_REF for the _TLS_MODULE_BASE_ symbol.  */
   12700              : 
   12701              : static GTY(()) rtx ix86_tls_module_base_symbol;
   12702              : 
   12703              : rtx
   12704           87 : ix86_tls_module_base (void)
   12705              : {
   12706           87 :   if (!ix86_tls_module_base_symbol)
   12707              :     {
   12708           10 :       ix86_tls_module_base_symbol
   12709           10 :         = gen_rtx_SYMBOL_REF (ptr_mode, "_TLS_MODULE_BASE_");
   12710              : 
   12711           10 :       SYMBOL_REF_FLAGS (ix86_tls_module_base_symbol)
   12712           10 :         |= TLS_MODEL_GLOBAL_DYNAMIC << SYMBOL_FLAG_TLS_SHIFT;
   12713              :     }
   12714              : 
   12715           87 :   return ix86_tls_module_base_symbol;
   12716              : }
   12717              : 
   12718              : /* A subroutine of ix86_legitimize_address and ix86_expand_move.  FOR_MOV is
   12719              :    false if we expect this to be used for a memory address and true if
   12720              :    we expect to load the address into a register.  */
   12721              : 
   12722              : rtx
   12723        30821 : legitimize_tls_address (rtx x, enum tls_model model, bool for_mov)
   12724              : {
   12725        30821 :   rtx dest, base, off;
   12726        30821 :   rtx pic = NULL_RTX, tp = NULL_RTX;
   12727        30821 :   machine_mode tp_mode = Pmode;
   12728        30821 :   int type;
   12729              : 
   12730              :   /* Windows implements a single form of TLS.  */
   12731        30821 :   if (TARGET_WIN32_TLS)
   12732              :     {
   12733              :       /* Load the 32-bit index.  */
   12734              :       rtx ind = gen_const_mem (SImode, ix86_tls_index ());
   12735              :       set_mem_alias_set (ind, GOT_ALIAS_SET);
   12736              :       if (TARGET_64BIT)
   12737              :         ind = convert_to_mode (Pmode, ind, 1);
   12738              :       ind = force_reg (Pmode, ind);
   12739              : 
   12740              :       /* Add it to the thread pointer and load the base.  */
   12741              :       tp = get_thread_pointer (Pmode, true);
   12742              :       rtx addr = gen_rtx_PLUS (Pmode, tp,
   12743              :                                gen_rtx_MULT (Pmode, ind,
   12744              :                                              GEN_INT (UNITS_PER_WORD)));
   12745              :       base = gen_const_mem (Pmode, addr);
   12746              :       set_mem_alias_set (base, GOT_ALIAS_SET);
   12747              : 
   12748              :       /* Add the 32-bit section-relative offset to the base.  */
   12749              :       base = force_reg (Pmode, base);
   12750              :       off = gen_rtx_CONST (Pmode,
   12751              :                            gen_rtx_UNSPEC (SImode,
   12752              :                                            gen_rtvec (1, x),
   12753              :                                            UNSPEC_SECREL32));
   12754              :       return gen_rtx_PLUS (Pmode, base, off);
   12755              :     }
   12756              : 
   12757              :   /* Fall back to global dynamic model if tool chain cannot support local
   12758              :      dynamic.  */
   12759        30821 :   if (TARGET_SUN_TLS && !TARGET_64BIT
   12760              :       && !HAVE_AS_IX86_TLSLDMPLT && !HAVE_AS_IX86_TLSLDM
   12761              :       && model == TLS_MODEL_LOCAL_DYNAMIC)
   12762              :     model = TLS_MODEL_GLOBAL_DYNAMIC;
   12763              : 
   12764        30821 :   switch (model)
   12765              :     {
   12766         6116 :     case TLS_MODEL_GLOBAL_DYNAMIC:
   12767         6116 :       if (!TARGET_64BIT)
   12768              :         {
   12769         1930 :           if (flag_pic && !TARGET_PECOFF)
   12770         1930 :             pic = pic_offset_table_rtx;
   12771              :           else
   12772              :             {
   12773            0 :               pic = gen_reg_rtx (Pmode);
   12774            0 :               emit_insn (gen_set_got (pic));
   12775              :             }
   12776              :         }
   12777              : 
   12778         6116 :       if (TARGET_GNU2_TLS)
   12779              :         {
   12780           53 :           dest = gen_reg_rtx (ptr_mode);
   12781           53 :           if (TARGET_64BIT)
   12782           53 :             emit_insn (gen_tls_dynamic_gnu2_64 (ptr_mode, dest, x));
   12783              :           else
   12784            0 :             emit_insn (gen_tls_dynamic_gnu2_32 (dest, x, pic));
   12785              : 
   12786           53 :           tp = get_thread_pointer (ptr_mode, true);
   12787           53 :           dest = gen_rtx_PLUS (ptr_mode, tp, dest);
   12788           61 :           if (GET_MODE (dest) != Pmode)
   12789            6 :              dest = gen_rtx_ZERO_EXTEND (Pmode, dest);
   12790           61 :           dest = force_reg (Pmode, dest);
   12791              : 
   12792           61 :           if (GET_MODE (x) != Pmode)
   12793            3 :             x = gen_rtx_ZERO_EXTEND (Pmode, x);
   12794              : 
   12795           53 :           set_unique_reg_note (get_last_insn (), REG_EQUAL, x);
   12796              :         }
   12797              :       else
   12798              :         {
   12799         6063 :           rtx caddr = ix86_tls_get_addr ();
   12800              : 
   12801         7993 :           dest = gen_reg_rtx (Pmode);
   12802         6063 :           if (TARGET_64BIT)
   12803              :             {
   12804         4133 :               rtx rax = gen_rtx_REG (Pmode, AX_REG);
   12805         4133 :               rtx rdi = gen_rtx_REG (Pmode, DI_REG);
   12806         4133 :               rtx_insn *insns;
   12807              : 
   12808         4133 :               start_sequence ();
   12809         4133 :               emit_call_insn
   12810         4133 :                 (gen_tls_global_dynamic_64 (Pmode, rax, x, caddr, rdi));
   12811         4133 :               insns = end_sequence ();
   12812              : 
   12813         4133 :               if (GET_MODE (x) != Pmode)
   12814            1 :                 x = gen_rtx_ZERO_EXTEND (Pmode, x);
   12815              : 
   12816         4133 :               RTL_CONST_CALL_P (insns) = 1;
   12817         4133 :               emit_libcall_block (insns, dest, rax, x);
   12818              :             }
   12819              :           else
   12820         1930 :             emit_insn (gen_tls_global_dynamic_32 (dest, x, pic, caddr));
   12821              :         }
   12822              :       break;
   12823              : 
   12824          384 :     case TLS_MODEL_LOCAL_DYNAMIC:
   12825          384 :       if (!TARGET_64BIT)
   12826              :         {
   12827           92 :           if (flag_pic)
   12828           92 :             pic = pic_offset_table_rtx;
   12829              :           else
   12830              :             {
   12831            0 :               pic = gen_reg_rtx (Pmode);
   12832            0 :               emit_insn (gen_set_got (pic));
   12833              :             }
   12834              :         }
   12835              : 
   12836          384 :       if (TARGET_GNU2_TLS)
   12837              :         {
   12838           24 :           rtx tmp = ix86_tls_module_base ();
   12839              : 
   12840           24 :           base = gen_reg_rtx (ptr_mode);
   12841           24 :           if (TARGET_64BIT)
   12842           24 :             emit_insn (gen_tls_dynamic_gnu2_64 (ptr_mode, base, tmp));
   12843              :           else
   12844            0 :             emit_insn (gen_tls_dynamic_gnu2_32 (base, tmp, pic));
   12845              : 
   12846           24 :           tp = get_thread_pointer (ptr_mode, true);
   12847           30 :           if (GET_MODE (base) != Pmode)
   12848            2 :             base = gen_rtx_ZERO_EXTEND (Pmode, base);
   12849           30 :           base = force_reg (Pmode, base);
   12850              :         }
   12851              :       else
   12852              :         {
   12853          360 :           rtx caddr = ix86_tls_get_addr ();
   12854              : 
   12855          452 :           base = gen_reg_rtx (Pmode);
   12856          360 :           if (TARGET_64BIT)
   12857              :             {
   12858          268 :               rtx rax = gen_rtx_REG (Pmode, AX_REG);
   12859          268 :               rtx rdi = gen_rtx_REG (Pmode, DI_REG);
   12860          268 :               rtx_insn *insns;
   12861          268 :               rtx eqv;
   12862              : 
   12863          268 :               start_sequence ();
   12864          268 :               emit_call_insn
   12865          268 :                 (gen_tls_local_dynamic_base_64 (Pmode, rax, caddr, rdi));
   12866          268 :               insns = end_sequence ();
   12867              : 
   12868              :               /* Attach a unique REG_EQUAL, to allow the RTL optimizers to
   12869              :                  share the LD_BASE result with other LD model accesses.  */
   12870          268 :               eqv = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, const0_rtx),
   12871              :                                     UNSPEC_TLS_LD_BASE);
   12872              : 
   12873          268 :               RTL_CONST_CALL_P (insns) = 1;
   12874          268 :               emit_libcall_block (insns, base, rax, eqv);
   12875              :             }
   12876              :           else
   12877           92 :             emit_insn (gen_tls_local_dynamic_base_32 (base, pic, caddr));
   12878              :         }
   12879              : 
   12880          482 :       off = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, x), UNSPEC_DTPOFF);
   12881          482 :       off = gen_rtx_CONST (Pmode, off);
   12882              : 
   12883          580 :       dest = force_reg (Pmode, gen_rtx_PLUS (Pmode, base, off));
   12884              : 
   12885          384 :       if (TARGET_GNU2_TLS)
   12886              :         {
   12887           30 :           if (GET_MODE (tp) != Pmode)
   12888              :             {
   12889            2 :               dest = lowpart_subreg (ptr_mode, dest, Pmode);
   12890            2 :               dest = gen_rtx_PLUS (ptr_mode, tp, dest);
   12891            2 :               dest = gen_rtx_ZERO_EXTEND (Pmode, dest);
   12892              :             }
   12893              :           else
   12894           22 :             dest = gen_rtx_PLUS (Pmode, tp, dest);
   12895           30 :           dest = force_reg (Pmode, dest);
   12896              : 
   12897           30 :           if (GET_MODE (x) != Pmode)
   12898            1 :             x = gen_rtx_ZERO_EXTEND (Pmode, x);
   12899              : 
   12900           24 :           set_unique_reg_note (get_last_insn (), REG_EQUAL, x);
   12901              :         }
   12902              :       break;
   12903              : 
   12904        10782 :     case TLS_MODEL_INITIAL_EXEC:
   12905        10782 :       if (TARGET_64BIT)
   12906              :         {
   12907              :           /* Generate DImode references to avoid %fs:(%reg32)
   12908              :              problems and linker IE->LE relaxation bug.  */
   12909              :           tp_mode = DImode;
   12910              :           pic = NULL;
   12911              :           type = UNSPEC_GOTNTPOFF;
   12912              :         }
   12913          761 :       else if (flag_pic)
   12914              :         {
   12915          760 :           pic = pic_offset_table_rtx;
   12916          760 :           type = TARGET_ANY_GNU_TLS ? UNSPEC_GOTNTPOFF : UNSPEC_GOTTPOFF;
   12917              :         }
   12918            1 :       else if (!TARGET_ANY_GNU_TLS)
   12919              :         {
   12920            0 :           pic = gen_reg_rtx (Pmode);
   12921            0 :           emit_insn (gen_set_got (pic));
   12922            0 :           type = UNSPEC_GOTTPOFF;
   12923              :         }
   12924              :       else
   12925              :         {
   12926              :           pic = NULL;
   12927              :           type = UNSPEC_INDNTPOFF;
   12928              :         }
   12929              : 
   12930        10782 :       off = gen_rtx_UNSPEC (tp_mode, gen_rtvec (1, x), type);
   12931        10782 :       off = gen_rtx_CONST (tp_mode, off);
   12932        10782 :       if (pic)
   12933          760 :         off = gen_rtx_PLUS (tp_mode, pic, off);
   12934        10782 :       off = gen_const_mem (tp_mode, off);
   12935        10782 :       set_mem_alias_set (off, GOT_ALIAS_SET);
   12936              : 
   12937        10782 :       if (TARGET_64BIT || TARGET_ANY_GNU_TLS)
   12938              :         {
   12939        10782 :           base = get_thread_pointer (tp_mode,
   12940        10782 :                                      for_mov || !TARGET_TLS_DIRECT_SEG_REFS);
   12941        10782 :           off = force_reg (tp_mode, off);
   12942        10782 :           dest = gen_rtx_PLUS (tp_mode, base, off);
   12943        11547 :           if (tp_mode != Pmode)
   12944            4 :             dest = convert_to_mode (Pmode, dest, 1);
   12945              :         }
   12946              :       else
   12947              :         {
   12948            0 :           base = get_thread_pointer (Pmode, true);
   12949            0 :           dest = gen_reg_rtx (Pmode);
   12950            0 :           emit_insn (gen_sub3_insn (dest, base, off));
   12951              :         }
   12952              :       break;
   12953              : 
   12954        13539 :     case TLS_MODEL_LOCAL_EXEC:
   12955        27846 :       off = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, x),
   12956              :                             (TARGET_64BIT || TARGET_ANY_GNU_TLS)
   12957              :                             ? UNSPEC_NTPOFF : UNSPEC_TPOFF);
   12958        14307 :       off = gen_rtx_CONST (Pmode, off);
   12959              : 
   12960        13539 :       if (TARGET_64BIT || TARGET_ANY_GNU_TLS)
   12961              :         {
   12962        14307 :           base = get_thread_pointer (Pmode,
   12963        13539 :                                      for_mov || !TARGET_TLS_DIRECT_SEG_REFS);
   12964        14307 :           return gen_rtx_PLUS (Pmode, base, off);
   12965              :         }
   12966              :       else
   12967              :         {
   12968            0 :           base = get_thread_pointer (Pmode, true);
   12969            0 :           dest = gen_reg_rtx (Pmode);
   12970            0 :           emit_insn (gen_sub3_insn (dest, base, off));
   12971              :         }
   12972            0 :       break;
   12973              : 
   12974            0 :     default:
   12975            0 :       gcc_unreachable ();
   12976              :     }
   12977              : 
   12978              :   return dest;
   12979              : }
   12980              : 
   12981              : /* Return true if the TLS address requires insn using integer registers.
   12982              :    It's used to prevent KMOV/VMOV in TLS code sequences which require integer
   12983              :    MOV instructions, refer to PR103275.  */
   12984              : bool
   12985     15221359 : ix86_gpr_tls_address_pattern_p (rtx mem)
   12986              : {
   12987     15221359 :   gcc_assert (MEM_P (mem));
   12988              : 
   12989     15221359 :   rtx addr = XEXP (mem, 0);
   12990     15221359 :   subrtx_var_iterator::array_type array;
   12991     53026351 :   FOR_EACH_SUBRTX_VAR (iter, array, addr, ALL)
   12992              :     {
   12993     37812527 :       rtx op = *iter;
   12994     37812527 :       if (GET_CODE (op) == UNSPEC)
   12995       201658 :         switch (XINT (op, 1))
   12996              :           {
   12997              :           case UNSPEC_GOTNTPOFF:
   12998         7535 :             return true;
   12999            0 :           case UNSPEC_TPOFF:
   13000            0 :             if (!TARGET_64BIT)
   13001              :               return true;
   13002              :             break;
   13003              :           default:
   13004              :             break;
   13005              :           }
   13006              :     }
   13007              : 
   13008     15213824 :   return false;
   13009     15221359 : }
   13010              : 
   13011              : /* Return true if OP refers to a TLS address.  */
   13012              : bool
   13013    232881913 : ix86_tls_address_pattern_p (rtx op)
   13014              : {
   13015    232881913 :   subrtx_var_iterator::array_type array;
   13016   1384618318 :   FOR_EACH_SUBRTX_VAR (iter, array, op, ALL)
   13017              :     {
   13018   1151754386 :       rtx op = *iter;
   13019   1151754386 :       if (MEM_P (op))
   13020              :         {
   13021    105327725 :           rtx *x = &XEXP (op, 0);
   13022    166716321 :           while (GET_CODE (*x) == PLUS)
   13023              :             {
   13024              :               int i;
   13025    184183792 :               for (i = 0; i < 2; i++)
   13026              :                 {
   13027    122795196 :                   rtx u = XEXP (*x, i);
   13028    122795196 :                   if (GET_CODE (u) == ZERO_EXTEND)
   13029       138440 :                     u = XEXP (u, 0);
   13030    122795196 :                   if (GET_CODE (u) == UNSPEC
   13031        18013 :                       && XINT (u, 1) == UNSPEC_TP)
   13032        17981 :                     return true;
   13033              :                 }
   13034     61388596 :               x = &XEXP (*x, 0);
   13035              :             }
   13036              : 
   13037    105309744 :           iter.skip_subrtxes ();
   13038              :         }
   13039              :     }
   13040              : 
   13041    232863932 :   return false;
   13042    232881913 : }
   13043              : 
   13044              : /* Rewrite *LOC so that it refers to a default TLS address space.  */
   13045              : static void
   13046        17981 : ix86_rewrite_tls_address_1 (rtx *loc)
   13047              : {
   13048        17981 :   subrtx_ptr_iterator::array_type array;
   13049        53264 :   FOR_EACH_SUBRTX_PTR (iter, array, loc, ALL)
   13050              :     {
   13051        53264 :       rtx *loc = *iter;
   13052        53264 :       if (MEM_P (*loc))
   13053              :         {
   13054        18168 :           rtx addr = XEXP (*loc, 0);
   13055        18168 :           rtx *x = &addr;
   13056        23004 :           while (GET_CODE (*x) == PLUS)
   13057              :             {
   13058              :               int i;
   13059        32512 :               for (i = 0; i < 2; i++)
   13060              :                 {
   13061        27676 :                   rtx u = XEXP (*x, i);
   13062        27676 :                   if (GET_CODE (u) == ZERO_EXTEND)
   13063           19 :                     u = XEXP (u, 0);
   13064        27676 :                   if (GET_CODE (u) == UNSPEC
   13065        17981 :                       && XINT (u, 1) == UNSPEC_TP)
   13066              :                     {
   13067              :                       /* NB: Since address override only applies to the
   13068              :                          (reg32) part in fs:(reg32), return if address
   13069              :                          override is used.  */
   13070        19608 :                       if (Pmode != word_mode
   13071        17981 :                           && REG_P (XEXP (*x, 1 - i)))
   13072        17981 :                         return;
   13073              : 
   13074        17979 :                       addr_space_t as = DEFAULT_TLS_SEG_REG;
   13075              : 
   13076        17979 :                       *x = XEXP (*x, 1 - i);
   13077              : 
   13078        17979 :                       *loc = replace_equiv_address_nv (*loc, addr, true);
   13079        17979 :                       set_mem_addr_space (*loc, as);
   13080        17979 :                       return;
   13081              :                     }
   13082              :                 }
   13083         4836 :               x = &XEXP (*x, 0);
   13084              :             }
   13085              : 
   13086          187 :           iter.skip_subrtxes ();
   13087              :         }
   13088              :     }
   13089        17981 : }
   13090              : 
   13091              : /* Rewrite instruction pattern involvning TLS address
   13092              :    so that it refers to a default TLS address space.  */
   13093              : rtx
   13094        17981 : ix86_rewrite_tls_address (rtx pattern)
   13095              : {
   13096        17981 :   pattern = copy_insn (pattern);
   13097        17981 :   ix86_rewrite_tls_address_1 (&pattern);
   13098        17981 :   return pattern;
   13099              : }
   13100              : 
   13101              : /* Try machine-dependent ways of modifying an illegitimate address
   13102              :    to be legitimate.  If we find one, return the new, valid address.
   13103              :    This macro is used in only one place: `memory_address' in explow.cc.
   13104              : 
   13105              :    OLDX is the address as it was before break_out_memory_refs was called.
   13106              :    In some cases it is useful to look at this to decide what needs to be done.
   13107              : 
   13108              :    It is always safe for this macro to do nothing.  It exists to recognize
   13109              :    opportunities to optimize the output.
   13110              : 
   13111              :    For the 80386, we handle X+REG by loading X into a register R and
   13112              :    using R+REG.  R will go in a general reg and indexing will be used.
   13113              :    However, if REG is a broken-out memory address or multiplication,
   13114              :    nothing needs to be done because REG can certainly go in a general reg.
   13115              : 
   13116              :    When -fpic is used, special handling is needed for symbolic references.
   13117              :    See comments by legitimize_pic_address in i386.cc for details.  */
   13118              : 
   13119              : static rtx
   13120       680679 : ix86_legitimize_address (rtx x, rtx, machine_mode mode)
   13121              : {
   13122       680679 :   bool changed = false;
   13123       680679 :   unsigned log;
   13124              : 
   13125       680679 :   log = SYMBOL_REF_P (x) ? SYMBOL_REF_TLS_MODEL (x) : 0;
   13126       152052 :   if (log)
   13127        20703 :     return legitimize_tls_address (x, (enum tls_model) log, false);
   13128       659976 :   if (GET_CODE (x) == CONST
   13129          508 :       && GET_CODE (XEXP (x, 0)) == PLUS
   13130          508 :       && SYMBOL_REF_P (XEXP (XEXP (x, 0), 0))
   13131       660484 :       && (log = SYMBOL_REF_TLS_MODEL (XEXP (XEXP (x, 0), 0))))
   13132              :     {
   13133            4 :       rtx t = legitimize_tls_address (XEXP (XEXP (x, 0), 0),
   13134              :                                       (enum tls_model) log, false);
   13135            5 :       return gen_rtx_PLUS (Pmode, t, XEXP (XEXP (x, 0), 1));
   13136              :     }
   13137              : 
   13138       659972 :   if (TARGET_DLLIMPORT_DECL_ATTRIBUTES)
   13139              :     {
   13140              : #if TARGET_PECOFF
   13141              :       rtx tmp = legitimize_pe_coff_symbol (x, true);
   13142              :       if (tmp)
   13143              :         return tmp;
   13144              : #endif
   13145              :     }
   13146              : 
   13147       659972 :   if (flag_pic && SYMBOLIC_CONST (x))
   13148       131732 :     return legitimize_pic_address (x, 0);
   13149              : 
   13150              : #if TARGET_MACHO
   13151              :   if (MACHO_DYNAMIC_NO_PIC_P && SYMBOLIC_CONST (x))
   13152              :     return machopic_indirect_data_reference (x, 0);
   13153              : #endif
   13154              : 
   13155              :   /* Canonicalize shifts by 0, 1, 2, 3 into multiply */
   13156       528240 :   if (GET_CODE (x) == ASHIFT
   13157            0 :       && CONST_INT_P (XEXP (x, 1))
   13158            0 :       && (unsigned HOST_WIDE_INT) INTVAL (XEXP (x, 1)) < 4)
   13159              :     {
   13160            0 :       changed = true;
   13161            0 :       log = INTVAL (XEXP (x, 1));
   13162            0 :       x = gen_rtx_MULT (Pmode, force_reg (Pmode, XEXP (x, 0)),
   13163              :                         GEN_INT (1 << log));
   13164              :     }
   13165              : 
   13166       528240 :   if (GET_CODE (x) == PLUS)
   13167              :     {
   13168              :       /* Canonicalize shifts by 0, 1, 2, 3 into multiply.  */
   13169              : 
   13170       191800 :       if (GET_CODE (XEXP (x, 0)) == ASHIFT
   13171          594 :           && CONST_INT_P (XEXP (XEXP (x, 0), 1))
   13172          594 :           && (unsigned HOST_WIDE_INT) INTVAL (XEXP (XEXP (x, 0), 1)) < 4)
   13173              :         {
   13174          594 :           changed = true;
   13175          594 :           log = INTVAL (XEXP (XEXP (x, 0), 1));
   13176         1738 :           XEXP (x, 0) = gen_rtx_MULT (Pmode,
   13177              :                                       force_reg (Pmode, XEXP (XEXP (x, 0), 0)),
   13178              :                                       GEN_INT (1 << log));
   13179              :         }
   13180              : 
   13181       191800 :       if (GET_CODE (XEXP (x, 1)) == ASHIFT
   13182            0 :           && CONST_INT_P (XEXP (XEXP (x, 1), 1))
   13183            0 :           && (unsigned HOST_WIDE_INT) INTVAL (XEXP (XEXP (x, 1), 1)) < 4)
   13184              :         {
   13185            0 :           changed = true;
   13186            0 :           log = INTVAL (XEXP (XEXP (x, 1), 1));
   13187            0 :           XEXP (x, 1) = gen_rtx_MULT (Pmode,
   13188              :                                       force_reg (Pmode, XEXP (XEXP (x, 1), 0)),
   13189              :                                       GEN_INT (1 << log));
   13190              :         }
   13191              : 
   13192              :       /* Put multiply first if it isn't already.  */
   13193       191800 :       if (GET_CODE (XEXP (x, 1)) == MULT)
   13194              :         {
   13195            0 :           std::swap (XEXP (x, 0), XEXP (x, 1));
   13196            0 :           changed = true;
   13197              :         }
   13198              : 
   13199              :       /* Canonicalize (plus (mult (reg) (const)) (plus (reg) (const)))
   13200              :          into (plus (plus (mult (reg) (const)) (reg)) (const)).  This can be
   13201              :          created by virtual register instantiation, register elimination, and
   13202              :          similar optimizations.  */
   13203       191800 :       if (GET_CODE (XEXP (x, 0)) == MULT && GET_CODE (XEXP (x, 1)) == PLUS)
   13204              :         {
   13205         9832 :           changed = true;
   13206        15440 :           x = gen_rtx_PLUS (Pmode,
   13207              :                             gen_rtx_PLUS (Pmode, XEXP (x, 0),
   13208              :                                           XEXP (XEXP (x, 1), 0)),
   13209              :                             XEXP (XEXP (x, 1), 1));
   13210              :         }
   13211              : 
   13212              :       /* Canonicalize
   13213              :          (plus (plus (mult (reg) (const)) (plus (reg) (const))) const)
   13214              :          into (plus (plus (mult (reg) (const)) (reg)) (const)).  */
   13215       181968 :       else if (GET_CODE (x) == PLUS && GET_CODE (XEXP (x, 0)) == PLUS
   13216       113821 :                && GET_CODE (XEXP (XEXP (x, 0), 0)) == MULT
   13217        51327 :                && GET_CODE (XEXP (XEXP (x, 0), 1)) == PLUS
   13218            0 :                && CONSTANT_P (XEXP (x, 1)))
   13219              :         {
   13220            0 :           rtx constant;
   13221            0 :           rtx other = NULL_RTX;
   13222              : 
   13223            0 :           if (CONST_INT_P (XEXP (x, 1)))
   13224              :             {
   13225            0 :               constant = XEXP (x, 1);
   13226            0 :               other = XEXP (XEXP (XEXP (x, 0), 1), 1);
   13227              :             }
   13228            0 :           else if (CONST_INT_P (XEXP (XEXP (XEXP (x, 0), 1), 1)))
   13229              :             {
   13230              :               constant = XEXP (XEXP (XEXP (x, 0), 1), 1);
   13231              :               other = XEXP (x, 1);
   13232              :             }
   13233              :           else
   13234              :             constant = 0;
   13235              : 
   13236            0 :           if (constant)
   13237              :             {
   13238            0 :               changed = true;
   13239            0 :               x = gen_rtx_PLUS (Pmode,
   13240              :                                 gen_rtx_PLUS (Pmode, XEXP (XEXP (x, 0), 0),
   13241              :                                               XEXP (XEXP (XEXP (x, 0), 1), 0)),
   13242              :                                 plus_constant (Pmode, other,
   13243              :                                                INTVAL (constant)));
   13244              :             }
   13245              :         }
   13246              : 
   13247       191800 :       if (changed && ix86_legitimate_address_p (mode, x, false))
   13248         9868 :         return x;
   13249              : 
   13250       181932 :       if (GET_CODE (XEXP (x, 0)) == MULT)
   13251              :         {
   13252        19874 :           changed = true;
   13253        19874 :           XEXP (x, 0) = copy_addr_to_reg (XEXP (x, 0));
   13254              :         }
   13255              : 
   13256       181932 :       if (GET_CODE (XEXP (x, 1)) == MULT)
   13257              :         {
   13258            0 :           changed = true;
   13259            0 :           XEXP (x, 1) = copy_addr_to_reg (XEXP (x, 1));
   13260              :         }
   13261              : 
   13262       181932 :       if (changed
   13263        19882 :           && REG_P (XEXP (x, 1))
   13264        16317 :           && REG_P (XEXP (x, 0)))
   13265              :         return x;
   13266              : 
   13267       165615 :       if (flag_pic && SYMBOLIC_CONST (XEXP (x, 1)))
   13268              :         {
   13269         1832 :           changed = true;
   13270         1832 :           x = legitimize_pic_address (x, 0);
   13271              :         }
   13272              : 
   13273       165615 :       if (changed && ix86_legitimate_address_p (mode, x, false))
   13274         3842 :         return x;
   13275              : 
   13276       161773 :       if (REG_P (XEXP (x, 0)))
   13277              :         {
   13278        50237 :           rtx temp = gen_reg_rtx (Pmode);
   13279        47489 :           rtx val  = force_operand (XEXP (x, 1), temp);
   13280        47489 :           if (val != temp)
   13281              :             {
   13282        39151 :               val = convert_to_mode (Pmode, val, 1);
   13283        38860 :               emit_move_insn (temp, val);
   13284              :             }
   13285              : 
   13286        47489 :           XEXP (x, 1) = temp;
   13287        47489 :           return x;
   13288              :         }
   13289              : 
   13290       114284 :       else if (REG_P (XEXP (x, 1)))
   13291              :         {
   13292         3587 :           rtx temp = gen_reg_rtx (Pmode);
   13293         2851 :           rtx val  = force_operand (XEXP (x, 0), temp);
   13294         2851 :           if (val != temp)
   13295              :             {
   13296            0 :               val = convert_to_mode (Pmode, val, 1);
   13297            0 :               emit_move_insn (temp, val);
   13298              :             }
   13299              : 
   13300         2851 :           XEXP (x, 0) = temp;
   13301         2851 :           return x;
   13302              :         }
   13303              :     }
   13304              : 
   13305              :   return x;
   13306              : }
   13307              : 
   13308              : /* Print an integer constant expression in assembler syntax.  Addition
   13309              :    and subtraction are the only arithmetic that may appear in these
   13310              :    expressions.  FILE is the stdio stream to write to, X is the rtx, and
   13311              :    CODE is the operand print code from the output string.  */
   13312              : 
   13313              : static void
   13314      3706972 : output_pic_addr_const (FILE *file, rtx x, int code)
   13315              : {
   13316      3937708 :   char buf[256];
   13317              : 
   13318      3937708 :   switch (GET_CODE (x))
   13319              :     {
   13320            0 :     case PC:
   13321            0 :       gcc_assert (flag_pic);
   13322            0 :       putc ('.', file);
   13323            0 :       break;
   13324              : 
   13325       871929 :     case SYMBOL_REF:
   13326       871929 :       if (TARGET_64BIT || ! TARGET_MACHO_SYMBOL_STUBS)
   13327       871929 :         output_addr_const (file, x);
   13328              :       else
   13329              :         {
   13330              :           const char *name = XSTR (x, 0);
   13331              : 
   13332              :           /* Mark the decl as referenced so that cgraph will
   13333              :              output the function.  */
   13334              :           if (SYMBOL_REF_DECL (x))
   13335              :             mark_decl_referenced (SYMBOL_REF_DECL (x));
   13336              : 
   13337              : #if TARGET_MACHO
   13338              :           if (MACHOPIC_INDIRECT
   13339              :               && machopic_classify_symbol (x) == MACHOPIC_UNDEFINED_FUNCTION)
   13340              :             name = machopic_indirection_name (x, /*stub_p=*/true);
   13341              : #endif
   13342              :           assemble_name (file, name);
   13343              :         }
   13344       871929 :       if (!TARGET_MACHO && !(TARGET_64BIT && TARGET_PECOFF)
   13345       871929 :           && code == 'P' && ix86_call_use_plt_p (x))
   13346       398387 :         fputs ("@PLT", file);
   13347              :       break;
   13348              : 
   13349         2642 :     case LABEL_REF:
   13350         2642 :       x = XEXP (x, 0);
   13351              :       /* FALLTHRU */
   13352         2642 :     case CODE_LABEL:
   13353         2642 :       ASM_GENERATE_INTERNAL_LABEL (buf, "L", CODE_LABEL_NUMBER (x));
   13354         2642 :       assemble_name (asm_out_file, buf);
   13355         2642 :       break;
   13356              : 
   13357      2630367 :     CASE_CONST_SCALAR_INT:
   13358      2630367 :       output_addr_const (file, x);
   13359      2630367 :       break;
   13360              : 
   13361       211656 :     case CONST:
   13362              :       /* This used to output parentheses around the expression,
   13363              :          but that does not work on the 386 (either ATT or BSD assembler).  */
   13364       211656 :       output_pic_addr_const (file, XEXP (x, 0), code);
   13365       211656 :       break;
   13366              : 
   13367            0 :     case CONST_DOUBLE:
   13368              :       /* We can't handle floating point constants;
   13369              :          TARGET_PRINT_OPERAND must handle them.  */
   13370            0 :       output_operand_lossage ("floating constant misused");
   13371            0 :       break;
   13372              : 
   13373        19080 :     case PLUS:
   13374              :       /* Some assemblers need integer constants to appear first.  */
   13375        19080 :       if (CONST_INT_P (XEXP (x, 0)))
   13376              :         {
   13377            0 :           output_pic_addr_const (file, XEXP (x, 0), code);
   13378            0 :           putc ('+', file);
   13379            0 :           output_pic_addr_const (file, XEXP (x, 1), code);
   13380              :         }
   13381              :       else
   13382              :         {
   13383        19080 :           gcc_assert (CONST_INT_P (XEXP (x, 1)));
   13384        19080 :           output_pic_addr_const (file, XEXP (x, 1), code);
   13385        19080 :           putc ('+', file);
   13386        19080 :           output_pic_addr_const (file, XEXP (x, 0), code);
   13387              :         }
   13388              :       break;
   13389              : 
   13390            0 :     case MINUS:
   13391            0 :       if (!TARGET_MACHO)
   13392            0 :         putc (ASSEMBLER_DIALECT == ASM_INTEL ? '(' : '[', file);
   13393            0 :       output_pic_addr_const (file, XEXP (x, 0), code);
   13394            0 :       putc ('-', file);
   13395            0 :       output_pic_addr_const (file, XEXP (x, 1), code);
   13396            0 :       if (!TARGET_MACHO)
   13397            0 :         putc (ASSEMBLER_DIALECT == ASM_INTEL ? ')' : ']', file);
   13398            0 :       break;
   13399              : 
   13400       202034 :     case UNSPEC:
   13401       202034 :       gcc_assert (XVECLEN (x, 0) == 1);
   13402       202034 :       output_pic_addr_const (file, XVECEXP (x, 0, 0), code);
   13403       202034 :       switch (XINT (x, 1))
   13404              :         {
   13405        43365 :         case UNSPEC_GOT:
   13406        43365 :           fputs ("@GOT", file);
   13407        43365 :           break;
   13408        78521 :         case UNSPEC_GOTOFF:
   13409        78521 :           fputs ("@GOTOFF", file);
   13410        78521 :           break;
   13411           36 :         case UNSPEC_PLTOFF:
   13412           36 :           fputs ("@PLTOFF", file);
   13413           36 :           break;
   13414            0 :         case UNSPEC_PCREL:
   13415            0 :           fputs (ASSEMBLER_DIALECT == ASM_ATT ?
   13416              :                  "(%rip)" : "[rip]", file);
   13417            0 :           break;
   13418        75928 :         case UNSPEC_GOTPCREL:
   13419        75928 :           fputs (ASSEMBLER_DIALECT == ASM_ATT ?
   13420              :                  "@GOTPCREL(%rip)" : "@GOTPCREL[rip]", file);
   13421        75928 :           break;
   13422            0 :         case UNSPEC_GOTTPOFF:
   13423              :           /* FIXME: This might be @TPOFF in Sun ld too.  */
   13424            0 :           fputs ("@gottpoff", file);
   13425            0 :           break;
   13426            0 :         case UNSPEC_TPOFF:
   13427            0 :           fputs ("@tpoff", file);
   13428            0 :           break;
   13429         1459 :         case UNSPEC_NTPOFF:
   13430         1459 :           if (TARGET_64BIT)
   13431         1459 :             fputs ("@tpoff", file);
   13432              :           else
   13433            0 :             fputs ("@ntpoff", file);
   13434              :           break;
   13435          315 :         case UNSPEC_DTPOFF:
   13436          315 :           fputs ("@dtpoff", file);
   13437          315 :           break;
   13438         2410 :         case UNSPEC_GOTNTPOFF:
   13439         2410 :           if (TARGET_64BIT)
   13440         2147 :             fputs (ASSEMBLER_DIALECT == ASM_ATT ?
   13441              :                    "@gottpoff(%rip)": "@gottpoff[rip]", file);
   13442              :           else
   13443          263 :             fputs ("@gotntpoff", file);
   13444              :           break;
   13445            0 :         case UNSPEC_INDNTPOFF:
   13446            0 :           fputs ("@indntpoff", file);
   13447            0 :           break;
   13448            0 :         case UNSPEC_SECREL32:
   13449            0 :           fputs ("@secrel32", file);
   13450            0 :           break;
   13451              : #if TARGET_MACHO
   13452              :         case UNSPEC_MACHOPIC_OFFSET:
   13453              :           putc ('-', file);
   13454              :           machopic_output_function_base_name (file);
   13455              :           break;
   13456              : #endif
   13457            0 :         default:
   13458            0 :           output_operand_lossage ("invalid UNSPEC as operand");
   13459            0 :           break;
   13460              :         }
   13461              :        break;
   13462              : 
   13463            0 :     default:
   13464            0 :       output_operand_lossage ("invalid expression as operand");
   13465              :     }
   13466      3706972 : }
   13467              : 
   13468              : /* This is called from dwarf2out.cc via TARGET_ASM_OUTPUT_DWARF_DTPREL.
   13469              :    We need to emit DTP-relative relocations.  */
   13470              : 
   13471              : static void ATTRIBUTE_UNUSED
   13472          668 : i386_output_dwarf_dtprel (FILE *file, int size, rtx x)
   13473              : {
   13474          668 :   fputs (ASM_LONG, file);
   13475          668 :   output_addr_const (file, x);
   13476              : #if TARGET_WIN32_TLS
   13477              :   fputs ("@secrel32", file);
   13478              : #else
   13479          668 :   fputs ("@dtpoff", file);
   13480              : #endif
   13481          668 :   switch (size)
   13482              :     {
   13483              :     case 4:
   13484              :       break;
   13485          549 :     case 8:
   13486          549 :       fputs (", 0", file);
   13487          549 :       break;
   13488            0 :     default:
   13489            0 :       gcc_unreachable ();
   13490              :    }
   13491          668 : }
   13492              : 
   13493              : /* Return true if X is a representation of the PIC register.  This copes
   13494              :    with calls from ix86_find_base_term, where the register might have
   13495              :    been replaced by a cselib value.  */
   13496              : 
   13497              : static bool
   13498     26896306 : ix86_pic_register_p (rtx x)
   13499              : {
   13500     26896306 :   if (GET_CODE (x) == VALUE && CSELIB_VAL_PTR (x))
   13501       748622 :     return (pic_offset_table_rtx
   13502       748622 :             && rtx_equal_for_cselib_p (x, pic_offset_table_rtx));
   13503     26147684 :   else if (GET_CODE (x) == UNSPEC && XINT (x, 1) == UNSPEC_SET_GOT)
   13504              :     return true;
   13505     26144692 :   else if (!REG_P (x))
   13506              :     return false;
   13507     25533062 :   else if (pic_offset_table_rtx)
   13508              :     {
   13509     25513177 :       if (REGNO (x) == REGNO (pic_offset_table_rtx))
   13510              :         return true;
   13511       407858 :       if (HARD_REGISTER_P (x)
   13512       386388 :           && !HARD_REGISTER_P (pic_offset_table_rtx)
   13513       794246 :           && ORIGINAL_REGNO (x) == REGNO (pic_offset_table_rtx))
   13514              :         return true;
   13515              :       return false;
   13516              :     }
   13517              :   else
   13518        19885 :     return REGNO (x) == PIC_OFFSET_TABLE_REGNUM;
   13519              : }
   13520              : 
   13521              : /* Helper function for ix86_delegitimize_address.
   13522              :    Attempt to delegitimize TLS local-exec accesses.  */
   13523              : 
   13524              : static rtx
   13525   3501736776 : ix86_delegitimize_tls_address (rtx orig_x)
   13526              : {
   13527   3501736776 :   rtx x = orig_x, unspec;
   13528   3501736776 :   struct ix86_address addr;
   13529              : 
   13530   3501736776 :   if (!TARGET_TLS_DIRECT_SEG_REFS)
   13531              :     return orig_x;
   13532   3501736776 :   if (MEM_P (x))
   13533     42880782 :     x = XEXP (x, 0);
   13534   5030402633 :   if (GET_CODE (x) != PLUS || GET_MODE (x) != Pmode)
   13535              :     return orig_x;
   13536   1684960142 :   if (ix86_decompose_address (x, &addr) == 0
   13537   1944664319 :       || addr.seg != DEFAULT_TLS_SEG_REG
   13538       276772 :       || addr.disp == NULL_RTX
   13539   1685185406 :       || GET_CODE (addr.disp) != CONST)
   13540              :     return orig_x;
   13541       115489 :   unspec = XEXP (addr.disp, 0);
   13542       115489 :   if (GET_CODE (unspec) == PLUS && CONST_INT_P (XEXP (unspec, 1)))
   13543        67942 :     unspec = XEXP (unspec, 0);
   13544       115489 :   if (GET_CODE (unspec) != UNSPEC || XINT (unspec, 1) != UNSPEC_NTPOFF)
   13545              :     return orig_x;
   13546       115423 :   x = XVECEXP (unspec, 0, 0);
   13547       115423 :   gcc_assert (SYMBOL_REF_P (x));
   13548       115423 :   if (unspec != XEXP (addr.disp, 0))
   13549        89725 :     x = gen_rtx_PLUS (Pmode, x, XEXP (XEXP (addr.disp, 0), 1));
   13550       115423 :   if (addr.index)
   13551              :     {
   13552          187 :       rtx idx = addr.index;
   13553          187 :       if (addr.scale != 1)
   13554          187 :         idx = gen_rtx_MULT (Pmode, idx, GEN_INT (addr.scale));
   13555          187 :       x = gen_rtx_PLUS (Pmode, idx, x);
   13556              :     }
   13557       115423 :   if (addr.base)
   13558            2 :     x = gen_rtx_PLUS (Pmode, addr.base, x);
   13559       115423 :   if (MEM_P (orig_x))
   13560          198 :     x = replace_equiv_address_nv (orig_x, x);
   13561              :   return x;
   13562              : }
   13563              : 
   13564              : /* In the name of slightly smaller debug output, and to cater to
   13565              :    general assembler lossage, recognize PIC+GOTOFF and turn it back
   13566              :    into a direct symbol reference.
   13567              : 
   13568              :    On Darwin, this is necessary to avoid a crash, because Darwin
   13569              :    has a different PIC label for each routine but the DWARF debugging
   13570              :    information is not associated with any particular routine, so it's
   13571              :    necessary to remove references to the PIC label from RTL stored by
   13572              :    the DWARF output code.
   13573              : 
   13574              :    This helper is used in the normal ix86_delegitimize_address
   13575              :    entrypoint (e.g. used in the target delegitimization hook) and
   13576              :    in ix86_find_base_term.  As compile time memory optimization, we
   13577              :    avoid allocating rtxes that will not change anything on the outcome
   13578              :    of the callers (find_base_value and find_base_term).  */
   13579              : 
   13580              : static inline rtx
   13581   3526580678 : ix86_delegitimize_address_1 (rtx x, bool base_term_p)
   13582              : {
   13583   3526580678 :   rtx orig_x = delegitimize_mem_from_attrs (x);
   13584              :   /* addend is NULL or some rtx if x is something+GOTOFF where
   13585              :      something doesn't include the PIC register.  */
   13586   3526580678 :   rtx addend = NULL_RTX;
   13587              :   /* reg_addend is NULL or a multiple of some register.  */
   13588   3526580678 :   rtx reg_addend = NULL_RTX;
   13589              :   /* const_addend is NULL or a const_int.  */
   13590   3526580678 :   rtx const_addend = NULL_RTX;
   13591              :   /* This is the result, or NULL.  */
   13592   3526580678 :   rtx result = NULL_RTX;
   13593              : 
   13594   3526580678 :   x = orig_x;
   13595              : 
   13596   3526580678 :   if (MEM_P (x))
   13597     62085897 :     x = XEXP (x, 0);
   13598              : 
   13599   3526580678 :   if (TARGET_64BIT)
   13600              :     {
   13601    253567916 :       if (GET_CODE (x) == CONST
   13602      8735273 :           && GET_CODE (XEXP (x, 0)) == PLUS
   13603      6752255 :           && GET_MODE (XEXP (x, 0)) == Pmode
   13604      6752206 :           && CONST_INT_P (XEXP (XEXP (x, 0), 1))
   13605      6752206 :           && GET_CODE (XEXP (XEXP (x, 0), 0)) == UNSPEC
   13606    253572047 :           && XINT (XEXP (XEXP (x, 0), 0), 1) == UNSPEC_PCREL)
   13607              :         {
   13608              :           /* find_base_{value,term} only care about MEMs with arg_pointer_rtx
   13609              :              base.  A CONST can't be arg_pointer_rtx based.  */
   13610            0 :           if (base_term_p && MEM_P (orig_x))
   13611              :             return orig_x;
   13612            0 :           rtx x2 = XVECEXP (XEXP (XEXP (x, 0), 0), 0, 0);
   13613            0 :           x = gen_rtx_PLUS (Pmode, XEXP (XEXP (x, 0), 1), x2);
   13614            0 :           if (MEM_P (orig_x))
   13615            0 :             x = replace_equiv_address_nv (orig_x, x);
   13616            0 :           return x;
   13617              :         }
   13618              : 
   13619    253567916 :       if (GET_CODE (x) == CONST
   13620      8735273 :           && GET_CODE (XEXP (x, 0)) == UNSPEC
   13621      1983067 :           && (XINT (XEXP (x, 0), 1) == UNSPEC_GOTPCREL
   13622       678689 :               || XINT (XEXP (x, 0), 1) == UNSPEC_PCREL)
   13623      1304378 :           && (MEM_P (orig_x) || XINT (XEXP (x, 0), 1) == UNSPEC_PCREL))
   13624              :         {
   13625       298194 :           x = XVECEXP (XEXP (x, 0), 0, 0);
   13626       298194 :           if (GET_MODE (orig_x) != GET_MODE (x) && MEM_P (orig_x))
   13627              :             {
   13628            9 :               x = lowpart_subreg (GET_MODE (orig_x), x, GET_MODE (x));
   13629            9 :               if (x == NULL_RTX)
   13630              :                 return orig_x;
   13631              :             }
   13632       298194 :           return x;
   13633              :         }
   13634              : 
   13635    253269722 :       if (ix86_cmodel != CM_MEDIUM_PIC && ix86_cmodel != CM_LARGE_PIC)
   13636    253268023 :         return ix86_delegitimize_tls_address (orig_x);
   13637              : 
   13638              :       /* Fall thru into the code shared with -m32 for -mcmodel=large -fpic
   13639              :          and -mcmodel=medium -fpic.  */
   13640              :     }
   13641              : 
   13642   3273014461 :   if (GET_CODE (x) != PLUS
   13643   1553211300 :       || GET_CODE (XEXP (x, 1)) != CONST)
   13644   3246671632 :     return ix86_delegitimize_tls_address (orig_x);
   13645              : 
   13646     26342829 :   if (ix86_pic_register_p (XEXP (x, 0)))
   13647              :     /* %ebx + GOT/GOTOFF */
   13648              :     ;
   13649      1279945 :   else if (GET_CODE (XEXP (x, 0)) == PLUS)
   13650              :     {
   13651              :       /* %ebx + %reg * scale + GOT/GOTOFF */
   13652       474739 :       reg_addend = XEXP (x, 0);
   13653       474739 :       if (ix86_pic_register_p (XEXP (reg_addend, 0)))
   13654       396001 :         reg_addend = XEXP (reg_addend, 1);
   13655        78738 :       else if (ix86_pic_register_p (XEXP (reg_addend, 1)))
   13656        47148 :         reg_addend = XEXP (reg_addend, 0);
   13657              :       else
   13658              :         {
   13659        31590 :           reg_addend = NULL_RTX;
   13660        31590 :           addend = XEXP (x, 0);
   13661              :         }
   13662              :     }
   13663              :   else
   13664              :     addend = XEXP (x, 0);
   13665              : 
   13666     26342829 :   x = XEXP (XEXP (x, 1), 0);
   13667     26342829 :   if (GET_CODE (x) == PLUS
   13668      1445100 :       && CONST_INT_P (XEXP (x, 1)))
   13669              :     {
   13670      1445100 :       const_addend = XEXP (x, 1);
   13671      1445100 :       x = XEXP (x, 0);
   13672              :     }
   13673              : 
   13674     26342829 :   if (GET_CODE (x) == UNSPEC
   13675     25671437 :       && ((XINT (x, 1) == UNSPEC_GOT && MEM_P (orig_x) && !addend)
   13676      6764516 :           || (XINT (x, 1) == UNSPEC_GOTOFF && !MEM_P (orig_x))
   13677      1125733 :           || (XINT (x, 1) == UNSPEC_PLTOFF && ix86_cmodel == CM_LARGE_PIC
   13678            4 :               && !MEM_P (orig_x) && !addend)))
   13679     24545708 :     result = XVECEXP (x, 0, 0);
   13680              : 
   13681     24545708 :   if (!TARGET_64BIT && TARGET_MACHO && darwin_local_data_pic (x)
   13682              :       && !MEM_P (orig_x))
   13683              :     result = XVECEXP (x, 0, 0);
   13684              : 
   13685     24545708 :   if (! result)
   13686      1797121 :     return ix86_delegitimize_tls_address (orig_x);
   13687              : 
   13688              :   /* For (PLUS something CONST_INT) both find_base_{value,term} just
   13689              :      recurse on the first operand.  */
   13690     24545708 :   if (const_addend && !base_term_p)
   13691       355788 :     result = gen_rtx_CONST (Pmode, gen_rtx_PLUS (Pmode, result, const_addend));
   13692     24545708 :   if (reg_addend)
   13693       862024 :     result = gen_rtx_PLUS (Pmode, reg_addend, result);
   13694     24545708 :   if (addend)
   13695              :     {
   13696              :       /* If the rest of original X doesn't involve the PIC register, add
   13697              :          addend and subtract pic_offset_table_rtx.  This can happen e.g.
   13698              :          for code like:
   13699              :          leal (%ebx, %ecx, 4), %ecx
   13700              :          ...
   13701              :          movl foo@GOTOFF(%ecx), %edx
   13702              :          in which case we return (%ecx - %ebx) + foo
   13703              :          or (%ecx - _GLOBAL_OFFSET_TABLE_) + foo if pseudo_pic_reg
   13704              :          and reload has completed.  Don't do the latter for debug,
   13705              :          as _GLOBAL_OFFSET_TABLE_ can't be expressed in the assembly.  */
   13706       137191 :       if (pic_offset_table_rtx
   13707       137191 :           && (!reload_completed || !ix86_use_pseudo_pic_reg ()))
   13708         2364 :         result = gen_rtx_PLUS (Pmode, gen_rtx_MINUS (Pmode, copy_rtx (addend),
   13709              :                                                      pic_offset_table_rtx),
   13710              :                                result);
   13711       136403 :       else if (base_term_p
   13712       130106 :                && pic_offset_table_rtx
   13713              :                && !TARGET_MACHO
   13714              :                && !TARGET_VXWORKS_VAROFF)
   13715              :         {
   13716       260212 :           rtx tmp = gen_rtx_SYMBOL_REF (Pmode, GOT_SYMBOL_NAME);
   13717       260212 :           tmp = gen_rtx_MINUS (Pmode, copy_rtx (addend), tmp);
   13718       260212 :           result = gen_rtx_PLUS (Pmode, tmp, result);
   13719       130106 :         }
   13720              :       else
   13721              :         return orig_x;
   13722              :     }
   13723     49078735 :   if (GET_MODE (orig_x) != Pmode && MEM_P (orig_x))
   13724              :     {
   13725            0 :       result = lowpart_subreg (GET_MODE (orig_x), result, Pmode);
   13726            0 :       if (result == NULL_RTX)
   13727              :         return orig_x;
   13728              :     }
   13729              :   return result;
   13730              : }
   13731              : 
   13732              : /* The normal instantiation of the above template.  */
   13733              : 
   13734              : static rtx
   13735    324373940 : ix86_delegitimize_address (rtx x)
   13736              : {
   13737    324373940 :   return ix86_delegitimize_address_1 (x, false);
   13738              : }
   13739              : 
   13740              : /* If X is a machine specific address (i.e. a symbol or label being
   13741              :    referenced as a displacement from the GOT implemented using an
   13742              :    UNSPEC), then return the base term.  Otherwise return X.  */
   13743              : 
   13744              : rtx
   13745   6675936890 : ix86_find_base_term (rtx x)
   13746              : {
   13747   6675936890 :   rtx term;
   13748              : 
   13749   6675936890 :   if (TARGET_64BIT)
   13750              :     {
   13751   3473730152 :       if (GET_CODE (x) != CONST)
   13752              :         return x;
   13753     45069016 :       term = XEXP (x, 0);
   13754     45069016 :       if (GET_CODE (term) == PLUS
   13755     45054143 :           && CONST_INT_P (XEXP (term, 1)))
   13756     45054143 :         term = XEXP (term, 0);
   13757     45069016 :       if (GET_CODE (term) != UNSPEC
   13758        40579 :           || (XINT (term, 1) != UNSPEC_GOTPCREL
   13759        40579 :               && XINT (term, 1) != UNSPEC_PCREL))
   13760              :         return x;
   13761              : 
   13762            0 :       return XVECEXP (term, 0, 0);
   13763              :     }
   13764              : 
   13765   3202206738 :   return ix86_delegitimize_address_1 (x, true);
   13766              : }
   13767              : 
   13768              : /* Return true if X shouldn't be emitted into the debug info.
   13769              :    Disallow UNSPECs other than @gotoff - we can't emit _GLOBAL_OFFSET_TABLE_
   13770              :    symbol easily into the .debug_info section, so we need not to
   13771              :    delegitimize, but instead assemble as @gotoff.
   13772              :    Disallow _GLOBAL_OFFSET_TABLE_ SYMBOL_REF - the assembler magically
   13773              :    assembles that as _GLOBAL_OFFSET_TABLE_-. expression.  */
   13774              : 
   13775              : static bool
   13776      1891873 : ix86_const_not_ok_for_debug_p (rtx x)
   13777              : {
   13778      1891873 :   if (GET_CODE (x) == UNSPEC && XINT (x, 1) != UNSPEC_GOTOFF)
   13779              :     return true;
   13780              : 
   13781      1891853 :   if (SYMBOL_REF_P (x) && strcmp (XSTR (x, 0), GOT_SYMBOL_NAME) == 0)
   13782            0 :     return true;
   13783              : 
   13784              :   return false;
   13785              : }
   13786              : 
   13787              : static void
   13788      7145002 : put_condition_code (enum rtx_code code, machine_mode mode, bool reverse,
   13789              :                     bool fp, FILE *file)
   13790              : {
   13791      7145002 :   const char *suffix;
   13792              : 
   13793      7145002 :   if (mode == CCFPmode)
   13794              :     {
   13795       561891 :       code = ix86_fp_compare_code_to_integer (code);
   13796       561891 :       mode = CCmode;
   13797              :     }
   13798      7145002 :   if (reverse)
   13799       204402 :     code = reverse_condition (code);
   13800              : 
   13801      7145002 :   switch (code)
   13802              :     {
   13803      2782348 :     case EQ:
   13804      2782348 :       gcc_assert (mode != CCGZmode);
   13805      2782348 :       switch (mode)
   13806              :         {
   13807              :         case E_CCAmode:
   13808              :           suffix = "a";
   13809              :           break;
   13810              :         case E_CCCmode:
   13811        26045 :           suffix = "c";
   13812              :           break;
   13813              :         case E_CCOmode:
   13814      7145002 :           suffix = "o";
   13815              :           break;
   13816              :         case E_CCPmode:
   13817       233496 :           suffix = "p";
   13818              :           break;
   13819              :         case E_CCSmode:
   13820       119843 :           suffix = "s";
   13821              :           break;
   13822      2762675 :         default:
   13823      2762675 :           suffix = "e";
   13824      2762675 :           break;
   13825              :         }
   13826              :       break;
   13827      2316688 :     case NE:
   13828      2316688 :       gcc_assert (mode != CCGZmode);
   13829      2316688 :       switch (mode)
   13830              :         {
   13831              :         case E_CCAmode:
   13832              :           suffix = "na";
   13833              :           break;
   13834              :         case E_CCCmode:
   13835        11882 :           suffix = "nc";
   13836              :           break;
   13837        10765 :         case E_CCOmode:
   13838        10765 :           suffix = "no";
   13839        10765 :           break;
   13840              :         case E_CCPmode:
   13841         4442 :           suffix = "np";
   13842              :           break;
   13843              :         case E_CCSmode:
   13844        49807 :           suffix = "ns";
   13845              :           break;
   13846      2304242 :         default:
   13847      2304242 :           suffix = "ne";
   13848      2304242 :           break;
   13849              :         }
   13850              :       break;
   13851       246039 :     case GT:
   13852       246039 :       gcc_assert (mode == CCmode || mode == CCNOmode || mode == CCGCmode);
   13853              :       suffix = "g";
   13854              :       break;
   13855       174754 :     case GTU:
   13856              :       /* ??? Use "nbe" instead of "a" for fcmov lossage on some assemblers.
   13857              :          Those same assemblers have the same but opposite lossage on cmov.  */
   13858       174754 :       if (mode == CCmode)
   13859       174816 :         suffix = fp ? "nbe" : "a";
   13860              :       else
   13861            0 :         gcc_unreachable ();
   13862              :       break;
   13863       236545 :     case LT:
   13864       236545 :       switch (mode)
   13865              :         {
   13866              :         case E_CCNOmode:
   13867              :         case E_CCGOCmode:
   13868              :           suffix = "s";
   13869              :           break;
   13870              : 
   13871              :         case E_CCmode:
   13872              :         case E_CCGCmode:
   13873              :         case E_CCGZmode:
   13874      7145002 :           suffix = "l";
   13875              :           break;
   13876              : 
   13877            0 :         default:
   13878            0 :           gcc_unreachable ();
   13879              :         }
   13880              :       break;
   13881       446192 :     case LTU:
   13882       446192 :       if (mode == CCmode || mode == CCGZmode)
   13883              :         suffix = "b";
   13884        24715 :       else if (mode == CCCmode)
   13885        26045 :         suffix = fp ? "b" : "c";
   13886              :       else
   13887            0 :         gcc_unreachable ();
   13888              :       break;
   13889       144226 :     case GE:
   13890       144226 :       switch (mode)
   13891              :         {
   13892              :         case E_CCNOmode:
   13893              :         case E_CCGOCmode:
   13894              :           suffix = "ns";
   13895              :           break;
   13896              : 
   13897              :         case E_CCmode:
   13898              :         case E_CCGCmode:
   13899              :         case E_CCGZmode:
   13900      7145002 :           suffix = "ge";
   13901              :           break;
   13902              : 
   13903            0 :         default:
   13904            0 :           gcc_unreachable ();
   13905              :         }
   13906              :       break;
   13907       196238 :     case GEU:
   13908       196238 :       if (mode == CCmode || mode == CCGZmode)
   13909              :         suffix = "nb";
   13910        10221 :       else if (mode == CCCmode)
   13911        11882 :         suffix = fp ? "nb" : "nc";
   13912              :       else
   13913            0 :         gcc_unreachable ();
   13914              :       break;
   13915       247338 :     case LE:
   13916       247338 :       gcc_assert (mode == CCmode || mode == CCGCmode || mode == CCNOmode);
   13917              :       suffix = "le";
   13918              :       break;
   13919       116694 :     case LEU:
   13920       116694 :       if (mode == CCmode)
   13921              :         suffix = "be";
   13922              :       else
   13923            0 :         gcc_unreachable ();
   13924              :       break;
   13925       233496 :     case UNORDERED:
   13926       233503 :       suffix = fp ? "u" : "p";
   13927              :       break;
   13928         4444 :     case ORDERED:
   13929         4449 :       suffix = fp ? "nu" : "np";
   13930              :       break;
   13931            0 :     default:
   13932            0 :       gcc_unreachable ();
   13933              :     }
   13934      7145002 :   fputs (suffix, file);
   13935      7145002 : }
   13936              : 
   13937              : /* Print the name of register X to FILE based on its machine mode and number.
   13938              :    If CODE is 'w', pretend the mode is HImode.
   13939              :    If CODE is 'b', pretend the mode is QImode.
   13940              :    If CODE is 'k', pretend the mode is SImode.
   13941              :    If CODE is 'q', pretend the mode is DImode.
   13942              :    If CODE is 'x', pretend the mode is V4SFmode.
   13943              :    If CODE is 't', pretend the mode is V8SFmode.
   13944              :    If CODE is 'g', pretend the mode is V16SFmode.
   13945              :    If CODE is 'h', pretend the reg is the 'high' byte register.
   13946              :    If CODE is 'y', print "st(0)" instead of "st", if the reg is stack op.
   13947              :    If CODE is 'd', duplicate the operand for AVX instruction.
   13948              :    If CODE is 'V', print naked full integer register name without %.
   13949              :  */
   13950              : 
   13951              : void
   13952    123494368 : print_reg (rtx x, int code, FILE *file)
   13953              : {
   13954    123494368 :   const char *reg;
   13955    123494368 :   int msize;
   13956    123494368 :   unsigned int regno;
   13957    123494368 :   bool duplicated;
   13958              : 
   13959    123494368 :   if (ASSEMBLER_DIALECT == ASM_ATT && code != 'V')
   13960    123491911 :     putc ('%', file);
   13961              : 
   13962    123494368 :   if (x == pc_rtx)
   13963              :     {
   13964      5745070 :       gcc_assert (TARGET_64BIT);
   13965      5745070 :       fputs ("rip", file);
   13966      5745070 :       return;
   13967              :     }
   13968              : 
   13969    117749298 :   if (code == 'y' && STACK_TOP_P (x))
   13970              :     {
   13971       290672 :       fputs ("st(0)", file);
   13972       290672 :       return;
   13973              :     }
   13974              : 
   13975    117458626 :   if (code == 'w')
   13976              :     msize = 2;
   13977              :   else if (code == 'b')
   13978              :     msize = 1;
   13979              :   else if (code == 'k')
   13980              :     msize = 4;
   13981              :   else if (code == 'q')
   13982              :     msize = 8;
   13983              :   else if (code == 'h')
   13984              :     msize = 0;
   13985              :   else if (code == 'x')
   13986              :     msize = 16;
   13987              :   else if (code == 't')
   13988              :     msize = 32;
   13989              :   else if (code == 'g')
   13990              :     msize = 64;
   13991              :   else
   13992    200696154 :     msize = GET_MODE_SIZE (GET_MODE (x));
   13993              : 
   13994    117458626 :   regno = REGNO (x);
   13995              : 
   13996    117458626 :   if (regno == ARG_POINTER_REGNUM
   13997    117458626 :       || regno == FRAME_POINTER_REGNUM
   13998    117458626 :       || regno == FPSR_REG)
   13999              :     {
   14000            0 :       output_operand_lossage
   14001            0 :         ("invalid use of register '%s'", reg_names[regno]);
   14002            0 :       return;
   14003              :     }
   14004    117458626 :   else if (regno == FLAGS_REG)
   14005              :     {
   14006            1 :       output_operand_lossage ("invalid use of asm flag output");
   14007            1 :       return;
   14008              :     }
   14009              : 
   14010    117458625 :   if (code == 'V')
   14011              :     {
   14012            1 :       if (GENERAL_REGNO_P (regno))
   14013            2 :         msize = GET_MODE_SIZE (word_mode);
   14014              :       else
   14015            0 :         error ("%<V%> modifier on non-integer register");
   14016              :     }
   14017              : 
   14018    117458625 :   duplicated = code == 'd' && TARGET_AVX;
   14019              : 
   14020    117458625 :   switch (msize)
   14021              :     {
   14022     77955659 :     case 16:
   14023     77955659 :     case 12:
   14024     77955659 :     case 8:
   14025    145854660 :       if (GENERAL_REGNO_P (regno) && msize > GET_MODE_SIZE (word_mode))
   14026            5 :         warning (0, "unsupported size for integer register");
   14027              :       /* FALLTHRU */
   14028    113996619 :     case 4:
   14029    113996619 :       if (LEGACY_INT_REGNO_P (regno))
   14030    123313791 :         putc (msize > 4 && TARGET_64BIT ? 'r' : 'e', file);
   14031              :       /* FALLTHRU */
   14032    114895926 :     case 2:
   14033     22245002 :     normal:
   14034    114895926 :       reg = hi_reg_name[regno];
   14035    114895926 :       break;
   14036      2280259 :     case 1:
   14037      2280259 :       if (regno >= ARRAY_SIZE (qi_reg_name))
   14038       275499 :         goto normal;
   14039      2004760 :       if (!ANY_QI_REGNO_P (regno))
   14040            0 :         error ("unsupported size for integer register");
   14041      2004760 :       reg = qi_reg_name[regno];
   14042      2004760 :       break;
   14043        27276 :     case 0:
   14044        27276 :       if (regno >= ARRAY_SIZE (qi_high_reg_name))
   14045            0 :         goto normal;
   14046        27276 :       reg = qi_high_reg_name[regno];
   14047        27276 :       break;
   14048       530663 :     case 32:
   14049       530663 :     case 64:
   14050       530663 :       if (SSE_REGNO_P (regno))
   14051              :         {
   14052       530663 :           gcc_assert (!duplicated);
   14053       737400 :           putc (msize == 32 ? 'y' : 'z', file);
   14054       530663 :           reg = hi_reg_name[regno] + 1;
   14055       530663 :           break;
   14056              :         }
   14057            0 :       goto normal;
   14058            0 :     default:
   14059            0 :       gcc_unreachable ();
   14060              :     }
   14061              : 
   14062    117458625 :   fputs (reg, file);
   14063              : 
   14064              :   /* Irritatingly, AMD extended registers use
   14065              :      different naming convention: "r%d[bwd]"  */
   14066    117458625 :   if (REX_INT_REGNO_P (regno) || REX2_INT_REGNO_P (regno))
   14067              :     {
   14068     10469897 :       gcc_assert (TARGET_64BIT);
   14069     10469897 :       switch (msize)
   14070              :         {
   14071            0 :           case 0:
   14072            0 :             error ("extended registers have no high halves");
   14073            0 :             break;
   14074       183516 :           case 1:
   14075       183516 :             putc ('b', file);
   14076       183516 :             break;
   14077        30397 :           case 2:
   14078        30397 :             putc ('w', file);
   14079        30397 :             break;
   14080      2546426 :           case 4:
   14081      2546426 :             putc ('d', file);
   14082      2546426 :             break;
   14083              :           case 8:
   14084              :             /* no suffix */
   14085              :             break;
   14086            0 :           default:
   14087            0 :             error ("unsupported operand size for extended register");
   14088            0 :             break;
   14089              :         }
   14090     10469897 :       return;
   14091              :     }
   14092              : 
   14093    106988728 :   if (duplicated)
   14094              :     {
   14095        16638 :       if (ASSEMBLER_DIALECT == ASM_ATT)
   14096        16617 :         fprintf (file, ", %%%s", reg);
   14097              :       else
   14098           21 :         fprintf (file, ", %s", reg);
   14099              :     }
   14100              : }
   14101              : 
   14102              : /* Meaning of CODE:
   14103              :    L,W,B,Q,S,T -- print the opcode suffix for specified size of operand.
   14104              :    C -- print opcode suffix for set/cmov insn.
   14105              :    c -- like C, but print reversed condition
   14106              :    F,f -- likewise, but for floating-point.
   14107              :    O -- if HAVE_AS_IX86_CMOV_SUN_SYNTAX, expand to "w.", "l." or "q.",
   14108              :         otherwise nothing
   14109              :    R -- print embedded rounding and sae.
   14110              :    r -- print only sae.
   14111              :    z -- print the opcode suffix for the size of the current operand.
   14112              :    Z -- likewise, with special suffixes for x87 instructions.
   14113              :    * -- print a star (in certain assembler syntax)
   14114              :    A -- print an absolute memory reference.
   14115              :    E -- print address with DImode register names if TARGET_64BIT.
   14116              :    w -- print the operand as if it's a "word" (HImode) even if it isn't.
   14117              :    s -- print a shift double count, followed by the assemblers argument
   14118              :         delimiter.
   14119              :    b -- print the QImode name of the register for the indicated operand.
   14120              :         %b0 would print %al if operands[0] is reg 0.
   14121              :    w --  likewise, print the HImode name of the register.
   14122              :    k --  likewise, print the SImode name of the register.
   14123              :    q --  likewise, print the DImode name of the register.
   14124              :    x --  likewise, print the V4SFmode name of the register.
   14125              :    t --  likewise, print the V8SFmode name of the register.
   14126              :    g --  likewise, print the V16SFmode name of the register.
   14127              :    h -- print the QImode name for a "high" register, either ah, bh, ch or dh.
   14128              :    y -- print "st(0)" instead of "st" as a register.
   14129              :    d -- print duplicated register operand for AVX instruction.
   14130              :    D -- print condition for SSE cmp instruction.
   14131              :    P -- if PIC, print an @PLT suffix.  For -fno-plt, load function
   14132              :         address from GOT.
   14133              :    p -- print raw symbol name.
   14134              :    X -- don't print any sort of PIC '@' suffix for a symbol.
   14135              :    & -- print some in-use local-dynamic symbol name.
   14136              :    H -- print a memory address offset by 8; used for sse high-parts
   14137              :    Y -- print condition for XOP pcom* instruction.
   14138              :    V -- print naked full integer register name without %.
   14139              :    v -- print segment override prefix
   14140              :    + -- print a branch hint as 'cs' or 'ds' prefix
   14141              :    ; -- print a semicolon (after prefixes due to bug in older gas).
   14142              :    ~ -- print "i" if TARGET_AVX2, "f" otherwise.
   14143              :    ^ -- print addr32 prefix if Pmode != word_mode
   14144              :    M -- print addr32 prefix for TARGET_X32 with VSIB address.
   14145              :    ! -- print NOTRACK prefix for jxx/call/ret instructions if required.
   14146              :    N -- print maskz if it's constant 0 operand.
   14147              :    G -- print embedded flag for ccmp/ctest.
   14148              :  */
   14149              : 
   14150              : void
   14151    176823180 : ix86_print_operand (FILE *file, rtx x, int code)
   14152              : {
   14153    177013623 :   if (code)
   14154              :     {
   14155     62206711 :       switch (code)
   14156              :         {
   14157       190439 :         case 'A':
   14158       190439 :           switch (ASSEMBLER_DIALECT)
   14159              :             {
   14160       190439 :             case ASM_ATT:
   14161       190439 :               putc ('*', file);
   14162       190439 :               break;
   14163              : 
   14164            0 :             case ASM_INTEL:
   14165              :               /* Intel syntax. For absolute addresses, registers should not
   14166              :                  be surrounded by braces.  */
   14167            0 :               if (!REG_P (x))
   14168              :                 {
   14169            0 :                   putc ('[', file);
   14170            0 :                   ix86_print_operand (file, x, 0);
   14171            0 :                   putc (']', file);
   14172            0 :                   return;
   14173              :                 }
   14174              :               break;
   14175              : 
   14176            0 :             default:
   14177            0 :               gcc_unreachable ();
   14178              :             }
   14179              : 
   14180       190439 :           ix86_print_operand (file, x, 0);
   14181       190439 :           return;
   14182              : 
   14183      3562162 :         case 'E':
   14184              :           /* Wrap address in an UNSPEC to declare special handling.  */
   14185      3562162 :           if (TARGET_64BIT)
   14186      3071765 :             x = gen_rtx_UNSPEC (DImode, gen_rtvec (1, x), UNSPEC_LEA_ADDR);
   14187              : 
   14188      3562162 :           output_address (VOIDmode, x);
   14189      3562162 :           return;
   14190              : 
   14191            0 :         case 'L':
   14192            0 :           if (ASSEMBLER_DIALECT == ASM_ATT)
   14193            0 :             putc ('l', file);
   14194            0 :           return;
   14195              : 
   14196            0 :         case 'W':
   14197            0 :           if (ASSEMBLER_DIALECT == ASM_ATT)
   14198            0 :             putc ('w', file);
   14199            0 :           return;
   14200              : 
   14201            0 :         case 'B':
   14202            0 :           if (ASSEMBLER_DIALECT == ASM_ATT)
   14203            0 :             putc ('b', file);
   14204            0 :           return;
   14205              : 
   14206            0 :         case 'Q':
   14207            0 :           if (ASSEMBLER_DIALECT == ASM_ATT)
   14208            0 :             putc ('l', file);
   14209            0 :           return;
   14210              : 
   14211            0 :         case 'S':
   14212            0 :           if (ASSEMBLER_DIALECT == ASM_ATT)
   14213            0 :             putc ('s', file);
   14214            0 :           return;
   14215              : 
   14216            0 :         case 'T':
   14217            0 :           if (ASSEMBLER_DIALECT == ASM_ATT)
   14218            0 :             putc ('t', file);
   14219            0 :           return;
   14220              : 
   14221              :         case 'O':
   14222              : #ifdef HAVE_AS_IX86_CMOV_SUN_SYNTAX
   14223              :           if (ASSEMBLER_DIALECT != ASM_ATT)
   14224              :             return;
   14225              : 
   14226              :           switch (GET_MODE_SIZE (GET_MODE (x)))
   14227              :             {
   14228              :             case 2:
   14229              :               putc ('w', file);
   14230              :               break;
   14231              : 
   14232              :             case 4:
   14233              :               putc ('l', file);
   14234              :               break;
   14235              : 
   14236              :             case 8:
   14237              :               putc ('q', file);
   14238              :               break;
   14239              : 
   14240              :             default:
   14241              :               output_operand_lossage ("invalid operand size for operand "
   14242              :                                       "code 'O'");
   14243              :               return;
   14244              :             }
   14245              : 
   14246              :           putc ('.', file);
   14247              : #endif
   14248              :           return;
   14249              : 
   14250        38012 :         case 'z':
   14251        38012 :           if (GET_MODE_CLASS (GET_MODE (x)) == MODE_INT)
   14252              :             {
   14253              :               /* Opcodes don't get size suffixes if using Intel opcodes.  */
   14254        38010 :               if (ASSEMBLER_DIALECT == ASM_INTEL)
   14255              :                 return;
   14256              : 
   14257        76020 :               switch (GET_MODE_SIZE (GET_MODE (x)))
   14258              :                 {
   14259            6 :                 case 1:
   14260            6 :                   putc ('b', file);
   14261            6 :                   return;
   14262              : 
   14263            6 :                 case 2:
   14264            6 :                   putc ('w', file);
   14265            6 :                   return;
   14266              : 
   14267        37516 :                 case 4:
   14268        37516 :                   putc ('l', file);
   14269        37516 :                   return;
   14270              : 
   14271          482 :                 case 8:
   14272          482 :                   putc ('q', file);
   14273          482 :                   return;
   14274              : 
   14275            0 :                 default:
   14276            0 :                   output_operand_lossage ("invalid operand size for operand "
   14277              :                                           "code 'z'");
   14278            0 :                   return;
   14279              :                 }
   14280              :             }
   14281              : 
   14282            2 :           if (GET_MODE_CLASS (GET_MODE (x)) == MODE_FLOAT)
   14283              :             {
   14284            1 :               if (this_is_asm_operands)
   14285            1 :                 warning_for_asm (this_is_asm_operands,
   14286              :                                  "non-integer operand used with operand code %<z%>");
   14287              :               else
   14288            0 :                 warning (0, "non-integer operand used with operand code %<z%>");
   14289              :             }
   14290              :           /* FALLTHRU */
   14291              : 
   14292       381955 :         case 'Z':
   14293              :           /* 387 opcodes don't get size suffixes if using Intel opcodes.  */
   14294       381955 :           if (ASSEMBLER_DIALECT == ASM_INTEL)
   14295              :             return;
   14296              : 
   14297       381955 :           if (GET_MODE_CLASS (GET_MODE (x)) == MODE_INT)
   14298              :             {
   14299        29236 :               switch (GET_MODE_SIZE (GET_MODE (x)))
   14300              :                 {
   14301         3501 :                 case 2:
   14302              : #ifdef HAVE_AS_IX86_FILDS
   14303         3501 :                   putc ('s', file);
   14304              : #endif
   14305         3501 :                   return;
   14306              : 
   14307         3941 :                 case 4:
   14308         3941 :                   putc ('l', file);
   14309         3941 :                   return;
   14310              : 
   14311         7176 :                 case 8:
   14312              : #ifdef HAVE_AS_IX86_FILDQ
   14313         7176 :                   putc ('q', file);
   14314              : #else
   14315              :                   fputs ("ll", file);
   14316              : #endif
   14317         7176 :                   return;
   14318              : 
   14319              :                 default:
   14320              :                   break;
   14321              :                 }
   14322              :             }
   14323       367337 :           else if (GET_MODE_CLASS (GET_MODE (x)) == MODE_FLOAT)
   14324              :             {
   14325              :               /* 387 opcodes don't get size suffixes
   14326              :                  if the operands are registers.  */
   14327       367335 :               if (STACK_REG_P (x))
   14328              :                 return;
   14329              : 
   14330       689904 :               switch (GET_MODE_SIZE (GET_MODE (x)))
   14331              :                 {
   14332        23110 :                 case 4:
   14333        23110 :                   putc ('s', file);
   14334        23110 :                   return;
   14335              : 
   14336        32751 :                 case 8:
   14337        32751 :                   putc ('l', file);
   14338        32751 :                   return;
   14339              : 
   14340       289089 :                 case 12:
   14341       289089 :                 case 16:
   14342       289089 :                   putc ('t', file);
   14343       289089 :                   return;
   14344              : 
   14345              :                 default:
   14346              :                   break;
   14347              :                 }
   14348              :             }
   14349              :           else
   14350              :             {
   14351            2 :               output_operand_lossage ("invalid operand type used with "
   14352              :                                       "operand code '%c'", code);
   14353            2 :               return;
   14354              :             }
   14355              : 
   14356            2 :           output_operand_lossage ("invalid operand size for operand code '%c'",
   14357              :                                   code);
   14358            2 :           return;
   14359              : 
   14360              :         case 'd':
   14361              :         case 'b':
   14362              :         case 'w':
   14363              :         case 'k':
   14364              :         case 'q':
   14365              :         case 'h':
   14366              :         case 't':
   14367              :         case 'g':
   14368              :         case 'y':
   14369              :         case 'x':
   14370              :         case 'X':
   14371              :         case 'P':
   14372              :         case 'p':
   14373              :         case 'V':
   14374              :           break;
   14375              : 
   14376            0 :         case 's':
   14377            0 :           if (CONST_INT_P (x) || ! SHIFT_DOUBLE_OMITS_COUNT)
   14378              :             {
   14379            0 :               ix86_print_operand (file, x, 0);
   14380            0 :               fputs (", ", file);
   14381              :             }
   14382            0 :           return;
   14383              : 
   14384          494 :         case 'Y':
   14385          494 :           switch (GET_CODE (x))
   14386              :             {
   14387          182 :             case NE:
   14388          182 :               fputs ("neq", file);
   14389          182 :               break;
   14390           32 :             case EQ:
   14391           32 :               fputs ("eq", file);
   14392           32 :               break;
   14393           64 :             case GE:
   14394           64 :             case GEU:
   14395           64 :               fputs (INTEGRAL_MODE_P (GET_MODE (x)) ? "ge" : "unlt", file);
   14396           64 :               break;
   14397           40 :             case GT:
   14398           40 :             case GTU:
   14399           40 :               fputs (INTEGRAL_MODE_P (GET_MODE (x)) ? "gt" : "unle", file);
   14400           40 :               break;
   14401           64 :             case LE:
   14402           64 :             case LEU:
   14403           64 :               fputs ("le", file);
   14404           64 :               break;
   14405          112 :             case LT:
   14406          112 :             case LTU:
   14407          112 :               fputs ("lt", file);
   14408          112 :               break;
   14409            0 :             case UNORDERED:
   14410            0 :               fputs ("unord", file);
   14411            0 :               break;
   14412            0 :             case ORDERED:
   14413            0 :               fputs ("ord", file);
   14414            0 :               break;
   14415            0 :             case UNEQ:
   14416            0 :               fputs ("ueq", file);
   14417            0 :               break;
   14418            0 :             case UNGE:
   14419            0 :               fputs ("nlt", file);
   14420            0 :               break;
   14421            0 :             case UNGT:
   14422            0 :               fputs ("nle", file);
   14423            0 :               break;
   14424            0 :             case UNLE:
   14425            0 :               fputs ("ule", file);
   14426            0 :               break;
   14427            0 :             case UNLT:
   14428            0 :               fputs ("ult", file);
   14429            0 :               break;
   14430            0 :             case LTGT:
   14431            0 :               fputs ("une", file);
   14432            0 :               break;
   14433            0 :             default:
   14434            0 :               output_operand_lossage ("operand is not a condition code, "
   14435              :                                       "invalid operand code 'Y'");
   14436            0 :               return;
   14437              :             }
   14438          494 :           return;
   14439              : 
   14440         9312 :         case 'D':
   14441              :           /* Little bit of braindamage here.  The SSE compare instructions
   14442              :              does use completely different names for the comparisons that the
   14443              :              fp conditional moves.  */
   14444         9312 :           switch (GET_CODE (x))
   14445              :             {
   14446            3 :             case UNEQ:
   14447            3 :               if (TARGET_AVX)
   14448              :                 {
   14449            3 :                   fputs ("eq_us", file);
   14450            3 :                   break;
   14451              :                 }
   14452              :              /* FALLTHRU */
   14453         4626 :             case EQ:
   14454         4626 :               fputs ("eq", file);
   14455         4626 :               break;
   14456            0 :             case UNLT:
   14457            0 :               if (TARGET_AVX)
   14458              :                 {
   14459            0 :                   fputs ("nge", file);
   14460            0 :                   break;
   14461              :                 }
   14462              :              /* FALLTHRU */
   14463         1626 :             case LT:
   14464         1626 :               fputs ("lt", file);
   14465         1626 :               break;
   14466            0 :             case UNLE:
   14467            0 :               if (TARGET_AVX)
   14468              :                 {
   14469            0 :                   fputs ("ngt", file);
   14470            0 :                   break;
   14471              :                 }
   14472              :              /* FALLTHRU */
   14473          795 :             case LE:
   14474          795 :               fputs ("le", file);
   14475          795 :               break;
   14476           95 :             case UNORDERED:
   14477           95 :               fputs ("unord", file);
   14478           95 :               break;
   14479           24 :             case LTGT:
   14480           24 :               if (TARGET_AVX)
   14481              :                 {
   14482           24 :                   fputs ("neq_oq", file);
   14483           24 :                   break;
   14484              :                 }
   14485              :              /* FALLTHRU */
   14486          887 :             case NE:
   14487          887 :               fputs ("neq", file);
   14488          887 :               break;
   14489            0 :             case GE:
   14490            0 :               if (TARGET_AVX)
   14491              :                 {
   14492            0 :                   fputs ("ge", file);
   14493            0 :                   break;
   14494              :                 }
   14495              :              /* FALLTHRU */
   14496          403 :             case UNGE:
   14497          403 :               fputs ("nlt", file);
   14498          403 :               break;
   14499            0 :             case GT:
   14500            0 :               if (TARGET_AVX)
   14501              :                 {
   14502            0 :                   fputs ("gt", file);
   14503            0 :                   break;
   14504              :                 }
   14505              :              /* FALLTHRU */
   14506          770 :             case UNGT:
   14507          770 :               fputs ("nle", file);
   14508          770 :               break;
   14509           83 :             case ORDERED:
   14510           83 :               fputs ("ord", file);
   14511           83 :               break;
   14512            0 :             default:
   14513            0 :               output_operand_lossage ("operand is not a condition code, "
   14514              :                                       "invalid operand code 'D'");
   14515            0 :               return;
   14516              :             }
   14517         9312 :           return;
   14518              : 
   14519      7145002 :         case 'F':
   14520      7145002 :         case 'f':
   14521              : #ifdef HAVE_AS_IX86_CMOV_SUN_SYNTAX
   14522              :           if (ASSEMBLER_DIALECT == ASM_ATT)
   14523              :             putc ('.', file);
   14524              :           gcc_fallthrough ();
   14525              : #endif
   14526              : 
   14527      7145002 :         case 'C':
   14528      7145002 :         case 'c':
   14529      7145002 :           if (!COMPARISON_P (x))
   14530              :             {
   14531            0 :               output_operand_lossage ("operand is not a condition code, "
   14532              :                                       "invalid operand code '%c'", code);
   14533            0 :               return;
   14534              :             }
   14535      7145002 :           put_condition_code (GET_CODE (x), GET_MODE (XEXP (x, 0)),
   14536      7145002 :                               code == 'c' || code == 'f',
   14537      7145002 :                               code == 'F' || code == 'f',
   14538              :                               file);
   14539      7145002 :           return;
   14540              : 
   14541           21 :         case 'G':
   14542           21 :           {
   14543           21 :             int dfv = INTVAL (x);
   14544           21 :             const char *dfv_suffix = ix86_ccmp_dfv_mapping[dfv];
   14545           21 :             fputs (dfv_suffix, file);
   14546              :           }
   14547           21 :           return;
   14548              : 
   14549         1301 :         case 'H':
   14550         1301 :           if (!offsettable_memref_p (x))
   14551              :             {
   14552            1 :               output_operand_lossage ("operand is not an offsettable memory "
   14553              :                                       "reference, invalid operand code 'H'");
   14554            1 :               return;
   14555              :             }
   14556              :           /* It doesn't actually matter what mode we use here, as we're
   14557              :              only going to use this for printing.  */
   14558         1300 :           x = adjust_address_nv (x, DImode, 8);
   14559              :           /* Output 'qword ptr' for intel assembler dialect.  */
   14560         1300 :           if (ASSEMBLER_DIALECT == ASM_INTEL)
   14561            0 :             code = 'q';
   14562              :           break;
   14563              : 
   14564        75627 :         case 'K':
   14565        75627 :           if (!CONST_INT_P (x))
   14566              :             {
   14567            1 :               output_operand_lossage ("operand is not an integer, invalid "
   14568              :                                       "operand code 'K'");
   14569            1 :               return;
   14570              :             }
   14571              : 
   14572        75626 :           if (INTVAL (x) & IX86_HLE_ACQUIRE)
   14573              : #ifdef HAVE_AS_IX86_HLE
   14574           22 :             fputs ("xacquire ", file);
   14575              : #else
   14576              :             fputs ("\n" ASM_BYTE "0xf2\n\t", file);
   14577              : #endif
   14578        75604 :           else if (INTVAL (x) & IX86_HLE_RELEASE)
   14579              : #ifdef HAVE_AS_IX86_HLE
   14580           24 :             fputs ("xrelease ", file);
   14581              : #else
   14582              :             fputs ("\n" ASM_BYTE "0xf3\n\t", file);
   14583              : #endif
   14584              :           /* We do not want to print value of the operand.  */
   14585        75626 :           return;
   14586              : 
   14587        43035 :         case 'N':
   14588        43035 :           if (x == const0_rtx || x == CONST0_RTX (GET_MODE (x)))
   14589        15481 :             fputs ("{z}", file);
   14590        43035 :           return;
   14591              : 
   14592         4013 :         case 'r':
   14593         4013 :           if (!CONST_INT_P (x) || INTVAL (x) != ROUND_SAE)
   14594              :             {
   14595            2 :               output_operand_lossage ("operand is not a specific integer, "
   14596              :                                       "invalid operand code 'r'");
   14597            2 :               return;
   14598              :             }
   14599              : 
   14600         4011 :           if (ASSEMBLER_DIALECT == ASM_INTEL)
   14601            1 :             fputs (", ", file);
   14602              : 
   14603         4011 :           fputs ("{sae}", file);
   14604              : 
   14605         4011 :           if (ASSEMBLER_DIALECT == ASM_ATT)
   14606         4010 :             fputs (", ", file);
   14607              : 
   14608         4011 :           return;
   14609              : 
   14610         5993 :         case 'R':
   14611         5993 :           if (!CONST_INT_P (x))
   14612              :             {
   14613            1 :               output_operand_lossage ("operand is not an integer, invalid "
   14614              :                                       "operand code 'R'");
   14615            1 :               return;
   14616              :             }
   14617              : 
   14618         5992 :           if (ASSEMBLER_DIALECT == ASM_INTEL)
   14619            6 :             fputs (", ", file);
   14620              : 
   14621         5992 :           switch (INTVAL (x))
   14622              :             {
   14623         5177 :             case ROUND_NEAREST_INT | ROUND_SAE:
   14624         5177 :               fputs ("{rn-sae}", file);
   14625         5177 :               break;
   14626          637 :             case ROUND_NEG_INF | ROUND_SAE:
   14627          637 :               fputs ("{rd-sae}", file);
   14628          637 :               break;
   14629           56 :             case ROUND_POS_INF | ROUND_SAE:
   14630           56 :               fputs ("{ru-sae}", file);
   14631           56 :               break;
   14632          121 :             case ROUND_ZERO | ROUND_SAE:
   14633          121 :               fputs ("{rz-sae}", file);
   14634          121 :               break;
   14635            1 :             default:
   14636            1 :               output_operand_lossage ("operand is not a specific integer, "
   14637              :                                       "invalid operand code 'R'");
   14638              :             }
   14639              : 
   14640         5992 :           if (ASSEMBLER_DIALECT == ASM_ATT)
   14641         5986 :             fputs (", ", file);
   14642              : 
   14643         5992 :           return;
   14644              : 
   14645        10450 :         case 'v':
   14646        10450 :           if (MEM_P (x))
   14647              :             {
   14648        10567 :               switch (MEM_ADDR_SPACE (x))
   14649              :                 {
   14650              :                 case ADDR_SPACE_GENERIC:
   14651              :                   break;
   14652            0 :                 case ADDR_SPACE_SEG_FS:
   14653            0 :                   fputs ("fs ", file);
   14654            0 :                   break;
   14655            0 :                 case ADDR_SPACE_SEG_GS:
   14656            0 :                   fputs ("gs ", file);
   14657            0 :                   break;
   14658            0 :                 default:
   14659            0 :                   gcc_unreachable ();
   14660              :                 }
   14661              :             }
   14662              :           else
   14663            0 :             output_operand_lossage ("operand is not a memory reference, "
   14664              :                                     "invalid operand code 'v'");
   14665        10450 :           return;
   14666              : 
   14667            0 :         case '*':
   14668            0 :           if (ASSEMBLER_DIALECT == ASM_ATT)
   14669            0 :             putc ('*', file);
   14670            0 :           return;
   14671              : 
   14672          202 :         case '&':
   14673          202 :           {
   14674          202 :             const char *name = get_some_local_dynamic_name ();
   14675          202 :             if (name == NULL)
   14676            1 :               output_operand_lossage ("'%%&' used without any "
   14677              :                                       "local dynamic TLS references");
   14678              :             else
   14679          201 :               assemble_name (file, name);
   14680          202 :             return;
   14681              :           }
   14682              : 
   14683      6511196 :         case '+':
   14684      6511196 :           {
   14685      6511196 :             rtx x;
   14686              : 
   14687      6511196 :             if (!optimize
   14688      5098077 :                 || optimize_function_for_size_p (cfun)
   14689     11418405 :                 || (!TARGET_BRANCH_PREDICTION_HINTS_NOT_TAKEN
   14690      4907209 :                     && !TARGET_BRANCH_PREDICTION_HINTS_TAKEN))
   14691      6511196 :               return;
   14692              : 
   14693            0 :             x = find_reg_note (current_output_insn, REG_BR_PROB, 0);
   14694            0 :             if (x)
   14695              :               {
   14696            0 :                 int pred_val = profile_probability::from_reg_br_prob_note
   14697            0 :                                  (XINT (x, 0)).to_reg_br_prob_base ();
   14698              : 
   14699            0 :                 bool taken = pred_val > REG_BR_PROB_BASE / 2;
   14700              :                 /* We use 3e (DS) prefix for taken branches and
   14701              :                    2e (CS) prefix for not taken branches.  */
   14702            0 :                 if (taken && TARGET_BRANCH_PREDICTION_HINTS_TAKEN)
   14703            0 :                   fputs ("ds ; ", file);
   14704            0 :                 else if (!taken && TARGET_BRANCH_PREDICTION_HINTS_NOT_TAKEN)
   14705            0 :                   fputs ("cs ; ", file);
   14706              :               }
   14707            0 :             return;
   14708              :           }
   14709              : 
   14710              :         case ';':
   14711              : #ifndef HAVE_AS_IX86_REP_LOCK_PREFIX
   14712              :           putc (';', file);
   14713              : #endif
   14714              :           return;
   14715              : 
   14716         3722 :         case '~':
   14717         3722 :           putc (TARGET_AVX2 ? 'i' : 'f', file);
   14718         3722 :           return;
   14719              : 
   14720         1675 :         case 'M':
   14721         1675 :           if (TARGET_X32)
   14722              :             {
   14723              :               /* NB: 32-bit indices in VSIB address are sign-extended
   14724              :                  to 64 bits. In x32, if 32-bit address 0xf7fa3010 is
   14725              :                  sign-extended to 0xfffffffff7fa3010 which is invalid
   14726              :                  address.  Add addr32 prefix if there is no base
   14727              :                  register nor symbol.  */
   14728           40 :               bool ok;
   14729           40 :               struct ix86_address parts;
   14730           40 :               ok = ix86_decompose_address (x, &parts);
   14731           40 :               gcc_assert (ok && parts.index == NULL_RTX);
   14732           40 :               if (parts.base == NULL_RTX
   14733           40 :                   && (parts.disp == NULL_RTX
   14734           34 :                       || !symbolic_operand (parts.disp,
   14735           34 :                                             GET_MODE (parts.disp))))
   14736           34 :                 fputs ("addr32 ", file);
   14737              :             }
   14738         1675 :           return;
   14739              : 
   14740        22171 :         case '^':
   14741        25342 :           if (Pmode != word_mode)
   14742            0 :             fputs ("addr32 ", file);
   14743        22171 :           return;
   14744              : 
   14745     14876739 :         case '!':
   14746     14876739 :           if (ix86_notrack_prefixed_insn_p (current_output_insn))
   14747         3777 :             fputs ("notrack ", file);
   14748     14876739 :           return;
   14749              : 
   14750            1 :         default:
   14751            1 :           output_operand_lossage ("invalid operand code '%c'", code);
   14752              :         }
   14753              :     }
   14754              : 
   14755    143742837 :   if (REG_P (x))
   14756     85618799 :     print_reg (x, code, file);
   14757              : 
   14758     58124038 :   else if (MEM_P (x))
   14759              :     {
   14760     33358604 :       rtx addr = XEXP (x, 0);
   14761              : 
   14762              :       /* No `byte ptr' prefix for call instructions ... */
   14763     33358604 :       if (ASSEMBLER_DIALECT == ASM_INTEL && code != 'X' && code != 'P')
   14764              :         {
   14765          325 :           machine_mode mode = GET_MODE (x);
   14766          325 :           const char *size;
   14767              : 
   14768              :           /* Check for explicit size override codes.  */
   14769          325 :           if (code == 'b')
   14770              :             size = "BYTE";
   14771              :           else if (code == 'w')
   14772              :             size = "WORD";
   14773              :           else if (code == 'k')
   14774              :             size = "DWORD";
   14775              :           else if (code == 'q')
   14776              :             size = "QWORD";
   14777              :           else if (code == 'x')
   14778              :             size = "XMMWORD";
   14779              :           else if (code == 't')
   14780              :             size = "YMMWORD";
   14781              :           else if (code == 'g')
   14782              :             size = "ZMMWORD";
   14783          238 :           else if (mode == BLKmode)
   14784              :             /* ... or BLKmode operands, when not overridden.  */
   14785              :             size = NULL;
   14786              :           else
   14787          472 :             switch (GET_MODE_SIZE (mode))
   14788              :               {
   14789              :               case 1: size = "BYTE"; break;
   14790              :               case 2: size = "WORD"; break;
   14791              :               case 4: size = "DWORD"; break;
   14792              :               case 8: size = "QWORD"; break;
   14793              :               case 12: size = "TBYTE"; break;
   14794            7 :               case 16:
   14795            7 :                 if (mode == XFmode)
   14796              :                   size = "TBYTE";
   14797              :                 else
   14798              :                   size = "XMMWORD";
   14799              :                 break;
   14800              :               case 32: size = "YMMWORD"; break;
   14801              :               case 64: size = "ZMMWORD"; break;
   14802            0 :               default:
   14803            0 :                 gcc_unreachable ();
   14804              :               }
   14805              :           if (size)
   14806              :             {
   14807          323 :               fputs (size, file);
   14808          323 :               fputs (" PTR ", file);
   14809              :             }
   14810              :         }
   14811              : 
   14812     33358604 :       if (this_is_asm_operands && ! address_operand (addr, VOIDmode))
   14813            0 :         output_operand_lossage ("invalid constraints for operand");
   14814              :       else
   14815     33358604 :         ix86_print_operand_address_as
   14816     34080913 :           (file, addr, MEM_ADDR_SPACE (x), code == 'p' || code == 'P');
   14817              :     }
   14818              : 
   14819     24765434 :   else if (CONST_DOUBLE_P (x) && GET_MODE (x) == HFmode)
   14820              :     {
   14821          762 :       long l = real_to_target (NULL, CONST_DOUBLE_REAL_VALUE (x),
   14822          762 :                                REAL_MODE_FORMAT (HFmode));
   14823          762 :       if (ASSEMBLER_DIALECT == ASM_ATT)
   14824          762 :         putc ('$', file);
   14825          762 :       fprintf (file, "0x%04x", (unsigned int) l);
   14826          762 :     }
   14827              : 
   14828     24764672 :   else if (CONST_DOUBLE_P (x) && GET_MODE (x) == SFmode)
   14829              :     {
   14830        22121 :       long l;
   14831              : 
   14832        22121 :       REAL_VALUE_TO_TARGET_SINGLE (*CONST_DOUBLE_REAL_VALUE (x), l);
   14833              : 
   14834        22121 :       if (ASSEMBLER_DIALECT == ASM_ATT)
   14835        22121 :         putc ('$', file);
   14836              :       /* Sign extend 32bit SFmode immediate to 8 bytes.  */
   14837        22121 :       if (code == 'q')
   14838          327 :         fprintf (file, "0x%08" HOST_LONG_LONG_FORMAT "x",
   14839              :                  (unsigned long long) (int) l);
   14840              :       else
   14841        21794 :         fprintf (file, "0x%08x", (unsigned int) l);
   14842              :     }
   14843              : 
   14844     24742551 :   else if (CONST_DOUBLE_P (x) && GET_MODE (x) == DFmode)
   14845              :     {
   14846         3699 :       long l[2];
   14847              : 
   14848         3699 :       REAL_VALUE_TO_TARGET_DOUBLE (*CONST_DOUBLE_REAL_VALUE (x), l);
   14849              : 
   14850         3699 :       if (ASSEMBLER_DIALECT == ASM_ATT)
   14851         3699 :         putc ('$', file);
   14852         3699 :       fprintf (file, "0x%lx%08lx", l[1] & 0xffffffff, l[0] & 0xffffffff);
   14853         3699 :     }
   14854              : 
   14855              :   /* These float cases don't actually occur as immediate operands.  */
   14856     24738852 :   else if (CONST_DOUBLE_P (x) && GET_MODE (x) == XFmode)
   14857              :     {
   14858            0 :       char dstr[30];
   14859              : 
   14860            0 :       real_to_decimal (dstr, CONST_DOUBLE_REAL_VALUE (x), sizeof (dstr), 0, 1);
   14861            0 :       fputs (dstr, file);
   14862            0 :     }
   14863              : 
   14864              :   /* Print bcst_mem_operand.  */
   14865     24738852 :   else if (GET_CODE (x) == VEC_DUPLICATE)
   14866              :     {
   14867          313 :       machine_mode vmode = GET_MODE (x);
   14868              :       /* Must be bcst_memory_operand.  */
   14869          313 :       gcc_assert (bcst_mem_operand (x, vmode));
   14870              : 
   14871          313 :       rtx mem = XEXP (x,0);
   14872          313 :       ix86_print_operand (file, mem, 0);
   14873              : 
   14874          313 :       switch (vmode)
   14875              :         {
   14876           28 :         case E_V2DImode:
   14877           28 :         case E_V2DFmode:
   14878           28 :           fputs ("{1to2}", file);
   14879           28 :           break;
   14880           74 :         case E_V4SImode:
   14881           74 :         case E_V4SFmode:
   14882           74 :         case E_V4DImode:
   14883           74 :         case E_V4DFmode:
   14884           74 :           fputs ("{1to4}", file);
   14885           74 :           break;
   14886           93 :         case E_V8SImode:
   14887           93 :         case E_V8SFmode:
   14888           93 :         case E_V8DFmode:
   14889           93 :         case E_V8DImode:
   14890           93 :         case E_V8HFmode:
   14891           93 :           fputs ("{1to8}", file);
   14892           93 :           break;
   14893          110 :         case E_V16SFmode:
   14894          110 :         case E_V16SImode:
   14895          110 :         case E_V16HFmode:
   14896          110 :           fputs ("{1to16}", file);
   14897          110 :           break;
   14898            8 :         case E_V32HFmode:
   14899            8 :           fputs ("{1to32}", file);
   14900            8 :           break;
   14901            0 :         default:
   14902            0 :           gcc_unreachable ();
   14903              :         }
   14904              :     }
   14905              : 
   14906              :   else
   14907              :     {
   14908              :       /* We have patterns that allow zero sets of memory, for instance.
   14909              :          In 64-bit mode, we should probably support all 8-byte vectors,
   14910              :          since we can in fact encode that into an immediate.  */
   14911     24738539 :       if (CONST_VECTOR_P (x))
   14912              :         {
   14913          118 :           if (x != CONST0_RTX (GET_MODE (x)))
   14914            2 :             output_operand_lossage ("invalid vector immediate");
   14915          118 :           x = const0_rtx;
   14916              :         }
   14917              : 
   14918     24738539 :       if (code == 'P')
   14919              :         {
   14920      5932153 :           if (ix86_force_load_from_GOT_p (x, true))
   14921              :             {
   14922              :               /* For inline assembly statement, load function address
   14923              :                  from GOT with 'P' operand modifier to avoid PLT.  */
   14924            4 :               x = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, x),
   14925              :                                   (TARGET_64BIT
   14926              :                                    ? UNSPEC_GOTPCREL
   14927              :                                    : UNSPEC_GOT));
   14928            4 :               x = gen_rtx_CONST (Pmode, x);
   14929            4 :               x = gen_const_mem (Pmode, x);
   14930            4 :               ix86_print_operand (file, x, 'A');
   14931            4 :               return;
   14932              :             }
   14933              :         }
   14934     18806386 :       else if (code != 'p')
   14935              :         {
   14936     18806277 :           if (CONST_INT_P (x))
   14937              :             {
   14938     15531574 :               if (ASSEMBLER_DIALECT == ASM_ATT)
   14939     15531348 :                 putc ('$', file);
   14940              :             }
   14941      3274703 :           else if (GET_CODE (x) == CONST || SYMBOL_REF_P (x)
   14942         9391 :                    || LABEL_REF_P (x))
   14943              :             {
   14944      3274701 :               if (ASSEMBLER_DIALECT == ASM_ATT)
   14945      3274677 :                 putc ('$', file);
   14946              :               else
   14947           24 :                 fputs ("OFFSET FLAT:", file);
   14948              :             }
   14949              :         }
   14950     24738535 :       if (CONST_INT_P (x))
   14951     15531660 :         fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (x));
   14952      9206875 :       else if (flag_pic || MACHOPIC_INDIRECT)
   14953       531902 :         output_pic_addr_const (file, x, code);
   14954              :       else
   14955      8674973 :         output_addr_const (file, x);
   14956              :     }
   14957              : }
   14958              : 
   14959              : static bool
   14960     21494411 : ix86_print_operand_punct_valid_p (unsigned char code)
   14961              : {
   14962     21494411 :   return (code == '*' || code == '+' || code == '&' || code == ';'
   14963     14898910 :           || code == '~' || code == '^' || code == '!');
   14964              : }
   14965              : 
   14966              : /* Print a memory operand whose address is ADDR.  */
   14967              : 
   14968              : static void
   14969     36923041 : ix86_print_operand_address_as (FILE *file, rtx addr,
   14970              :                                addr_space_t as, bool raw)
   14971              : {
   14972     36923041 :   struct ix86_address parts;
   14973     36923041 :   rtx base, index, disp;
   14974     36923041 :   int scale;
   14975     36923041 :   int ok;
   14976     36923041 :   bool vsib = false;
   14977     36923041 :   int code = 0;
   14978              : 
   14979     36923041 :   if (GET_CODE (addr) == UNSPEC && XINT (addr, 1) == UNSPEC_VSIBADDR)
   14980              :     {
   14981         1675 :       ok = ix86_decompose_address (XVECEXP (addr, 0, 0), &parts);
   14982         1675 :       gcc_assert (parts.index == NULL_RTX);
   14983         1675 :       parts.index = XVECEXP (addr, 0, 1);
   14984         1675 :       parts.scale = INTVAL (XVECEXP (addr, 0, 2));
   14985         1675 :       addr = XVECEXP (addr, 0, 0);
   14986         1675 :       vsib = true;
   14987              :     }
   14988     36921366 :   else if (GET_CODE (addr) == UNSPEC && XINT (addr, 1) == UNSPEC_LEA_ADDR)
   14989              :     {
   14990      3071765 :       gcc_assert (TARGET_64BIT);
   14991      3071765 :       ok = ix86_decompose_address (XVECEXP (addr, 0, 0), &parts);
   14992      3071765 :       code = 'q';
   14993              :     }
   14994              :   else
   14995     33849601 :     ok = ix86_decompose_address (addr, &parts);
   14996              : 
   14997     36923041 :   gcc_assert (ok);
   14998              : 
   14999     36923041 :   base = parts.base;
   15000     36923041 :   index = parts.index;
   15001     36923041 :   disp = parts.disp;
   15002     36923041 :   scale = parts.scale;
   15003              : 
   15004     36923041 :   if (ADDR_SPACE_GENERIC_P (as))
   15005     36641399 :     as = parts.seg;
   15006              :   else
   15007       281642 :     gcc_assert (ADDR_SPACE_GENERIC_P (parts.seg));
   15008              : 
   15009     36923041 :   if (!ADDR_SPACE_GENERIC_P (as) && !raw)
   15010              :     {
   15011       281657 :       if (ASSEMBLER_DIALECT == ASM_ATT)
   15012       281655 :         putc ('%', file);
   15013              : 
   15014       281657 :       switch (as)
   15015              :         {
   15016       182016 :         case ADDR_SPACE_SEG_FS:
   15017       182016 :           fputs ("fs:", file);
   15018       182016 :           break;
   15019        99641 :         case ADDR_SPACE_SEG_GS:
   15020        99641 :           fputs ("gs:", file);
   15021        99641 :           break;
   15022            0 :         default:
   15023            0 :           gcc_unreachable ();
   15024              :         }
   15025              :     }
   15026              : 
   15027              :   /* Use one byte shorter RIP relative addressing for 64bit mode.  */
   15028     36923041 :   if (TARGET_64BIT && !base && !index && !raw)
   15029              :     {
   15030      6006697 :       rtx symbol = disp;
   15031              : 
   15032      6006697 :       if (GET_CODE (disp) == CONST
   15033      2182666 :           && GET_CODE (XEXP (disp, 0)) == PLUS
   15034      2097426 :           && CONST_INT_P (XEXP (XEXP (disp, 0), 1)))
   15035      2097426 :         symbol = XEXP (XEXP (disp, 0), 0);
   15036              : 
   15037      6006697 :       if (LABEL_REF_P (symbol)
   15038      6006697 :           || (SYMBOL_REF_P (symbol)
   15039      5745198 :               && SYMBOL_REF_TLS_MODEL (symbol) == 0))
   15040      5745070 :         base = pc_rtx;
   15041              :     }
   15042              : 
   15043     36923041 :   if (!base && !index)
   15044              :     {
   15045              :       /* Displacement only requires special attention.  */
   15046       601112 :       if (CONST_INT_P (disp))
   15047              :         {
   15048       269321 :           if (ASSEMBLER_DIALECT == ASM_INTEL && ADDR_SPACE_GENERIC_P (as))
   15049            0 :             fputs ("ds:", file);
   15050       269321 :           fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (disp));
   15051              :         }
   15052              :       /* Load the external function address via the GOT slot to avoid PLT.  */
   15053       331791 :       else if (GET_CODE (disp) == CONST
   15054       113546 :                && GET_CODE (XEXP (disp, 0)) == UNSPEC
   15055        85478 :                && (XINT (XEXP (disp, 0), 1) == UNSPEC_GOTPCREL
   15056         9550 :                    || XINT (XEXP (disp, 0), 1) == UNSPEC_GOT)
   15057       407719 :                && ix86_force_load_from_GOT_p (XVECEXP (XEXP (disp, 0), 0, 0)))
   15058           24 :         output_pic_addr_const (file, disp, 0);
   15059       331767 :       else if (flag_pic)
   15060       114915 :         output_pic_addr_const (file, disp, 0);
   15061              :       else
   15062       216852 :         output_addr_const (file, disp);
   15063              :     }
   15064              :   else
   15065              :     {
   15066              :       /* Print SImode register names to force addr32 prefix.  */
   15067     36321929 :       if (SImode_address_operand (addr, VOIDmode))
   15068              :         {
   15069           37 :           if (flag_checking)
   15070              :             {
   15071           37 :               gcc_assert (TARGET_64BIT);
   15072           37 :               switch (GET_CODE (addr))
   15073              :                 {
   15074            0 :                 case SUBREG:
   15075            0 :                   gcc_assert (GET_MODE (addr) == SImode);
   15076            0 :                   gcc_assert (GET_MODE (SUBREG_REG (addr)) == DImode);
   15077              :                   break;
   15078           37 :                 case ZERO_EXTEND:
   15079           37 :                 case AND:
   15080           37 :                   gcc_assert (GET_MODE (addr) == DImode);
   15081              :                   break;
   15082            0 :                 default:
   15083            0 :                   gcc_unreachable ();
   15084              :                 }
   15085              :             }
   15086           37 :           gcc_assert (!code);
   15087              :           code = 'k';
   15088              :         }
   15089     36321892 :       else if (code == 0
   15090     33251758 :                && TARGET_X32
   15091          482 :                && disp
   15092          410 :                && CONST_INT_P (disp)
   15093          311 :                && INTVAL (disp) < -16*1024*1024)
   15094              :         {
   15095              :           /* X32 runs in 64-bit mode, where displacement, DISP, in
   15096              :              address DISP(%r64), is encoded as 32-bit immediate sign-
   15097              :              extended from 32-bit to 64-bit.  For -0x40000300(%r64),
   15098              :              address is %r64 + 0xffffffffbffffd00.  When %r64 <
   15099              :              0x40000300, like 0x37ffe064, address is 0xfffffffff7ffdd64,
   15100              :              which is invalid for x32.  The correct address is %r64
   15101              :              - 0x40000300 == 0xf7ffdd64.  To properly encode
   15102              :              -0x40000300(%r64) for x32, we zero-extend negative
   15103              :              displacement by forcing addr32 prefix which truncates
   15104              :              0xfffffffff7ffdd64 to 0xf7ffdd64.  In theory, we should
   15105              :              zero-extend all negative displacements, including -1(%rsp).
   15106              :              However, for small negative displacements, sign-extension
   15107              :              won't cause overflow.  We only zero-extend negative
   15108              :              displacements if they < -16*1024*1024, which is also used
   15109              :              to check legitimate address displacements for PIC.  */
   15110           38 :           code = 'k';
   15111              :         }
   15112              : 
   15113              :       /* Since the upper 32 bits of RSP are always zero for x32,
   15114              :          we can encode %esp as %rsp to avoid 0x67 prefix if
   15115              :          there is no index register.  */
   15116          976 :       if (TARGET_X32 && Pmode == SImode
   15117     36322333 :           && !index && base && REG_P (base) && REGNO (base) == SP_REG)
   15118              :         code = 'q';
   15119              : 
   15120     36321929 :       if (ASSEMBLER_DIALECT == ASM_ATT)
   15121              :         {
   15122     36321555 :           if (disp)
   15123              :             {
   15124     32219976 :               if (flag_pic)
   15125      2839017 :                 output_pic_addr_const (file, disp, 0);
   15126     29380959 :               else if (LABEL_REF_P (disp))
   15127         5156 :                 output_asm_label (disp);
   15128              :               else
   15129     29375803 :                 output_addr_const (file, disp);
   15130              :             }
   15131              : 
   15132     36321555 :           putc ('(', file);
   15133     36321555 :           if (base)
   15134     35904556 :             print_reg (base, code, file);
   15135     36321555 :           if (index)
   15136              :             {
   15137      1970586 :               putc (',', file);
   15138      3939545 :               print_reg (index, vsib ? 0 : code, file);
   15139      1970586 :               if (scale != 1 || vsib)
   15140      1027677 :                 fprintf (file, ",%d", scale);
   15141              :             }
   15142     36321555 :           putc (')', file);
   15143              :         }
   15144              :       else
   15145              :         {
   15146          374 :           rtx offset = NULL_RTX;
   15147              : 
   15148          374 :           if (disp)
   15149              :             {
   15150              :               /* Pull out the offset of a symbol; print any symbol itself.  */
   15151          294 :               if (GET_CODE (disp) == CONST
   15152           20 :                   && GET_CODE (XEXP (disp, 0)) == PLUS
   15153           20 :                   && CONST_INT_P (XEXP (XEXP (disp, 0), 1)))
   15154              :                 {
   15155           20 :                   offset = XEXP (XEXP (disp, 0), 1);
   15156           20 :                   disp = gen_rtx_CONST (VOIDmode,
   15157              :                                         XEXP (XEXP (disp, 0), 0));
   15158              :                 }
   15159              : 
   15160          294 :               if (flag_pic)
   15161            0 :                 output_pic_addr_const (file, disp, 0);
   15162          294 :               else if (LABEL_REF_P (disp))
   15163            0 :                 output_asm_label (disp);
   15164          294 :               else if (CONST_INT_P (disp))
   15165              :                 offset = disp;
   15166              :               else
   15167          127 :                 output_addr_const (file, disp);
   15168              :             }
   15169              : 
   15170          374 :           putc ('[', file);
   15171          374 :           if (base)
   15172              :             {
   15173          331 :               print_reg (base, code, file);
   15174          331 :               if (offset)
   15175              :                 {
   15176          187 :                   if (INTVAL (offset) >= 0)
   15177           20 :                     putc ('+', file);
   15178          187 :                   fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (offset));
   15179              :                 }
   15180              :             }
   15181           43 :           else if (offset)
   15182            0 :             fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (offset));
   15183              :           else
   15184           43 :             putc ('0', file);
   15185              : 
   15186          374 :           if (index)
   15187              :             {
   15188           96 :               putc ('+', file);
   15189          144 :               print_reg (index, vsib ? 0 : code, file);
   15190           96 :               if (scale != 1 || vsib)
   15191           94 :                 fprintf (file, "*%d", scale);
   15192              :             }
   15193          374 :           putc (']', file);
   15194              :         }
   15195              :     }
   15196     36923041 : }
   15197              : 
   15198              : static void
   15199      3564438 : ix86_print_operand_address (FILE *file, machine_mode /*mode*/, rtx addr)
   15200              : {
   15201      3564438 :   if (this_is_asm_operands && ! address_operand (addr, VOIDmode))
   15202            1 :     output_operand_lossage ("invalid constraints for operand");
   15203              :   else
   15204      3564437 :     ix86_print_operand_address_as (file, addr, ADDR_SPACE_GENERIC, false);
   15205      3564438 : }
   15206              : 
   15207              : /* Implementation of TARGET_ASM_OUTPUT_ADDR_CONST_EXTRA.  */
   15208              : 
   15209              : static bool
   15210        15339 : i386_asm_output_addr_const_extra (FILE *file, rtx x)
   15211              : {
   15212        15339 :   rtx op;
   15213              : 
   15214        15339 :   if (GET_CODE (x) != UNSPEC)
   15215              :     return false;
   15216              : 
   15217        15339 :   op = XVECEXP (x, 0, 0);
   15218        15339 :   switch (XINT (x, 1))
   15219              :     {
   15220         1350 :     case UNSPEC_GOTOFF:
   15221         1350 :       output_addr_const (file, op);
   15222         1350 :       fputs ("@gotoff", file);
   15223         1350 :       break;
   15224            0 :     case UNSPEC_GOTTPOFF:
   15225            0 :       output_addr_const (file, op);
   15226              :       /* FIXME: This might be @TPOFF in Sun ld.  */
   15227            0 :       fputs ("@gottpoff", file);
   15228            0 :       break;
   15229            0 :     case UNSPEC_TPOFF:
   15230            0 :       output_addr_const (file, op);
   15231            0 :       fputs ("@tpoff", file);
   15232            0 :       break;
   15233        10914 :     case UNSPEC_NTPOFF:
   15234        10914 :       output_addr_const (file, op);
   15235        10914 :       if (TARGET_64BIT)
   15236        10168 :         fputs ("@tpoff", file);
   15237              :       else
   15238          746 :         fputs ("@ntpoff", file);
   15239              :       break;
   15240            0 :     case UNSPEC_DTPOFF:
   15241            0 :       output_addr_const (file, op);
   15242            0 :       fputs ("@dtpoff", file);
   15243            0 :       break;
   15244         3074 :     case UNSPEC_GOTNTPOFF:
   15245         3074 :       output_addr_const (file, op);
   15246         3074 :       if (TARGET_64BIT)
   15247         3074 :         fputs (ASSEMBLER_DIALECT == ASM_ATT ?
   15248              :                "@gottpoff(%rip)" : "@gottpoff[rip]", file);
   15249              :       else
   15250            0 :         fputs ("@gotntpoff", file);
   15251              :       break;
   15252            1 :     case UNSPEC_INDNTPOFF:
   15253            1 :       output_addr_const (file, op);
   15254            1 :       fputs ("@indntpoff", file);
   15255            1 :       break;
   15256            0 :     case UNSPEC_SECREL32:
   15257            0 :       output_addr_const (file, op);
   15258            0 :       fputs ("@secrel32", file);
   15259            0 :       break;
   15260              : #if TARGET_MACHO
   15261              :     case UNSPEC_MACHOPIC_OFFSET:
   15262              :       output_addr_const (file, op);
   15263              :       putc ('-', file);
   15264              :       machopic_output_function_base_name (file);
   15265              :       break;
   15266              : #endif
   15267              : 
   15268              :     default:
   15269              :       return false;
   15270              :     }
   15271              : 
   15272              :   return true;
   15273              : }
   15274              : 
   15275              : 
   15276              : /* Output code to perform a 387 binary operation in INSN, one of PLUS,
   15277              :    MINUS, MULT or DIV.  OPERANDS are the insn operands, where operands[3]
   15278              :    is the expression of the binary operation.  The output may either be
   15279              :    emitted here, or returned to the caller, like all output_* functions.
   15280              : 
   15281              :    There is no guarantee that the operands are the same mode, as they
   15282              :    might be within FLOAT or FLOAT_EXTEND expressions.  */
   15283              : 
   15284              : #ifndef SYSV386_COMPAT
   15285              : /* Set to 1 for compatibility with brain-damaged assemblers.  No-one
   15286              :    wants to fix the assemblers because that causes incompatibility
   15287              :    with gcc.  No-one wants to fix gcc because that causes
   15288              :    incompatibility with assemblers...  You can use the option of
   15289              :    -DSYSV386_COMPAT=0 if you recompile both gcc and gas this way.  */
   15290              : #define SYSV386_COMPAT 1
   15291              : #endif
   15292              : 
   15293              : const char *
   15294       602255 : output_387_binary_op (rtx_insn *insn, rtx *operands)
   15295              : {
   15296       602255 :   static char buf[40];
   15297       602255 :   const char *p;
   15298       602255 :   bool is_sse
   15299       602255 :     = (SSE_REG_P (operands[0])
   15300       657318 :        || SSE_REG_P (operands[1]) || SSE_REG_P (operands[2]));
   15301              : 
   15302        55063 :   if (is_sse)
   15303              :     p = "%v";
   15304        55063 :   else if (GET_MODE_CLASS (GET_MODE (operands[1])) == MODE_INT
   15305        55056 :            || GET_MODE_CLASS (GET_MODE (operands[2])) == MODE_INT)
   15306              :     p = "fi";
   15307              :   else
   15308       602255 :     p = "f";
   15309              : 
   15310       602255 :   strcpy (buf, p);
   15311              : 
   15312       602255 :   switch (GET_CODE (operands[3]))
   15313              :     {
   15314              :     case PLUS:
   15315              :       p = "add"; break;
   15316              :     case MINUS:
   15317              :       p = "sub"; break;
   15318        93430 :     case MULT:
   15319        93430 :       p = "mul"; break;
   15320        27668 :     case DIV:
   15321        27668 :       p = "div"; break;
   15322            0 :     default:
   15323            0 :       gcc_unreachable ();
   15324              :     }
   15325              : 
   15326       602255 :   strcat (buf, p);
   15327              : 
   15328       602255 :   if (is_sse)
   15329              :    {
   15330       547192 :      p = GET_MODE (operands[0]) == SFmode ? "ss" : "sd";
   15331       547192 :      strcat (buf, p);
   15332              : 
   15333       547192 :      if (TARGET_AVX)
   15334              :        p = "\t{%2, %1, %0|%0, %1, %2}";
   15335              :      else
   15336       530714 :        p = "\t{%2, %0|%0, %2}";
   15337              : 
   15338       547192 :      strcat (buf, p);
   15339       547192 :      return buf;
   15340              :    }
   15341              : 
   15342              :   /* Even if we do not want to check the inputs, this documents input
   15343              :      constraints.  Which helps in understanding the following code.  */
   15344        55063 :   if (flag_checking)
   15345              :     {
   15346        55062 :       if (STACK_REG_P (operands[0])
   15347        55062 :           && ((REG_P (operands[1])
   15348        53483 :                && REGNO (operands[0]) == REGNO (operands[1])
   15349        49493 :                && (STACK_REG_P (operands[2]) || MEM_P (operands[2])))
   15350         5569 :               || (REG_P (operands[2])
   15351         5569 :                   && REGNO (operands[0]) == REGNO (operands[2])
   15352         5569 :                   && (STACK_REG_P (operands[1]) || MEM_P (operands[1]))))
   15353       110124 :           && (STACK_TOP_P (operands[1]) || STACK_TOP_P (operands[2])))
   15354              :         ; /* ok */
   15355              :       else
   15356            0 :         gcc_unreachable ();
   15357              :     }
   15358              : 
   15359        55063 :   switch (GET_CODE (operands[3]))
   15360              :     {
   15361        40441 :     case MULT:
   15362        40441 :     case PLUS:
   15363        40441 :       if (REG_P (operands[2]) && REGNO (operands[0]) == REGNO (operands[2]))
   15364         1991 :         std::swap (operands[1], operands[2]);
   15365              : 
   15366              :       /* know operands[0] == operands[1].  */
   15367              : 
   15368        40441 :       if (MEM_P (operands[2]))
   15369              :         {
   15370              :           p = "%Z2\t%2";
   15371              :           break;
   15372              :         }
   15373              : 
   15374        36078 :       if (find_regno_note (insn, REG_DEAD, REGNO (operands[2])))
   15375              :         {
   15376        21087 :           if (STACK_TOP_P (operands[0]))
   15377              :             /* How is it that we are storing to a dead operand[2]?
   15378              :                Well, presumably operands[1] is dead too.  We can't
   15379              :                store the result to st(0) as st(0) gets popped on this
   15380              :                instruction.  Instead store to operands[2] (which I
   15381              :                think has to be st(1)).  st(1) will be popped later.
   15382              :                gcc <= 2.8.1 didn't have this check and generated
   15383              :                assembly code that the Unixware assembler rejected.  */
   15384              :             p = "p\t{%0, %2|%2, %0}"; /* st(1) = st(0) op st(1); pop */
   15385              :           else
   15386              :             p = "p\t{%2, %0|%0, %2}"; /* st(r1) = st(r1) op st(0); pop */
   15387              :           break;
   15388              :         }
   15389              : 
   15390        14991 :       if (STACK_TOP_P (operands[0]))
   15391              :         p = "\t{%y2, %0|%0, %y2}";    /* st(0) = st(0) op st(r2) */
   15392              :       else
   15393              :         p = "\t{%2, %0|%0, %2}";      /* st(r1) = st(r1) op st(0) */
   15394              :       break;
   15395              : 
   15396        14622 :     case MINUS:
   15397        14622 :     case DIV:
   15398        14622 :       if (MEM_P (operands[1]))
   15399              :         {
   15400              :           p = "r%Z1\t%1";
   15401              :           break;
   15402              :         }
   15403              : 
   15404        14189 :       if (MEM_P (operands[2]))
   15405              :         {
   15406              :           p = "%Z2\t%2";
   15407              :           break;
   15408              :         }
   15409              : 
   15410        12683 :       if (find_regno_note (insn, REG_DEAD, REGNO (operands[2])))
   15411              :         {
   15412              : #if SYSV386_COMPAT
   15413              :           /* The SystemV/386 SVR3.2 assembler, and probably all AT&T
   15414              :              derived assemblers, confusingly reverse the direction of
   15415              :              the operation for fsub{r} and fdiv{r} when the
   15416              :              destination register is not st(0).  The Intel assembler
   15417              :              doesn't have this brain damage.  Read !SYSV386_COMPAT to
   15418              :              figure out what the hardware really does.  */
   15419         6093 :           if (STACK_TOP_P (operands[0]))
   15420              :             p = "{p\t%0, %2|rp\t%2, %0}";
   15421              :           else
   15422              :             p = "{rp\t%2, %0|p\t%0, %2}";
   15423              : #else
   15424              :           if (STACK_TOP_P (operands[0]))
   15425              :             /* As above for fmul/fadd, we can't store to st(0).  */
   15426              :             p = "rp\t{%0, %2|%2, %0}";        /* st(1) = st(0) op st(1); pop */
   15427              :           else
   15428              :             p = "p\t{%2, %0|%0, %2}"; /* st(r1) = st(r1) op st(0); pop */
   15429              : #endif
   15430              :           break;
   15431              :         }
   15432              : 
   15433         6590 :       if (find_regno_note (insn, REG_DEAD, REGNO (operands[1])))
   15434              :         {
   15435              : #if SYSV386_COMPAT
   15436         3075 :           if (STACK_TOP_P (operands[0]))
   15437              :             p = "{rp\t%0, %1|p\t%1, %0}";
   15438              :           else
   15439              :             p = "{p\t%1, %0|rp\t%0, %1}";
   15440              : #else
   15441              :           if (STACK_TOP_P (operands[0]))
   15442              :             p = "p\t{%0, %1|%1, %0}"; /* st(1) = st(1) op st(0); pop */
   15443              :           else
   15444              :             p = "rp\t{%1, %0|%0, %1}";        /* st(r2) = st(0) op st(r2); pop */
   15445              : #endif
   15446              :           break;
   15447              :         }
   15448              : 
   15449         3515 :       if (STACK_TOP_P (operands[0]))
   15450              :         {
   15451         2674 :           if (STACK_TOP_P (operands[1]))
   15452              :             p = "\t{%y2, %0|%0, %y2}";        /* st(0) = st(0) op st(r2) */
   15453              :           else
   15454              :             p = "r\t{%y1, %0|%0, %y1}";       /* st(0) = st(r1) op st(0) */
   15455              :           break;
   15456              :         }
   15457          841 :       else if (STACK_TOP_P (operands[1]))
   15458              :         {
   15459              : #if SYSV386_COMPAT
   15460              :           p = "{\t%1, %0|r\t%0, %1}";
   15461              : #else
   15462              :           p = "r\t{%1, %0|%0, %1}";   /* st(r2) = st(0) op st(r2) */
   15463              : #endif
   15464              :         }
   15465              :       else
   15466              :         {
   15467              : #if SYSV386_COMPAT
   15468              :           p = "{r\t%2, %0|\t%0, %2}";
   15469              : #else
   15470              :           p = "\t{%2, %0|%0, %2}";    /* st(r1) = st(r1) op st(0) */
   15471              : #endif
   15472              :         }
   15473              :       break;
   15474              : 
   15475            0 :     default:
   15476            0 :       gcc_unreachable ();
   15477              :     }
   15478              : 
   15479        55063 :   strcat (buf, p);
   15480        55063 :   return buf;
   15481              : }
   15482              : 
   15483              : /* Return needed mode for entity in optimize_mode_switching pass.  */
   15484              : 
   15485              : static int
   15486         1654 : ix86_dirflag_mode_needed (rtx_insn *insn)
   15487              : {
   15488         1654 :   if (CALL_P (insn))
   15489              :     {
   15490          339 :       if (cfun->machine->func_type == TYPE_NORMAL)
   15491              :         return X86_DIRFLAG_ANY;
   15492              :       else
   15493              :         /* No need to emit CLD in interrupt handler for TARGET_CLD.  */
   15494          339 :         return TARGET_CLD ? X86_DIRFLAG_ANY : X86_DIRFLAG_RESET;
   15495              :     }
   15496              : 
   15497         1315 :   if (recog_memoized (insn) < 0)
   15498              :     return X86_DIRFLAG_ANY;
   15499              : 
   15500         1313 :   if (get_attr_type (insn) == TYPE_STR)
   15501              :     {
   15502              :       /* Emit cld instruction if stringops are used in the function.  */
   15503            1 :       if (cfun->machine->func_type == TYPE_NORMAL)
   15504            0 :         return TARGET_CLD ? X86_DIRFLAG_RESET : X86_DIRFLAG_ANY;
   15505              :       else
   15506              :         return X86_DIRFLAG_RESET;
   15507              :     }
   15508              : 
   15509              :   return X86_DIRFLAG_ANY;
   15510              : }
   15511              : 
   15512              : /* Check if a 256bit or 512 bit AVX register is referenced inside of EXP.   */
   15513              : 
   15514              : static bool
   15515      2234671 : ix86_check_avx_upper_register (const_rtx exp)
   15516              : {
   15517              :   /* construct_container may return a parallel with expr_list
   15518              :      which contains the real reg and mode  */
   15519      2234671 :   subrtx_iterator::array_type array;
   15520      8521656 :   FOR_EACH_SUBRTX (iter, array, exp, NONCONST)
   15521              :     {
   15522      6450525 :       const_rtx x = *iter;
   15523      2597739 :       if (SSE_REG_P (x)
   15524       834287 :           && !EXT_REX_SSE_REG_P (x)
   15525      8106245 :           && GET_MODE_BITSIZE (GET_MODE (x)) > 128)
   15526       163540 :         return true;
   15527              :     }
   15528              : 
   15529      2071131 :   return false;
   15530      2234671 : }
   15531              : 
   15532              : /* Check if a 256bit or 512bit AVX register is referenced in stores.   */
   15533              : 
   15534              : static void
   15535        52036 : ix86_check_avx_upper_stores (rtx dest, const_rtx, void *data)
   15536              : {
   15537        52036 :   if (SSE_REG_P (dest)
   15538        12854 :       && !EXT_REX_SSE_REG_P (dest)
   15539        77744 :       && GET_MODE_BITSIZE (GET_MODE (dest)) > 128)
   15540              :     {
   15541          759 :       bool *used = (bool *) data;
   15542          759 :       *used = true;
   15543              :     }
   15544        52036 : }
   15545              : 
   15546              : /* Return needed mode for entity in optimize_mode_switching pass.  */
   15547              : 
   15548              : static int
   15549      2095968 : ix86_avx_u128_mode_needed (rtx_insn *insn)
   15550              : {
   15551      2095968 :   if (DEBUG_INSN_P (insn))
   15552              :     return AVX_U128_ANY;
   15553              : 
   15554      2095968 :   if (CALL_P (insn))
   15555              :     {
   15556        49730 :       rtx link;
   15557              : 
   15558              :       /* Needed mode is set to AVX_U128_CLEAN if there are
   15559              :          no 256bit or 512bit modes used in function arguments. */
   15560        49730 :       for (link = CALL_INSN_FUNCTION_USAGE (insn);
   15561       135371 :            link;
   15562        85641 :            link = XEXP (link, 1))
   15563              :         {
   15564        86692 :           if (GET_CODE (XEXP (link, 0)) == USE)
   15565              :             {
   15566        85246 :               rtx arg = XEXP (XEXP (link, 0), 0);
   15567              : 
   15568        85246 :               if (ix86_check_avx_upper_register (arg))
   15569              :                 return AVX_U128_DIRTY;
   15570              :             }
   15571              :         }
   15572              : 
   15573              :       /* Needed mode is set to AVX_U128_CLEAN if there are no 256bit
   15574              :          nor 512bit registers used in the function return register.  */
   15575        48679 :       bool avx_upper_reg_found = false;
   15576        48679 :       note_stores (insn, ix86_check_avx_upper_stores,
   15577              :                    &avx_upper_reg_found);
   15578        48679 :       if (avx_upper_reg_found)
   15579              :         return AVX_U128_DIRTY;
   15580              : 
   15581              :       /* If the function is known to preserve some SSE registers,
   15582              :          RA and previous passes can legitimately rely on that for
   15583              :          modes wider than 256 bits.  It's only safe to issue a
   15584              :          vzeroupper if all SSE registers are clobbered.  */
   15585        48495 :       const function_abi &abi = insn_callee_abi (insn);
   15586        48495 :       if (vzeroupper_pattern (PATTERN (insn), VOIDmode)
   15587              :           /* Should be safe to issue an vzeroupper before sibling_call_p.
   15588              :              Also there not mode_exit for sibling_call, so there could be
   15589              :              missing vzeroupper for that.  */
   15590        48495 :           || !(SIBLING_CALL_P (insn)
   15591        47211 :                || hard_reg_set_subset_p (reg_class_contents[SSE_REGS],
   15592        47211 :                                          abi.mode_clobbers (V4DImode))))
   15593         8441 :         return AVX_U128_ANY;
   15594              : 
   15595        40054 :       return AVX_U128_CLEAN;
   15596              :     }
   15597              : 
   15598      2046238 :   rtx set = single_set (insn);
   15599      2046238 :   if (set)
   15600              :     {
   15601      1973829 :       rtx dest = SET_DEST (set);
   15602      1973829 :       rtx src = SET_SRC (set);
   15603      1477923 :       if (SSE_REG_P (dest)
   15604       557806 :           && !EXT_REX_SSE_REG_P (dest)
   15605      3077305 :           && GET_MODE_BITSIZE (GET_MODE (dest)) > 128)
   15606              :         {
   15607              :           /* This is an YMM/ZMM load.  Return AVX_U128_DIRTY if the
   15608              :              source isn't zero.  */
   15609       176234 :           if (standard_sse_constant_p (src, GET_MODE (dest)) != 1)
   15610              :             return AVX_U128_DIRTY;
   15611              :           else
   15612              :             return AVX_U128_ANY;
   15613              :         }
   15614              :       else
   15615              :         {
   15616      1797595 :           if (ix86_check_avx_upper_register (src))
   15617              :             return AVX_U128_DIRTY;
   15618              :         }
   15619              : 
   15620              :       /* This isn't YMM/ZMM load/store.  */
   15621              :       return AVX_U128_ANY;
   15622              :     }
   15623              : 
   15624              :   /* Require DIRTY mode if a 256bit or 512bit AVX register is referenced.
   15625              :      Hardware changes state only when a 256bit register is written to,
   15626              :      but we need to prevent the compiler from moving optimal insertion
   15627              :      point above eventual read from 256bit or 512 bit register.  */
   15628        72409 :   if (ix86_check_avx_upper_register (PATTERN (insn)))
   15629              :     return AVX_U128_DIRTY;
   15630              : 
   15631              :   return AVX_U128_ANY;
   15632              : }
   15633              : 
   15634              : /* Return mode that i387 must be switched into
   15635              :    prior to the execution of insn.  */
   15636              : 
   15637              : static int
   15638       412857 : ix86_i387_mode_needed (int entity, rtx_insn *insn)
   15639              : {
   15640       412857 :   enum attr_i387_cw mode;
   15641              : 
   15642              :   /* The mode UNINITIALIZED is used to store control word after a
   15643              :      function call or ASM pattern.  The mode ANY specify that function
   15644              :      has no requirements on the control word and make no changes in the
   15645              :      bits we are interested in.  */
   15646              : 
   15647       412857 :   if (CALL_P (insn)
   15648       412857 :       || (NONJUMP_INSN_P (insn)
   15649       337983 :           && (asm_noperands (PATTERN (insn)) >= 0
   15650       337930 :               || GET_CODE (PATTERN (insn)) == ASM_INPUT)))
   15651        14354 :     return I387_CW_UNINITIALIZED;
   15652              : 
   15653       398503 :   if (recog_memoized (insn) < 0)
   15654              :     return I387_CW_ANY;
   15655              : 
   15656       397565 :   mode = get_attr_i387_cw (insn);
   15657              : 
   15658       397565 :   switch (entity)
   15659              :     {
   15660            0 :     case I387_ROUNDEVEN:
   15661            0 :       if (mode == I387_CW_ROUNDEVEN)
   15662              :         return mode;
   15663              :       break;
   15664              : 
   15665       391853 :     case I387_TRUNC:
   15666       391853 :       if (mode == I387_CW_TRUNC)
   15667              :         return mode;
   15668              :       break;
   15669              : 
   15670         4378 :     case I387_FLOOR:
   15671         4378 :       if (mode == I387_CW_FLOOR)
   15672              :         return mode;
   15673              :       break;
   15674              : 
   15675         1334 :     case I387_CEIL:
   15676         1334 :       if (mode == I387_CW_CEIL)
   15677              :         return mode;
   15678              :       break;
   15679              : 
   15680            0 :     default:
   15681            0 :       gcc_unreachable ();
   15682              :     }
   15683              : 
   15684              :   return I387_CW_ANY;
   15685              : }
   15686              : 
   15687              : /* Return mode that entity must be switched into
   15688              :    prior to the execution of insn.  */
   15689              : 
   15690              : static int
   15691      2510479 : ix86_mode_needed (int entity, rtx_insn *insn, HARD_REG_SET)
   15692              : {
   15693      2510479 :   switch (entity)
   15694              :     {
   15695         1654 :     case X86_DIRFLAG:
   15696         1654 :       return ix86_dirflag_mode_needed (insn);
   15697      2095968 :     case AVX_U128:
   15698      2095968 :       return ix86_avx_u128_mode_needed (insn);
   15699       412857 :     case I387_ROUNDEVEN:
   15700       412857 :     case I387_TRUNC:
   15701       412857 :     case I387_FLOOR:
   15702       412857 :     case I387_CEIL:
   15703       412857 :       return ix86_i387_mode_needed (entity, insn);
   15704            0 :     default:
   15705            0 :       gcc_unreachable ();
   15706              :     }
   15707              :   return 0;
   15708              : }
   15709              : 
   15710              : /* Calculate mode of upper 128bit AVX registers after the insn.  */
   15711              : 
   15712              : static int
   15713      2095968 : ix86_avx_u128_mode_after (int mode, rtx_insn *insn)
   15714              : {
   15715      2095968 :   rtx pat = PATTERN (insn);
   15716              : 
   15717      2095968 :   if (vzeroupper_pattern (pat, VOIDmode)
   15718      2095968 :       || vzeroall_pattern (pat, VOIDmode))
   15719          184 :     return AVX_U128_CLEAN;
   15720              : 
   15721              :   /* We know that state is clean after CALL insn if there are no
   15722              :      256bit or 512bit registers used in the function return register. */
   15723      2095784 :   if (CALL_P (insn))
   15724              :     {
   15725        49684 :       bool avx_upper_reg_found = false;
   15726        49684 :       note_stores (insn, ix86_check_avx_upper_stores, &avx_upper_reg_found);
   15727              : 
   15728        49684 :       if (avx_upper_reg_found)
   15729              :         return AVX_U128_DIRTY;
   15730              : 
   15731              :       /* If the function desn't clobber any sse registers or only clobber
   15732              :          128-bit part, Then vzeroupper isn't issued before the function exit.
   15733              :          the status not CLEAN but ANY after the function.  */
   15734        49109 :       const function_abi &abi = insn_callee_abi (insn);
   15735        49109 :       if (!(SIBLING_CALL_P (insn)
   15736        47830 :             || hard_reg_set_subset_p (reg_class_contents[SSE_REGS],
   15737        47830 :                                       abi.mode_clobbers (V4DImode))))
   15738         8737 :         return AVX_U128_ANY;
   15739              : 
   15740        40372 :       return  AVX_U128_CLEAN;
   15741              :     }
   15742              : 
   15743              :   /* Otherwise, return current mode.  Remember that if insn
   15744              :      references AVX 256bit or 512bit registers, the mode was already
   15745              :      changed to DIRTY from MODE_NEEDED.  */
   15746              :   return mode;
   15747              : }
   15748              : 
   15749              : /* Return the mode that an insn results in.  */
   15750              : 
   15751              : static int
   15752      2509638 : ix86_mode_after (int entity, int mode, rtx_insn *insn, HARD_REG_SET)
   15753              : {
   15754      2509638 :   switch (entity)
   15755              :     {
   15756              :     case X86_DIRFLAG:
   15757              :       return mode;
   15758      2095968 :     case AVX_U128:
   15759      2095968 :       return ix86_avx_u128_mode_after (mode, insn);
   15760              :     case I387_ROUNDEVEN:
   15761              :     case I387_TRUNC:
   15762              :     case I387_FLOOR:
   15763              :     case I387_CEIL:
   15764              :       return mode;
   15765            0 :     default:
   15766            0 :       gcc_unreachable ();
   15767              :     }
   15768              : }
   15769              : 
   15770              : static int
   15771          120 : ix86_dirflag_mode_entry (void)
   15772              : {
   15773              :   /* For TARGET_CLD or in the interrupt handler we can't assume
   15774              :      direction flag state at function entry.  */
   15775          120 :   if (TARGET_CLD
   15776          118 :       || cfun->machine->func_type != TYPE_NORMAL)
   15777          120 :     return X86_DIRFLAG_ANY;
   15778              : 
   15779              :   return X86_DIRFLAG_RESET;
   15780              : }
   15781              : 
   15782              : static int
   15783       122971 : ix86_avx_u128_mode_entry (void)
   15784              : {
   15785       122971 :   tree arg;
   15786              : 
   15787              :   /* Entry mode is set to AVX_U128_DIRTY if there are
   15788              :      256bit or 512bit modes used in function arguments.  */
   15789       310402 :   for (arg = DECL_ARGUMENTS (current_function_decl); arg;
   15790       187431 :        arg = TREE_CHAIN (arg))
   15791              :     {
   15792       221404 :       rtx incoming = DECL_INCOMING_RTL (arg);
   15793              : 
   15794       221404 :       if (incoming && ix86_check_avx_upper_register (incoming))
   15795              :         return AVX_U128_DIRTY;
   15796              :     }
   15797              : 
   15798              :   return AVX_U128_CLEAN;
   15799              : }
   15800              : 
   15801              : /* Return a mode that ENTITY is assumed to be
   15802              :    switched to at function entry.  */
   15803              : 
   15804              : static int
   15805        75836 : ix86_mode_entry (int entity)
   15806              : {
   15807        75836 :   switch (entity)
   15808              :     {
   15809          120 :     case X86_DIRFLAG:
   15810          120 :       return ix86_dirflag_mode_entry ();
   15811        74567 :     case AVX_U128:
   15812        74567 :       return ix86_avx_u128_mode_entry ();
   15813              :     case I387_ROUNDEVEN:
   15814              :     case I387_TRUNC:
   15815              :     case I387_FLOOR:
   15816              :     case I387_CEIL:
   15817              :       return I387_CW_ANY;
   15818            0 :     default:
   15819            0 :       gcc_unreachable ();
   15820              :     }
   15821              : }
   15822              : 
   15823              : static int
   15824        73318 : ix86_avx_u128_mode_exit (void)
   15825              : {
   15826        73318 :   rtx reg = crtl->return_rtx;
   15827              : 
   15828              :   /* Exit mode is set to AVX_U128_DIRTY if there are 256bit
   15829              :      or 512 bit modes used in the function return register. */
   15830        73318 :   if (reg && ix86_check_avx_upper_register (reg))
   15831              :     return AVX_U128_DIRTY;
   15832              : 
   15833              :   /* Exit mode is set to AVX_U128_DIRTY if there are 256bit or 512bit
   15834              :      modes used in function arguments, otherwise return AVX_U128_CLEAN.
   15835              :    */
   15836        48404 :   return ix86_avx_u128_mode_entry ();
   15837              : }
   15838              : 
   15839              : /* Return a mode that ENTITY is assumed to be
   15840              :    switched to at function exit.  */
   15841              : 
   15842              : static int
   15843        74442 : ix86_mode_exit (int entity)
   15844              : {
   15845        74442 :   switch (entity)
   15846              :     {
   15847              :     case X86_DIRFLAG:
   15848              :       return X86_DIRFLAG_ANY;
   15849        73318 :     case AVX_U128:
   15850        73318 :       return ix86_avx_u128_mode_exit ();
   15851         1090 :     case I387_ROUNDEVEN:
   15852         1090 :     case I387_TRUNC:
   15853         1090 :     case I387_FLOOR:
   15854         1090 :     case I387_CEIL:
   15855         1090 :       return I387_CW_ANY;
   15856            0 :     default:
   15857            0 :       gcc_unreachable ();
   15858              :     }
   15859              : }
   15860              : 
   15861              : static int
   15862      2179750 : ix86_mode_priority (int, int n)
   15863              : {
   15864      2179750 :   return n;
   15865              : }
   15866              : 
   15867              : /* Output code to initialize control word copies used by trunc?f?i and
   15868              :    rounding patterns.  CURRENT_MODE is set to current control word,
   15869              :    while NEW_MODE is set to new control word.  */
   15870              : 
   15871              : static void
   15872         3296 : emit_i387_cw_initialization (int mode)
   15873              : {
   15874         3296 :   rtx stored_mode = assign_386_stack_local (HImode, SLOT_CW_STORED);
   15875         3296 :   rtx new_mode;
   15876              : 
   15877         3296 :   enum ix86_stack_slot slot;
   15878              : 
   15879         3296 :   rtx reg = gen_reg_rtx (HImode);
   15880              : 
   15881         3296 :   emit_insn (gen_x86_fnstcw_1 (stored_mode));
   15882         3296 :   emit_move_insn (reg, copy_rtx (stored_mode));
   15883              : 
   15884         3296 :   switch (mode)
   15885              :     {
   15886            0 :     case I387_CW_ROUNDEVEN:
   15887              :       /* round to nearest */
   15888            0 :       emit_insn (gen_andhi3 (reg, reg, GEN_INT (~0x0c00)));
   15889            0 :       slot = SLOT_CW_ROUNDEVEN;
   15890            0 :       break;
   15891              : 
   15892         3076 :     case I387_CW_TRUNC:
   15893              :       /* round toward zero (truncate) */
   15894         3076 :       emit_insn (gen_iorhi3 (reg, reg, GEN_INT (0x0c00)));
   15895         3076 :       slot = SLOT_CW_TRUNC;
   15896         3076 :       break;
   15897              : 
   15898          153 :     case I387_CW_FLOOR:
   15899              :       /* round down toward -oo */
   15900          153 :       emit_insn (gen_andhi3 (reg, reg, GEN_INT (~0x0c00)));
   15901          153 :       emit_insn (gen_iorhi3 (reg, reg, GEN_INT (0x0400)));
   15902          153 :       slot = SLOT_CW_FLOOR;
   15903          153 :       break;
   15904              : 
   15905           67 :     case I387_CW_CEIL:
   15906              :       /* round up toward +oo */
   15907           67 :       emit_insn (gen_andhi3 (reg, reg, GEN_INT (~0x0c00)));
   15908           67 :       emit_insn (gen_iorhi3 (reg, reg, GEN_INT (0x0800)));
   15909           67 :       slot = SLOT_CW_CEIL;
   15910           67 :       break;
   15911              : 
   15912            0 :     default:
   15913            0 :       gcc_unreachable ();
   15914              :     }
   15915              : 
   15916         3296 :   gcc_assert (slot < MAX_386_STACK_LOCALS);
   15917              : 
   15918         3296 :   new_mode = assign_386_stack_local (HImode, slot);
   15919         3296 :   emit_move_insn (new_mode, reg);
   15920         3296 : }
   15921              : 
   15922              : /* Generate one or more insns to set ENTITY to MODE.  */
   15923              : 
   15924              : static void
   15925        52611 : ix86_emit_mode_set (int entity, int mode, int prev_mode ATTRIBUTE_UNUSED,
   15926              :                     HARD_REG_SET regs_live ATTRIBUTE_UNUSED)
   15927              : {
   15928        52611 :   switch (entity)
   15929              :     {
   15930          265 :     case X86_DIRFLAG:
   15931          265 :       if (mode == X86_DIRFLAG_RESET)
   15932          265 :         emit_insn (gen_cld ());
   15933              :       break;
   15934        44231 :     case AVX_U128:
   15935        44231 :       if (mode == AVX_U128_CLEAN)
   15936        22572 :         ix86_expand_avx_vzeroupper ();
   15937              :       break;
   15938         8115 :     case I387_ROUNDEVEN:
   15939         8115 :     case I387_TRUNC:
   15940         8115 :     case I387_FLOOR:
   15941         8115 :     case I387_CEIL:
   15942         8115 :       if (mode != I387_CW_ANY
   15943         8115 :           && mode != I387_CW_UNINITIALIZED)
   15944         3296 :         emit_i387_cw_initialization (mode);
   15945              :       break;
   15946            0 :     default:
   15947            0 :       gcc_unreachable ();
   15948              :     }
   15949        52611 : }
   15950              : 
   15951              : /* Output code for INSN to convert a float to a signed int.  OPERANDS
   15952              :    are the insn operands.  The output may be [HSD]Imode and the input
   15953              :    operand may be [SDX]Fmode.  */
   15954              : 
   15955              : const char *
   15956         7425 : output_fix_trunc (rtx_insn *insn, rtx *operands, bool fisttp)
   15957              : {
   15958         7425 :   bool stack_top_dies = find_regno_note (insn, REG_DEAD, FIRST_STACK_REG);
   15959         7425 :   bool dimode_p = GET_MODE (operands[0]) == DImode;
   15960         7425 :   int round_mode = get_attr_i387_cw (insn);
   15961              : 
   15962         7425 :   static char buf[40];
   15963         7425 :   const char *p;
   15964              : 
   15965              :   /* Jump through a hoop or two for DImode, since the hardware has no
   15966              :      non-popping instruction.  We used to do this a different way, but
   15967              :      that was somewhat fragile and broke with post-reload splitters.  */
   15968         7425 :   if ((dimode_p || fisttp) && !stack_top_dies)
   15969           25 :     output_asm_insn ("fld\t%y1", operands);
   15970              : 
   15971         7425 :   gcc_assert (STACK_TOP_P (operands[1]));
   15972         7425 :   gcc_assert (MEM_P (operands[0]));
   15973         7425 :   gcc_assert (GET_MODE (operands[1]) != TFmode);
   15974              : 
   15975         7425 :   if (fisttp)
   15976              :     return "fisttp%Z0\t%0";
   15977              : 
   15978         7424 :   strcpy (buf, "fist");
   15979              : 
   15980         7424 :   if (round_mode != I387_CW_ANY)
   15981         7376 :     output_asm_insn ("fldcw\t%3", operands);
   15982              : 
   15983         7424 :   p = "p%Z0\t%0";
   15984         7424 :   strcat (buf, p + !(stack_top_dies || dimode_p));
   15985              : 
   15986         7424 :   output_asm_insn (buf, operands);
   15987              : 
   15988         7424 :   if (round_mode != I387_CW_ANY)
   15989         7376 :     output_asm_insn ("fldcw\t%2", operands);
   15990              : 
   15991              :   return "";
   15992              : }
   15993              : 
   15994              : /* Output code for x87 ffreep insn.  The OPNO argument, which may only
   15995              :    have the values zero or one, indicates the ffreep insn's operand
   15996              :    from the OPERANDS array.  */
   15997              : 
   15998              : static const char *
   15999       275937 : output_387_ffreep (rtx *operands ATTRIBUTE_UNUSED, int opno)
   16000              : {
   16001            0 :   if (TARGET_USE_FFREEP)
   16002              : #ifdef HAVE_AS_IX86_FFREEP
   16003            0 :     return opno ? "ffreep\t%y1" : "ffreep\t%y0";
   16004              : #else
   16005              :     {
   16006              :       static char retval[32];
   16007              :       int regno = REGNO (operands[opno]);
   16008              : 
   16009              :       gcc_assert (STACK_REGNO_P (regno));
   16010              : 
   16011              :       regno -= FIRST_STACK_REG;
   16012              : 
   16013              :       snprintf (retval, sizeof (retval), ASM_SHORT "0xc%ddf", regno);
   16014              :       return retval;
   16015              :     }
   16016              : #endif
   16017              : 
   16018            0 :   return opno ? "fstp\t%y1" : "fstp\t%y0";
   16019              : }
   16020              : 
   16021              : 
   16022              : /* Output code for INSN to compare OPERANDS.  EFLAGS_P is 1 when fcomi
   16023              :    should be used.  UNORDERED_P is true when fucom should be used.  */
   16024              : 
   16025              : const char *
   16026       107426 : output_fp_compare (rtx_insn *insn, rtx *operands,
   16027              :                    bool eflags_p, bool unordered_p)
   16028              : {
   16029       107426 :   rtx *xops = eflags_p ? &operands[0] : &operands[1];
   16030       107426 :   bool stack_top_dies;
   16031              : 
   16032       107426 :   static char buf[40];
   16033       107426 :   const char *p;
   16034              : 
   16035       107426 :   gcc_assert (STACK_TOP_P (xops[0]));
   16036              : 
   16037       107426 :   stack_top_dies = find_regno_note (insn, REG_DEAD, FIRST_STACK_REG);
   16038              : 
   16039       107426 :   if (eflags_p)
   16040              :     {
   16041       107426 :       p = unordered_p ? "fucomi" : "fcomi";
   16042       107426 :       strcpy (buf, p);
   16043              : 
   16044       107426 :       p = "p\t{%y1, %0|%0, %y1}";
   16045       107426 :       strcat (buf, p + !stack_top_dies);
   16046              : 
   16047       107426 :       return buf;
   16048              :     }
   16049              : 
   16050            0 :   if (STACK_REG_P (xops[1])
   16051            0 :       && stack_top_dies
   16052            0 :       && find_regno_note (insn, REG_DEAD, FIRST_STACK_REG + 1))
   16053              :     {
   16054            0 :       gcc_assert (REGNO (xops[1]) == FIRST_STACK_REG + 1);
   16055              : 
   16056              :       /* If both the top of the 387 stack die, and the other operand
   16057              :          is also a stack register that dies, then this must be a
   16058              :          `fcompp' float compare.  */
   16059            0 :       p = unordered_p ? "fucompp" : "fcompp";
   16060            0 :       strcpy (buf, p);
   16061              :     }
   16062            0 :   else if (const0_operand (xops[1], VOIDmode))
   16063              :     {
   16064            0 :       gcc_assert (!unordered_p);
   16065            0 :       strcpy (buf, "ftst");
   16066              :     }
   16067              :   else
   16068              :     {
   16069            0 :       if (GET_MODE_CLASS (GET_MODE (xops[1])) == MODE_INT)
   16070              :         {
   16071            0 :           gcc_assert (!unordered_p);
   16072              :           p = "ficom";
   16073              :         }
   16074              :       else
   16075            0 :         p = unordered_p ? "fucom" : "fcom";
   16076              : 
   16077            0 :       strcpy (buf, p);
   16078              : 
   16079            0 :       p = "p%Z2\t%y2";
   16080            0 :       strcat (buf, p + !stack_top_dies);
   16081              :     }
   16082              : 
   16083            0 :   output_asm_insn (buf, operands);
   16084            0 :   return "fnstsw\t%0";
   16085              : }
   16086              : 
   16087              : void
   16088       112773 : ix86_output_addr_vec_elt (FILE *file, int value)
   16089              : {
   16090       112773 :   const char *directive = ASM_LONG;
   16091              : 
   16092              : #ifdef ASM_QUAD
   16093       112773 :   if (TARGET_LP64)
   16094       101134 :     directive = ASM_QUAD;
   16095              : #else
   16096              :   gcc_assert (!TARGET_64BIT);
   16097              : #endif
   16098              : 
   16099       112773 :   fprintf (file, "%s%s%d\n", directive, LPREFIX, value);
   16100       112773 : }
   16101              : 
   16102              : void
   16103        25756 : ix86_output_addr_diff_elt (FILE *file, int value, int rel)
   16104              : {
   16105        25756 :   const char *directive = ASM_LONG;
   16106              : 
   16107              : #ifdef ASM_QUAD
   16108        38554 :   if (TARGET_64BIT && CASE_VECTOR_MODE == DImode)
   16109              :     directive = ASM_QUAD;
   16110              : #else
   16111              :   gcc_assert (!TARGET_64BIT);
   16112              : #endif
   16113              :   /* We can't use @GOTOFF for text labels on VxWorks; see gotoff_operand.  */
   16114        25756 :   if (TARGET_64BIT || TARGET_VXWORKS_VAROFF)
   16115        12798 :     fprintf (file, "%s%s%d-%s%d\n",
   16116              :              directive, LPREFIX, value, LPREFIX, rel);
   16117              : #if TARGET_MACHO
   16118              :   else if (TARGET_MACHO)
   16119              :     {
   16120              :       fprintf (file, ASM_LONG "%s%d-", LPREFIX, value);
   16121              :       machopic_output_function_base_name (file);
   16122              :       putc ('\n', file);
   16123              :     }
   16124              : #endif
   16125        12958 :   else if (HAVE_AS_GOTOFF_IN_DATA)
   16126        12958 :     fprintf (file, ASM_LONG "%s%d@GOTOFF\n", LPREFIX, value);
   16127              :   else
   16128              :     asm_fprintf (file, ASM_LONG "%U%s+[.-%s%d]\n",
   16129              :                  GOT_SYMBOL_NAME, LPREFIX, value);
   16130        25756 : }
   16131              : 
   16132              : #define LEA_MAX_STALL (3)
   16133              : #define LEA_SEARCH_THRESHOLD (LEA_MAX_STALL << 1)
   16134              : 
   16135              : /* Increase given DISTANCE in half-cycles according to
   16136              :    dependencies between PREV and NEXT instructions.
   16137              :    Add 1 half-cycle if there is no dependency and
   16138              :    go to next cycle if there is some dependecy.  */
   16139              : 
   16140              : static unsigned int
   16141         2129 : increase_distance (rtx_insn *prev, rtx_insn *next, unsigned int distance)
   16142              : {
   16143         2129 :   df_ref def, use;
   16144              : 
   16145         2129 :   if (!prev || !next)
   16146          748 :     return distance + (distance & 1) + 2;
   16147              : 
   16148         1381 :   if (!DF_INSN_USES (next) || !DF_INSN_DEFS (prev))
   16149          226 :     return distance + 1;
   16150              : 
   16151         1920 :   FOR_EACH_INSN_USE (use, next)
   16152         2448 :     FOR_EACH_INSN_DEF (def, prev)
   16153         1683 :       if (!DF_REF_IS_ARTIFICIAL (def)
   16154         1683 :           && DF_REF_REGNO (use) == DF_REF_REGNO (def))
   16155          735 :         return distance + (distance & 1) + 2;
   16156              : 
   16157          420 :   return distance + 1;
   16158              : }
   16159              : 
   16160              : /* Function checks if instruction INSN defines register number
   16161              :    REGNO1 or REGNO2.  */
   16162              : 
   16163              : bool
   16164         2073 : insn_defines_reg (unsigned int regno1, unsigned int regno2,
   16165              :                   rtx_insn *insn)
   16166              : {
   16167         2073 :   df_ref def;
   16168              : 
   16169         3739 :   FOR_EACH_INSN_DEF (def, insn)
   16170         2070 :     if (DF_REF_REG_DEF_P (def)
   16171         2070 :         && !DF_REF_IS_ARTIFICIAL (def)
   16172         2070 :         && (regno1 == DF_REF_REGNO (def)
   16173         1682 :             || regno2 == DF_REF_REGNO (def)))
   16174              :       return true;
   16175              : 
   16176              :   return false;
   16177              : }
   16178              : 
   16179              : /* Function checks if instruction INSN uses register number
   16180              :    REGNO as a part of address expression.  */
   16181              : 
   16182              : static bool
   16183         1182 : insn_uses_reg_mem (unsigned int regno, rtx insn)
   16184              : {
   16185         1182 :   df_ref use;
   16186              : 
   16187         2475 :   FOR_EACH_INSN_USE (use, insn)
   16188         1384 :     if (DF_REF_REG_MEM_P (use) && regno == DF_REF_REGNO (use))
   16189              :       return true;
   16190              : 
   16191              :   return false;
   16192              : }
   16193              : 
   16194              : /* Search backward for non-agu definition of register number REGNO1
   16195              :    or register number REGNO2 in basic block starting from instruction
   16196              :    START up to head of basic block or instruction INSN.
   16197              : 
   16198              :    Function puts true value into *FOUND var if definition was found
   16199              :    and false otherwise.
   16200              : 
   16201              :    Distance in half-cycles between START and found instruction or head
   16202              :    of BB is added to DISTANCE and returned.  */
   16203              : 
   16204              : static int
   16205          624 : distance_non_agu_define_in_bb (unsigned int regno1, unsigned int regno2,
   16206              :                                rtx_insn *insn, int distance,
   16207              :                                rtx_insn *start, bool *found)
   16208              : {
   16209          624 :   basic_block bb = start ? BLOCK_FOR_INSN (start) : NULL;
   16210          624 :   rtx_insn *prev = start;
   16211          624 :   rtx_insn *next = NULL;
   16212              : 
   16213          624 :   *found = false;
   16214              : 
   16215          624 :   while (prev
   16216         1861 :          && prev != insn
   16217         1861 :          && distance < LEA_SEARCH_THRESHOLD)
   16218              :     {
   16219         1660 :       if (NONDEBUG_INSN_P (prev) && NONJUMP_INSN_P (prev))
   16220              :         {
   16221          947 :           distance = increase_distance (prev, next, distance);
   16222          947 :           if (insn_defines_reg (regno1, regno2, prev))
   16223              :             {
   16224          243 :               if (recog_memoized (prev) < 0
   16225          243 :                   || get_attr_type (prev) != TYPE_LEA)
   16226              :                 {
   16227          200 :                   *found = true;
   16228          200 :                   return distance;
   16229              :                 }
   16230              :             }
   16231              : 
   16232              :           next = prev;
   16233              :         }
   16234         1460 :       if (prev == BB_HEAD (bb))
   16235              :         break;
   16236              : 
   16237         1237 :       prev = PREV_INSN (prev);
   16238              :     }
   16239              : 
   16240              :   return distance;
   16241              : }
   16242              : 
   16243              : /* Search backward for non-agu definition of register number REGNO1
   16244              :    or register number REGNO2 in INSN's basic block until
   16245              :    1. Pass LEA_SEARCH_THRESHOLD instructions, or
   16246              :    2. Reach neighbor BBs boundary, or
   16247              :    3. Reach agu definition.
   16248              :    Returns the distance between the non-agu definition point and INSN.
   16249              :    If no definition point, returns -1.  */
   16250              : 
   16251              : static int
   16252          429 : distance_non_agu_define (unsigned int regno1, unsigned int regno2,
   16253              :                          rtx_insn *insn)
   16254              : {
   16255          429 :   basic_block bb = BLOCK_FOR_INSN (insn);
   16256          429 :   int distance = 0;
   16257          429 :   bool found = false;
   16258              : 
   16259          429 :   if (insn != BB_HEAD (bb))
   16260          429 :     distance = distance_non_agu_define_in_bb (regno1, regno2, insn,
   16261              :                                               distance, PREV_INSN (insn),
   16262              :                                               &found);
   16263              : 
   16264          429 :   if (!found && distance < LEA_SEARCH_THRESHOLD)
   16265              :     {
   16266          167 :       edge e;
   16267          167 :       edge_iterator ei;
   16268          167 :       bool simple_loop = false;
   16269              : 
   16270          336 :       FOR_EACH_EDGE (e, ei, bb->preds)
   16271          206 :         if (e->src == bb)
   16272              :           {
   16273              :             simple_loop = true;
   16274              :             break;
   16275              :           }
   16276              : 
   16277          167 :       if (simple_loop)
   16278           37 :         distance = distance_non_agu_define_in_bb (regno1, regno2,
   16279              :                                                   insn, distance,
   16280           37 :                                                   BB_END (bb), &found);
   16281              :       else
   16282              :         {
   16283          130 :           int shortest_dist = -1;
   16284          130 :           bool found_in_bb = false;
   16285              : 
   16286          288 :           FOR_EACH_EDGE (e, ei, bb->preds)
   16287              :             {
   16288          158 :               int bb_dist
   16289          316 :                 = distance_non_agu_define_in_bb (regno1, regno2,
   16290              :                                                  insn, distance,
   16291          158 :                                                  BB_END (e->src),
   16292              :                                                  &found_in_bb);
   16293          158 :               if (found_in_bb)
   16294              :                 {
   16295           24 :                   if (shortest_dist < 0)
   16296              :                     shortest_dist = bb_dist;
   16297            0 :                   else if (bb_dist > 0)
   16298            0 :                     shortest_dist = MIN (bb_dist, shortest_dist);
   16299              : 
   16300           24 :                   found = true;
   16301              :                 }
   16302              :             }
   16303              : 
   16304          130 :           distance = shortest_dist;
   16305              :         }
   16306              :     }
   16307              : 
   16308          429 :   if (!found)
   16309              :     return -1;
   16310              : 
   16311          200 :   return distance >> 1;
   16312              : }
   16313              : 
   16314              : /* Return the distance in half-cycles between INSN and the next
   16315              :    insn that uses register number REGNO in memory address added
   16316              :    to DISTANCE.  Return -1 if REGNO0 is set.
   16317              : 
   16318              :    Put true value into *FOUND if register usage was found and
   16319              :    false otherwise.
   16320              :    Put true value into *REDEFINED if register redefinition was
   16321              :    found and false otherwise.  */
   16322              : 
   16323              : static int
   16324          767 : distance_agu_use_in_bb (unsigned int regno,
   16325              :                         rtx_insn *insn, int distance, rtx_insn *start,
   16326              :                         bool *found, bool *redefined)
   16327              : {
   16328          767 :   basic_block bb = NULL;
   16329          767 :   rtx_insn *next = start;
   16330          767 :   rtx_insn *prev = NULL;
   16331              : 
   16332          767 :   *found = false;
   16333          767 :   *redefined = false;
   16334              : 
   16335          767 :   if (start != NULL_RTX)
   16336              :     {
   16337          750 :       bb = BLOCK_FOR_INSN (start);
   16338          750 :       if (start != BB_HEAD (bb))
   16339              :         /* If insn and start belong to the same bb, set prev to insn,
   16340              :            so the call to increase_distance will increase the distance
   16341              :            between insns by 1.  */
   16342          412 :         prev = insn;
   16343              :     }
   16344              : 
   16345         2566 :   while (next
   16346         2566 :          && next != insn
   16347         2566 :          && distance < LEA_SEARCH_THRESHOLD)
   16348              :     {
   16349         2378 :       if (NONDEBUG_INSN_P (next) && NONJUMP_INSN_P (next))
   16350              :         {
   16351         1182 :           distance = increase_distance(prev, next, distance);
   16352         1182 :           if (insn_uses_reg_mem (regno, next))
   16353              :             {
   16354              :               /* Return DISTANCE if OP0 is used in memory
   16355              :                  address in NEXT.  */
   16356           91 :               *found = true;
   16357           91 :               return distance;
   16358              :             }
   16359              : 
   16360         1091 :           if (insn_defines_reg (regno, INVALID_REGNUM, next))
   16361              :             {
   16362              :               /* Return -1 if OP0 is set in NEXT.  */
   16363          156 :               *redefined = true;
   16364          156 :               return -1;
   16365              :             }
   16366              : 
   16367              :           prev = next;
   16368              :         }
   16369              : 
   16370         2131 :       if (next == BB_END (bb))
   16371              :         break;
   16372              : 
   16373         1799 :       next = NEXT_INSN (next);
   16374              :     }
   16375              : 
   16376              :   return distance;
   16377              : }
   16378              : 
   16379              : /* Return the distance between INSN and the next insn that uses
   16380              :    register number REGNO0 in memory address.  Return -1 if no such
   16381              :    a use is found within LEA_SEARCH_THRESHOLD or REGNO0 is set.  */
   16382              : 
   16383              : static int
   16384          429 : distance_agu_use (unsigned int regno0, rtx_insn *insn)
   16385              : {
   16386          429 :   basic_block bb = BLOCK_FOR_INSN (insn);
   16387          429 :   int distance = 0;
   16388          429 :   bool found = false;
   16389          429 :   bool redefined = false;
   16390              : 
   16391          429 :   if (insn != BB_END (bb))
   16392          412 :     distance = distance_agu_use_in_bb (regno0, insn, distance,
   16393              :                                        NEXT_INSN (insn),
   16394              :                                        &found, &redefined);
   16395              : 
   16396          429 :   if (!found && !redefined && distance < LEA_SEARCH_THRESHOLD)
   16397              :     {
   16398          250 :       edge e;
   16399          250 :       edge_iterator ei;
   16400          250 :       bool simple_loop = false;
   16401              : 
   16402          535 :       FOR_EACH_EDGE (e, ei, bb->succs)
   16403          355 :         if (e->dest == bb)
   16404              :           {
   16405              :             simple_loop = true;
   16406              :             break;
   16407              :           }
   16408              : 
   16409          250 :       if (simple_loop)
   16410           70 :         distance = distance_agu_use_in_bb (regno0, insn,
   16411              :                                            distance, BB_HEAD (bb),
   16412              :                                            &found, &redefined);
   16413              :       else
   16414              :         {
   16415          180 :           int shortest_dist = -1;
   16416          180 :           bool found_in_bb = false;
   16417          180 :           bool redefined_in_bb = false;
   16418              : 
   16419          465 :           FOR_EACH_EDGE (e, ei, bb->succs)
   16420              :             {
   16421          285 :               int bb_dist
   16422          570 :                 = distance_agu_use_in_bb (regno0, insn,
   16423          285 :                                           distance, BB_HEAD (e->dest),
   16424              :                                           &found_in_bb, &redefined_in_bb);
   16425          285 :               if (found_in_bb)
   16426              :                 {
   16427           17 :                   if (shortest_dist < 0)
   16428              :                     shortest_dist = bb_dist;
   16429            2 :                   else if (bb_dist > 0)
   16430            2 :                     shortest_dist = MIN (bb_dist, shortest_dist);
   16431              : 
   16432           17 :                   found = true;
   16433              :                 }
   16434              :             }
   16435              : 
   16436          180 :           distance = shortest_dist;
   16437              :         }
   16438              :     }
   16439              : 
   16440          429 :   if (!found || redefined)
   16441              :     return -1;
   16442              : 
   16443           89 :   return distance >> 1;
   16444              : }
   16445              : 
   16446              : /* Define this macro to tune LEA priority vs ADD, it take effect when
   16447              :    there is a dilemma of choosing LEA or ADD
   16448              :    Negative value: ADD is more preferred than LEA
   16449              :    Zero: Neutral
   16450              :    Positive value: LEA is more preferred than ADD.  */
   16451              : #define IX86_LEA_PRIORITY 0
   16452              : 
   16453              : /* Return true if usage of lea INSN has performance advantage
   16454              :    over a sequence of instructions.  Instructions sequence has
   16455              :    SPLIT_COST cycles higher latency than lea latency.  */
   16456              : 
   16457              : static bool
   16458         1629 : ix86_lea_outperforms (rtx_insn *insn, unsigned int regno0, unsigned int regno1,
   16459              :                       unsigned int regno2, int split_cost, bool has_scale)
   16460              : {
   16461         1629 :   int dist_define, dist_use;
   16462              : 
   16463              :   /* For Atom processors newer than Bonnell, if using a 2-source or
   16464              :      3-source LEA for non-destructive destination purposes, or due to
   16465              :      wanting ability to use SCALE, the use of LEA is justified.  */
   16466         1629 :   if (!TARGET_CPU_P (BONNELL))
   16467              :     {
   16468         1200 :       if (has_scale)
   16469              :         return true;
   16470         1181 :       if (split_cost < 1)
   16471              :         return false;
   16472          406 :       if (regno0 == regno1 || regno0 == regno2)
   16473              :         return false;
   16474              :       return true;
   16475              :     }
   16476              : 
   16477              :   /* Remember recog_data content.  */
   16478          429 :   struct recog_data_d recog_data_save = recog_data;
   16479              : 
   16480          429 :   dist_define = distance_non_agu_define (regno1, regno2, insn);
   16481          429 :   dist_use = distance_agu_use (regno0, insn);
   16482              : 
   16483              :   /* distance_non_agu_define can call get_attr_type which can call
   16484              :      recog_memoized, restore recog_data back to previous content.  */
   16485          429 :   recog_data = recog_data_save;
   16486              : 
   16487          429 :   if (dist_define < 0 || dist_define >= LEA_MAX_STALL)
   16488              :     {
   16489              :       /* If there is no non AGU operand definition, no AGU
   16490              :          operand usage and split cost is 0 then both lea
   16491              :          and non lea variants have same priority.  Currently
   16492              :          we prefer lea for 64 bit code and non lea on 32 bit
   16493              :          code.  */
   16494          232 :       if (dist_use < 0 && split_cost == 0)
   16495           98 :         return TARGET_64BIT || IX86_LEA_PRIORITY;
   16496              :       else
   16497              :         return true;
   16498              :     }
   16499              : 
   16500              :   /* With longer definitions distance lea is more preferable.
   16501              :      Here we change it to take into account splitting cost and
   16502              :      lea priority.  */
   16503          197 :   dist_define += split_cost + IX86_LEA_PRIORITY;
   16504              : 
   16505              :   /* If there is no use in memory addess then we just check
   16506              :      that split cost exceeds AGU stall.  */
   16507          197 :   if (dist_use < 0)
   16508          193 :     return dist_define > LEA_MAX_STALL;
   16509              : 
   16510              :   /* If this insn has both backward non-agu dependence and forward
   16511              :      agu dependence, the one with short distance takes effect.  */
   16512            4 :   return dist_define >= dist_use;
   16513              : }
   16514              : 
   16515              : /* Return true if we need to split op0 = op1 + op2 into a sequence of
   16516              :    move and add to avoid AGU stalls.  */
   16517              : 
   16518              : bool
   16519      9115323 : ix86_avoid_lea_for_add (rtx_insn *insn, rtx operands[])
   16520              : {
   16521      9115323 :   unsigned int regno0, regno1, regno2;
   16522              : 
   16523              :   /* Check if we need to optimize.  */
   16524      9115323 :   if (!TARGET_OPT_AGU || optimize_function_for_size_p (cfun))
   16525      9114508 :     return false;
   16526              : 
   16527          815 :   regno0 = true_regnum (operands[0]);
   16528          815 :   regno1 = true_regnum (operands[1]);
   16529          815 :   regno2 = true_regnum (operands[2]);
   16530              : 
   16531              :   /* We need to split only adds with non destructive
   16532              :      destination operand.  */
   16533          815 :   if (regno0 == regno1 || regno0 == regno2)
   16534              :     return false;
   16535              :   else
   16536          245 :     return !ix86_lea_outperforms (insn, regno0, regno1, regno2, 1, false);
   16537              : }
   16538              : 
   16539              : /* Return true if we should emit lea instruction instead of mov
   16540              :    instruction.  */
   16541              : 
   16542              : bool
   16543     29543582 : ix86_use_lea_for_mov (rtx_insn *insn, rtx operands[])
   16544              : {
   16545     29543582 :   unsigned int regno0, regno1;
   16546              : 
   16547              :   /* Check if we need to optimize.  */
   16548     29543582 :   if (!TARGET_OPT_AGU || optimize_function_for_size_p (cfun))
   16549     29541296 :     return false;
   16550              : 
   16551              :   /* Use lea for reg to reg moves only.  */
   16552         2286 :   if (!REG_P (operands[0]) || !REG_P (operands[1]))
   16553              :     return false;
   16554              : 
   16555          464 :   regno0 = true_regnum (operands[0]);
   16556          464 :   regno1 = true_regnum (operands[1]);
   16557              : 
   16558          464 :   return ix86_lea_outperforms (insn, regno0, regno1, INVALID_REGNUM, 0, false);
   16559              : }
   16560              : 
   16561              : /* Return true if we need to split lea into a sequence of
   16562              :    instructions to avoid AGU stalls during peephole2. */
   16563              : 
   16564              : bool
   16565     11162102 : ix86_avoid_lea_for_addr (rtx_insn *insn, rtx operands[])
   16566              : {
   16567     11162102 :   unsigned int regno0, regno1, regno2;
   16568     11162102 :   int split_cost;
   16569     11162102 :   struct ix86_address parts;
   16570     11162102 :   int ok;
   16571              : 
   16572              :   /* The "at least two components" test below might not catch simple
   16573              :      move or zero extension insns if parts.base is non-NULL and parts.disp
   16574              :      is const0_rtx as the only components in the address, e.g. if the
   16575              :      register is %rbp or %r13.  As this test is much cheaper and moves or
   16576              :      zero extensions are the common case, do this check first.  */
   16577     11162102 :   if (REG_P (operands[1])
   16578     11162102 :       || (SImode_address_operand (operands[1], VOIDmode)
   16579       152059 :           && REG_P (XEXP (operands[1], 0))))
   16580      4095309 :     return false;
   16581              : 
   16582      7066793 :   ok = ix86_decompose_address (operands[1], &parts);
   16583      7066793 :   gcc_assert (ok);
   16584              : 
   16585              :   /* There should be at least two components in the address.  */
   16586      7066793 :   if ((parts.base != NULL_RTX) + (parts.index != NULL_RTX)
   16587      7066793 :       + (parts.disp != NULL_RTX) + (parts.scale > 1) < 2)
   16588              :     return false;
   16589              : 
   16590              :   /* We should not split into add if non legitimate pic
   16591              :      operand is used as displacement. */
   16592      2689633 :   if (parts.disp && flag_pic && !LEGITIMATE_PIC_OPERAND_P (parts.disp))
   16593              :     return false;
   16594              : 
   16595      2639534 :   regno0 = true_regnum (operands[0]) ;
   16596      2639534 :   regno1 = INVALID_REGNUM;
   16597      2639534 :   regno2 = INVALID_REGNUM;
   16598              : 
   16599      2639534 :   if (parts.base)
   16600      2565415 :     regno1 = true_regnum (parts.base);
   16601      2639534 :   if (parts.index)
   16602       487539 :     regno2 = true_regnum (parts.index);
   16603              : 
   16604              :   /* Use add for a = a + b and a = b + a since it is faster and shorter
   16605              :      than lea for most processors.  For the processors like BONNELL, if
   16606              :      the destination register of LEA holds an actual address which will
   16607              :      be used soon, LEA is better and otherwise ADD is better.  */
   16608      2639534 :   if (!TARGET_CPU_P (BONNELL)
   16609      2639405 :       && parts.scale == 1
   16610      2397482 :       && (!parts.disp || parts.disp == const0_rtx)
   16611       179822 :       && (regno0 == regno1 || regno0 == regno2))
   16612              :     return true;
   16613              : 
   16614              :   /* Split with -Oz if the encoding requires fewer bytes.  */
   16615      2633551 :   if (optimize_size > 1
   16616           27 :       && parts.scale > 1
   16617            4 :       && !parts.base
   16618            4 :       && (!parts.disp || parts.disp == const0_rtx))
   16619              :     return true;
   16620              : 
   16621              :   /* Check we need to optimize.  */
   16622      2633547 :   if (!TARGET_AVOID_LEA_FOR_ADDR || optimize_function_for_size_p (cfun))
   16623      2633206 :     return false;
   16624              : 
   16625          341 :   split_cost = 0;
   16626              : 
   16627              :   /* Compute how many cycles we will add to execution time
   16628              :      if split lea into a sequence of instructions.  */
   16629          341 :   if (parts.base || parts.index)
   16630              :     {
   16631              :       /* Have to use mov instruction if non desctructive
   16632              :          destination form is used.  */
   16633          341 :       if (regno1 != regno0 && regno2 != regno0)
   16634          266 :         split_cost += 1;
   16635              : 
   16636              :       /* Have to add index to base if both exist.  */
   16637          341 :       if (parts.base && parts.index)
   16638           54 :         split_cost += 1;
   16639              : 
   16640              :       /* Have to use shift and adds if scale is 2 or greater.  */
   16641          341 :       if (parts.scale > 1)
   16642              :         {
   16643           29 :           if (regno0 != regno1)
   16644           23 :             split_cost += 1;
   16645            6 :           else if (regno2 == regno0)
   16646            0 :             split_cost += 4;
   16647              :           else
   16648            6 :             split_cost += parts.scale;
   16649              :         }
   16650              : 
   16651              :       /* Have to use add instruction with immediate if
   16652              :          disp is non zero.  */
   16653          341 :       if (parts.disp && parts.disp != const0_rtx)
   16654          280 :         split_cost += 1;
   16655              : 
   16656              :       /* Subtract the price of lea.  */
   16657          341 :       split_cost -= 1;
   16658              :     }
   16659              : 
   16660          341 :   return !ix86_lea_outperforms (insn, regno0, regno1, regno2, split_cost,
   16661          341 :                                 parts.scale > 1);
   16662              : }
   16663              : 
   16664              : /* Return true if it is ok to optimize an ADD operation to LEA
   16665              :    operation to avoid flag register consumation.  For most processors,
   16666              :    ADD is faster than LEA.  For the processors like BONNELL, if the
   16667              :    destination register of LEA holds an actual address which will be
   16668              :    used soon, LEA is better and otherwise ADD is better.  */
   16669              : 
   16670              : bool
   16671      9172942 : ix86_lea_for_add_ok (rtx_insn *insn, rtx operands[])
   16672              : {
   16673      9172942 :   unsigned int regno0 = true_regnum (operands[0]);
   16674      9172942 :   unsigned int regno1 = true_regnum (operands[1]);
   16675      9172942 :   unsigned int regno2 = true_regnum (operands[2]);
   16676              : 
   16677              :   /* If a = b + c, (a!=b && a!=c), must use lea form. */
   16678      9172942 :   if (regno0 != regno1 && regno0 != regno2)
   16679              :     return true;
   16680              : 
   16681      7130270 :   if (!TARGET_OPT_AGU || optimize_function_for_size_p (cfun))
   16682      7129691 :     return false;
   16683              : 
   16684          579 :   return ix86_lea_outperforms (insn, regno0, regno1, regno2, 0, false);
   16685              : }
   16686              : 
   16687              : /* Return true if destination reg of SET_BODY is shift count of
   16688              :    USE_BODY.  */
   16689              : 
   16690              : static bool
   16691           89 : ix86_dep_by_shift_count_body (const_rtx set_body, const_rtx use_body)
   16692              : {
   16693           89 :   rtx set_dest;
   16694           89 :   rtx shift_rtx;
   16695           89 :   int i;
   16696              : 
   16697              :   /* Retrieve destination of SET_BODY.  */
   16698           89 :   switch (GET_CODE (set_body))
   16699              :     {
   16700           73 :     case SET:
   16701           73 :       set_dest = SET_DEST (set_body);
   16702           73 :       if (!set_dest || !REG_P (set_dest))
   16703              :         return false;
   16704           72 :       break;
   16705            8 :     case PARALLEL:
   16706           24 :       for (i = XVECLEN (set_body, 0) - 1; i >= 0; i--)
   16707           16 :         if (ix86_dep_by_shift_count_body (XVECEXP (set_body, 0, i),
   16708              :                                           use_body))
   16709              :           return true;
   16710              :       /* FALLTHROUGH */
   16711              :     default:
   16712              :       return false;
   16713              :     }
   16714              : 
   16715              :   /* Retrieve shift count of USE_BODY.  */
   16716           72 :   switch (GET_CODE (use_body))
   16717              :     {
   16718           24 :     case SET:
   16719           24 :       shift_rtx = XEXP (use_body, 1);
   16720           24 :       break;
   16721           24 :     case PARALLEL:
   16722           72 :       for (i = XVECLEN (use_body, 0) - 1; i >= 0; i--)
   16723           48 :         if (ix86_dep_by_shift_count_body (set_body,
   16724           48 :                                           XVECEXP (use_body, 0, i)))
   16725              :           return true;
   16726              :       /* FALLTHROUGH */
   16727              :     default:
   16728              :       return false;
   16729              :     }
   16730              : 
   16731           24 :   if (shift_rtx
   16732           24 :       && (GET_CODE (shift_rtx) == ASHIFT
   16733           21 :           || GET_CODE (shift_rtx) == LSHIFTRT
   16734            5 :           || GET_CODE (shift_rtx) == ASHIFTRT
   16735            0 :           || GET_CODE (shift_rtx) == ROTATE
   16736            0 :           || GET_CODE (shift_rtx) == ROTATERT))
   16737              :     {
   16738           24 :       rtx shift_count = XEXP (shift_rtx, 1);
   16739              : 
   16740              :       /* Return true if shift count is dest of SET_BODY.  */
   16741           24 :       if (REG_P (shift_count))
   16742              :         {
   16743              :           /* Add check since it can be invoked before register
   16744              :              allocation in pre-reload schedule.  */
   16745            0 :           if (reload_completed
   16746            0 :               && true_regnum (set_dest) == true_regnum (shift_count))
   16747              :             return true;
   16748            0 :           else if (REGNO(set_dest) == REGNO(shift_count))
   16749              :             return true;
   16750              :         }
   16751              :     }
   16752              : 
   16753              :   return false;
   16754              : }
   16755              : 
   16756              : /* Return true if destination reg of SET_INSN is shift count of
   16757              :    USE_INSN.  */
   16758              : 
   16759              : bool
   16760           25 : ix86_dep_by_shift_count (const_rtx set_insn, const_rtx use_insn)
   16761              : {
   16762           25 :   return ix86_dep_by_shift_count_body (PATTERN (set_insn),
   16763           25 :                                        PATTERN (use_insn));
   16764              : }
   16765              : 
   16766              : /* Return TRUE if the operands to a vec_interleave_{high,low}v2df
   16767              :    are ok, keeping in mind the possible movddup alternative.  */
   16768              : 
   16769              : bool
   16770        89963 : ix86_vec_interleave_v2df_operator_ok (rtx operands[3], bool high)
   16771              : {
   16772        89963 :   if (MEM_P (operands[0]))
   16773         2041 :     return rtx_equal_p (operands[0], operands[1 + high]);
   16774        87922 :   if (MEM_P (operands[1]) && MEM_P (operands[2]))
   16775          951 :     return false;
   16776              :   return true;
   16777              : }
   16778              : 
   16779              : /* A subroutine of ix86_build_signbit_mask.  If VECT is true,
   16780              :    then replicate the value for all elements of the vector
   16781              :    register.  */
   16782              : 
   16783              : rtx
   16784        74193 : ix86_build_const_vector (machine_mode mode, bool vect, rtx value)
   16785              : {
   16786        74193 :   int i, n_elt;
   16787        74193 :   rtvec v;
   16788        74193 :   machine_mode scalar_mode;
   16789              : 
   16790        74193 :   switch (mode)
   16791              :     {
   16792         1276 :     case E_V64QImode:
   16793         1276 :     case E_V32QImode:
   16794         1276 :     case E_V16QImode:
   16795         1276 :     case E_V32HImode:
   16796         1276 :     case E_V16HImode:
   16797         1276 :     case E_V8HImode:
   16798         1276 :     case E_V16SImode:
   16799         1276 :     case E_V8SImode:
   16800         1276 :     case E_V4SImode:
   16801         1276 :     case E_V2SImode:
   16802         1276 :     case E_V8DImode:
   16803         1276 :     case E_V4DImode:
   16804         1276 :     case E_V2DImode:
   16805         1276 :       gcc_assert (vect);
   16806              :       /* FALLTHRU */
   16807        74193 :     case E_V2HFmode:
   16808        74193 :     case E_V4HFmode:
   16809        74193 :     case E_V8HFmode:
   16810        74193 :     case E_V16HFmode:
   16811        74193 :     case E_V32HFmode:
   16812        74193 :     case E_V16SFmode:
   16813        74193 :     case E_V8SFmode:
   16814        74193 :     case E_V4SFmode:
   16815        74193 :     case E_V2SFmode:
   16816        74193 :     case E_V8DFmode:
   16817        74193 :     case E_V4DFmode:
   16818        74193 :     case E_V2DFmode:
   16819        74193 :     case E_V32BFmode:
   16820        74193 :     case E_V16BFmode:
   16821        74193 :     case E_V8BFmode:
   16822        74193 :     case E_V4BFmode:
   16823        74193 :     case E_V2BFmode:
   16824        74193 :       n_elt = GET_MODE_NUNITS (mode);
   16825        74193 :       v = rtvec_alloc (n_elt);
   16826        74193 :       scalar_mode = GET_MODE_INNER (mode);
   16827              : 
   16828        74193 :       RTVEC_ELT (v, 0) = value;
   16829              : 
   16830       230446 :       for (i = 1; i < n_elt; ++i)
   16831       156253 :         RTVEC_ELT (v, i) = vect ? value : CONST0_RTX (scalar_mode);
   16832              : 
   16833        74193 :       return gen_rtx_CONST_VECTOR (mode, v);
   16834              : 
   16835            0 :     default:
   16836            0 :       gcc_unreachable ();
   16837              :     }
   16838              : }
   16839              : 
   16840              : /* A subroutine of ix86_expand_fp_absneg_operator, copysign expanders
   16841              :    and ix86_expand_int_vcond.  Create a mask for the sign bit in MODE
   16842              :    for an SSE register.  If VECT is true, then replicate the mask for
   16843              :    all elements of the vector register.  If INVERT is true, then create
   16844              :    a mask excluding the sign bit.  */
   16845              : 
   16846              : rtx
   16847        75562 : ix86_build_signbit_mask (machine_mode mode, bool vect, bool invert)
   16848              : {
   16849        75562 :   machine_mode vec_mode, imode;
   16850        75562 :   wide_int w;
   16851        75562 :   rtx mask, v;
   16852              : 
   16853        75562 :   switch (mode)
   16854              :     {
   16855              :     case E_V2HFmode:
   16856              :     case E_V4HFmode:
   16857              :     case E_V8HFmode:
   16858              :     case E_V16HFmode:
   16859              :     case E_V32HFmode:
   16860              :     case E_V32BFmode:
   16861              :     case E_V16BFmode:
   16862              :     case E_V8BFmode:
   16863              :     case E_V4BFmode:
   16864              :     case E_V2BFmode:
   16865              :       vec_mode = mode;
   16866              :       imode = HImode;
   16867              :       break;
   16868              : 
   16869        33840 :     case E_V16SImode:
   16870        33840 :     case E_V16SFmode:
   16871        33840 :     case E_V8SImode:
   16872        33840 :     case E_V4SImode:
   16873        33840 :     case E_V8SFmode:
   16874        33840 :     case E_V4SFmode:
   16875        33840 :     case E_V2SFmode:
   16876        33840 :     case E_V2SImode:
   16877        33840 :       vec_mode = mode;
   16878        33840 :       imode = SImode;
   16879        33840 :       break;
   16880              : 
   16881        38871 :     case E_V8DImode:
   16882        38871 :     case E_V4DImode:
   16883        38871 :     case E_V2DImode:
   16884        38871 :     case E_V8DFmode:
   16885        38871 :     case E_V4DFmode:
   16886        38871 :     case E_V2DFmode:
   16887        38871 :       vec_mode = mode;
   16888        38871 :       imode = DImode;
   16889        38871 :       break;
   16890              : 
   16891         2352 :     case E_TImode:
   16892         2352 :     case E_TFmode:
   16893         2352 :       vec_mode = VOIDmode;
   16894         2352 :       imode = TImode;
   16895         2352 :       break;
   16896              : 
   16897            0 :     default:
   16898            0 :       gcc_unreachable ();
   16899              :     }
   16900              : 
   16901        75562 :   machine_mode inner_mode = GET_MODE_INNER (mode);
   16902       151124 :   w = wi::set_bit_in_zero (GET_MODE_BITSIZE (inner_mode) - 1,
   16903       151124 :                            GET_MODE_BITSIZE (inner_mode));
   16904        75562 :   if (invert)
   16905        39261 :     w = wi::bit_not (w);
   16906              : 
   16907              :   /* Force this value into the low part of a fp vector constant.  */
   16908        75562 :   mask = immed_wide_int_const (w, imode);
   16909        75562 :   mask = gen_lowpart (inner_mode, mask);
   16910              : 
   16911        75562 :   if (vec_mode == VOIDmode)
   16912         2352 :     return force_reg (inner_mode, mask);
   16913              : 
   16914        73210 :   v = ix86_build_const_vector (vec_mode, vect, mask);
   16915        73210 :   return force_reg (vec_mode, v);
   16916        75562 : }
   16917              : 
   16918              : /* Return HOST_WIDE_INT for const vector OP in MODE.  */
   16919              : 
   16920              : HOST_WIDE_INT
   16921       159181 : ix86_convert_const_vector_to_integer (rtx op, machine_mode mode)
   16922              : {
   16923       336312 :   if (GET_MODE_SIZE (mode) > UNITS_PER_WORD)
   16924            0 :     gcc_unreachable ();
   16925              : 
   16926       159181 :   int nunits = GET_MODE_NUNITS (mode);
   16927       318362 :   wide_int val = wi::zero (GET_MODE_BITSIZE (mode));
   16928       159181 :   machine_mode innermode = GET_MODE_INNER (mode);
   16929       159181 :   unsigned int innermode_bits = GET_MODE_BITSIZE (innermode);
   16930              : 
   16931       159181 :   switch (mode)
   16932              :     {
   16933              :     case E_V2QImode:
   16934              :     case E_V4QImode:
   16935              :     case E_V2HImode:
   16936              :     case E_V8QImode:
   16937              :     case E_V4HImode:
   16938              :     case E_V2SImode:
   16939       530135 :       for (int i = 0; i < nunits; ++i)
   16940              :         {
   16941       376326 :           int v = INTVAL (XVECEXP (op, 0, i));
   16942       376326 :           wide_int wv = wi::shwi (v, innermode_bits);
   16943       376326 :           val = wi::insert (val, wv, innermode_bits * i, innermode_bits);
   16944       376326 :         }
   16945              :       break;
   16946           92 :     case E_V1SImode:
   16947           92 :     case E_V1DImode:
   16948           92 :       op = CONST_VECTOR_ELT (op, 0);
   16949           92 :       return INTVAL (op);
   16950              :     case E_V2HFmode:
   16951              :     case E_V2BFmode:
   16952              :     case E_V4HFmode:
   16953              :     case E_V4BFmode:
   16954              :     case E_V2SFmode:
   16955        15864 :       for (int i = 0; i < nunits; ++i)
   16956              :         {
   16957        10584 :           rtx x = XVECEXP (op, 0, i);
   16958        10584 :           int v = real_to_target (NULL, CONST_DOUBLE_REAL_VALUE (x),
   16959        10584 :                                   REAL_MODE_FORMAT (innermode));
   16960        10584 :           wide_int wv = wi::shwi (v, innermode_bits);
   16961        10584 :           val = wi::insert (val, wv, innermode_bits * i, innermode_bits);
   16962        10584 :         }
   16963              :       break;
   16964            0 :     default:
   16965            0 :       gcc_unreachable ();
   16966              :     }
   16967              : 
   16968       159089 :   return val.to_shwi ();
   16969       159181 : }
   16970              : 
   16971           32 : int ix86_get_flags_cc (rtx_code code)
   16972              : {
   16973           32 :   switch (code)
   16974              :     {
   16975              :       case NE: return X86_CCNE;
   16976              :       case EQ: return X86_CCE;
   16977              :       case GE: return X86_CCNL;
   16978              :       case GT: return X86_CCNLE;
   16979              :       case LE: return X86_CCLE;
   16980              :       case LT: return X86_CCL;
   16981              :       case GEU: return X86_CCNB;
   16982              :       case GTU: return X86_CCNBE;
   16983              :       case LEU: return X86_CCBE;
   16984              :       case LTU: return X86_CCB;
   16985              :       default: return -1;
   16986              :     }
   16987              : }
   16988              : 
   16989              : /* Return TRUE or FALSE depending on whether the first SET in INSN
   16990              :    has source and destination with matching CC modes, and that the
   16991              :    CC mode is at least as constrained as REQ_MODE.  */
   16992              : 
   16993              : bool
   16994     54326591 : ix86_match_ccmode (rtx insn, machine_mode req_mode)
   16995              : {
   16996     54326591 :   rtx set;
   16997     54326591 :   machine_mode set_mode;
   16998              : 
   16999     54326591 :   set = PATTERN (insn);
   17000     54326591 :   if (GET_CODE (set) == PARALLEL)
   17001       492250 :     set = XVECEXP (set, 0, 0);
   17002     54326591 :   gcc_assert (GET_CODE (set) == SET);
   17003     54326591 :   gcc_assert (GET_CODE (SET_SRC (set)) == COMPARE);
   17004              : 
   17005     54326591 :   set_mode = GET_MODE (SET_DEST (set));
   17006     54326591 :   switch (set_mode)
   17007              :     {
   17008      1371212 :     case E_CCNOmode:
   17009      1371212 :       if (req_mode != CCNOmode
   17010        91827 :           && (req_mode != CCmode
   17011            0 :               || XEXP (SET_SRC (set), 1) != const0_rtx))
   17012              :         return false;
   17013              :       break;
   17014      5855163 :     case E_CCmode:
   17015      5855163 :       if (req_mode == CCGCmode)
   17016              :         return false;
   17017              :       /* FALLTHRU */
   17018      9453361 :     case E_CCGCmode:
   17019      9453361 :       if (req_mode == CCGOCmode || req_mode == CCNOmode)
   17020              :         return false;
   17021              :       /* FALLTHRU */
   17022     10494583 :     case E_CCGOCmode:
   17023     10494583 :       if (req_mode == CCZmode)
   17024              :         return false;
   17025              :       /* FALLTHRU */
   17026              :     case E_CCZmode:
   17027              :       break;
   17028              : 
   17029            0 :     case E_CCGZmode:
   17030              : 
   17031            0 :     case E_CCAmode:
   17032            0 :     case E_CCCmode:
   17033            0 :     case E_CCOmode:
   17034            0 :     case E_CCPmode:
   17035            0 :     case E_CCSmode:
   17036            0 :       if (set_mode != req_mode)
   17037              :         return false;
   17038              :       break;
   17039              : 
   17040            0 :     default:
   17041            0 :       gcc_unreachable ();
   17042              :     }
   17043              : 
   17044     54225913 :   return GET_MODE (SET_SRC (set)) == set_mode;
   17045              : }
   17046              : 
   17047              : machine_mode
   17048     13686562 : ix86_cc_mode (enum rtx_code code, rtx op0, rtx op1)
   17049              : {
   17050     13686562 :   machine_mode mode = GET_MODE (op0);
   17051              : 
   17052     13686562 :   if (SCALAR_FLOAT_MODE_P (mode))
   17053              :     {
   17054       136234 :       gcc_assert (!DECIMAL_FLOAT_MODE_P (mode));
   17055              :       return CCFPmode;
   17056              :     }
   17057              : 
   17058     13550328 :   switch (code)
   17059              :     {
   17060              :       /* Only zero flag is needed.  */
   17061              :     case EQ:                    /* ZF=0 */
   17062              :     case NE:                    /* ZF!=0 */
   17063              :       return CCZmode;
   17064              :       /* Codes needing carry flag.  */
   17065       991284 :     case GEU:                   /* CF=0 */
   17066       991284 :     case LTU:                   /* CF=1 */
   17067       991284 :       rtx geu;
   17068              :       /* Detect overflow checks.  They need just the carry flag.  */
   17069       991284 :       if (GET_CODE (op0) == PLUS
   17070       991284 :           && (rtx_equal_p (op1, XEXP (op0, 0))
   17071       130336 :               || rtx_equal_p (op1, XEXP (op0, 1))))
   17072        17468 :         return CCCmode;
   17073              :       /* Similarly for *setcc_qi_addqi3_cconly_overflow_1_* patterns.
   17074              :          Match LTU of op0
   17075              :          (neg:QI (geu:QI (reg:CC_CCC FLAGS_REG) (const_int 0)))
   17076              :          and op1
   17077              :          (ltu:QI (reg:CC_CCC FLAGS_REG) (const_int 0))
   17078              :          where CC_CCC is either CC or CCC.  */
   17079       973816 :       else if (code == LTU
   17080       388884 :                && GET_CODE (op0) == NEG
   17081         5034 :                && GET_CODE (geu = XEXP (op0, 0)) == GEU
   17082         3663 :                && REG_P (XEXP (geu, 0))
   17083         3353 :                && (GET_MODE (XEXP (geu, 0)) == CCCmode
   17084           37 :                    || GET_MODE (XEXP (geu, 0)) == CCmode)
   17085         3342 :                && REGNO (XEXP (geu, 0)) == FLAGS_REG
   17086         3342 :                && XEXP (geu, 1) == const0_rtx
   17087         3342 :                && GET_CODE (op1) == LTU
   17088         3342 :                && REG_P (XEXP (op1, 0))
   17089         3342 :                && GET_MODE (XEXP (op1, 0)) == GET_MODE (XEXP (geu, 0))
   17090         3342 :                && REGNO (XEXP (op1, 0)) == FLAGS_REG
   17091       977158 :                && XEXP (op1, 1) == const0_rtx)
   17092              :         return CCCmode;
   17093              :       /* Similarly for *x86_cmc pattern.
   17094              :          Match LTU of op0 (neg:QI (ltu:QI (reg:CCC FLAGS_REG) (const_int 0)))
   17095              :          and op1 (geu:QI (reg:CCC FLAGS_REG) (const_int 0)).
   17096              :          It is sufficient to test that the operand modes are CCCmode.  */
   17097       970474 :       else if (code == LTU
   17098       385542 :                && GET_CODE (op0) == NEG
   17099         1692 :                && GET_CODE (XEXP (op0, 0)) == LTU
   17100          372 :                && GET_MODE (XEXP (XEXP (op0, 0), 0)) == CCCmode
   17101            3 :                && GET_CODE (op1) == GEU
   17102            3 :                && GET_MODE (XEXP (op1, 0)) == CCCmode)
   17103              :         return CCCmode;
   17104              :       /* Similarly for the comparison of addcarry/subborrow pattern.  */
   17105       385539 :       else if (code == LTU
   17106       385539 :                && GET_CODE (op0) == ZERO_EXTEND
   17107        16270 :                && GET_CODE (op1) == PLUS
   17108        10346 :                && ix86_carry_flag_operator (XEXP (op1, 0), VOIDmode)
   17109        10346 :                && GET_CODE (XEXP (op1, 1)) == ZERO_EXTEND)
   17110              :         return CCCmode;
   17111              :       else
   17112       960125 :         return CCmode;
   17113              :     case GTU:                   /* CF=0 & ZF=0 */
   17114              :     case LEU:                   /* CF=1 | ZF=1 */
   17115              :       return CCmode;
   17116              :       /* Codes possibly doable only with sign flag when
   17117              :          comparing against zero.  */
   17118       779489 :     case GE:                    /* SF=OF   or   SF=0 */
   17119       779489 :     case LT:                    /* SF<>OF  or   SF=1 */
   17120       779489 :       if (op1 == const0_rtx)
   17121              :         return CCGOCmode;
   17122              :       else
   17123              :         /* For other cases Carry flag is not required.  */
   17124       443641 :         return CCGCmode;
   17125              :       /* Codes doable only with sign flag when comparing
   17126              :          against zero, but we miss jump instruction for it
   17127              :          so we need to use relational tests against overflow
   17128              :          that thus needs to be zero.  */
   17129       892404 :     case GT:                    /* ZF=0 & SF=OF */
   17130       892404 :     case LE:                    /* ZF=1 | SF<>OF */
   17131       892404 :       if (op1 == const0_rtx)
   17132              :         return CCNOmode;
   17133              :       else
   17134       593789 :         return CCGCmode;
   17135              :     default:
   17136              :       /* CCmode should be used in all other cases.  */
   17137              :       return CCmode;
   17138              :     }
   17139              : }
   17140              : 
   17141              : /* Return TRUE or FALSE depending on whether the ptest instruction
   17142              :    INSN has source and destination with suitable matching CC modes.  */
   17143              : 
   17144              : bool
   17145        94451 : ix86_match_ptest_ccmode (rtx insn)
   17146              : {
   17147        94451 :   rtx set, src;
   17148        94451 :   machine_mode set_mode;
   17149              : 
   17150        94451 :   set = PATTERN (insn);
   17151        94451 :   gcc_assert (GET_CODE (set) == SET);
   17152        94451 :   src = SET_SRC (set);
   17153        94451 :   gcc_assert (GET_CODE (src) == UNSPEC
   17154              :               && XINT (src, 1) == UNSPEC_PTEST);
   17155              : 
   17156        94451 :   set_mode = GET_MODE (src);
   17157        94451 :   if (set_mode != CCZmode
   17158              :       && set_mode != CCCmode
   17159              :       && set_mode != CCmode)
   17160              :     return false;
   17161        94451 :   return GET_MODE (SET_DEST (set)) == set_mode;
   17162              : }
   17163              : 
   17164              : /* Return the fixed registers used for condition codes.  */
   17165              : 
   17166              : static bool
   17167     18822845 : ix86_fixed_condition_code_regs (unsigned int *p1, unsigned int *p2)
   17168              : {
   17169     18822845 :   *p1 = FLAGS_REG;
   17170     18822845 :   *p2 = INVALID_REGNUM;
   17171     18822845 :   return true;
   17172              : }
   17173              : 
   17174              : /* If two condition code modes are compatible, return a condition code
   17175              :    mode which is compatible with both.  Otherwise, return
   17176              :    VOIDmode.  */
   17177              : 
   17178              : static machine_mode
   17179        30602 : ix86_cc_modes_compatible (machine_mode m1, machine_mode m2)
   17180              : {
   17181        30602 :   if (m1 == m2)
   17182              :     return m1;
   17183              : 
   17184        29947 :   if (GET_MODE_CLASS (m1) != MODE_CC || GET_MODE_CLASS (m2) != MODE_CC)
   17185              :     return VOIDmode;
   17186              : 
   17187        29947 :   if ((m1 == CCGCmode && m2 == CCGOCmode)
   17188        29947 :       || (m1 == CCGOCmode && m2 == CCGCmode))
   17189              :     return CCGCmode;
   17190              : 
   17191        29947 :   if ((m1 == CCNOmode && m2 == CCGOCmode)
   17192        29767 :       || (m1 == CCGOCmode && m2 == CCNOmode))
   17193              :     return CCNOmode;
   17194              : 
   17195        29650 :   if (m1 == CCZmode
   17196        15662 :       && (m2 == CCGCmode || m2 == CCGOCmode || m2 == CCNOmode))
   17197              :     return m2;
   17198        17251 :   else if (m2 == CCZmode
   17199        13735 :            && (m1 == CCGCmode || m1 == CCGOCmode || m1 == CCNOmode))
   17200              :     return m1;
   17201              : 
   17202         7188 :   switch (m1)
   17203              :     {
   17204            0 :     default:
   17205            0 :       gcc_unreachable ();
   17206              : 
   17207         7188 :     case E_CCmode:
   17208         7188 :     case E_CCGCmode:
   17209         7188 :     case E_CCGOCmode:
   17210         7188 :     case E_CCNOmode:
   17211         7188 :     case E_CCAmode:
   17212         7188 :     case E_CCCmode:
   17213         7188 :     case E_CCOmode:
   17214         7188 :     case E_CCPmode:
   17215         7188 :     case E_CCSmode:
   17216         7188 :     case E_CCZmode:
   17217         7188 :       switch (m2)
   17218              :         {
   17219              :         default:
   17220              :           return VOIDmode;
   17221              : 
   17222              :         case E_CCmode:
   17223              :         case E_CCGCmode:
   17224              :         case E_CCGOCmode:
   17225              :         case E_CCNOmode:
   17226              :         case E_CCAmode:
   17227              :         case E_CCCmode:
   17228              :         case E_CCOmode:
   17229              :         case E_CCPmode:
   17230              :         case E_CCSmode:
   17231              :         case E_CCZmode:
   17232              :           return CCmode;
   17233              :         }
   17234              : 
   17235              :     case E_CCFPmode:
   17236              :       /* These are only compatible with themselves, which we already
   17237              :          checked above.  */
   17238              :       return VOIDmode;
   17239              :     }
   17240              : }
   17241              : 
   17242              : /* Return strategy to use for floating-point.  We assume that fcomi is always
   17243              :    preferrable where available, since that is also true when looking at size
   17244              :    (2 bytes, vs. 3 for fnstsw+sahf and at least 5 for fnstsw+test).  */
   17245              : 
   17246              : enum ix86_fpcmp_strategy
   17247      5531039 : ix86_fp_comparison_strategy (enum rtx_code)
   17248              : {
   17249              :   /* Do fcomi/sahf based test when profitable.  */
   17250              : 
   17251      5531039 :   if (TARGET_CMOVE)
   17252              :     return IX86_FPCMP_COMI;
   17253              : 
   17254            0 :   if (TARGET_SAHF && (TARGET_USE_SAHF || optimize_insn_for_size_p ()))
   17255            0 :     return IX86_FPCMP_SAHF;
   17256              : 
   17257              :   return IX86_FPCMP_ARITH;
   17258              : }
   17259              : 
   17260              : /* Convert comparison codes we use to represent FP comparison to integer
   17261              :    code that will result in proper branch.  Return UNKNOWN if no such code
   17262              :    is available.  */
   17263              : 
   17264              : enum rtx_code
   17265       580954 : ix86_fp_compare_code_to_integer (enum rtx_code code)
   17266              : {
   17267       580954 :   switch (code)
   17268              :     {
   17269              :     case GT:
   17270              :       return GTU;
   17271        18009 :     case GE:
   17272        18009 :       return GEU;
   17273              :     case ORDERED:
   17274              :     case UNORDERED:
   17275              :       return code;
   17276       118749 :     case UNEQ:
   17277       118749 :       return EQ;
   17278        17430 :     case UNLT:
   17279        17430 :       return LTU;
   17280        31130 :     case UNLE:
   17281        31130 :       return LEU;
   17282       113340 :     case LTGT:
   17283       113340 :       return NE;
   17284          681 :     case EQ:
   17285          681 :     case NE:
   17286          681 :       if (TARGET_AVX10_2)
   17287              :         return code;
   17288              :       /* FALLTHRU.  */
   17289          221 :     default:
   17290          221 :       return UNKNOWN;
   17291              :     }
   17292              : }
   17293              : 
   17294              : /* Zero extend possibly SImode EXP to Pmode register.  */
   17295              : rtx
   17296        46575 : ix86_zero_extend_to_Pmode (rtx exp)
   17297              : {
   17298        58437 :   return force_reg (Pmode, convert_to_mode (Pmode, exp, 1));
   17299              : }
   17300              : 
   17301              : /* Return true if the function is called via PLT.   */
   17302              : 
   17303              : bool
   17304      1000178 : ix86_call_use_plt_p (rtx call_op)
   17305              : {
   17306      1000178 :   if (SYMBOL_REF_LOCAL_P (call_op))
   17307              :     {
   17308       199464 :       if (SYMBOL_REF_DECL (call_op)
   17309       199464 :           && TREE_CODE (SYMBOL_REF_DECL (call_op)) == FUNCTION_DECL)
   17310              :         {
   17311              :           /* NB: All ifunc functions must be called via PLT.  */
   17312       116304 :           cgraph_node *node
   17313       116304 :             = cgraph_node::get (SYMBOL_REF_DECL (call_op));
   17314       116304 :           if (node && node->ifunc_resolver)
   17315              :             return true;
   17316              :         }
   17317       199444 :       return false;
   17318              :     }
   17319              :   return true;
   17320              : }
   17321              : 
   17322              : /* Implement TARGET_IFUNC_REF_LOCAL_OK.  If this hook returns true,
   17323              :    the PLT entry will be used as the function address for local IFUNC
   17324              :    functions.  When the PIC register is needed for PLT call, indirect
   17325              :    call via the PLT entry will fail since the PIC register may not be
   17326              :    set up properly for indirect call.  In this case, we should return
   17327              :    false.  */
   17328              : 
   17329              : static bool
   17330    768688329 : ix86_ifunc_ref_local_ok (void)
   17331              : {
   17332    768688329 :   return !flag_pic || (TARGET_64BIT && ix86_cmodel != CM_LARGE_PIC);
   17333              : }
   17334              : 
   17335              : /* Return true if the function being called was marked with attribute
   17336              :    "noplt" or using -fno-plt and we are compiling for non-PIC.  We need
   17337              :    to handle the non-PIC case in the backend because there is no easy
   17338              :    interface for the front-end to force non-PLT calls to use the GOT.
   17339              :    This is currently used only with 64-bit or 32-bit GOT32X ELF targets
   17340              :    to call the function marked "noplt" indirectly.  */
   17341              : 
   17342              : bool
   17343      5916374 : ix86_nopic_noplt_attribute_p (rtx call_op)
   17344              : {
   17345      5421862 :   if (flag_pic || ix86_cmodel == CM_LARGE
   17346              :       || !(TARGET_64BIT || HAVE_AS_IX86_GOT32X)
   17347              :       || TARGET_MACHO || TARGET_SEH || TARGET_PECOFF
   17348     11338236 :       || SYMBOL_REF_LOCAL_P (call_op))
   17349              :     return false;
   17350              : 
   17351      3800964 :   tree symbol_decl = SYMBOL_REF_DECL (call_op);
   17352              : 
   17353      3800964 :   if (!flag_plt
   17354      3800964 :       || (symbol_decl != NULL_TREE
   17355      3800932 :           && lookup_attribute ("noplt", DECL_ATTRIBUTES (symbol_decl))))
   17356           34 :     return true;
   17357              : 
   17358              :   return false;
   17359              : }
   17360              : 
   17361              : /* Helper to output the jmp/call.  */
   17362              : static void
   17363           33 : ix86_output_jmp_thunk_or_indirect (const char *thunk_name, const int regno)
   17364              : {
   17365           33 :   if (thunk_name != NULL)
   17366              :     {
   17367           22 :       if ((REX_INT_REGNO_P (regno) || REX2_INT_REGNO_P (regno))
   17368            1 :           && ix86_indirect_branch_cs_prefix)
   17369            1 :         fprintf (asm_out_file, "\tcs\n");
   17370           22 :       fprintf (asm_out_file, "\tjmp\t");
   17371           22 :       assemble_name (asm_out_file, thunk_name);
   17372           22 :       putc ('\n', asm_out_file);
   17373           22 :       if ((ix86_harden_sls & harden_sls_indirect_jmp))
   17374            2 :         fputs ("\tint3\n", asm_out_file);
   17375              :     }
   17376              :   else
   17377           11 :     output_indirect_thunk (regno);
   17378           33 : }
   17379              : 
   17380              : /* Output indirect branch via a call and return thunk.  CALL_OP is a
   17381              :    register which contains the branch target.  XASM is the assembly
   17382              :    template for CALL_OP.  Branch is a tail call if SIBCALL_P is true.
   17383              :    A normal call is converted to:
   17384              : 
   17385              :         call __x86_indirect_thunk_reg
   17386              : 
   17387              :    and a tail call is converted to:
   17388              : 
   17389              :         jmp __x86_indirect_thunk_reg
   17390              :  */
   17391              : 
   17392              : static void
   17393           50 : ix86_output_indirect_branch_via_reg (rtx call_op, bool sibcall_p)
   17394              : {
   17395           50 :   char thunk_name_buf[32];
   17396           50 :   char *thunk_name;
   17397           50 :   enum indirect_thunk_prefix need_prefix
   17398           50 :     = indirect_thunk_need_prefix (current_output_insn);
   17399           50 :   int regno = REGNO (call_op);
   17400              : 
   17401           50 :   if (cfun->machine->indirect_branch_type
   17402           50 :       != indirect_branch_thunk_inline)
   17403              :     {
   17404           39 :       if (cfun->machine->indirect_branch_type == indirect_branch_thunk)
   17405           16 :         SET_HARD_REG_BIT (indirect_thunks_used, regno);
   17406              : 
   17407           39 :       indirect_thunk_name (thunk_name_buf, regno, need_prefix, false);
   17408           39 :       thunk_name = thunk_name_buf;
   17409              :     }
   17410              :   else
   17411              :     thunk_name = NULL;
   17412              : 
   17413           50 :   if (sibcall_p)
   17414           27 :      ix86_output_jmp_thunk_or_indirect (thunk_name, regno);
   17415              :   else
   17416              :     {
   17417           23 :       if (thunk_name != NULL)
   17418              :         {
   17419           17 :           if ((REX_INT_REGNO_P (regno) || REX_INT_REGNO_P (regno))
   17420            1 :               && ix86_indirect_branch_cs_prefix)
   17421            1 :             fprintf (asm_out_file, "\tcs\n");
   17422           17 :           fprintf (asm_out_file, "\tcall\t");
   17423           17 :           assemble_name (asm_out_file, thunk_name);
   17424           17 :           putc ('\n', asm_out_file);
   17425           17 :           return;
   17426              :         }
   17427              : 
   17428            6 :       char indirectlabel1[32];
   17429            6 :       char indirectlabel2[32];
   17430              : 
   17431            6 :       ASM_GENERATE_INTERNAL_LABEL (indirectlabel1,
   17432              :                                    INDIRECT_LABEL,
   17433              :                                    indirectlabelno++);
   17434            6 :       ASM_GENERATE_INTERNAL_LABEL (indirectlabel2,
   17435              :                                    INDIRECT_LABEL,
   17436              :                                    indirectlabelno++);
   17437              : 
   17438              :       /* Jump.  */
   17439            6 :       fputs ("\tjmp\t", asm_out_file);
   17440            6 :       assemble_name_raw (asm_out_file, indirectlabel2);
   17441            6 :       fputc ('\n', asm_out_file);
   17442              : 
   17443            6 :       ASM_OUTPUT_INTERNAL_LABEL (asm_out_file, indirectlabel1);
   17444              : 
   17445            6 :      ix86_output_jmp_thunk_or_indirect (thunk_name, regno);
   17446              : 
   17447            6 :       ASM_OUTPUT_INTERNAL_LABEL (asm_out_file, indirectlabel2);
   17448              : 
   17449              :       /* Call.  */
   17450            6 :       fputs ("\tcall\t", asm_out_file);
   17451            6 :       assemble_name_raw (asm_out_file, indirectlabel1);
   17452            6 :       fputc ('\n', asm_out_file);
   17453              :     }
   17454              : }
   17455              : 
   17456              : /* Output indirect branch via a call and return thunk.  CALL_OP is
   17457              :    the branch target.  XASM is the assembly template for CALL_OP.
   17458              :    Branch is a tail call if SIBCALL_P is true.  A normal call is
   17459              :    converted to:
   17460              : 
   17461              :         jmp L2
   17462              :    L1:
   17463              :         push CALL_OP
   17464              :         jmp __x86_indirect_thunk
   17465              :    L2:
   17466              :         call L1
   17467              : 
   17468              :    and a tail call is converted to:
   17469              : 
   17470              :         push CALL_OP
   17471              :         jmp __x86_indirect_thunk
   17472              :  */
   17473              : 
   17474              : static void
   17475            0 : ix86_output_indirect_branch_via_push (rtx call_op, const char *xasm,
   17476              :                                       bool sibcall_p)
   17477              : {
   17478            0 :   char thunk_name_buf[32];
   17479            0 :   char *thunk_name;
   17480            0 :   char push_buf[64];
   17481            0 :   enum indirect_thunk_prefix need_prefix
   17482            0 :     = indirect_thunk_need_prefix (current_output_insn);
   17483            0 :   int regno = -1;
   17484              : 
   17485            0 :   if (cfun->machine->indirect_branch_type
   17486            0 :       != indirect_branch_thunk_inline)
   17487              :     {
   17488            0 :       if (cfun->machine->indirect_branch_type == indirect_branch_thunk)
   17489            0 :         indirect_thunk_needed = true;
   17490            0 :       indirect_thunk_name (thunk_name_buf, regno, need_prefix, false);
   17491            0 :       thunk_name = thunk_name_buf;
   17492              :     }
   17493              :   else
   17494              :     thunk_name = NULL;
   17495              : 
   17496            0 :   snprintf (push_buf, sizeof (push_buf), "push{%c}\t%s",
   17497            0 :             TARGET_64BIT ? 'q' : 'l', xasm);
   17498              : 
   17499            0 :   if (sibcall_p)
   17500              :     {
   17501            0 :       output_asm_insn (push_buf, &call_op);
   17502            0 :       ix86_output_jmp_thunk_or_indirect (thunk_name, regno);
   17503              :     }
   17504              :   else
   17505              :     {
   17506            0 :       char indirectlabel1[32];
   17507            0 :       char indirectlabel2[32];
   17508              : 
   17509            0 :       ASM_GENERATE_INTERNAL_LABEL (indirectlabel1,
   17510              :                                    INDIRECT_LABEL,
   17511              :                                    indirectlabelno++);
   17512            0 :       ASM_GENERATE_INTERNAL_LABEL (indirectlabel2,
   17513              :                                    INDIRECT_LABEL,
   17514              :                                    indirectlabelno++);
   17515              : 
   17516              :       /* Jump.  */
   17517            0 :       fputs ("\tjmp\t", asm_out_file);
   17518            0 :       assemble_name_raw (asm_out_file, indirectlabel2);
   17519            0 :       fputc ('\n', asm_out_file);
   17520              : 
   17521            0 :       ASM_OUTPUT_INTERNAL_LABEL (asm_out_file, indirectlabel1);
   17522              : 
   17523              :       /* An external function may be called via GOT, instead of PLT.  */
   17524            0 :       if (MEM_P (call_op))
   17525              :         {
   17526            0 :           struct ix86_address parts;
   17527            0 :           rtx addr = XEXP (call_op, 0);
   17528            0 :           if (ix86_decompose_address (addr, &parts)
   17529            0 :               && parts.base == stack_pointer_rtx)
   17530              :             {
   17531              :               /* Since call will adjust stack by -UNITS_PER_WORD,
   17532              :                  we must convert "disp(stack, index, scale)" to
   17533              :                  "disp+UNITS_PER_WORD(stack, index, scale)".  */
   17534            0 :               if (parts.index)
   17535              :                 {
   17536            0 :                   addr = gen_rtx_MULT (Pmode, parts.index,
   17537              :                                        GEN_INT (parts.scale));
   17538            0 :                   addr = gen_rtx_PLUS (Pmode, stack_pointer_rtx,
   17539              :                                        addr);
   17540              :                 }
   17541              :               else
   17542              :                 addr = stack_pointer_rtx;
   17543              : 
   17544            0 :               rtx disp;
   17545            0 :               if (parts.disp != NULL_RTX)
   17546            0 :                 disp = plus_constant (Pmode, parts.disp,
   17547            0 :                                       UNITS_PER_WORD);
   17548              :               else
   17549            0 :                 disp = GEN_INT (UNITS_PER_WORD);
   17550              : 
   17551            0 :               addr = gen_rtx_PLUS (Pmode, addr, disp);
   17552            0 :               call_op = gen_rtx_MEM (GET_MODE (call_op), addr);
   17553              :             }
   17554              :         }
   17555              : 
   17556            0 :       output_asm_insn (push_buf, &call_op);
   17557              : 
   17558            0 :       ix86_output_jmp_thunk_or_indirect (thunk_name, regno);
   17559              : 
   17560            0 :       ASM_OUTPUT_INTERNAL_LABEL (asm_out_file, indirectlabel2);
   17561              : 
   17562              :       /* Call.  */
   17563            0 :       fputs ("\tcall\t", asm_out_file);
   17564            0 :       assemble_name_raw (asm_out_file, indirectlabel1);
   17565            0 :       fputc ('\n', asm_out_file);
   17566              :     }
   17567            0 : }
   17568              : 
   17569              : /* Output indirect branch via a call and return thunk.  CALL_OP is
   17570              :    the branch target.  XASM is the assembly template for CALL_OP.
   17571              :    Branch is a tail call if SIBCALL_P is true.   */
   17572              : 
   17573              : static void
   17574           50 : ix86_output_indirect_branch (rtx call_op, const char *xasm,
   17575              :                              bool sibcall_p)
   17576              : {
   17577           50 :   if (REG_P (call_op))
   17578           50 :     ix86_output_indirect_branch_via_reg (call_op, sibcall_p);
   17579              :   else
   17580            0 :     ix86_output_indirect_branch_via_push (call_op, xasm, sibcall_p);
   17581           50 : }
   17582              : 
   17583              : /* Output indirect jump.  CALL_OP is the jump target.  */
   17584              : 
   17585              : const char *
   17586         7633 : ix86_output_indirect_jmp (rtx call_op)
   17587              : {
   17588         7633 :   if (cfun->machine->indirect_branch_type != indirect_branch_keep)
   17589              :     {
   17590              :       /* We can't have red-zone since "call" in the indirect thunk
   17591              :          pushes the return address onto stack, destroying red-zone.  */
   17592            4 :       if (ix86_red_zone_used)
   17593            0 :         gcc_unreachable ();
   17594              : 
   17595            4 :       ix86_output_indirect_branch (call_op, "%0", true);
   17596              :     }
   17597              :   else
   17598         7629 :     output_asm_insn ("%!jmp\t%A0", &call_op);
   17599         7633 :   return (ix86_harden_sls & harden_sls_indirect_jmp) ? "int3" : "";
   17600              : }
   17601              : 
   17602              : /* Output return instrumentation for current function if needed.  */
   17603              : 
   17604              : static void
   17605      1708718 : output_return_instrumentation (void)
   17606              : {
   17607      1708718 :   if (ix86_instrument_return != instrument_return_none
   17608            6 :       && flag_fentry
   17609      1708724 :       && !DECL_NO_INSTRUMENT_FUNCTION_ENTRY_EXIT (cfun->decl))
   17610              :     {
   17611            5 :       if (ix86_flag_record_return)
   17612            5 :         fprintf (asm_out_file, "1:\n");
   17613            5 :       switch (ix86_instrument_return)
   17614              :         {
   17615            2 :         case instrument_return_call:
   17616            2 :           fprintf (asm_out_file, "\tcall\t__return__\n");
   17617            2 :           break;
   17618            3 :         case instrument_return_nop5:
   17619              :           /* 5 byte nop: nopl 0(%[re]ax,%[re]ax,1)  */
   17620            3 :           fprintf (asm_out_file, ASM_BYTE "0x0f, 0x1f, 0x44, 0x00, 0x00\n");
   17621            3 :           break;
   17622              :         case instrument_return_none:
   17623              :           break;
   17624              :         }
   17625              : 
   17626            5 :       if (ix86_flag_record_return)
   17627              :         {
   17628            5 :           fprintf (asm_out_file, "\t.section __return_loc, \"a\",@progbits\n");
   17629            5 :           fprintf (asm_out_file, "\t.%s 1b\n", TARGET_64BIT ? "quad" : "long");
   17630            5 :           fprintf (asm_out_file, "\t.previous\n");
   17631              :         }
   17632              :     }
   17633      1708718 : }
   17634              : 
   17635              : /* Output function return.  CALL_OP is the jump target.  Add a REP
   17636              :    prefix to RET if LONG_P is true and function return is kept.  */
   17637              : 
   17638              : const char *
   17639      1578236 : ix86_output_function_return (bool long_p)
   17640              : {
   17641      1578236 :   output_return_instrumentation ();
   17642              : 
   17643      1578236 :   if (cfun->machine->function_return_type != indirect_branch_keep)
   17644              :     {
   17645           18 :       char thunk_name[32];
   17646           18 :       enum indirect_thunk_prefix need_prefix
   17647           18 :         = indirect_thunk_need_prefix (current_output_insn);
   17648              : 
   17649           18 :       if (cfun->machine->function_return_type
   17650           18 :           != indirect_branch_thunk_inline)
   17651              :         {
   17652           13 :           bool need_thunk = (cfun->machine->function_return_type
   17653              :                              == indirect_branch_thunk);
   17654           13 :           indirect_thunk_name (thunk_name, INVALID_REGNUM, need_prefix,
   17655              :                                true);
   17656           13 :           indirect_return_needed |= need_thunk;
   17657           13 :           fprintf (asm_out_file, "\tjmp\t");
   17658           13 :           assemble_name (asm_out_file, thunk_name);
   17659           13 :           putc ('\n', asm_out_file);
   17660              :         }
   17661              :       else
   17662            5 :         output_indirect_thunk (INVALID_REGNUM);
   17663              : 
   17664           18 :       return "";
   17665              :     }
   17666              : 
   17667      3155947 :   output_asm_insn (long_p ? "rep%; ret" : "ret", nullptr);
   17668      1578218 :   return (ix86_harden_sls & harden_sls_return) ? "int3" : "";
   17669              : }
   17670              : 
   17671              : /* Output indirect function return.  RET_OP is the function return
   17672              :    target.  */
   17673              : 
   17674              : const char *
   17675           17 : ix86_output_indirect_function_return (rtx ret_op)
   17676              : {
   17677           17 :   if (cfun->machine->function_return_type != indirect_branch_keep)
   17678              :     {
   17679            0 :       char thunk_name[32];
   17680            0 :       enum indirect_thunk_prefix need_prefix
   17681            0 :         = indirect_thunk_need_prefix (current_output_insn);
   17682            0 :       unsigned int regno = REGNO (ret_op);
   17683            0 :       gcc_assert (regno == CX_REG);
   17684              : 
   17685            0 :       if (cfun->machine->function_return_type
   17686            0 :           != indirect_branch_thunk_inline)
   17687              :         {
   17688            0 :           bool need_thunk = (cfun->machine->function_return_type
   17689              :                              == indirect_branch_thunk);
   17690            0 :           indirect_thunk_name (thunk_name, regno, need_prefix, true);
   17691              : 
   17692            0 :           if (need_thunk)
   17693              :             {
   17694            0 :               indirect_return_via_cx = true;
   17695            0 :               SET_HARD_REG_BIT (indirect_thunks_used, CX_REG);
   17696              :             }
   17697            0 :           fprintf (asm_out_file, "\tjmp\t");
   17698            0 :           assemble_name (asm_out_file, thunk_name);
   17699            0 :           putc ('\n', asm_out_file);
   17700              :         }
   17701              :       else
   17702            0 :         output_indirect_thunk (regno);
   17703              :     }
   17704              :   else
   17705              :     {
   17706           17 :       output_asm_insn ("%!jmp\t%A0", &ret_op);
   17707           17 :       if (ix86_harden_sls & harden_sls_indirect_jmp)
   17708            1 :         fputs ("\tint3\n", asm_out_file);
   17709              :     }
   17710           17 :   return "";
   17711              : }
   17712              : 
   17713              : /* Output the assembly for a call instruction.  */
   17714              : 
   17715              : const char *
   17716      6098187 : ix86_output_call_insn (rtx_insn *insn, rtx call_op)
   17717              : {
   17718      6098187 :   bool direct_p = constant_call_address_operand (call_op, VOIDmode);
   17719      6098187 :   bool output_indirect_p
   17720              :     = (!TARGET_SEH
   17721      6098187 :        && cfun->machine->indirect_branch_type != indirect_branch_keep);
   17722      6098187 :   bool seh_nop_p = false;
   17723      6098187 :   const char *xasm;
   17724              : 
   17725      6098187 :   if (SIBLING_CALL_P (insn))
   17726              :     {
   17727       130482 :       output_return_instrumentation ();
   17728       130482 :       if (direct_p)
   17729              :         {
   17730       120847 :           if (ix86_nopic_noplt_attribute_p (call_op))
   17731              :             {
   17732            4 :               direct_p = false;
   17733            4 :               if (TARGET_64BIT)
   17734              :                 {
   17735            4 :                   if (output_indirect_p)
   17736              :                     xasm = "{%p0@GOTPCREL(%%rip)|[QWORD PTR %p0@GOTPCREL[rip]]}";
   17737              :                   else
   17738            4 :                     xasm = "%!jmp\t{*%p0@GOTPCREL(%%rip)|[QWORD PTR %p0@GOTPCREL[rip]]}";
   17739              :                 }
   17740              :               else
   17741              :                 {
   17742            0 :                   if (output_indirect_p)
   17743              :                     xasm = "{%p0@GOT|[DWORD PTR %p0@GOT]}";
   17744              :                   else
   17745            0 :                     xasm = "%!jmp\t{*%p0@GOT|[DWORD PTR %p0@GOT]}";
   17746              :                 }
   17747              :             }
   17748              :           else
   17749              :             xasm = "%!jmp\t%P0";
   17750              :         }
   17751              :       /* SEH epilogue detection requires the indirect branch case
   17752              :          to include REX.W.  */
   17753         9635 :       else if (TARGET_SEH)
   17754              :         xasm = "%!rex.W jmp\t%A0";
   17755              :       else
   17756              :         {
   17757         9635 :           if (output_indirect_p)
   17758              :             xasm = "%0";
   17759              :           else
   17760         9612 :             xasm = "%!jmp\t%A0";
   17761              :         }
   17762              : 
   17763       130482 :       if (output_indirect_p && !direct_p)
   17764           23 :         ix86_output_indirect_branch (call_op, xasm, true);
   17765              :       else
   17766              :         {
   17767       130459 :           output_asm_insn (xasm, &call_op);
   17768       130459 :           if (!direct_p
   17769         9616 :               && (ix86_harden_sls & harden_sls_indirect_jmp))
   17770              :             return "int3";
   17771              :         }
   17772       130481 :       return "";
   17773              :     }
   17774              : 
   17775              :   /* SEH unwinding can require an extra nop to be emitted in several
   17776              :      circumstances.  Determine if we have one of those.  */
   17777      5967705 :   if (TARGET_SEH)
   17778              :     {
   17779              :       rtx_insn *i;
   17780              : 
   17781              :       for (i = NEXT_INSN (insn); i ; i = NEXT_INSN (i))
   17782              :         {
   17783              :           /* Prevent a catch region from being adjacent to a jump that would
   17784              :              be interpreted as an epilogue sequence by the unwinder.  */
   17785              :           if (JUMP_P(i) && CROSSING_JUMP_P (i))
   17786              :             {
   17787              :               seh_nop_p = true;
   17788              :               break;
   17789              :             }
   17790              : 
   17791              :           /* If we get to another real insn, we don't need the nop.  */
   17792              :           if (INSN_P (i))
   17793              :             break;
   17794              : 
   17795              :           /* If we get to the epilogue note, prevent a catch region from
   17796              :              being adjacent to the standard epilogue sequence.  Note that,
   17797              :              if non-call exceptions are enabled, we already did it during
   17798              :              epilogue expansion, or else, if the insn can throw internally,
   17799              :              we already did it during the reorg pass.  */
   17800              :           if (NOTE_P (i) && NOTE_KIND (i) == NOTE_INSN_EPILOGUE_BEG
   17801              :               && !flag_non_call_exceptions
   17802              :               && !can_throw_internal (insn))
   17803              :             {
   17804              :               seh_nop_p = true;
   17805              :               break;
   17806              :             }
   17807              :         }
   17808              : 
   17809              :       /* If we didn't find a real insn following the call, prevent the
   17810              :          unwinder from looking into the next function.  */
   17811              :       if (i == NULL)
   17812              :         seh_nop_p = true;
   17813              :     }
   17814              : 
   17815      5967705 :   if (direct_p)
   17816              :     {
   17817      5794505 :       if (ix86_nopic_noplt_attribute_p (call_op))
   17818              :         {
   17819            6 :           direct_p = false;
   17820            6 :           if (TARGET_64BIT)
   17821              :             {
   17822            6 :               if (output_indirect_p)
   17823              :                 xasm = "{%p0@GOTPCREL(%%rip)|[QWORD PTR %p0@GOTPCREL[rip]]}";
   17824              :               else
   17825            6 :                 xasm = "%!call\t{*%p0@GOTPCREL(%%rip)|[QWORD PTR %p0@GOTPCREL[rip]]}";
   17826              :             }
   17827              :           else
   17828              :             {
   17829            0 :               if (output_indirect_p)
   17830              :                 xasm = "{%p0@GOT|[DWORD PTR %p0@GOT]}";
   17831              :               else
   17832            0 :                 xasm = "%!call\t{*%p0@GOT|[DWORD PTR %p0@GOT]}";
   17833              :             }
   17834              :         }
   17835              :       else
   17836              :         xasm = "%!call\t%P0";
   17837              :     }
   17838              :   else
   17839              :     {
   17840       173200 :       if (output_indirect_p)
   17841              :         xasm = "%0";
   17842              :       else
   17843       173177 :         xasm = "%!call\t%A0";
   17844              :     }
   17845              : 
   17846      5967705 :   if (output_indirect_p && !direct_p)
   17847           23 :     ix86_output_indirect_branch (call_op, xasm, false);
   17848              :   else
   17849      5967682 :     output_asm_insn (xasm, &call_op);
   17850              : 
   17851              :   if (seh_nop_p)
   17852              :     return "nop";
   17853              : 
   17854              :   return "";
   17855              : }
   17856              : 
   17857              : /* Return a MEM corresponding to a stack slot with mode MODE.
   17858              :    Allocate a new slot if necessary.
   17859              : 
   17860              :    The RTL for a function can have several slots available: N is
   17861              :    which slot to use.  */
   17862              : 
   17863              : rtx
   17864        22366 : assign_386_stack_local (machine_mode mode, enum ix86_stack_slot n)
   17865              : {
   17866        22366 :   struct stack_local_entry *s;
   17867              : 
   17868        22366 :   gcc_assert (n < MAX_386_STACK_LOCALS);
   17869              : 
   17870        33727 :   for (s = ix86_stack_locals; s; s = s->next)
   17871        31116 :     if (s->mode == mode && s->n == n)
   17872        19755 :       return validize_mem (copy_rtx (s->rtl));
   17873              : 
   17874         2611 :   int align = 0;
   17875              :   /* For DImode with SLOT_FLOATxFDI_387 use 32-bit
   17876              :      alignment with -m32 -mpreferred-stack-boundary=2.  */
   17877         2611 :   if (mode == DImode
   17878          329 :       && !TARGET_64BIT
   17879          329 :       && n == SLOT_FLOATxFDI_387
   17880         2940 :       && ix86_preferred_stack_boundary < GET_MODE_ALIGNMENT (DImode))
   17881              :     align = 32;
   17882         2611 :   s = ggc_alloc<stack_local_entry> ();
   17883         2611 :   s->n = n;
   17884         2611 :   s->mode = mode;
   17885         5222 :   s->rtl = assign_stack_local (mode, GET_MODE_SIZE (mode), align);
   17886              : 
   17887         2611 :   s->next = ix86_stack_locals;
   17888         2611 :   ix86_stack_locals = s;
   17889         2611 :   return validize_mem (copy_rtx (s->rtl));
   17890              : }
   17891              : 
   17892              : static void
   17893      1480112 : ix86_instantiate_decls (void)
   17894              : {
   17895      1480112 :   struct stack_local_entry *s;
   17896              : 
   17897      1480112 :   for (s = ix86_stack_locals; s; s = s->next)
   17898            0 :     if (s->rtl != NULL_RTX)
   17899            0 :       instantiate_decl_rtl (s->rtl);
   17900      1480112 : }
   17901              : 
   17902              : /* Check whether x86 address PARTS is a pc-relative address.  */
   17903              : 
   17904              : bool
   17905     27330350 : ix86_rip_relative_addr_p (struct ix86_address *parts)
   17906              : {
   17907     27330350 :   rtx base, index, disp;
   17908              : 
   17909     27330350 :   base = parts->base;
   17910     27330350 :   index = parts->index;
   17911     27330350 :   disp = parts->disp;
   17912              : 
   17913     27330350 :   if (disp && !base && !index)
   17914              :     {
   17915     25572114 :       if (TARGET_64BIT)
   17916              :         {
   17917     23907639 :           rtx symbol = disp;
   17918              : 
   17919     23907639 :           if (GET_CODE (disp) == CONST)
   17920      7846182 :             symbol = XEXP (disp, 0);
   17921     23907639 :           if (GET_CODE (symbol) == PLUS
   17922      7330399 :               && CONST_INT_P (XEXP (symbol, 1)))
   17923      7330399 :             symbol = XEXP (symbol, 0);
   17924              : 
   17925     23907639 :           if (LABEL_REF_P (symbol)
   17926     23900063 :               || (SYMBOL_REF_P (symbol)
   17927     22632605 :                   && SYMBOL_REF_TLS_MODEL (symbol) == 0)
   17928     25175097 :               || (GET_CODE (symbol) == UNSPEC
   17929       534681 :                   && (XINT (symbol, 1) == UNSPEC_GOTPCREL
   17930              :                       || XINT (symbol, 1) == UNSPEC_PCREL
   17931              :                       || XINT (symbol, 1) == UNSPEC_GOTNTPOFF)))
   17932     23147380 :             return true;
   17933              :         }
   17934              :     }
   17935              :   return false;
   17936              : }
   17937              : 
   17938              : /* Calculate the length of the memory address in the instruction encoding.
   17939              :    Includes addr32 prefix, does not include the one-byte modrm, opcode,
   17940              :    or other prefixes.  We never generate addr32 prefix for LEA insn.  */
   17941              : 
   17942              : int
   17943    272372610 : memory_address_length (rtx addr, bool lea)
   17944              : {
   17945    272372610 :   struct ix86_address parts;
   17946    272372610 :   rtx base, index, disp;
   17947    272372610 :   int len;
   17948    272372610 :   int ok;
   17949              : 
   17950    272372610 :   if (GET_CODE (addr) == PRE_DEC
   17951    263834143 :       || GET_CODE (addr) == POST_INC
   17952    259350500 :       || GET_CODE (addr) == PRE_MODIFY
   17953    259350500 :       || GET_CODE (addr) == POST_MODIFY)
   17954              :     return 0;
   17955              : 
   17956    259350500 :   ok = ix86_decompose_address (addr, &parts);
   17957    259350500 :   gcc_assert (ok);
   17958              : 
   17959    259350500 :   len = (parts.seg == ADDR_SPACE_GENERIC) ? 0 : 1;
   17960              : 
   17961              :   /*  If this is not LEA instruction, add the length of addr32 prefix.  */
   17962    220838890 :   if (TARGET_64BIT && !lea
   17963    455332092 :       && (SImode_address_operand (addr, VOIDmode)
   17964    195981439 :           || (parts.base && GET_MODE (parts.base) == SImode)
   17965    195971209 :           || (parts.index && GET_MODE (parts.index) == SImode)))
   17966        10383 :     len++;
   17967              : 
   17968    259350500 :   base = parts.base;
   17969    259350500 :   index = parts.index;
   17970    259350500 :   disp = parts.disp;
   17971              : 
   17972    259350500 :   if (base && SUBREG_P (base))
   17973            2 :     base = SUBREG_REG (base);
   17974    259350500 :   if (index && SUBREG_P (index))
   17975            0 :     index = SUBREG_REG (index);
   17976              : 
   17977    259350500 :   gcc_assert (base == NULL_RTX || REG_P (base));
   17978    259350500 :   gcc_assert (index == NULL_RTX || REG_P (index));
   17979              : 
   17980              :   /* Rule of thumb:
   17981              :        - esp as the base always wants an index,
   17982              :        - ebp as the base always wants a displacement,
   17983              :        - r12 as the base always wants an index,
   17984              :        - r13 as the base always wants a displacement.  */
   17985              : 
   17986              :   /* Register Indirect.  */
   17987    259350500 :   if (base && !index && !disp)
   17988              :     {
   17989              :       /* esp (for its index) and ebp (for its displacement) need
   17990              :          the two-byte modrm form.  Similarly for r12 and r13 in 64-bit
   17991              :          code.  */
   17992     16993768 :       if (base == arg_pointer_rtx
   17993     16993768 :           || base == frame_pointer_rtx
   17994     16993768 :           || REGNO (base) == SP_REG
   17995     10128453 :           || REGNO (base) == BP_REG
   17996     10128453 :           || REGNO (base) == R12_REG
   17997     26609837 :           || REGNO (base) == R13_REG)
   17998      7377699 :         len++;
   17999              :     }
   18000              : 
   18001              :   /* Direct Addressing.  In 64-bit mode mod 00 r/m 5
   18002              :      is not disp32, but disp32(%rip), so for disp32
   18003              :      SIB byte is needed, unless print_operand_address
   18004              :      optimizes it into disp32(%rip) or (%rip) is implied
   18005              :      by UNSPEC.  */
   18006    242356732 :   else if (disp && !base && !index)
   18007              :     {
   18008     24596500 :       len += 4;
   18009     24596500 :       if (!ix86_rip_relative_addr_p (&parts))
   18010      1852246 :         len++;
   18011              :     }
   18012              :   else
   18013              :     {
   18014              :       /* Find the length of the displacement constant.  */
   18015    217760232 :       if (disp)
   18016              :         {
   18017    213619538 :           if (base && satisfies_constraint_K (disp))
   18018    124654550 :             len += 1;
   18019              :           else
   18020     88964988 :             len += 4;
   18021              :         }
   18022              :       /* ebp always wants a displacement.  Similarly r13.  */
   18023      4140694 :       else if (base && (REGNO (base) == BP_REG || REGNO (base) == R13_REG))
   18024         8133 :         len++;
   18025              : 
   18026              :       /* An index requires the two-byte modrm form....  */
   18027    217760232 :       if (index
   18028              :           /* ...like esp (or r12), which always wants an index.  */
   18029    206745757 :           || base == arg_pointer_rtx
   18030    206745757 :           || base == frame_pointer_rtx
   18031    424505989 :           || (base && (REGNO (base) == SP_REG || REGNO (base) == R12_REG)))
   18032    155831440 :         len++;
   18033              :     }
   18034              : 
   18035              :   return len;
   18036              : }
   18037              : 
   18038              : /* Compute default value for "length_immediate" attribute.  When SHORTFORM
   18039              :    is set, expect that insn have 8bit immediate alternative.  */
   18040              : int
   18041    317452537 : ix86_attr_length_immediate_default (rtx_insn *insn, bool shortform)
   18042              : {
   18043    317452537 :   int len = 0;
   18044    317452537 :   int i;
   18045    317452537 :   extract_insn_cached (insn);
   18046    990219189 :   for (i = recog_data.n_operands - 1; i >= 0; --i)
   18047    672766652 :     if (CONSTANT_P (recog_data.operand[i]))
   18048              :       {
   18049    139493576 :         enum attr_mode mode = get_attr_mode (insn);
   18050              : 
   18051    139493576 :         gcc_assert (!len);
   18052    139493576 :         if (shortform && CONST_INT_P (recog_data.operand[i]))
   18053              :           {
   18054     37475913 :             HOST_WIDE_INT ival = INTVAL (recog_data.operand[i]);
   18055     37475913 :             switch (mode)
   18056              :               {
   18057      1266980 :               case MODE_QI:
   18058      1266980 :                 len = 1;
   18059      1266980 :                 continue;
   18060       443110 :               case MODE_HI:
   18061       443110 :                 ival = trunc_int_for_mode (ival, HImode);
   18062       443110 :                 break;
   18063     15893982 :               case MODE_SI:
   18064     15893982 :                 ival = trunc_int_for_mode (ival, SImode);
   18065     15893982 :                 break;
   18066              :               default:
   18067              :                 break;
   18068              :               }
   18069     36208933 :             if (IN_RANGE (ival, -128, 127))
   18070              :               {
   18071     32126802 :                 len = 1;
   18072     32126802 :                 continue;
   18073              :               }
   18074              :           }
   18075    106099794 :         switch (mode)
   18076              :           {
   18077              :           case MODE_QI:
   18078              :             len = 1;
   18079              :             break;
   18080              :           case MODE_HI:
   18081    672766652 :             len = 2;
   18082              :             break;
   18083              :           case MODE_SI:
   18084    100304113 :             len = 4;
   18085              :             break;
   18086              :           /* Immediates for DImode instructions are encoded
   18087              :              as 32bit sign extended values.  */
   18088              :           case MODE_DI:
   18089    100304113 :             len = 4;
   18090              :             break;
   18091            0 :           default:
   18092            0 :             fatal_insn ("unknown insn mode", insn);
   18093              :         }
   18094              :       }
   18095    317452537 :   return len;
   18096              : }
   18097              : 
   18098              : /* Compute default value for "length_address" attribute.  */
   18099              : int
   18100    445065192 : ix86_attr_length_address_default (rtx_insn *insn)
   18101              : {
   18102    445065192 :   int i;
   18103              : 
   18104    445065192 :   if (get_attr_type (insn) == TYPE_LEA)
   18105              :     {
   18106     27601829 :       rtx set = PATTERN (insn), addr;
   18107              : 
   18108     27601829 :       if (GET_CODE (set) == PARALLEL)
   18109        86829 :         set = XVECEXP (set, 0, 0);
   18110              : 
   18111     27601829 :       gcc_assert (GET_CODE (set) == SET);
   18112              : 
   18113     27601829 :       addr = SET_SRC (set);
   18114              : 
   18115     27601829 :       return memory_address_length (addr, true);
   18116              :     }
   18117              : 
   18118    417463363 :   extract_insn_cached (insn);
   18119    957476000 :   for (i = recog_data.n_operands - 1; i >= 0; --i)
   18120              :     {
   18121    784503800 :       rtx op = recog_data.operand[i];
   18122    784503800 :       if (MEM_P (op))
   18123              :         {
   18124    244769791 :           constrain_operands_cached (insn, reload_completed);
   18125    244769791 :           if (which_alternative != -1)
   18126              :             {
   18127    244769791 :               const char *constraints = recog_data.constraints[i];
   18128    244769791 :               int alt = which_alternative;
   18129              : 
   18130    388282590 :               while (*constraints == '=' || *constraints == '+')
   18131    143512799 :                 constraints++;
   18132   1113723357 :               while (alt-- > 0)
   18133   2130468530 :                 while (*constraints++ != ',')
   18134              :                   ;
   18135              :               /* Skip ignored operands.  */
   18136    244769791 :               if (*constraints == 'X')
   18137       278628 :                 continue;
   18138              :             }
   18139              : 
   18140    244491163 :           int len = memory_address_length (XEXP (op, 0), false);
   18141              : 
   18142              :           /* Account for segment prefix for non-default addr spaces.  */
   18143    258316774 :           if (!ADDR_SPACE_GENERIC_P (MEM_ADDR_SPACE (op)))
   18144       783498 :             len++;
   18145              : 
   18146    244491163 :           return len;
   18147              :         }
   18148              :     }
   18149              :   return 0;
   18150              : }
   18151              : 
   18152              : /* Compute default value for "length_vex" attribute. It includes
   18153              :    2 or 3 byte VEX prefix and 1 opcode byte.  */
   18154              : 
   18155              : int
   18156      5101876 : ix86_attr_length_vex_default (rtx_insn *insn, bool has_0f_opcode,
   18157              :                               bool has_vex_w)
   18158              : {
   18159      5101876 :   int i, reg_only = 2 + 1;
   18160      5101876 :   bool has_mem = false;
   18161              : 
   18162              :   /* Only 0f opcode can use 2 byte VEX prefix and  VEX W bit uses 3
   18163              :      byte VEX prefix.  */
   18164      5101876 :   if (!has_0f_opcode || has_vex_w)
   18165              :     return 3 + 1;
   18166              : 
   18167              :  /* We can always use 2 byte VEX prefix in 32bit.  */
   18168      4648019 :   if (!TARGET_64BIT)
   18169              :     return 2 + 1;
   18170              : 
   18171      3545275 :   extract_insn_cached (insn);
   18172              : 
   18173     11034714 :   for (i = recog_data.n_operands - 1; i >= 0; --i)
   18174      7829944 :     if (REG_P (recog_data.operand[i]))
   18175              :       {
   18176              :         /* REX.W bit uses 3 byte VEX prefix.
   18177              :            REX2 with vex use extended EVEX prefix length is 4-byte.  */
   18178      5134689 :         if (GET_MODE (recog_data.operand[i]) == DImode
   18179      5134689 :             && GENERAL_REG_P (recog_data.operand[i]))
   18180              :           return 3 + 1;
   18181              : 
   18182              :         /* REX.B bit requires 3-byte VEX. Right here we don't know which
   18183              :            operand will be encoded using VEX.B, so be conservative.
   18184              :            REX2 with vex use extended EVEX prefix length is 4-byte.  */
   18185      5122762 :         if (REX_INT_REGNO_P (recog_data.operand[i])
   18186      5122762 :             || REX2_INT_REGNO_P (recog_data.operand[i])
   18187      5122762 :             || REX_SSE_REGNO_P (recog_data.operand[i]))
   18188            0 :           reg_only = 3 + 1;
   18189              :       }
   18190      2695255 :     else if (MEM_P (recog_data.operand[i]))
   18191              :       {
   18192              :         /* REX2.X or REX2.B bits use 3 byte VEX prefix.  */
   18193      2091210 :         if (x86_extended_rex2reg_mentioned_p (recog_data.operand[i]))
   18194              :           return 4;
   18195              : 
   18196              :         /* REX.X or REX.B bits use 3 byte VEX prefix.  */
   18197      2090956 :         if (x86_extended_reg_mentioned_p (recog_data.operand[i]))
   18198              :           return 3 + 1;
   18199              : 
   18200              :         has_mem = true;
   18201              :       }
   18202              : 
   18203      3204770 :   return has_mem ? 2 + 1 : reg_only;
   18204              : }
   18205              : 
   18206              : 
   18207              : static bool
   18208              : ix86_class_likely_spilled_p (reg_class_t);
   18209              : 
   18210              : /* Returns true if lhs of insn is HW function argument register and set up
   18211              :    is_spilled to true if it is likely spilled HW register.  */
   18212              : static bool
   18213         1145 : insn_is_function_arg (rtx insn, bool* is_spilled)
   18214              : {
   18215         1145 :   rtx dst;
   18216              : 
   18217         1145 :   if (!NONDEBUG_INSN_P (insn))
   18218              :     return false;
   18219              :   /* Call instructions are not movable, ignore it.  */
   18220         1145 :   if (CALL_P (insn))
   18221              :     return false;
   18222         1071 :   insn = PATTERN (insn);
   18223         1071 :   if (GET_CODE (insn) == PARALLEL)
   18224           73 :     insn = XVECEXP (insn, 0, 0);
   18225         1071 :   if (GET_CODE (insn) != SET)
   18226              :     return false;
   18227         1071 :   dst = SET_DEST (insn);
   18228          975 :   if (REG_P (dst) && HARD_REGISTER_P (dst)
   18229         1940 :       && ix86_function_arg_regno_p (REGNO (dst)))
   18230              :     {
   18231              :       /* Is it likely spilled HW register?  */
   18232          869 :       if (!TEST_HARD_REG_BIT (fixed_reg_set, REGNO (dst))
   18233          869 :           && ix86_class_likely_spilled_p (REGNO_REG_CLASS (REGNO (dst))))
   18234          825 :         *is_spilled = true;
   18235          869 :       return true;
   18236              :     }
   18237              :   return false;
   18238              : }
   18239              : 
   18240              : /* Add output dependencies for chain of function adjacent arguments if only
   18241              :    there is a move to likely spilled HW register.  Return first argument
   18242              :    if at least one dependence was added or NULL otherwise.  */
   18243              : static rtx_insn *
   18244          414 : add_parameter_dependencies (rtx_insn *call, rtx_insn *head)
   18245              : {
   18246          414 :   rtx_insn *insn;
   18247          414 :   rtx_insn *last = call;
   18248          414 :   rtx_insn *first_arg = NULL;
   18249          414 :   bool is_spilled = false;
   18250              : 
   18251          414 :   head = PREV_INSN (head);
   18252              : 
   18253              :   /* Find nearest to call argument passing instruction.  */
   18254          414 :   while (true)
   18255              :     {
   18256          414 :       last = PREV_INSN (last);
   18257          414 :       if (last == head)
   18258              :         return NULL;
   18259          414 :       if (!NONDEBUG_INSN_P (last))
   18260            0 :         continue;
   18261          414 :       if (insn_is_function_arg (last, &is_spilled))
   18262              :         break;
   18263              :       return NULL;
   18264              :     }
   18265              : 
   18266              :   first_arg = last;
   18267         1050 :   while (true)
   18268              :     {
   18269         1050 :       insn = PREV_INSN (last);
   18270         1050 :       if (!INSN_P (insn))
   18271              :         break;
   18272          953 :       if (insn == head)
   18273              :         break;
   18274          912 :       if (!NONDEBUG_INSN_P (insn))
   18275              :         {
   18276          181 :           last = insn;
   18277          181 :           continue;
   18278              :         }
   18279          731 :       if (insn_is_function_arg (insn, &is_spilled))
   18280              :         {
   18281              :           /* Add output depdendence between two function arguments if chain
   18282              :              of output arguments contains likely spilled HW registers.  */
   18283          463 :           if (is_spilled)
   18284          463 :             add_dependence (first_arg, insn, REG_DEP_OUTPUT);
   18285              :           first_arg = last = insn;
   18286              :         }
   18287              :       else
   18288              :         break;
   18289              :     }
   18290          406 :   if (!is_spilled)
   18291              :     return NULL;
   18292              :   return first_arg;
   18293              : }
   18294              : 
   18295              : /* Add output or anti dependency from insn to first_arg to restrict its code
   18296              :    motion.  */
   18297              : static void
   18298         2335 : avoid_func_arg_motion (rtx_insn *first_arg, rtx_insn *insn)
   18299              : {
   18300         2335 :   rtx set;
   18301         2335 :   rtx tmp;
   18302              : 
   18303         2335 :   set = single_set (insn);
   18304         2335 :   if (!set)
   18305              :     return;
   18306         1453 :   tmp = SET_DEST (set);
   18307         1453 :   if (REG_P (tmp))
   18308              :     {
   18309              :       /* Add output dependency to the first function argument.  */
   18310         1258 :       add_dependence (first_arg, insn, REG_DEP_OUTPUT);
   18311         1258 :       return;
   18312              :     }
   18313              :   /* Add anti dependency.  */
   18314          195 :   add_dependence (first_arg, insn, REG_DEP_ANTI);
   18315              : }
   18316              : 
   18317              : /* Avoid cross block motion of function argument through adding dependency
   18318              :    from the first non-jump instruction in bb.  */
   18319              : static void
   18320           68 : add_dependee_for_func_arg (rtx_insn *arg, basic_block bb)
   18321              : {
   18322           68 :   rtx_insn *insn = BB_END (bb);
   18323              : 
   18324          134 :   while (insn)
   18325              :     {
   18326          134 :       if (NONDEBUG_INSN_P (insn) && NONJUMP_INSN_P (insn))
   18327              :         {
   18328           67 :           rtx set = single_set (insn);
   18329           67 :           if (set)
   18330              :             {
   18331           67 :               avoid_func_arg_motion (arg, insn);
   18332           67 :               return;
   18333              :             }
   18334              :         }
   18335           67 :       if (insn == BB_HEAD (bb))
   18336              :         return;
   18337           66 :       insn = PREV_INSN (insn);
   18338              :     }
   18339              : }
   18340              : 
   18341              : /* Hook for pre-reload schedule - avoid motion of function arguments
   18342              :    passed in likely spilled HW registers.  */
   18343              : static void
   18344     10297998 : ix86_dependencies_evaluation_hook (rtx_insn *head, rtx_insn *tail)
   18345              : {
   18346     10297998 :   rtx_insn *insn;
   18347     10297998 :   rtx_insn *first_arg = NULL;
   18348     10297998 :   if (reload_completed)
   18349              :     return;
   18350         1579 :   while (head != tail && DEBUG_INSN_P (head))
   18351          346 :     head = NEXT_INSN (head);
   18352        10663 :   for (insn = tail; insn != head; insn = PREV_INSN (insn))
   18353         9565 :     if (INSN_P (insn) && CALL_P (insn))
   18354              :       {
   18355          414 :         first_arg = add_parameter_dependencies (insn, head);
   18356          414 :         if (first_arg)
   18357              :           {
   18358              :             /* Add dependee for first argument to predecessors if only
   18359              :                region contains more than one block.  */
   18360          406 :             basic_block bb =  BLOCK_FOR_INSN (insn);
   18361          406 :             int rgn = CONTAINING_RGN (bb->index);
   18362          406 :             int nr_blks = RGN_NR_BLOCKS (rgn);
   18363              :             /* Skip trivial regions and region head blocks that can have
   18364              :                predecessors outside of region.  */
   18365          406 :             if (nr_blks > 1 && BLOCK_TO_BB (bb->index) != 0)
   18366              :               {
   18367           67 :                 edge e;
   18368           67 :                 edge_iterator ei;
   18369              : 
   18370              :                 /* Regions are SCCs with the exception of selective
   18371              :                    scheduling with pipelining of outer blocks enabled.
   18372              :                    So also check that immediate predecessors of a non-head
   18373              :                    block are in the same region.  */
   18374          137 :                 FOR_EACH_EDGE (e, ei, bb->preds)
   18375              :                   {
   18376              :                     /* Avoid creating of loop-carried dependencies through
   18377              :                        using topological ordering in the region.  */
   18378           70 :                     if (rgn == CONTAINING_RGN (e->src->index)
   18379           69 :                         && BLOCK_TO_BB (bb->index) > BLOCK_TO_BB (e->src->index))
   18380           68 :                       add_dependee_for_func_arg (first_arg, e->src);
   18381              :                   }
   18382              :               }
   18383          406 :             insn = first_arg;
   18384          406 :             if (insn == head)
   18385              :               break;
   18386              :           }
   18387              :       }
   18388         9151 :     else if (first_arg)
   18389         2268 :       avoid_func_arg_motion (first_arg, insn);
   18390              : }
   18391              : 
   18392              : /* Hook for pre-reload schedule - set priority of moves from likely spilled
   18393              :    HW registers to maximum, to schedule them at soon as possible. These are
   18394              :    moves from function argument registers at the top of the function entry
   18395              :    and moves from function return value registers after call.  */
   18396              : static int
   18397    108717909 : ix86_adjust_priority (rtx_insn *insn, int priority)
   18398              : {
   18399    108717909 :   rtx set;
   18400              : 
   18401    108717909 :   if (reload_completed)
   18402              :     return priority;
   18403              : 
   18404        14043 :   if (!NONDEBUG_INSN_P (insn))
   18405              :     return priority;
   18406              : 
   18407        12477 :   set = single_set (insn);
   18408        12477 :   if (set)
   18409              :     {
   18410        11905 :       rtx tmp = SET_SRC (set);
   18411        11905 :       if (REG_P (tmp)
   18412         2530 :           && HARD_REGISTER_P (tmp)
   18413          499 :           && !TEST_HARD_REG_BIT (fixed_reg_set, REGNO (tmp))
   18414        11905 :           && ix86_class_likely_spilled_p (REGNO_REG_CLASS (REGNO (tmp))))
   18415          448 :         return current_sched_info->sched_max_insns_priority;
   18416              :     }
   18417              : 
   18418              :   return priority;
   18419              : }
   18420              : 
   18421              : /* Prepare for scheduling pass.  */
   18422              : static void
   18423       965986 : ix86_sched_init_global (FILE *, int, int)
   18424              : {
   18425              :   /* Install scheduling hooks for current CPU.  Some of these hooks are used
   18426              :      in time-critical parts of the scheduler, so we only set them up when
   18427              :      they are actually used.  */
   18428       965986 :   switch (ix86_tune)
   18429              :     {
   18430       919451 :     case PROCESSOR_CORE2:
   18431       919451 :     case PROCESSOR_NEHALEM:
   18432       919451 :     case PROCESSOR_SANDYBRIDGE:
   18433       919451 :     case PROCESSOR_HASWELL:
   18434       919451 :     case PROCESSOR_TREMONT:
   18435       919451 :     case PROCESSOR_ALDERLAKE:
   18436       919451 :     case PROCESSOR_GENERIC:
   18437              :       /* Do not perform multipass scheduling for pre-reload schedule
   18438              :          to save compile time.  */
   18439       919451 :       if (reload_completed)
   18440              :         {
   18441       918964 :           ix86_core2i7_init_hooks ();
   18442       918964 :           break;
   18443              :         }
   18444              :       /* Fall through.  */
   18445        47022 :     default:
   18446        47022 :       targetm.sched.dfa_post_advance_cycle = NULL;
   18447        47022 :       targetm.sched.first_cycle_multipass_init = NULL;
   18448        47022 :       targetm.sched.first_cycle_multipass_begin = NULL;
   18449        47022 :       targetm.sched.first_cycle_multipass_issue = NULL;
   18450        47022 :       targetm.sched.first_cycle_multipass_backtrack = NULL;
   18451        47022 :       targetm.sched.first_cycle_multipass_end = NULL;
   18452        47022 :       targetm.sched.first_cycle_multipass_fini = NULL;
   18453        47022 :       break;
   18454              :     }
   18455       965986 : }
   18456              : 
   18457              : 
   18458              : /* Implement TARGET_STATIC_RTX_ALIGNMENT.  */
   18459              : 
   18460              : static HOST_WIDE_INT
   18461       721342 : ix86_static_rtx_alignment (machine_mode mode)
   18462              : {
   18463       721342 :   if (mode == DFmode)
   18464              :     return 64;
   18465              :   if (ALIGN_MODE_128 (mode))
   18466       156579 :     return MAX (128, GET_MODE_ALIGNMENT (mode));
   18467       479968 :   return GET_MODE_ALIGNMENT (mode);
   18468              : }
   18469              : 
   18470              : /* Implement TARGET_CONSTANT_ALIGNMENT.  */
   18471              : 
   18472              : static HOST_WIDE_INT
   18473      6871682 : ix86_constant_alignment (const_tree exp, HOST_WIDE_INT align)
   18474              : {
   18475      6871682 :   if (TREE_CODE (exp) == REAL_CST || TREE_CODE (exp) == VECTOR_CST
   18476              :       || TREE_CODE (exp) == INTEGER_CST)
   18477              :     {
   18478       366240 :       machine_mode mode = TYPE_MODE (TREE_TYPE (exp));
   18479       366240 :       HOST_WIDE_INT mode_align = ix86_static_rtx_alignment (mode);
   18480       366240 :       return MAX (mode_align, align);
   18481              :     }
   18482      6364192 :   else if (!optimize_size && TREE_CODE (exp) == STRING_CST
   18483      9612932 :            && TREE_STRING_LENGTH (exp) >= 31 && align < BITS_PER_WORD)
   18484              :     return BITS_PER_WORD;
   18485              : 
   18486              :   return align;
   18487              : }
   18488              : 
   18489              : /* Implement TARGET_EMPTY_RECORD_P.  */
   18490              : 
   18491              : static bool
   18492   1442610527 : ix86_is_empty_record (const_tree type)
   18493              : {
   18494   1442610527 :   if (!TARGET_64BIT)
   18495              :     return false;
   18496   1411804127 :   return default_is_empty_record (type);
   18497              : }
   18498              : 
   18499              : /* Implement TARGET_WARN_PARAMETER_PASSING_ABI.  */
   18500              : 
   18501              : static void
   18502     15182377 : ix86_warn_parameter_passing_abi (cumulative_args_t cum_v, tree type)
   18503              : {
   18504     15182377 :   CUMULATIVE_ARGS *cum = get_cumulative_args (cum_v);
   18505              : 
   18506     15182377 :   if (!cum->warn_empty)
   18507              :     return;
   18508              : 
   18509     12997267 :   if (!TYPE_EMPTY_P (type))
   18510              :     return;
   18511              : 
   18512              :   /* Don't warn if the function isn't visible outside of the TU.  */
   18513        14649 :   if (cum->decl && !TREE_PUBLIC (cum->decl))
   18514              :     return;
   18515              : 
   18516        13189 :   tree decl = cum->decl;
   18517        13189 :   if (!decl)
   18518              :     /* If we don't know the target, look at the current TU.  */
   18519           39 :     decl = current_function_decl;
   18520              : 
   18521        13189 :   const_tree ctx = get_ultimate_context (decl);
   18522        13189 :   if (ctx == NULL_TREE
   18523        26344 :       || !TRANSLATION_UNIT_WARN_EMPTY_P (ctx))
   18524              :     return;
   18525              : 
   18526              :   /* If the actual size of the type is zero, then there is no change
   18527              :      in how objects of this size are passed.  */
   18528           72 :   if (int_size_in_bytes (type) == 0)
   18529              :     return;
   18530              : 
   18531           66 :   warning (OPT_Wabi, "empty class %qT parameter passing ABI "
   18532              :            "changes in %<-fabi-version=12%> (GCC 8)", type);
   18533              : 
   18534              :   /* Only warn once.  */
   18535           66 :   cum->warn_empty = false;
   18536              : }
   18537              : 
   18538              : /* This hook returns name of multilib ABI.  */
   18539              : 
   18540              : static const char *
   18541      3393039 : ix86_get_multilib_abi_name (void)
   18542              : {
   18543      3393039 :   if (!(TARGET_64BIT_P (ix86_isa_flags)))
   18544              :     return "i386";
   18545      3349083 :   else if (TARGET_X32_P (ix86_isa_flags))
   18546              :     return "x32";
   18547              :   else
   18548      3349083 :     return "x86_64";
   18549              : }
   18550              : 
   18551              : /* Compute the alignment for a variable for Intel MCU psABI.  TYPE is
   18552              :    the data type, and ALIGN is the alignment that the object would
   18553              :    ordinarily have.  */
   18554              : 
   18555              : static int
   18556            0 : iamcu_alignment (tree type, int align)
   18557              : {
   18558            0 :   machine_mode mode;
   18559              : 
   18560            0 :   if (align < 32 || TYPE_USER_ALIGN (type))
   18561              :     return align;
   18562              : 
   18563              :   /* Intel MCU psABI specifies scalar types > 4 bytes aligned to 4
   18564              :      bytes.  */
   18565            0 :   type = strip_array_types (type);
   18566            0 :   if (TYPE_ATOMIC (type))
   18567              :     return align;
   18568              : 
   18569            0 :   mode = TYPE_MODE (type);
   18570            0 :   switch (GET_MODE_CLASS (mode))
   18571              :     {
   18572              :     case MODE_INT:
   18573              :     case MODE_COMPLEX_INT:
   18574              :     case MODE_COMPLEX_FLOAT:
   18575              :     case MODE_FLOAT:
   18576              :     case MODE_DECIMAL_FLOAT:
   18577              :       return 32;
   18578              :     default:
   18579              :       return align;
   18580              :     }
   18581              : }
   18582              : 
   18583              : /* Compute the alignment for a static variable.
   18584              :    TYPE is the data type, and ALIGN is the alignment that
   18585              :    the object would ordinarily have.  The value of this function is used
   18586              :    instead of that alignment to align the object.  */
   18587              : 
   18588              : int
   18589     12017904 : ix86_data_alignment (tree type, unsigned int align, bool opt)
   18590              : {
   18591              :   /* GCC 4.8 and earlier used to incorrectly assume this alignment even
   18592              :      for symbols from other compilation units or symbols that don't need
   18593              :      to bind locally.  In order to preserve some ABI compatibility with
   18594              :      those compilers, ensure we don't decrease alignment from what we
   18595              :      used to assume.  */
   18596              : 
   18597     12017904 :   unsigned int max_align_compat = MIN (256, MAX_OFILE_ALIGNMENT);
   18598              : 
   18599              :   /* A data structure, equal or greater than the size of a cache line
   18600              :      (64 bytes in the Pentium 4 and other recent Intel processors, including
   18601              :      processors based on Intel Core microarchitecture) should be aligned
   18602              :      so that its base address is a multiple of a cache line size.  */
   18603              : 
   18604     24035808 :   unsigned int max_align
   18605     12017904 :     = MIN ((unsigned) ix86_tune_cost->prefetch_block * 8, MAX_OFILE_ALIGNMENT);
   18606              : 
   18607     14600522 :   if (max_align < BITS_PER_WORD)
   18608            0 :     max_align = BITS_PER_WORD;
   18609              : 
   18610     12017904 :   switch (ix86_align_data_type)
   18611              :     {
   18612     12017904 :     case ix86_align_data_type_abi: opt = false; break;
   18613     12017884 :     case ix86_align_data_type_compat: max_align = BITS_PER_WORD; break;
   18614              :     case ix86_align_data_type_cacheline: break;
   18615              :     }
   18616              : 
   18617     12017904 :   if (TARGET_IAMCU)
   18618            0 :     align = iamcu_alignment (type, align);
   18619              : 
   18620     12017904 :   if (opt
   18621      5782865 :       && AGGREGATE_TYPE_P (type)
   18622      3706077 :       && TYPE_SIZE (type)
   18623     15723929 :       && TREE_CODE (TYPE_SIZE (type)) == INTEGER_CST)
   18624              :     {
   18625      6711719 :       if (wi::geu_p (wi::to_wide (TYPE_SIZE (type)), max_align_compat)
   18626      3706025 :           && align < max_align_compat)
   18627       700331 :         align = max_align_compat;
   18628      7349420 :       if (wi::geu_p (wi::to_wide (TYPE_SIZE (type)), max_align)
   18629      3706025 :           && align < max_align)
   18630        62630 :         align = max_align;
   18631              :     }
   18632              : 
   18633              :   /* x86-64 ABI requires arrays greater than 16 bytes to be aligned
   18634              :      to 16byte boundary.  */
   18635     12017904 :   if (TARGET_64BIT)
   18636              :     {
   18637      4942981 :       if ((opt ? AGGREGATE_TYPE_P (type) : TREE_CODE (type) == ARRAY_TYPE)
   18638      3255550 :           && TYPE_SIZE (type)
   18639      3255488 :           && TREE_CODE (TYPE_SIZE (type)) == INTEGER_CST
   18640     10866782 :           && wi::geu_p (wi::to_wide (TYPE_SIZE (type)), 128)
   18641     11479870 :           && align < 128)
   18642       613088 :         return 128;
   18643              :     }
   18644              : 
   18645     11404816 :   if (!opt)
   18646      6040486 :     return align;
   18647              : 
   18648      5364330 :   if (TREE_CODE (type) == ARRAY_TYPE)
   18649              :     {
   18650      1099464 :       if (TYPE_MODE (TREE_TYPE (type)) == DFmode && align < 64)
   18651              :         return 64;
   18652      1099464 :       if (ALIGN_MODE_128 (TYPE_MODE (TREE_TYPE (type))) && align < 128)
   18653              :         return 128;
   18654              :     }
   18655      4264866 :   else if (TREE_CODE (type) == COMPLEX_TYPE)
   18656              :     {
   18657              : 
   18658        12969 :       if (TYPE_MODE (type) == DCmode && align < 64)
   18659              :         return 64;
   18660        12969 :       if ((TYPE_MODE (type) == XCmode
   18661        12969 :            || TYPE_MODE (type) == TCmode) && align < 128)
   18662              :         return 128;
   18663              :     }
   18664      4251897 :   else if (RECORD_OR_UNION_TYPE_P (type)
   18665      4251897 :            && TYPE_FIELDS (type))
   18666              :     {
   18667      2186323 :       if (DECL_MODE (TYPE_FIELDS (type)) == DFmode && align < 64)
   18668              :         return 64;
   18669      2186323 :       if (ALIGN_MODE_128 (DECL_MODE (TYPE_FIELDS (type))) && align < 128)
   18670              :         return 128;
   18671              :     }
   18672      2065574 :   else if (SCALAR_FLOAT_TYPE_P (type) || VECTOR_TYPE_P (type)
   18673              :            || TREE_CODE (type) == INTEGER_TYPE)
   18674              :     {
   18675      1918063 :       if (TYPE_MODE (type) == DFmode && align < 64)
   18676              :         return 64;
   18677      1918063 :       if (ALIGN_MODE_128 (TYPE_MODE (type)) && align < 128)
   18678              :         return 128;
   18679              :     }
   18680              : 
   18681      5364217 :   return align;
   18682              : }
   18683              : 
   18684              : /* Implememnt TARGET_LOWER_LOCAL_DECL_ALIGNMENT.  */
   18685              : static void
   18686     31460724 : ix86_lower_local_decl_alignment (tree decl)
   18687              : {
   18688     31460724 :   unsigned int new_align = ix86_local_alignment (decl, VOIDmode,
   18689     31460724 :                                                  DECL_ALIGN (decl), true);
   18690     31460724 :   if (new_align < DECL_ALIGN (decl))
   18691            0 :     SET_DECL_ALIGN (decl, new_align);
   18692     31460724 : }
   18693              : 
   18694              : /* Compute the alignment for a local variable or a stack slot.  EXP is
   18695              :    the data type or decl itself, MODE is the widest mode available and
   18696              :    ALIGN is the alignment that the object would ordinarily have.  The
   18697              :    value of this macro is used instead of that alignment to align the
   18698              :    object.  */
   18699              : 
   18700              : unsigned int
   18701     48740137 : ix86_local_alignment (tree exp, machine_mode mode,
   18702              :                       unsigned int align, bool may_lower)
   18703              : {
   18704     48740137 :   tree type, decl;
   18705              : 
   18706     48740137 :   if (exp && DECL_P (exp))
   18707              :     {
   18708     46578009 :       type = TREE_TYPE (exp);
   18709     46578009 :       decl = exp;
   18710              :     }
   18711              :   else
   18712              :     {
   18713              :       type = exp;
   18714              :       decl = NULL;
   18715              :     }
   18716              : 
   18717              :   /* Don't do dynamic stack realignment for long long objects with
   18718              :      -mpreferred-stack-boundary=2.  */
   18719     48740137 :   if (may_lower
   18720     31460724 :       && !TARGET_64BIT
   18721       248581 :       && align == 64
   18722        38958 :       && ix86_preferred_stack_boundary < 64
   18723            0 :       && (mode == DImode || (type && TYPE_MODE (type) == DImode))
   18724            0 :       && (!type || (!TYPE_USER_ALIGN (type)
   18725            0 :                     && !TYPE_ATOMIC (strip_array_types (type))))
   18726     48740137 :       && (!decl || !DECL_USER_ALIGN (decl)))
   18727              :     align = 32;
   18728              : 
   18729              :   /* If TYPE is NULL, we are allocating a stack slot for caller-save
   18730              :      register in MODE.  We will return the largest alignment of XF
   18731              :      and DF.  */
   18732     48740137 :   if (!type)
   18733              :     {
   18734      1415746 :       if (mode == XFmode && align < GET_MODE_ALIGNMENT (DFmode))
   18735         1476 :         align = GET_MODE_ALIGNMENT (DFmode);
   18736      1415746 :       return align;
   18737              :     }
   18738              : 
   18739              :   /* Don't increase alignment for Intel MCU psABI.  */
   18740     47324391 :   if (TARGET_IAMCU)
   18741              :     return align;
   18742              : 
   18743              :   /* x86-64 ABI requires arrays greater than 16 bytes to be aligned
   18744              :      to 16byte boundary.  Exact wording is:
   18745              : 
   18746              :      An array uses the same alignment as its elements, except that a local or
   18747              :      global array variable of length at least 16 bytes or
   18748              :      a C99 variable-length array variable always has alignment of at least 16 bytes.
   18749              : 
   18750              :      This was added to allow use of aligned SSE instructions at arrays.  This
   18751              :      rule is meant for static storage (where compiler cannot do the analysis
   18752              :      by itself).  We follow it for automatic variables only when convenient.
   18753              :      We fully control everything in the function compiled and functions from
   18754              :      other unit cannot rely on the alignment.
   18755              : 
   18756              :      Exclude va_list type.  It is the common case of local array where
   18757              :      we cannot benefit from the alignment.
   18758              : 
   18759              :      TODO: Probably one should optimize for size only when var is not escaping.  */
   18760     44489600 :   if (TARGET_64BIT && optimize_function_for_speed_p (cfun)
   18761     91461311 :       && TARGET_SSE)
   18762              :     {
   18763     44097249 :       if (AGGREGATE_TYPE_P (type)
   18764      9005276 :           && (va_list_type_node == NULL_TREE
   18765      9005276 :               || (TYPE_MAIN_VARIANT (type)
   18766      9005276 :                   != TYPE_MAIN_VARIANT (va_list_type_node)))
   18767      8906563 :           && TYPE_SIZE (type)
   18768      8906563 :           && TREE_CODE (TYPE_SIZE (type)) == INTEGER_CST
   18769     45157376 :           && wi::geu_p (wi::to_wide (TYPE_SIZE (type)), 128)
   18770     50539179 :           && align < 128)
   18771      5381803 :         return 128;
   18772              :     }
   18773     41942588 :   if (TREE_CODE (type) == ARRAY_TYPE)
   18774              :     {
   18775       791022 :       if (TYPE_MODE (TREE_TYPE (type)) == DFmode && align < 64)
   18776              :         return 64;
   18777       791022 :       if (ALIGN_MODE_128 (TYPE_MODE (TREE_TYPE (type))) && align < 128)
   18778              :         return 128;
   18779              :     }
   18780     41151566 :   else if (TREE_CODE (type) == COMPLEX_TYPE)
   18781              :     {
   18782       154234 :       if (TYPE_MODE (type) == DCmode && align < 64)
   18783              :         return 64;
   18784       154234 :       if ((TYPE_MODE (type) == XCmode
   18785       154234 :            || TYPE_MODE (type) == TCmode) && align < 128)
   18786              :         return 128;
   18787              :     }
   18788     40997332 :   else if (RECORD_OR_UNION_TYPE_P (type)
   18789     40997332 :            && TYPE_FIELDS (type))
   18790              :     {
   18791      4803261 :       if (DECL_MODE (TYPE_FIELDS (type)) == DFmode && align < 64)
   18792              :         return 64;
   18793      4800156 :       if (ALIGN_MODE_128 (DECL_MODE (TYPE_FIELDS (type))) && align < 128)
   18794              :         return 128;
   18795              :     }
   18796     36194071 :   else if (SCALAR_FLOAT_TYPE_P (type) || VECTOR_TYPE_P (type)
   18797              :            || TREE_CODE (type) == INTEGER_TYPE)
   18798              :     {
   18799              : 
   18800     29720340 :       if (TYPE_MODE (type) == DFmode && align < 64)
   18801              :         return 64;
   18802     29720340 :       if (ALIGN_MODE_128 (TYPE_MODE (type)) && align < 128)
   18803              :         return 128;
   18804              :     }
   18805              :   return align;
   18806              : }
   18807              : 
   18808              : /* Compute the minimum required alignment for dynamic stack realignment
   18809              :    purposes for a local variable, parameter or a stack slot.  EXP is
   18810              :    the data type or decl itself, MODE is its mode and ALIGN is the
   18811              :    alignment that the object would ordinarily have.  */
   18812              : 
   18813              : unsigned int
   18814     47686965 : ix86_minimum_alignment (tree exp, machine_mode mode,
   18815              :                         unsigned int align)
   18816              : {
   18817     47686965 :   tree type, decl;
   18818              : 
   18819     47686965 :   if (exp && DECL_P (exp))
   18820              :     {
   18821     14966988 :       type = TREE_TYPE (exp);
   18822     14966988 :       decl = exp;
   18823              :     }
   18824              :   else
   18825              :     {
   18826              :       type = exp;
   18827              :       decl = NULL;
   18828              :     }
   18829              : 
   18830     47686965 :   if (TARGET_64BIT || align != 64 || ix86_preferred_stack_boundary >= 64)
   18831              :     return align;
   18832              : 
   18833              :   /* Don't do dynamic stack realignment for long long objects with
   18834              :      -mpreferred-stack-boundary=2.  */
   18835            0 :   if ((mode == DImode || (type && TYPE_MODE (type) == DImode))
   18836            0 :       && (!type || (!TYPE_USER_ALIGN (type)
   18837            0 :                     && !TYPE_ATOMIC (strip_array_types (type))))
   18838            0 :       && (!decl || !DECL_USER_ALIGN (decl)))
   18839              :     {
   18840            0 :       gcc_checking_assert (!TARGET_STV);
   18841              :       return 32;
   18842              :     }
   18843              : 
   18844              :   return align;
   18845              : }
   18846              : 
   18847              : /* Find a location for the static chain incoming to a nested function.
   18848              :    This is a register, unless all free registers are used by arguments.  */
   18849              : 
   18850              : static rtx
   18851       269378 : ix86_static_chain (const_tree fndecl_or_type, bool incoming_p)
   18852              : {
   18853       269378 :   unsigned regno;
   18854              : 
   18855       269378 :   if (TARGET_64BIT)
   18856              :     {
   18857              :       /* We always use R10 in 64-bit mode.  */
   18858              :       regno = R10_REG;
   18859              :     }
   18860              :   else
   18861              :     {
   18862        88535 :       const_tree fntype, fndecl;
   18863        88535 :       unsigned int ccvt;
   18864              : 
   18865              :       /* By default in 32-bit mode we use ECX to pass the static chain.  */
   18866        88535 :       regno = CX_REG;
   18867              : 
   18868        88535 :       if (TREE_CODE (fndecl_or_type) == FUNCTION_DECL)
   18869              :         {
   18870        78559 :           fntype = TREE_TYPE (fndecl_or_type);
   18871        78559 :           fndecl = fndecl_or_type;
   18872              :         }
   18873              :       else
   18874              :         {
   18875              :           fntype = fndecl_or_type;
   18876              :           fndecl = NULL;
   18877              :         }
   18878              : 
   18879        88535 :       ccvt = ix86_get_callcvt (fntype);
   18880        88535 :       if ((ccvt & IX86_CALLCVT_FASTCALL) != 0)
   18881              :         {
   18882              :           /* Fastcall functions use ecx/edx for arguments, which leaves
   18883              :              us with EAX for the static chain.
   18884              :              Thiscall functions use ecx for arguments, which also
   18885              :              leaves us with EAX for the static chain.  */
   18886              :           regno = AX_REG;
   18887              :         }
   18888        88535 :       else if ((ccvt & IX86_CALLCVT_THISCALL) != 0)
   18889              :         {
   18890              :           /* Thiscall functions use ecx for arguments, which leaves
   18891              :              us with EAX and EDX for the static chain.
   18892              :              We are using for abi-compatibility EAX.  */
   18893              :           regno = AX_REG;
   18894              :         }
   18895        88535 :       else if (ix86_function_regparm (fntype, fndecl) == 3)
   18896              :         {
   18897              :           /* For regparm 3, we have no free call-clobbered registers in
   18898              :              which to store the static chain.  In order to implement this,
   18899              :              we have the trampoline push the static chain to the stack.
   18900              :              However, we can't push a value below the return address when
   18901              :              we call the nested function directly, so we have to use an
   18902              :              alternate entry point.  For this we use ESI, and have the
   18903              :              alternate entry point push ESI, so that things appear the
   18904              :              same once we're executing the nested function.  */
   18905            0 :           if (incoming_p)
   18906              :             {
   18907            0 :               if (fndecl == current_function_decl
   18908            0 :                   && !ix86_static_chain_on_stack)
   18909              :                 {
   18910            0 :                   gcc_assert (!reload_completed);
   18911            0 :                   ix86_static_chain_on_stack = true;
   18912              :                 }
   18913            0 :               return gen_frame_mem (SImode,
   18914            0 :                                     plus_constant (Pmode,
   18915              :                                                    arg_pointer_rtx, -8));
   18916              :             }
   18917              :           regno = SI_REG;
   18918              :         }
   18919              :     }
   18920              : 
   18921       357926 :   return gen_rtx_REG (Pmode, regno);
   18922              : }
   18923              : 
   18924              : /* Emit RTL insns to initialize the variable parts of a trampoline.
   18925              :    FNDECL is the decl of the target address; M_TRAMP is a MEM for
   18926              :    the trampoline, and CHAIN_VALUE is an RTX for the static chain
   18927              :    to be passed to the target function.  */
   18928              : 
   18929              : static void
   18930          296 : ix86_trampoline_init (rtx m_tramp, tree fndecl, rtx chain_value)
   18931              : {
   18932          296 :   rtx mem, fnaddr;
   18933          296 :   int opcode;
   18934          296 :   int offset = 0;
   18935          296 :   bool need_endbr = (flag_cf_protection & CF_BRANCH);
   18936              : 
   18937          296 :   fnaddr = XEXP (DECL_RTL (fndecl), 0);
   18938              : 
   18939          296 :   if (TARGET_64BIT)
   18940              :     {
   18941          296 :       int size;
   18942              : 
   18943          296 :       if (need_endbr)
   18944              :         {
   18945              :           /* Insert ENDBR64.  */
   18946            1 :           mem = adjust_address (m_tramp, SImode, offset);
   18947            1 :           emit_move_insn (mem, gen_int_mode (0xfa1e0ff3, SImode));
   18948            1 :           offset += 4;
   18949              :         }
   18950              : 
   18951              :       /* Load the function address to r11.  Try to load address using
   18952              :          the shorter movl instead of movabs.  We may want to support
   18953              :          movq for kernel mode, but kernel does not use trampolines at
   18954              :          the moment.  FNADDR is a 32bit address and may not be in
   18955              :          DImode when ptr_mode == SImode.  Always use movl in this
   18956              :          case.  */
   18957          296 :       if (ptr_mode == SImode
   18958          296 :           || x86_64_zext_immediate_operand (fnaddr, VOIDmode))
   18959              :         {
   18960          264 :           fnaddr = copy_addr_to_reg (fnaddr);
   18961              : 
   18962          264 :           mem = adjust_address (m_tramp, HImode, offset);
   18963          264 :           emit_move_insn (mem, gen_int_mode (0xbb41, HImode));
   18964              : 
   18965          264 :           mem = adjust_address (m_tramp, SImode, offset + 2);
   18966          264 :           emit_move_insn (mem, gen_lowpart (SImode, fnaddr));
   18967          264 :           offset += 6;
   18968              :         }
   18969              :       else
   18970              :         {
   18971           32 :           mem = adjust_address (m_tramp, HImode, offset);
   18972           32 :           emit_move_insn (mem, gen_int_mode (0xbb49, HImode));
   18973              : 
   18974           32 :           mem = adjust_address (m_tramp, DImode, offset + 2);
   18975           32 :           emit_move_insn (mem, fnaddr);
   18976           32 :           offset += 10;
   18977              :         }
   18978              : 
   18979              :       /* Load static chain using movabs to r10.  Use the shorter movl
   18980              :          instead of movabs when ptr_mode == SImode.  */
   18981          296 :       if (ptr_mode == SImode)
   18982              :         {
   18983              :           opcode = 0xba41;
   18984              :           size = 6;
   18985              :         }
   18986              :       else
   18987              :         {
   18988          296 :           opcode = 0xba49;
   18989          296 :           size = 10;
   18990              :         }
   18991              : 
   18992          296 :       mem = adjust_address (m_tramp, HImode, offset);
   18993          296 :       emit_move_insn (mem, gen_int_mode (opcode, HImode));
   18994              : 
   18995          296 :       mem = adjust_address (m_tramp, ptr_mode, offset + 2);
   18996          296 :       emit_move_insn (mem, chain_value);
   18997          296 :       offset += size;
   18998              : 
   18999              :       /* Jump to r11; the last (unused) byte is a nop, only there to
   19000              :          pad the write out to a single 32-bit store.  */
   19001          296 :       mem = adjust_address (m_tramp, SImode, offset);
   19002          296 :       emit_move_insn (mem, gen_int_mode (0x90e3ff49, SImode));
   19003          296 :       offset += 4;
   19004              :     }
   19005              :   else
   19006              :     {
   19007            0 :       rtx disp, chain;
   19008              : 
   19009              :       /* Depending on the static chain location, either load a register
   19010              :          with a constant, or push the constant to the stack.  All of the
   19011              :          instructions are the same size.  */
   19012            0 :       chain = ix86_static_chain (fndecl, true);
   19013            0 :       if (REG_P (chain))
   19014              :         {
   19015            0 :           switch (REGNO (chain))
   19016              :             {
   19017              :             case AX_REG:
   19018              :               opcode = 0xb8; break;
   19019            0 :             case CX_REG:
   19020            0 :               opcode = 0xb9; break;
   19021            0 :             default:
   19022            0 :               gcc_unreachable ();
   19023              :             }
   19024              :         }
   19025              :       else
   19026              :         opcode = 0x68;
   19027              : 
   19028            0 :       if (need_endbr)
   19029              :         {
   19030              :           /* Insert ENDBR32.  */
   19031            0 :           mem = adjust_address (m_tramp, SImode, offset);
   19032            0 :           emit_move_insn (mem, gen_int_mode (0xfb1e0ff3, SImode));
   19033            0 :           offset += 4;
   19034              :         }
   19035              : 
   19036            0 :       mem = adjust_address (m_tramp, QImode, offset);
   19037            0 :       emit_move_insn (mem, gen_int_mode (opcode, QImode));
   19038              : 
   19039            0 :       mem = adjust_address (m_tramp, SImode, offset + 1);
   19040            0 :       emit_move_insn (mem, chain_value);
   19041            0 :       offset += 5;
   19042              : 
   19043            0 :       mem = adjust_address (m_tramp, QImode, offset);
   19044            0 :       emit_move_insn (mem, gen_int_mode (0xe9, QImode));
   19045              : 
   19046            0 :       mem = adjust_address (m_tramp, SImode, offset + 1);
   19047              : 
   19048              :       /* Compute offset from the end of the jmp to the target function.
   19049              :          In the case in which the trampoline stores the static chain on
   19050              :          the stack, we need to skip the first insn which pushes the
   19051              :          (call-saved) register static chain; this push is 1 byte.  */
   19052            0 :       offset += 5;
   19053            0 :       int skip = MEM_P (chain) ? 1 : 0;
   19054              :       /* Skip ENDBR32 at the entry of the target function.  */
   19055            0 :       if (need_endbr
   19056            0 :           && !cgraph_node::get (fndecl)->only_called_directly_p ())
   19057            0 :         skip += 4;
   19058            0 :       disp = expand_binop (SImode, sub_optab, fnaddr,
   19059            0 :                            plus_constant (Pmode, XEXP (m_tramp, 0),
   19060            0 :                                           offset - skip),
   19061              :                            NULL_RTX, 1, OPTAB_DIRECT);
   19062            0 :       emit_move_insn (mem, disp);
   19063              :     }
   19064              : 
   19065          296 :   gcc_assert (offset <= TRAMPOLINE_SIZE);
   19066              : 
   19067              : #ifdef HAVE_ENABLE_EXECUTE_STACK
   19068              : #ifdef CHECK_EXECUTE_STACK_ENABLED
   19069              :   if (CHECK_EXECUTE_STACK_ENABLED)
   19070              : #endif
   19071              :   emit_library_call (gen_rtx_SYMBOL_REF (Pmode, "__enable_execute_stack"),
   19072              :                      LCT_NORMAL, VOIDmode, XEXP (m_tramp, 0), Pmode);
   19073              : #endif
   19074          296 : }
   19075              : 
   19076              : static bool
   19077     53790351 : ix86_allocate_stack_slots_for_args (void)
   19078              : {
   19079              :   /* Naked functions should not allocate stack slots for arguments.  */
   19080     53790351 :   return !ix86_function_naked (current_function_decl);
   19081              : }
   19082              : 
   19083              : static bool
   19084     38102474 : ix86_warn_func_return (tree decl)
   19085              : {
   19086              :   /* Naked functions are implemented entirely in assembly, including the
   19087              :      return sequence, so suppress warnings about this.  */
   19088     38102474 :   return !ix86_function_naked (decl);
   19089              : }
   19090              : 
   19091              : /* Return the shift count of a vector by scalar shift builtin second argument
   19092              :    ARG1.  */
   19093              : static tree
   19094        14142 : ix86_vector_shift_count (tree arg1)
   19095              : {
   19096        14142 :   if (tree_fits_uhwi_p (arg1))
   19097              :     return arg1;
   19098         8316 :   else if (TREE_CODE (arg1) == VECTOR_CST && CHAR_BIT == 8)
   19099              :     {
   19100              :       /* The count argument is weird, passed in as various 128-bit
   19101              :          (or 64-bit) vectors, the low 64 bits from it are the count.  */
   19102          162 :       unsigned char buf[16];
   19103          162 :       int len = native_encode_expr (arg1, buf, 16);
   19104          162 :       if (len == 0)
   19105          162 :         return NULL_TREE;
   19106          162 :       tree t = native_interpret_expr (uint64_type_node, buf, len);
   19107          162 :       if (t && tree_fits_uhwi_p (t))
   19108              :         return t;
   19109              :     }
   19110              :   return NULL_TREE;
   19111              : }
   19112              : 
   19113              : /* Return true if arg_mask is all ones, ELEMS is elements number of
   19114              :    corresponding vector.  */
   19115              : static bool
   19116        25042 : ix86_masked_all_ones (unsigned HOST_WIDE_INT elems, tree arg_mask)
   19117              : {
   19118        25042 :   if (TREE_CODE (arg_mask) != INTEGER_CST)
   19119              :     return false;
   19120              : 
   19121         7462 :   unsigned HOST_WIDE_INT mask = TREE_INT_CST_LOW (arg_mask);
   19122         7462 :   if (elems == HOST_BITS_PER_WIDE_INT)
   19123           33 :     return  mask == HOST_WIDE_INT_M1U;
   19124         7429 :   if ((mask | (HOST_WIDE_INT_M1U << elems)) != HOST_WIDE_INT_M1U)
   19125         2681 :     return false;
   19126              : 
   19127              :   return true;
   19128              : }
   19129              : 
   19130              : static tree
   19131     68159638 : ix86_fold_builtin (tree fndecl, int n_args,
   19132              :                    tree *args, bool ignore ATTRIBUTE_UNUSED)
   19133              : {
   19134     68159638 :   if (DECL_BUILT_IN_CLASS (fndecl) == BUILT_IN_MD)
   19135              :     {
   19136     68159638 :       enum ix86_builtins fn_code
   19137     68159638 :         = (enum ix86_builtins) DECL_MD_FUNCTION_CODE (fndecl);
   19138     68159638 :       enum rtx_code rcode;
   19139     68159638 :       bool is_vshift;
   19140     68159638 :       enum tree_code tcode;
   19141     68159638 :       bool is_scalar;
   19142     68159638 :       unsigned HOST_WIDE_INT mask;
   19143              : 
   19144     68159638 :       switch (fn_code)
   19145              :         {
   19146         8764 :         case IX86_BUILTIN_CPU_IS:
   19147         8764 :         case IX86_BUILTIN_CPU_SUPPORTS:
   19148         8764 :           gcc_assert (n_args == 1);
   19149         8764 :           return fold_builtin_cpu (fndecl, args);
   19150              : 
   19151        24861 :         case IX86_BUILTIN_NANQ:
   19152        24861 :         case IX86_BUILTIN_NANSQ:
   19153        24861 :           {
   19154        24861 :             tree type = TREE_TYPE (TREE_TYPE (fndecl));
   19155        24861 :             const char *str = c_getstr (*args);
   19156        24861 :             int quiet = fn_code == IX86_BUILTIN_NANQ;
   19157        24861 :             REAL_VALUE_TYPE real;
   19158              : 
   19159        24861 :             if (str && real_nan (&real, str, quiet, TYPE_MODE (type)))
   19160        24861 :               return build_real (type, real);
   19161            0 :             return NULL_TREE;
   19162              :           }
   19163              : 
   19164          108 :         case IX86_BUILTIN_INFQ:
   19165          108 :         case IX86_BUILTIN_HUGE_VALQ:
   19166          108 :           {
   19167          108 :             tree type = TREE_TYPE (TREE_TYPE (fndecl));
   19168          108 :             REAL_VALUE_TYPE inf;
   19169          108 :             real_inf (&inf);
   19170          108 :             return build_real (type, inf);
   19171              :           }
   19172              : 
   19173        62447 :         case IX86_BUILTIN_TZCNT16:
   19174        62447 :         case IX86_BUILTIN_CTZS:
   19175        62447 :         case IX86_BUILTIN_TZCNT32:
   19176        62447 :         case IX86_BUILTIN_TZCNT64:
   19177        62447 :           gcc_assert (n_args == 1);
   19178        62447 :           if (TREE_CODE (args[0]) == INTEGER_CST)
   19179              :             {
   19180           45 :               tree type = TREE_TYPE (TREE_TYPE (fndecl));
   19181           45 :               tree arg = args[0];
   19182           45 :               if (fn_code == IX86_BUILTIN_TZCNT16
   19183           45 :                   || fn_code == IX86_BUILTIN_CTZS)
   19184            3 :                 arg = fold_convert (short_unsigned_type_node, arg);
   19185           45 :               if (integer_zerop (arg))
   19186            6 :                 return build_int_cst (type, TYPE_PRECISION (TREE_TYPE (arg)));
   19187              :               else
   19188           39 :                 return fold_const_call (CFN_CTZ, type, arg);
   19189              :             }
   19190              :           break;
   19191              : 
   19192        51998 :         case IX86_BUILTIN_LZCNT16:
   19193        51998 :         case IX86_BUILTIN_CLZS:
   19194        51998 :         case IX86_BUILTIN_LZCNT32:
   19195        51998 :         case IX86_BUILTIN_LZCNT64:
   19196        51998 :           gcc_assert (n_args == 1);
   19197        51998 :           if (TREE_CODE (args[0]) == INTEGER_CST)
   19198              :             {
   19199           54 :               tree type = TREE_TYPE (TREE_TYPE (fndecl));
   19200           54 :               tree arg = args[0];
   19201           54 :               if (fn_code == IX86_BUILTIN_LZCNT16
   19202           54 :                   || fn_code == IX86_BUILTIN_CLZS)
   19203           18 :                 arg = fold_convert (short_unsigned_type_node, arg);
   19204           54 :               if (integer_zerop (arg))
   19205            3 :                 return build_int_cst (type, TYPE_PRECISION (TREE_TYPE (arg)));
   19206              :               else
   19207           51 :                 return fold_const_call (CFN_CLZ, type, arg);
   19208              :             }
   19209              :           break;
   19210              : 
   19211        61231 :         case IX86_BUILTIN_BEXTR32:
   19212        61231 :         case IX86_BUILTIN_BEXTR64:
   19213        61231 :         case IX86_BUILTIN_BEXTRI32:
   19214        61231 :         case IX86_BUILTIN_BEXTRI64:
   19215        61231 :           gcc_assert (n_args == 2);
   19216        61231 :           if (tree_fits_uhwi_p (args[1]))
   19217              :             {
   19218          152 :               unsigned HOST_WIDE_INT res = 0;
   19219          152 :               unsigned int prec = TYPE_PRECISION (TREE_TYPE (args[0]));
   19220          152 :               unsigned int start = tree_to_uhwi (args[1]);
   19221          152 :               unsigned int len = (start & 0xff00) >> 8;
   19222          152 :               tree lhs_type = TREE_TYPE (TREE_TYPE (fndecl));
   19223          152 :               start &= 0xff;
   19224          152 :               if (start >= prec || len == 0)
   19225          111 :                 return omit_one_operand (lhs_type, build_zero_cst (lhs_type),
   19226              :                                          args[0]);
   19227           41 :               else if (!tree_fits_uhwi_p (args[0]))
   19228              :                 break;
   19229              :               else
   19230           24 :                 res = tree_to_uhwi (args[0]) >> start;
   19231           24 :               if (len > prec)
   19232              :                 len = prec;
   19233           24 :               if (len < HOST_BITS_PER_WIDE_INT)
   19234           15 :                 res &= (HOST_WIDE_INT_1U << len) - 1;
   19235           24 :               return build_int_cstu (lhs_type, res);
   19236              :             }
   19237              :           break;
   19238              : 
   19239        21034 :         case IX86_BUILTIN_BZHI32:
   19240        21034 :         case IX86_BUILTIN_BZHI64:
   19241        21034 :           gcc_assert (n_args == 2);
   19242        21034 :           if (tree_fits_uhwi_p (args[1]))
   19243              :             {
   19244          190 :               unsigned int idx = tree_to_uhwi (args[1]) & 0xff;
   19245          190 :               tree lhs_type = TREE_TYPE (TREE_TYPE (fndecl));
   19246          190 :               if (idx >= TYPE_PRECISION (TREE_TYPE (args[0])))
   19247              :                 return args[0];
   19248          190 :               if (idx == 0)
   19249           52 :                 return omit_one_operand (lhs_type, build_zero_cst (lhs_type),
   19250              :                                          args[0]);
   19251          138 :               if (!tree_fits_uhwi_p (args[0]))
   19252              :                 break;
   19253           12 :               unsigned HOST_WIDE_INT res = tree_to_uhwi (args[0]);
   19254           12 :               res &= ~(HOST_WIDE_INT_M1U << idx);
   19255           12 :               return build_int_cstu (lhs_type, res);
   19256              :             }
   19257              :           break;
   19258              : 
   19259        20792 :         case IX86_BUILTIN_PDEP32:
   19260        20792 :         case IX86_BUILTIN_PDEP64:
   19261        20792 :           gcc_assert (n_args == 2);
   19262        20792 :           if (tree_fits_uhwi_p (args[0]) && tree_fits_uhwi_p (args[1]))
   19263              :             {
   19264           46 :               unsigned HOST_WIDE_INT src = tree_to_uhwi (args[0]);
   19265           46 :               unsigned HOST_WIDE_INT mask = tree_to_uhwi (args[1]);
   19266           46 :               unsigned HOST_WIDE_INT res = 0;
   19267           46 :               unsigned HOST_WIDE_INT m, k = 1;
   19268         2990 :               for (m = 1; m; m <<= 1)
   19269         2944 :                 if ((mask & m) != 0)
   19270              :                   {
   19271         1440 :                     if ((src & k) != 0)
   19272          789 :                       res |= m;
   19273         1440 :                     k <<= 1;
   19274              :                   }
   19275           46 :               return build_int_cstu (TREE_TYPE (TREE_TYPE (fndecl)), res);
   19276              :             }
   19277              :           break;
   19278              : 
   19279        20794 :         case IX86_BUILTIN_PEXT32:
   19280        20794 :         case IX86_BUILTIN_PEXT64:
   19281        20794 :           gcc_assert (n_args == 2);
   19282        20794 :           if (tree_fits_uhwi_p (args[0]) && tree_fits_uhwi_p (args[1]))
   19283              :             {
   19284           46 :               unsigned HOST_WIDE_INT src = tree_to_uhwi (args[0]);
   19285           46 :               unsigned HOST_WIDE_INT mask = tree_to_uhwi (args[1]);
   19286           46 :               unsigned HOST_WIDE_INT res = 0;
   19287           46 :               unsigned HOST_WIDE_INT m, k = 1;
   19288         2990 :               for (m = 1; m; m <<= 1)
   19289         2944 :                 if ((mask & m) != 0)
   19290              :                   {
   19291         2016 :                     if ((src & m) != 0)
   19292         1063 :                       res |= k;
   19293         2016 :                     k <<= 1;
   19294              :                   }
   19295           46 :               return build_int_cstu (TREE_TYPE (TREE_TYPE (fndecl)), res);
   19296              :             }
   19297              :           break;
   19298              : 
   19299       100430 :         case IX86_BUILTIN_MOVMSKPS:
   19300       100430 :         case IX86_BUILTIN_PMOVMSKB:
   19301       100430 :         case IX86_BUILTIN_MOVMSKPD:
   19302       100430 :         case IX86_BUILTIN_PMOVMSKB128:
   19303       100430 :         case IX86_BUILTIN_MOVMSKPD256:
   19304       100430 :         case IX86_BUILTIN_MOVMSKPS256:
   19305       100430 :         case IX86_BUILTIN_PMOVMSKB256:
   19306       100430 :           gcc_assert (n_args == 1);
   19307       100430 :           if (TREE_CODE (args[0]) == VECTOR_CST)
   19308              :             {
   19309              :               HOST_WIDE_INT res = 0;
   19310         1460 :               for (unsigned i = 0; i < VECTOR_CST_NELTS (args[0]); ++i)
   19311              :                 {
   19312         1218 :                   tree e = VECTOR_CST_ELT (args[0], i);
   19313         1218 :                   if (TREE_CODE (e) == INTEGER_CST && !TREE_OVERFLOW (e))
   19314              :                     {
   19315          624 :                       if (wi::neg_p (wi::to_wide (e)))
   19316          575 :                         res |= HOST_WIDE_INT_1 << i;
   19317              :                     }
   19318          594 :                   else if (TREE_CODE (e) == REAL_CST && !TREE_OVERFLOW (e))
   19319              :                     {
   19320          594 :                       if (TREE_REAL_CST (e).sign)
   19321          505 :                         res |= HOST_WIDE_INT_1 << i;
   19322              :                     }
   19323              :                   else
   19324              :                     return NULL_TREE;
   19325              :                 }
   19326          242 :               return build_int_cst (TREE_TYPE (TREE_TYPE (fndecl)), res);
   19327              :             }
   19328              :           break;
   19329              : 
   19330       659772 :         case IX86_BUILTIN_PSLLD:
   19331       659772 :         case IX86_BUILTIN_PSLLD128:
   19332       659772 :         case IX86_BUILTIN_PSLLD128_MASK:
   19333       659772 :         case IX86_BUILTIN_PSLLD256:
   19334       659772 :         case IX86_BUILTIN_PSLLD256_MASK:
   19335       659772 :         case IX86_BUILTIN_PSLLD512:
   19336       659772 :         case IX86_BUILTIN_PSLLDI:
   19337       659772 :         case IX86_BUILTIN_PSLLDI128:
   19338       659772 :         case IX86_BUILTIN_PSLLDI128_MASK:
   19339       659772 :         case IX86_BUILTIN_PSLLDI256:
   19340       659772 :         case IX86_BUILTIN_PSLLDI256_MASK:
   19341       659772 :         case IX86_BUILTIN_PSLLDI512:
   19342       659772 :         case IX86_BUILTIN_PSLLQ:
   19343       659772 :         case IX86_BUILTIN_PSLLQ128:
   19344       659772 :         case IX86_BUILTIN_PSLLQ128_MASK:
   19345       659772 :         case IX86_BUILTIN_PSLLQ256:
   19346       659772 :         case IX86_BUILTIN_PSLLQ256_MASK:
   19347       659772 :         case IX86_BUILTIN_PSLLQ512:
   19348       659772 :         case IX86_BUILTIN_PSLLQI:
   19349       659772 :         case IX86_BUILTIN_PSLLQI128:
   19350       659772 :         case IX86_BUILTIN_PSLLQI128_MASK:
   19351       659772 :         case IX86_BUILTIN_PSLLQI256:
   19352       659772 :         case IX86_BUILTIN_PSLLQI256_MASK:
   19353       659772 :         case IX86_BUILTIN_PSLLQI512:
   19354       659772 :         case IX86_BUILTIN_PSLLW:
   19355       659772 :         case IX86_BUILTIN_PSLLW128:
   19356       659772 :         case IX86_BUILTIN_PSLLW128_MASK:
   19357       659772 :         case IX86_BUILTIN_PSLLW256:
   19358       659772 :         case IX86_BUILTIN_PSLLW256_MASK:
   19359       659772 :         case IX86_BUILTIN_PSLLW512_MASK:
   19360       659772 :         case IX86_BUILTIN_PSLLWI:
   19361       659772 :         case IX86_BUILTIN_PSLLWI128:
   19362       659772 :         case IX86_BUILTIN_PSLLWI128_MASK:
   19363       659772 :         case IX86_BUILTIN_PSLLWI256:
   19364       659772 :         case IX86_BUILTIN_PSLLWI256_MASK:
   19365       659772 :         case IX86_BUILTIN_PSLLWI512_MASK:
   19366       659772 :           rcode = ASHIFT;
   19367       659772 :           is_vshift = false;
   19368       659772 :           goto do_shift;
   19369       601367 :         case IX86_BUILTIN_PSRAD:
   19370       601367 :         case IX86_BUILTIN_PSRAD128:
   19371       601367 :         case IX86_BUILTIN_PSRAD128_MASK:
   19372       601367 :         case IX86_BUILTIN_PSRAD256:
   19373       601367 :         case IX86_BUILTIN_PSRAD256_MASK:
   19374       601367 :         case IX86_BUILTIN_PSRAD512:
   19375       601367 :         case IX86_BUILTIN_PSRADI:
   19376       601367 :         case IX86_BUILTIN_PSRADI128:
   19377       601367 :         case IX86_BUILTIN_PSRADI128_MASK:
   19378       601367 :         case IX86_BUILTIN_PSRADI256:
   19379       601367 :         case IX86_BUILTIN_PSRADI256_MASK:
   19380       601367 :         case IX86_BUILTIN_PSRADI512:
   19381       601367 :         case IX86_BUILTIN_PSRAQ128_MASK:
   19382       601367 :         case IX86_BUILTIN_PSRAQ256_MASK:
   19383       601367 :         case IX86_BUILTIN_PSRAQ512:
   19384       601367 :         case IX86_BUILTIN_PSRAQI128_MASK:
   19385       601367 :         case IX86_BUILTIN_PSRAQI256_MASK:
   19386       601367 :         case IX86_BUILTIN_PSRAQI512:
   19387       601367 :         case IX86_BUILTIN_PSRAW:
   19388       601367 :         case IX86_BUILTIN_PSRAW128:
   19389       601367 :         case IX86_BUILTIN_PSRAW128_MASK:
   19390       601367 :         case IX86_BUILTIN_PSRAW256:
   19391       601367 :         case IX86_BUILTIN_PSRAW256_MASK:
   19392       601367 :         case IX86_BUILTIN_PSRAW512:
   19393       601367 :         case IX86_BUILTIN_PSRAWI:
   19394       601367 :         case IX86_BUILTIN_PSRAWI128:
   19395       601367 :         case IX86_BUILTIN_PSRAWI128_MASK:
   19396       601367 :         case IX86_BUILTIN_PSRAWI256:
   19397       601367 :         case IX86_BUILTIN_PSRAWI256_MASK:
   19398       601367 :         case IX86_BUILTIN_PSRAWI512:
   19399       601367 :           rcode = ASHIFTRT;
   19400       601367 :           is_vshift = false;
   19401       601367 :           goto do_shift;
   19402       633647 :         case IX86_BUILTIN_PSRLD:
   19403       633647 :         case IX86_BUILTIN_PSRLD128:
   19404       633647 :         case IX86_BUILTIN_PSRLD128_MASK:
   19405       633647 :         case IX86_BUILTIN_PSRLD256:
   19406       633647 :         case IX86_BUILTIN_PSRLD256_MASK:
   19407       633647 :         case IX86_BUILTIN_PSRLD512:
   19408       633647 :         case IX86_BUILTIN_PSRLDI:
   19409       633647 :         case IX86_BUILTIN_PSRLDI128:
   19410       633647 :         case IX86_BUILTIN_PSRLDI128_MASK:
   19411       633647 :         case IX86_BUILTIN_PSRLDI256:
   19412       633647 :         case IX86_BUILTIN_PSRLDI256_MASK:
   19413       633647 :         case IX86_BUILTIN_PSRLDI512:
   19414       633647 :         case IX86_BUILTIN_PSRLQ:
   19415       633647 :         case IX86_BUILTIN_PSRLQ128:
   19416       633647 :         case IX86_BUILTIN_PSRLQ128_MASK:
   19417       633647 :         case IX86_BUILTIN_PSRLQ256:
   19418       633647 :         case IX86_BUILTIN_PSRLQ256_MASK:
   19419       633647 :         case IX86_BUILTIN_PSRLQ512:
   19420       633647 :         case IX86_BUILTIN_PSRLQI:
   19421       633647 :         case IX86_BUILTIN_PSRLQI128:
   19422       633647 :         case IX86_BUILTIN_PSRLQI128_MASK:
   19423       633647 :         case IX86_BUILTIN_PSRLQI256:
   19424       633647 :         case IX86_BUILTIN_PSRLQI256_MASK:
   19425       633647 :         case IX86_BUILTIN_PSRLQI512:
   19426       633647 :         case IX86_BUILTIN_PSRLW:
   19427       633647 :         case IX86_BUILTIN_PSRLW128:
   19428       633647 :         case IX86_BUILTIN_PSRLW128_MASK:
   19429       633647 :         case IX86_BUILTIN_PSRLW256:
   19430       633647 :         case IX86_BUILTIN_PSRLW256_MASK:
   19431       633647 :         case IX86_BUILTIN_PSRLW512:
   19432       633647 :         case IX86_BUILTIN_PSRLWI:
   19433       633647 :         case IX86_BUILTIN_PSRLWI128:
   19434       633647 :         case IX86_BUILTIN_PSRLWI128_MASK:
   19435       633647 :         case IX86_BUILTIN_PSRLWI256:
   19436       633647 :         case IX86_BUILTIN_PSRLWI256_MASK:
   19437       633647 :         case IX86_BUILTIN_PSRLWI512:
   19438       633647 :           rcode = LSHIFTRT;
   19439       633647 :           is_vshift = false;
   19440       633647 :           goto do_shift;
   19441       276063 :         case IX86_BUILTIN_PSLLVV16HI:
   19442       276063 :         case IX86_BUILTIN_PSLLVV16SI:
   19443       276063 :         case IX86_BUILTIN_PSLLVV2DI:
   19444       276063 :         case IX86_BUILTIN_PSLLVV2DI_MASK:
   19445       276063 :         case IX86_BUILTIN_PSLLVV32HI:
   19446       276063 :         case IX86_BUILTIN_PSLLVV4DI:
   19447       276063 :         case IX86_BUILTIN_PSLLVV4DI_MASK:
   19448       276063 :         case IX86_BUILTIN_PSLLVV4SI:
   19449       276063 :         case IX86_BUILTIN_PSLLVV4SI_MASK:
   19450       276063 :         case IX86_BUILTIN_PSLLVV8DI:
   19451       276063 :         case IX86_BUILTIN_PSLLVV8HI:
   19452       276063 :         case IX86_BUILTIN_PSLLVV8SI:
   19453       276063 :         case IX86_BUILTIN_PSLLVV8SI_MASK:
   19454       276063 :           rcode = ASHIFT;
   19455       276063 :           is_vshift = true;
   19456       276063 :           goto do_shift;
   19457       275642 :         case IX86_BUILTIN_PSRAVQ128:
   19458       275642 :         case IX86_BUILTIN_PSRAVQ256:
   19459       275642 :         case IX86_BUILTIN_PSRAVV16HI:
   19460       275642 :         case IX86_BUILTIN_PSRAVV16SI:
   19461       275642 :         case IX86_BUILTIN_PSRAVV32HI:
   19462       275642 :         case IX86_BUILTIN_PSRAVV4SI:
   19463       275642 :         case IX86_BUILTIN_PSRAVV4SI_MASK:
   19464       275642 :         case IX86_BUILTIN_PSRAVV8DI:
   19465       275642 :         case IX86_BUILTIN_PSRAVV8HI:
   19466       275642 :         case IX86_BUILTIN_PSRAVV8SI:
   19467       275642 :         case IX86_BUILTIN_PSRAVV8SI_MASK:
   19468       275642 :           rcode = ASHIFTRT;
   19469       275642 :           is_vshift = true;
   19470       275642 :           goto do_shift;
   19471       276054 :         case IX86_BUILTIN_PSRLVV16HI:
   19472       276054 :         case IX86_BUILTIN_PSRLVV16SI:
   19473       276054 :         case IX86_BUILTIN_PSRLVV2DI:
   19474       276054 :         case IX86_BUILTIN_PSRLVV2DI_MASK:
   19475       276054 :         case IX86_BUILTIN_PSRLVV32HI:
   19476       276054 :         case IX86_BUILTIN_PSRLVV4DI:
   19477       276054 :         case IX86_BUILTIN_PSRLVV4DI_MASK:
   19478       276054 :         case IX86_BUILTIN_PSRLVV4SI:
   19479       276054 :         case IX86_BUILTIN_PSRLVV4SI_MASK:
   19480       276054 :         case IX86_BUILTIN_PSRLVV8DI:
   19481       276054 :         case IX86_BUILTIN_PSRLVV8HI:
   19482       276054 :         case IX86_BUILTIN_PSRLVV8SI:
   19483       276054 :         case IX86_BUILTIN_PSRLVV8SI_MASK:
   19484       276054 :           rcode = LSHIFTRT;
   19485       276054 :           is_vshift = true;
   19486       276054 :           goto do_shift;
   19487              : 
   19488      2722545 :         do_shift:
   19489      2722545 :           gcc_assert (n_args >= 2);
   19490      2722545 :           if (TREE_CODE (args[0]) != VECTOR_CST)
   19491              :             break;
   19492          927 :           mask = HOST_WIDE_INT_M1U;
   19493          927 :           if (n_args > 2)
   19494              :             {
   19495              :               /* This is masked shift.  */
   19496          678 :               if (!tree_fits_uhwi_p (args[n_args - 1])
   19497          678 :                   || TREE_SIDE_EFFECTS (args[n_args - 2]))
   19498              :                 break;
   19499          678 :               mask = tree_to_uhwi (args[n_args - 1]);
   19500          678 :               unsigned elems = TYPE_VECTOR_SUBPARTS (TREE_TYPE (args[0]));
   19501          678 :               mask |= HOST_WIDE_INT_M1U << elems;
   19502          678 :               if (mask != HOST_WIDE_INT_M1U
   19503          567 :                   && TREE_CODE (args[n_args - 2]) != VECTOR_CST)
   19504              :                 break;
   19505          633 :               if (mask == (HOST_WIDE_INT_M1U << elems))
   19506              :                 return args[n_args - 2];
   19507              :             }
   19508          879 :           if (is_vshift && TREE_CODE (args[1]) != VECTOR_CST)
   19509              :             break;
   19510          879 :           if (tree tem = (is_vshift ? integer_one_node
   19511          879 :                           : ix86_vector_shift_count (args[1])))
   19512              :             {
   19513          558 :               unsigned HOST_WIDE_INT count = tree_to_uhwi (tem);
   19514          558 :               unsigned HOST_WIDE_INT prec
   19515          558 :                 = TYPE_PRECISION (TREE_TYPE (TREE_TYPE (args[0])));
   19516          558 :               if (count == 0 && mask == HOST_WIDE_INT_M1U)
   19517              :                 return args[0];
   19518          558 :               if (count >= prec)
   19519              :                 {
   19520           72 :                   if (rcode == ASHIFTRT)
   19521           27 :                     count = prec - 1;
   19522           45 :                   else if (mask == HOST_WIDE_INT_M1U)
   19523            3 :                     return build_zero_cst (TREE_TYPE (args[0]));
   19524              :                 }
   19525          555 :               tree countt = NULL_TREE;
   19526          555 :               if (!is_vshift)
   19527              :                 {
   19528          377 :                   if (count >= prec)
   19529           42 :                     countt = integer_zero_node;
   19530              :                   else
   19531          335 :                     countt = build_int_cst (integer_type_node, count);
   19532              :                 }
   19533          555 :               tree_vector_builder builder;
   19534          555 :               if (mask != HOST_WIDE_INT_M1U || is_vshift)
   19535          392 :                 builder.new_vector (TREE_TYPE (args[0]),
   19536          784 :                                     TYPE_VECTOR_SUBPARTS (TREE_TYPE (args[0])),
   19537              :                                     1);
   19538              :               else
   19539          163 :                 builder.new_unary_operation (TREE_TYPE (args[0]), args[0],
   19540              :                                              false);
   19541          555 :               unsigned int cnt = builder.encoded_nelts ();
   19542         5967 :               for (unsigned int i = 0; i < cnt; ++i)
   19543              :                 {
   19544         5412 :                   tree elt = VECTOR_CST_ELT (args[0], i);
   19545         5412 :                   if (TREE_CODE (elt) != INTEGER_CST || TREE_OVERFLOW (elt))
   19546            0 :                     return NULL_TREE;
   19547         5412 :                   tree type = TREE_TYPE (elt);
   19548         5412 :                   if (rcode == LSHIFTRT)
   19549         2040 :                     elt = fold_convert (unsigned_type_for (type), elt);
   19550         5412 :                   if (is_vshift)
   19551              :                     {
   19552         1846 :                       countt = VECTOR_CST_ELT (args[1], i);
   19553         1846 :                       if (TREE_CODE (countt) != INTEGER_CST
   19554         1846 :                           || TREE_OVERFLOW (countt))
   19555              :                         return NULL_TREE;
   19556         1846 :                       if (wi::neg_p (wi::to_wide (countt))
   19557         3610 :                           || wi::to_widest (countt) >= prec)
   19558              :                         {
   19559          325 :                           if (rcode == ASHIFTRT)
   19560          108 :                             countt = build_int_cst (TREE_TYPE (countt),
   19561          108 :                                                     prec - 1);
   19562              :                           else
   19563              :                             {
   19564          217 :                               elt = build_zero_cst (TREE_TYPE (elt));
   19565          217 :                               countt = build_zero_cst (TREE_TYPE (countt));
   19566              :                             }
   19567              :                         }
   19568              :                     }
   19569         3566 :                   else if (count >= prec)
   19570          504 :                     elt = build_zero_cst (TREE_TYPE (elt));
   19571         8950 :                   elt = const_binop (rcode == ASHIFT
   19572              :                                      ? LSHIFT_EXPR : RSHIFT_EXPR,
   19573         5412 :                                      TREE_TYPE (elt), elt, countt);
   19574         5412 :                   if (!elt || TREE_CODE (elt) != INTEGER_CST)
   19575              :                     return NULL_TREE;
   19576         5412 :                   if (rcode == LSHIFTRT)
   19577         2040 :                     elt = fold_convert (type, elt);
   19578         5412 :                   if ((mask & (HOST_WIDE_INT_1U << i)) == 0)
   19579              :                     {
   19580         1566 :                       elt = VECTOR_CST_ELT (args[n_args - 2], i);
   19581         1566 :                       if (TREE_CODE (elt) != INTEGER_CST
   19582         1566 :                           || TREE_OVERFLOW (elt))
   19583              :                         return NULL_TREE;
   19584              :                     }
   19585         5412 :                   builder.quick_push (elt);
   19586              :                 }
   19587          555 :               return builder.build ();
   19588          555 :             }
   19589              :           break;
   19590              : 
   19591        32724 :         case IX86_BUILTIN_MINSS:
   19592        32724 :         case IX86_BUILTIN_MINSH_MASK:
   19593        32724 :           tcode = LT_EXPR;
   19594        32724 :           is_scalar = true;
   19595        32724 :           goto do_minmax;
   19596              : 
   19597        32724 :         case IX86_BUILTIN_MAXSS:
   19598        32724 :         case IX86_BUILTIN_MAXSH_MASK:
   19599        32724 :           tcode = GT_EXPR;
   19600        32724 :           is_scalar = true;
   19601        32724 :           goto do_minmax;
   19602              : 
   19603       350642 :         case IX86_BUILTIN_MINPS:
   19604       350642 :         case IX86_BUILTIN_MINPD:
   19605       350642 :         case IX86_BUILTIN_MINPS256:
   19606       350642 :         case IX86_BUILTIN_MINPD256:
   19607       350642 :         case IX86_BUILTIN_MINPS512:
   19608       350642 :         case IX86_BUILTIN_MINPD512:
   19609       350642 :         case IX86_BUILTIN_MINPS128_MASK:
   19610       350642 :         case IX86_BUILTIN_MINPD128_MASK:
   19611       350642 :         case IX86_BUILTIN_MINPS256_MASK:
   19612       350642 :         case IX86_BUILTIN_MINPD256_MASK:
   19613       350642 :         case IX86_BUILTIN_MINPH128_MASK:
   19614       350642 :         case IX86_BUILTIN_MINPH256_MASK:
   19615       350642 :         case IX86_BUILTIN_MINPH512_MASK:
   19616       350642 :           tcode = LT_EXPR;
   19617       350642 :           is_scalar = false;
   19618       350642 :           goto do_minmax;
   19619              : 
   19620              :         case IX86_BUILTIN_MAXPS:
   19621              :         case IX86_BUILTIN_MAXPD:
   19622              :         case IX86_BUILTIN_MAXPS256:
   19623              :         case IX86_BUILTIN_MAXPD256:
   19624              :         case IX86_BUILTIN_MAXPS512:
   19625              :         case IX86_BUILTIN_MAXPD512:
   19626              :         case IX86_BUILTIN_MAXPS128_MASK:
   19627              :         case IX86_BUILTIN_MAXPD128_MASK:
   19628              :         case IX86_BUILTIN_MAXPS256_MASK:
   19629              :         case IX86_BUILTIN_MAXPD256_MASK:
   19630              :         case IX86_BUILTIN_MAXPH128_MASK:
   19631              :         case IX86_BUILTIN_MAXPH256_MASK:
   19632              :         case IX86_BUILTIN_MAXPH512_MASK:
   19633              :           tcode = GT_EXPR;
   19634              :           is_scalar = false;
   19635       766752 :         do_minmax:
   19636       766752 :           gcc_assert (n_args >= 2);
   19637       766752 :           if (TREE_CODE (args[0]) != VECTOR_CST
   19638           76 :               || TREE_CODE (args[1]) != VECTOR_CST)
   19639              :             break;
   19640           76 :           mask = HOST_WIDE_INT_M1U;
   19641           76 :           if (n_args > 2)
   19642              :             {
   19643           36 :               gcc_assert (n_args >= 4);
   19644              :               /* This is masked minmax.  */
   19645           36 :               if (TREE_CODE (args[3]) != INTEGER_CST
   19646           36 :                   || TREE_SIDE_EFFECTS (args[2]))
   19647              :                 break;
   19648           36 :               mask = TREE_INT_CST_LOW (args[3]);
   19649           36 :               unsigned elems = TYPE_VECTOR_SUBPARTS (TREE_TYPE (args[0]));
   19650           36 :               mask |= HOST_WIDE_INT_M1U << elems;
   19651           36 :               if (mask != HOST_WIDE_INT_M1U
   19652           32 :                   && TREE_CODE (args[2]) != VECTOR_CST)
   19653              :                 break;
   19654           36 :               if (n_args >= 5)
   19655              :                 {
   19656           20 :                   if (!tree_fits_uhwi_p (args[4]))
   19657              :                     break;
   19658           20 :                   if (tree_to_uhwi (args[4]) != 4
   19659            0 :                       && tree_to_uhwi (args[4]) != 8)
   19660              :                     break;
   19661              :                 }
   19662           36 :               if (mask == (HOST_WIDE_INT_M1U << elems))
   19663              :                 return args[2];
   19664              :             }
   19665              :           /* Punt on NaNs, unless exceptions are disabled.  */
   19666           76 :           if (HONOR_NANS (args[0])
   19667           76 :               && (n_args < 5 || tree_to_uhwi (args[4]) != 8))
   19668          184 :             for (int i = 0; i < 2; ++i)
   19669              :               {
   19670          134 :                 unsigned count = vector_cst_encoded_nelts (args[i]);
   19671          957 :                 for (unsigned j = 0; j < count; ++j)
   19672          849 :                   if (tree_expr_nan_p (VECTOR_CST_ENCODED_ELT (args[i], j)))
   19673              :                     return NULL_TREE;
   19674              :               }
   19675           50 :           {
   19676           50 :             tree res = const_binop (tcode,
   19677           50 :                                     truth_type_for (TREE_TYPE (args[0])),
   19678              :                                     args[0], args[1]);
   19679           50 :             if (res == NULL_TREE || TREE_CODE (res) != VECTOR_CST)
   19680              :               break;
   19681           50 :             res = fold_ternary (VEC_COND_EXPR, TREE_TYPE (args[0]), res,
   19682              :                                 args[0], args[1]);
   19683           50 :             if (res == NULL_TREE || TREE_CODE (res) != VECTOR_CST)
   19684              :               break;
   19685           50 :             if (mask != HOST_WIDE_INT_M1U)
   19686              :               {
   19687           32 :                 unsigned nelts = TYPE_VECTOR_SUBPARTS (TREE_TYPE (args[0]));
   19688           32 :                 vec_perm_builder sel (nelts, nelts, 1);
   19689          328 :                 for (unsigned int i = 0; i < nelts; i++)
   19690          296 :                   if (mask & (HOST_WIDE_INT_1U << i))
   19691          160 :                     sel.quick_push (i);
   19692              :                   else
   19693          136 :                     sel.quick_push (nelts + i);
   19694           32 :                 vec_perm_indices indices (sel, 2, nelts);
   19695           32 :                 res = fold_vec_perm (TREE_TYPE (args[0]), res, args[2],
   19696              :                                      indices);
   19697           32 :                 if (res == NULL_TREE || TREE_CODE (res) != VECTOR_CST)
   19698              :                   break;
   19699           32 :               }
   19700           50 :             if (is_scalar)
   19701              :               {
   19702           10 :                 unsigned nelts = TYPE_VECTOR_SUBPARTS (TREE_TYPE (args[0]));
   19703           10 :                 vec_perm_builder sel (nelts, nelts, 1);
   19704           10 :                 sel.quick_push (0);
   19705           40 :                 for (unsigned int i = 1; i < nelts; i++)
   19706           30 :                   sel.quick_push (nelts + i);
   19707           10 :                 vec_perm_indices indices (sel, 2, nelts);
   19708           10 :                 res = fold_vec_perm (TREE_TYPE (args[0]), res, args[0],
   19709              :                                      indices);
   19710           10 :                 if (res == NULL_TREE || TREE_CODE (res) != VECTOR_CST)
   19711              :                   break;
   19712           10 :               }
   19713           50 :             return res;
   19714              :           }
   19715              : 
   19716              :         default:
   19717              :           break;
   19718              :         }
   19719              :     }
   19720              : 
   19721              : #ifdef SUBTARGET_FOLD_BUILTIN
   19722              :   return SUBTARGET_FOLD_BUILTIN (fndecl, n_args, args, ignore);
   19723              : #endif
   19724              : 
   19725              :   return NULL_TREE;
   19726              : }
   19727              : 
   19728              : /* Fold a MD builtin (use ix86_fold_builtin for folding into
   19729              :    constant) in GIMPLE.  */
   19730              : 
   19731              : bool
   19732      1121435 : ix86_gimple_fold_builtin (gimple_stmt_iterator *gsi)
   19733              : {
   19734      1121435 :   gimple *stmt = gsi_stmt (*gsi), *g;
   19735      1121435 :   gimple_seq stmts = NULL;
   19736      1121435 :   tree fndecl = gimple_call_fndecl (stmt);
   19737      1121435 :   gcc_checking_assert (fndecl && fndecl_built_in_p (fndecl, BUILT_IN_MD));
   19738      1121435 :   int n_args = gimple_call_num_args (stmt);
   19739      1121435 :   enum ix86_builtins fn_code
   19740      1121435 :     = (enum ix86_builtins) DECL_MD_FUNCTION_CODE (fndecl);
   19741      1121435 :   tree decl = NULL_TREE;
   19742      1121435 :   tree arg0, arg1, arg2;
   19743      1121435 :   enum rtx_code rcode;
   19744      1121435 :   enum tree_code tcode;
   19745      1121435 :   unsigned HOST_WIDE_INT count;
   19746      1121435 :   bool is_vshift;
   19747      1121435 :   unsigned HOST_WIDE_INT elems;
   19748      1121435 :   location_t loc;
   19749              : 
   19750              :   /* Don't fold when there's isa mismatch.  */
   19751      1121435 :   if (!ix86_check_builtin_isa_match (fn_code, NULL, NULL))
   19752              :     return false;
   19753              : 
   19754      1121308 :   switch (fn_code)
   19755              :     {
   19756          288 :     case IX86_BUILTIN_TZCNT32:
   19757          288 :       decl = builtin_decl_implicit (BUILT_IN_CTZ);
   19758          288 :       goto fold_tzcnt_lzcnt;
   19759              : 
   19760          237 :     case IX86_BUILTIN_TZCNT64:
   19761          237 :       decl = builtin_decl_implicit (BUILT_IN_CTZLL);
   19762          237 :       goto fold_tzcnt_lzcnt;
   19763              : 
   19764          215 :     case IX86_BUILTIN_LZCNT32:
   19765          215 :       decl = builtin_decl_implicit (BUILT_IN_CLZ);
   19766          215 :       goto fold_tzcnt_lzcnt;
   19767              : 
   19768          224 :     case IX86_BUILTIN_LZCNT64:
   19769          224 :       decl = builtin_decl_implicit (BUILT_IN_CLZLL);
   19770          224 :       goto fold_tzcnt_lzcnt;
   19771              : 
   19772          964 :     fold_tzcnt_lzcnt:
   19773          964 :       gcc_assert (n_args == 1);
   19774          964 :       arg0 = gimple_call_arg (stmt, 0);
   19775          964 :       if (TREE_CODE (arg0) == SSA_NAME && decl && gimple_call_lhs (stmt))
   19776              :         {
   19777          799 :           int prec = TYPE_PRECISION (TREE_TYPE (arg0));
   19778              :           /* If arg0 is provably non-zero, optimize into generic
   19779              :              __builtin_c[tl]z{,ll} function the middle-end handles
   19780              :              better.  */
   19781          799 :           if (!expr_not_equal_to (arg0, wi::zero (prec)))
   19782              :             return false;
   19783              : 
   19784            9 :           loc = gimple_location (stmt);
   19785            9 :           g = gimple_build_call (decl, 1, arg0);
   19786            9 :           gimple_set_location (g, loc);
   19787            9 :           tree lhs = make_ssa_name (integer_type_node);
   19788            9 :           gimple_call_set_lhs (g, lhs);
   19789            9 :           gsi_insert_before (gsi, g, GSI_SAME_STMT);
   19790            9 :           g = gimple_build_assign (gimple_call_lhs (stmt), NOP_EXPR, lhs);
   19791            9 :           gimple_set_location (g, loc);
   19792            9 :           gsi_replace (gsi, g, false);
   19793            9 :           return true;
   19794              :         }
   19795              :       break;
   19796              : 
   19797          491 :     case IX86_BUILTIN_BZHI32:
   19798          491 :     case IX86_BUILTIN_BZHI64:
   19799          491 :       gcc_assert (n_args == 2);
   19800          491 :       arg1 = gimple_call_arg (stmt, 1);
   19801          491 :       if (tree_fits_uhwi_p (arg1) && gimple_call_lhs (stmt))
   19802              :         {
   19803          195 :           unsigned int idx = tree_to_uhwi (arg1) & 0xff;
   19804          195 :           arg0 = gimple_call_arg (stmt, 0);
   19805          195 :           if (idx < TYPE_PRECISION (TREE_TYPE (arg0)))
   19806              :             break;
   19807           31 :           loc = gimple_location (stmt);
   19808           31 :           g = gimple_build_assign (gimple_call_lhs (stmt), arg0);
   19809           31 :           gimple_set_location (g, loc);
   19810           31 :           gsi_replace (gsi, g, false);
   19811           31 :           return true;
   19812              :         }
   19813              :       break;
   19814              : 
   19815          502 :     case IX86_BUILTIN_PDEP32:
   19816          502 :     case IX86_BUILTIN_PDEP64:
   19817          502 :     case IX86_BUILTIN_PEXT32:
   19818          502 :     case IX86_BUILTIN_PEXT64:
   19819          502 :       gcc_assert (n_args == 2);
   19820          502 :       arg1 = gimple_call_arg (stmt, 1);
   19821          502 :       if (integer_all_onesp (arg1) && gimple_call_lhs (stmt))
   19822              :         {
   19823            4 :           loc = gimple_location (stmt);
   19824            4 :           arg0 = gimple_call_arg (stmt, 0);
   19825            4 :           g = gimple_build_assign (gimple_call_lhs (stmt), arg0);
   19826            4 :           gimple_set_location (g, loc);
   19827            4 :           gsi_replace (gsi, g, false);
   19828            4 :           return true;
   19829              :         }
   19830              :       break;
   19831              : 
   19832          145 :     case IX86_BUILTIN_PBLENDVB256:
   19833          145 :     case IX86_BUILTIN_BLENDVPS256:
   19834          145 :     case IX86_BUILTIN_BLENDVPD256:
   19835              :       /* pcmpeqb/d/q is under avx2, w/o avx2, it's veclower
   19836              :          to scalar operations and not combined back.  */
   19837          145 :       if (!TARGET_AVX2)
   19838              :         break;
   19839              : 
   19840              :       /* FALLTHRU.  */
   19841          112 :     case IX86_BUILTIN_BLENDVPD:
   19842              :       /* blendvpd is under sse4.1 but pcmpgtq is under sse4.2,
   19843              :          w/o sse4.2, it's veclowered to scalar operations and
   19844              :          not combined back.  */
   19845          112 :       if (!TARGET_SSE4_2)
   19846              :         break;
   19847              :       /* FALLTHRU.  */
   19848          166 :     case IX86_BUILTIN_PBLENDVB128:
   19849          166 :     case IX86_BUILTIN_BLENDVPS:
   19850          166 :       gcc_assert (n_args == 3);
   19851          166 :       arg0 = gimple_call_arg (stmt, 0);
   19852          166 :       arg1 = gimple_call_arg (stmt, 1);
   19853          166 :       arg2 = gimple_call_arg (stmt, 2);
   19854          166 :       if (gimple_call_lhs (stmt))
   19855              :         {
   19856          166 :           loc = gimple_location (stmt);
   19857          166 :           tree type = TREE_TYPE (arg2);
   19858          166 :           if (VECTOR_FLOAT_TYPE_P (type))
   19859              :             {
   19860           73 :               tree itype = GET_MODE_INNER (TYPE_MODE (type)) == E_SFmode
   19861           73 :                 ? intSI_type_node : intDI_type_node;
   19862           73 :               type = get_same_sized_vectype (itype, type);
   19863              :             }
   19864              :           else
   19865           93 :             type = signed_type_for (type);
   19866          166 :           arg2 = gimple_build (&stmts, VIEW_CONVERT_EXPR, type, arg2);
   19867          166 :           tree zero_vec = build_zero_cst (type);
   19868          166 :           tree cmp_type = truth_type_for (type);
   19869          166 :           tree cmp = gimple_build (&stmts, LT_EXPR, cmp_type, arg2, zero_vec);
   19870          166 :           gsi_insert_seq_before (gsi, stmts, GSI_SAME_STMT);
   19871          166 :           g = gimple_build_assign (gimple_call_lhs (stmt),
   19872              :                                    VEC_COND_EXPR, cmp,
   19873              :                                    arg1, arg0);
   19874          166 :           gimple_set_location (g, loc);
   19875          166 :           gsi_replace (gsi, g, false);
   19876              :         }
   19877              :       else
   19878            0 :         gsi_replace (gsi, gimple_build_nop (), false);
   19879              :       return true;
   19880              : 
   19881              : 
   19882           16 :     case IX86_BUILTIN_PCMPEQB128:
   19883           16 :     case IX86_BUILTIN_PCMPEQW128:
   19884           16 :     case IX86_BUILTIN_PCMPEQD128:
   19885           16 :     case IX86_BUILTIN_PCMPEQQ:
   19886           16 :     case IX86_BUILTIN_PCMPEQB256:
   19887           16 :     case IX86_BUILTIN_PCMPEQW256:
   19888           16 :     case IX86_BUILTIN_PCMPEQD256:
   19889           16 :     case IX86_BUILTIN_PCMPEQQ256:
   19890           16 :       tcode = EQ_EXPR;
   19891           16 :       goto do_cmp;
   19892              : 
   19893              :     case IX86_BUILTIN_PCMPGTB128:
   19894              :     case IX86_BUILTIN_PCMPGTW128:
   19895              :     case IX86_BUILTIN_PCMPGTD128:
   19896              :     case IX86_BUILTIN_PCMPGTQ:
   19897              :     case IX86_BUILTIN_PCMPGTB256:
   19898              :     case IX86_BUILTIN_PCMPGTW256:
   19899              :     case IX86_BUILTIN_PCMPGTD256:
   19900              :     case IX86_BUILTIN_PCMPGTQ256:
   19901              :       tcode = GT_EXPR;
   19902              : 
   19903           33 :     do_cmp:
   19904           33 :       gcc_assert (n_args == 2);
   19905           33 :       arg0 = gimple_call_arg (stmt, 0);
   19906           33 :       arg1 = gimple_call_arg (stmt, 1);
   19907           33 :       if (gimple_call_lhs (stmt))
   19908              :         {
   19909           32 :           loc = gimple_location (stmt);
   19910           32 :           tree type = TREE_TYPE (arg0);
   19911           32 :           tree zero_vec = build_zero_cst (type);
   19912           32 :           tree minus_one_vec = build_minus_one_cst (type);
   19913           32 :           tree cmp_type = truth_type_for (type);
   19914           32 :           tree cmp = gimple_build (&stmts, tcode, cmp_type, arg0, arg1);
   19915           32 :           gsi_insert_seq_before (gsi, stmts, GSI_SAME_STMT);
   19916           32 :           g = gimple_build_assign (gimple_call_lhs (stmt),
   19917              :                                    VEC_COND_EXPR, cmp,
   19918              :                                    minus_one_vec, zero_vec);
   19919           32 :           gimple_set_location (g, loc);
   19920           32 :           gsi_replace (gsi, g, false);
   19921              :         }
   19922              :       else
   19923            1 :         gsi_replace (gsi, gimple_build_nop (), false);
   19924              :       return true;
   19925              : 
   19926         9297 :     case IX86_BUILTIN_PSLLD:
   19927         9297 :     case IX86_BUILTIN_PSLLD128:
   19928         9297 :     case IX86_BUILTIN_PSLLD128_MASK:
   19929         9297 :     case IX86_BUILTIN_PSLLD256:
   19930         9297 :     case IX86_BUILTIN_PSLLD256_MASK:
   19931         9297 :     case IX86_BUILTIN_PSLLD512:
   19932         9297 :     case IX86_BUILTIN_PSLLDI:
   19933         9297 :     case IX86_BUILTIN_PSLLDI128:
   19934         9297 :     case IX86_BUILTIN_PSLLDI128_MASK:
   19935         9297 :     case IX86_BUILTIN_PSLLDI256:
   19936         9297 :     case IX86_BUILTIN_PSLLDI256_MASK:
   19937         9297 :     case IX86_BUILTIN_PSLLDI512:
   19938         9297 :     case IX86_BUILTIN_PSLLQ:
   19939         9297 :     case IX86_BUILTIN_PSLLQ128:
   19940         9297 :     case IX86_BUILTIN_PSLLQ128_MASK:
   19941         9297 :     case IX86_BUILTIN_PSLLQ256:
   19942         9297 :     case IX86_BUILTIN_PSLLQ256_MASK:
   19943         9297 :     case IX86_BUILTIN_PSLLQ512:
   19944         9297 :     case IX86_BUILTIN_PSLLQI:
   19945         9297 :     case IX86_BUILTIN_PSLLQI128:
   19946         9297 :     case IX86_BUILTIN_PSLLQI128_MASK:
   19947         9297 :     case IX86_BUILTIN_PSLLQI256:
   19948         9297 :     case IX86_BUILTIN_PSLLQI256_MASK:
   19949         9297 :     case IX86_BUILTIN_PSLLQI512:
   19950         9297 :     case IX86_BUILTIN_PSLLW:
   19951         9297 :     case IX86_BUILTIN_PSLLW128:
   19952         9297 :     case IX86_BUILTIN_PSLLW128_MASK:
   19953         9297 :     case IX86_BUILTIN_PSLLW256:
   19954         9297 :     case IX86_BUILTIN_PSLLW256_MASK:
   19955         9297 :     case IX86_BUILTIN_PSLLW512_MASK:
   19956         9297 :     case IX86_BUILTIN_PSLLWI:
   19957         9297 :     case IX86_BUILTIN_PSLLWI128:
   19958         9297 :     case IX86_BUILTIN_PSLLWI128_MASK:
   19959         9297 :     case IX86_BUILTIN_PSLLWI256:
   19960         9297 :     case IX86_BUILTIN_PSLLWI256_MASK:
   19961         9297 :     case IX86_BUILTIN_PSLLWI512_MASK:
   19962         9297 :       rcode = ASHIFT;
   19963         9297 :       is_vshift = false;
   19964         9297 :       goto do_shift;
   19965         6495 :     case IX86_BUILTIN_PSRAD:
   19966         6495 :     case IX86_BUILTIN_PSRAD128:
   19967         6495 :     case IX86_BUILTIN_PSRAD128_MASK:
   19968         6495 :     case IX86_BUILTIN_PSRAD256:
   19969         6495 :     case IX86_BUILTIN_PSRAD256_MASK:
   19970         6495 :     case IX86_BUILTIN_PSRAD512:
   19971         6495 :     case IX86_BUILTIN_PSRADI:
   19972         6495 :     case IX86_BUILTIN_PSRADI128:
   19973         6495 :     case IX86_BUILTIN_PSRADI128_MASK:
   19974         6495 :     case IX86_BUILTIN_PSRADI256:
   19975         6495 :     case IX86_BUILTIN_PSRADI256_MASK:
   19976         6495 :     case IX86_BUILTIN_PSRADI512:
   19977         6495 :     case IX86_BUILTIN_PSRAQ128_MASK:
   19978         6495 :     case IX86_BUILTIN_PSRAQ256_MASK:
   19979         6495 :     case IX86_BUILTIN_PSRAQ512:
   19980         6495 :     case IX86_BUILTIN_PSRAQI128_MASK:
   19981         6495 :     case IX86_BUILTIN_PSRAQI256_MASK:
   19982         6495 :     case IX86_BUILTIN_PSRAQI512:
   19983         6495 :     case IX86_BUILTIN_PSRAW:
   19984         6495 :     case IX86_BUILTIN_PSRAW128:
   19985         6495 :     case IX86_BUILTIN_PSRAW128_MASK:
   19986         6495 :     case IX86_BUILTIN_PSRAW256:
   19987         6495 :     case IX86_BUILTIN_PSRAW256_MASK:
   19988         6495 :     case IX86_BUILTIN_PSRAW512:
   19989         6495 :     case IX86_BUILTIN_PSRAWI:
   19990         6495 :     case IX86_BUILTIN_PSRAWI128:
   19991         6495 :     case IX86_BUILTIN_PSRAWI128_MASK:
   19992         6495 :     case IX86_BUILTIN_PSRAWI256:
   19993         6495 :     case IX86_BUILTIN_PSRAWI256_MASK:
   19994         6495 :     case IX86_BUILTIN_PSRAWI512:
   19995         6495 :       rcode = ASHIFTRT;
   19996         6495 :       is_vshift = false;
   19997         6495 :       goto do_shift;
   19998         7960 :     case IX86_BUILTIN_PSRLD:
   19999         7960 :     case IX86_BUILTIN_PSRLD128:
   20000         7960 :     case IX86_BUILTIN_PSRLD128_MASK:
   20001         7960 :     case IX86_BUILTIN_PSRLD256:
   20002         7960 :     case IX86_BUILTIN_PSRLD256_MASK:
   20003         7960 :     case IX86_BUILTIN_PSRLD512:
   20004         7960 :     case IX86_BUILTIN_PSRLDI:
   20005         7960 :     case IX86_BUILTIN_PSRLDI128:
   20006         7960 :     case IX86_BUILTIN_PSRLDI128_MASK:
   20007         7960 :     case IX86_BUILTIN_PSRLDI256:
   20008         7960 :     case IX86_BUILTIN_PSRLDI256_MASK:
   20009         7960 :     case IX86_BUILTIN_PSRLDI512:
   20010         7960 :     case IX86_BUILTIN_PSRLQ:
   20011         7960 :     case IX86_BUILTIN_PSRLQ128:
   20012         7960 :     case IX86_BUILTIN_PSRLQ128_MASK:
   20013         7960 :     case IX86_BUILTIN_PSRLQ256:
   20014         7960 :     case IX86_BUILTIN_PSRLQ256_MASK:
   20015         7960 :     case IX86_BUILTIN_PSRLQ512:
   20016         7960 :     case IX86_BUILTIN_PSRLQI:
   20017         7960 :     case IX86_BUILTIN_PSRLQI128:
   20018         7960 :     case IX86_BUILTIN_PSRLQI128_MASK:
   20019         7960 :     case IX86_BUILTIN_PSRLQI256:
   20020         7960 :     case IX86_BUILTIN_PSRLQI256_MASK:
   20021         7960 :     case IX86_BUILTIN_PSRLQI512:
   20022         7960 :     case IX86_BUILTIN_PSRLW:
   20023         7960 :     case IX86_BUILTIN_PSRLW128:
   20024         7960 :     case IX86_BUILTIN_PSRLW128_MASK:
   20025         7960 :     case IX86_BUILTIN_PSRLW256:
   20026         7960 :     case IX86_BUILTIN_PSRLW256_MASK:
   20027         7960 :     case IX86_BUILTIN_PSRLW512:
   20028         7960 :     case IX86_BUILTIN_PSRLWI:
   20029         7960 :     case IX86_BUILTIN_PSRLWI128:
   20030         7960 :     case IX86_BUILTIN_PSRLWI128_MASK:
   20031         7960 :     case IX86_BUILTIN_PSRLWI256:
   20032         7960 :     case IX86_BUILTIN_PSRLWI256_MASK:
   20033         7960 :     case IX86_BUILTIN_PSRLWI512:
   20034         7960 :       rcode = LSHIFTRT;
   20035         7960 :       is_vshift = false;
   20036         7960 :       goto do_shift;
   20037         2384 :     case IX86_BUILTIN_PSLLVV16HI:
   20038         2384 :     case IX86_BUILTIN_PSLLVV16SI:
   20039         2384 :     case IX86_BUILTIN_PSLLVV2DI:
   20040         2384 :     case IX86_BUILTIN_PSLLVV2DI_MASK:
   20041         2384 :     case IX86_BUILTIN_PSLLVV32HI:
   20042         2384 :     case IX86_BUILTIN_PSLLVV4DI:
   20043         2384 :     case IX86_BUILTIN_PSLLVV4DI_MASK:
   20044         2384 :     case IX86_BUILTIN_PSLLVV4SI:
   20045         2384 :     case IX86_BUILTIN_PSLLVV4SI_MASK:
   20046         2384 :     case IX86_BUILTIN_PSLLVV8DI:
   20047         2384 :     case IX86_BUILTIN_PSLLVV8HI:
   20048         2384 :     case IX86_BUILTIN_PSLLVV8SI:
   20049         2384 :     case IX86_BUILTIN_PSLLVV8SI_MASK:
   20050         2384 :       rcode = ASHIFT;
   20051         2384 :       is_vshift = true;
   20052         2384 :       goto do_shift;
   20053         2341 :     case IX86_BUILTIN_PSRAVQ128:
   20054         2341 :     case IX86_BUILTIN_PSRAVQ256:
   20055         2341 :     case IX86_BUILTIN_PSRAVV16HI:
   20056         2341 :     case IX86_BUILTIN_PSRAVV16SI:
   20057         2341 :     case IX86_BUILTIN_PSRAVV32HI:
   20058         2341 :     case IX86_BUILTIN_PSRAVV4SI:
   20059         2341 :     case IX86_BUILTIN_PSRAVV4SI_MASK:
   20060         2341 :     case IX86_BUILTIN_PSRAVV8DI:
   20061         2341 :     case IX86_BUILTIN_PSRAVV8HI:
   20062         2341 :     case IX86_BUILTIN_PSRAVV8SI:
   20063         2341 :     case IX86_BUILTIN_PSRAVV8SI_MASK:
   20064         2341 :       rcode = ASHIFTRT;
   20065         2341 :       is_vshift = true;
   20066         2341 :       goto do_shift;
   20067         2380 :     case IX86_BUILTIN_PSRLVV16HI:
   20068         2380 :     case IX86_BUILTIN_PSRLVV16SI:
   20069         2380 :     case IX86_BUILTIN_PSRLVV2DI:
   20070         2380 :     case IX86_BUILTIN_PSRLVV2DI_MASK:
   20071         2380 :     case IX86_BUILTIN_PSRLVV32HI:
   20072         2380 :     case IX86_BUILTIN_PSRLVV4DI:
   20073         2380 :     case IX86_BUILTIN_PSRLVV4DI_MASK:
   20074         2380 :     case IX86_BUILTIN_PSRLVV4SI:
   20075         2380 :     case IX86_BUILTIN_PSRLVV4SI_MASK:
   20076         2380 :     case IX86_BUILTIN_PSRLVV8DI:
   20077         2380 :     case IX86_BUILTIN_PSRLVV8HI:
   20078         2380 :     case IX86_BUILTIN_PSRLVV8SI:
   20079         2380 :     case IX86_BUILTIN_PSRLVV8SI_MASK:
   20080         2380 :       rcode = LSHIFTRT;
   20081         2380 :       is_vshift = true;
   20082         2380 :       goto do_shift;
   20083              : 
   20084        30857 :     do_shift:
   20085        30857 :       gcc_assert (n_args >= 2);
   20086        30857 :       if (!gimple_call_lhs (stmt))
   20087              :         {
   20088            1 :           gsi_replace (gsi, gimple_build_nop (), false);
   20089            1 :           return true;
   20090              :         }
   20091        30856 :       arg0 = gimple_call_arg (stmt, 0);
   20092        30856 :       arg1 = gimple_call_arg (stmt, 1);
   20093        30856 :       elems = TYPE_VECTOR_SUBPARTS (TREE_TYPE (arg0));
   20094              :       /* For masked shift, only optimize if the mask is all ones.  */
   20095        30856 :       if (n_args > 2
   20096        30856 :           && !ix86_masked_all_ones (elems, gimple_call_arg (stmt, n_args - 1)))
   20097              :         break;
   20098        16081 :       if (is_vshift)
   20099              :         {
   20100         2640 :           if (TREE_CODE (arg1) != VECTOR_CST)
   20101              :             break;
   20102           69 :           count = TYPE_PRECISION (TREE_TYPE (TREE_TYPE (arg0)));
   20103           69 :           if (integer_zerop (arg1))
   20104           27 :             count = 0;
   20105           42 :           else if (rcode == ASHIFTRT)
   20106              :             break;
   20107              :           else
   20108          230 :             for (unsigned int i = 0; i < VECTOR_CST_NELTS (arg1); ++i)
   20109              :               {
   20110          212 :                 tree elt = VECTOR_CST_ELT (arg1, i);
   20111          212 :                 if (!wi::neg_p (wi::to_wide (elt))
   20112          375 :                     && wi::to_widest (elt) < count)
   20113           16 :                   return false;
   20114              :               }
   20115              :         }
   20116              :       else
   20117              :         {
   20118        13441 :           arg1 = ix86_vector_shift_count (arg1);
   20119        13441 :           if (!arg1)
   20120              :             break;
   20121         5608 :           count = tree_to_uhwi (arg1);
   20122              :         }
   20123         5653 :       if (count == 0)
   20124              :         {
   20125              :           /* Just return the first argument for shift by 0.  */
   20126           93 :           loc = gimple_location (stmt);
   20127           93 :           g = gimple_build_assign (gimple_call_lhs (stmt), arg0);
   20128           93 :           gimple_set_location (g, loc);
   20129           93 :           gsi_replace (gsi, g, false);
   20130           93 :           return true;
   20131              :         }
   20132         5560 :       if (rcode != ASHIFTRT
   20133         5560 :           && count >= TYPE_PRECISION (TREE_TYPE (TREE_TYPE (arg0))))
   20134              :         {
   20135              :           /* For shift counts equal or greater than precision, except for
   20136              :              arithmetic right shift the result is zero.  */
   20137           78 :           loc = gimple_location (stmt);
   20138           78 :           g = gimple_build_assign (gimple_call_lhs (stmt),
   20139           78 :                                    build_zero_cst (TREE_TYPE (arg0)));
   20140           78 :           gimple_set_location (g, loc);
   20141           78 :           gsi_replace (gsi, g, false);
   20142           78 :           return true;
   20143              :         }
   20144              :       break;
   20145              : 
   20146          531 :     case IX86_BUILTIN_SHUFPD512:
   20147          531 :     case IX86_BUILTIN_SHUFPS512:
   20148          531 :     case IX86_BUILTIN_SHUFPD:
   20149          531 :     case IX86_BUILTIN_SHUFPD256:
   20150          531 :     case IX86_BUILTIN_SHUFPS:
   20151          531 :     case IX86_BUILTIN_SHUFPS256:
   20152          531 :       arg0 = gimple_call_arg (stmt, 0);
   20153          531 :       elems = TYPE_VECTOR_SUBPARTS (TREE_TYPE (arg0));
   20154              :       /* This is masked shuffle.  Only optimize if the mask is all ones.  */
   20155          531 :       if (n_args > 3
   20156          895 :           && !ix86_masked_all_ones (elems,
   20157          364 :                                     gimple_call_arg (stmt, n_args - 1)))
   20158              :         break;
   20159          203 :       arg2 = gimple_call_arg (stmt, 2);
   20160          203 :       if (TREE_CODE (arg2) == INTEGER_CST && gimple_call_lhs (stmt))
   20161              :         {
   20162          146 :           unsigned HOST_WIDE_INT shuffle_mask = TREE_INT_CST_LOW (arg2);
   20163              :           /* Check valid imm, refer to gcc.target/i386/testimm-10.c.  */
   20164          146 :           if (shuffle_mask > 255)
   20165              :             return false;
   20166              : 
   20167          144 :           machine_mode imode = GET_MODE_INNER (TYPE_MODE (TREE_TYPE (arg0)));
   20168          144 :           loc = gimple_location (stmt);
   20169          144 :           tree itype = (imode == E_DFmode
   20170          144 :                         ? long_long_integer_type_node : integer_type_node);
   20171          144 :           tree vtype = build_vector_type (itype, elems);
   20172          144 :           tree_vector_builder elts (vtype, elems, 1);
   20173              : 
   20174              : 
   20175              :           /* Transform integer shuffle_mask to vector perm_mask which
   20176              :              is used by vec_perm_expr, refer to shuflp[sd]256/512 in sse.md.  */
   20177          840 :           for (unsigned i = 0; i != elems; i++)
   20178              :             {
   20179          696 :               unsigned sel_idx;
   20180              :               /* Imm[1:0](if VL > 128, then use Imm[3:2],Imm[5:4],Imm[7:6])
   20181              :                  provide 2 select constrols for each element of the
   20182              :                  destination.  */
   20183          696 :               if (imode == E_DFmode)
   20184          240 :                 sel_idx = (i & 1) * elems + (i & ~1)
   20185          240 :                           + ((shuffle_mask >> i) & 1);
   20186              :               else
   20187              :                 {
   20188              :                   /* Imm[7:0](if VL > 128, also use Imm[7:0]) provide 4 select
   20189              :                      controls for each element of the destination.  */
   20190          456 :                   unsigned j = i % 4;
   20191          456 :                   sel_idx = ((i >> 1) & 1) * elems + (i & ~3)
   20192          456 :                             + ((shuffle_mask >> 2 * j) & 3);
   20193              :                 }
   20194          696 :               elts.quick_push (build_int_cst (itype, sel_idx));
   20195              :             }
   20196              : 
   20197          144 :           tree perm_mask = elts.build ();
   20198          144 :           arg1 = gimple_call_arg (stmt, 1);
   20199          144 :           g = gimple_build_assign (gimple_call_lhs (stmt),
   20200              :                                    VEC_PERM_EXPR,
   20201              :                                    arg0, arg1, perm_mask);
   20202          144 :           gimple_set_location (g, loc);
   20203          144 :           gsi_replace (gsi, g, false);
   20204          144 :           return true;
   20205          144 :         }
   20206              :       // Do not error yet, the constant could be propagated later?
   20207              :       break;
   20208              : 
   20209           48 :     case IX86_BUILTIN_PABSB:
   20210           48 :     case IX86_BUILTIN_PABSW:
   20211           48 :     case IX86_BUILTIN_PABSD:
   20212              :       /* 64-bit vector abs<mode>2 is only supported under TARGET_MMX_WITH_SSE.  */
   20213           48 :       if (!TARGET_MMX_WITH_SSE)
   20214              :         break;
   20215              :       /* FALLTHRU.  */
   20216         2190 :     case IX86_BUILTIN_PABSB128:
   20217         2190 :     case IX86_BUILTIN_PABSB256:
   20218         2190 :     case IX86_BUILTIN_PABSB512:
   20219         2190 :     case IX86_BUILTIN_PABSW128:
   20220         2190 :     case IX86_BUILTIN_PABSW256:
   20221         2190 :     case IX86_BUILTIN_PABSW512:
   20222         2190 :     case IX86_BUILTIN_PABSD128:
   20223         2190 :     case IX86_BUILTIN_PABSD256:
   20224         2190 :     case IX86_BUILTIN_PABSD512:
   20225         2190 :     case IX86_BUILTIN_PABSQ128:
   20226         2190 :     case IX86_BUILTIN_PABSQ256:
   20227         2190 :     case IX86_BUILTIN_PABSQ512:
   20228         2190 :     case IX86_BUILTIN_PABSB128_MASK:
   20229         2190 :     case IX86_BUILTIN_PABSB256_MASK:
   20230         2190 :     case IX86_BUILTIN_PABSW128_MASK:
   20231         2190 :     case IX86_BUILTIN_PABSW256_MASK:
   20232         2190 :     case IX86_BUILTIN_PABSD128_MASK:
   20233         2190 :     case IX86_BUILTIN_PABSD256_MASK:
   20234         2190 :       gcc_assert (n_args >= 1);
   20235         2190 :       if (!gimple_call_lhs (stmt))
   20236              :         {
   20237            1 :           gsi_replace (gsi, gimple_build_nop (), false);
   20238            1 :           return true;
   20239              :         }
   20240         2189 :       arg0 = gimple_call_arg (stmt, 0);
   20241         2189 :       elems = TYPE_VECTOR_SUBPARTS (TREE_TYPE (arg0));
   20242              :       /* For masked ABS, only optimize if the mask is all ones.  */
   20243         2189 :       if (n_args > 1
   20244         2189 :           && !ix86_masked_all_ones (elems, gimple_call_arg (stmt, n_args - 1)))
   20245              :         break;
   20246          229 :       {
   20247          229 :         tree utype, ures, vce;
   20248          229 :         utype = unsigned_type_for (TREE_TYPE (arg0));
   20249              :         /* PABSB/W/D/Q store the unsigned result in dst, use ABSU_EXPR
   20250              :            instead of ABS_EXPR to handle overflow case(TYPE_MIN).  */
   20251          229 :         ures = gimple_build (&stmts, ABSU_EXPR, utype, arg0);
   20252          229 :         gsi_insert_seq_before (gsi, stmts, GSI_SAME_STMT);
   20253          229 :         loc = gimple_location (stmt);
   20254          229 :         vce = build1 (VIEW_CONVERT_EXPR, TREE_TYPE (arg0), ures);
   20255          229 :         g = gimple_build_assign (gimple_call_lhs (stmt),
   20256              :                                  VIEW_CONVERT_EXPR, vce);
   20257          229 :         gsi_replace (gsi, g, false);
   20258              :       }
   20259          229 :       return true;
   20260              : 
   20261         2225 :     case IX86_BUILTIN_MINPS:
   20262         2225 :     case IX86_BUILTIN_MINPD:
   20263         2225 :     case IX86_BUILTIN_MINPS256:
   20264         2225 :     case IX86_BUILTIN_MINPD256:
   20265         2225 :     case IX86_BUILTIN_MINPS512:
   20266         2225 :     case IX86_BUILTIN_MINPD512:
   20267         2225 :     case IX86_BUILTIN_MINPS128_MASK:
   20268         2225 :     case IX86_BUILTIN_MINPD128_MASK:
   20269         2225 :     case IX86_BUILTIN_MINPS256_MASK:
   20270         2225 :     case IX86_BUILTIN_MINPD256_MASK:
   20271         2225 :     case IX86_BUILTIN_MINPH128_MASK:
   20272         2225 :     case IX86_BUILTIN_MINPH256_MASK:
   20273         2225 :     case IX86_BUILTIN_MINPH512_MASK:
   20274         2225 :       tcode = LT_EXPR;
   20275         2225 :       goto do_minmax;
   20276              : 
   20277              :     case IX86_BUILTIN_MAXPS:
   20278              :     case IX86_BUILTIN_MAXPD:
   20279              :     case IX86_BUILTIN_MAXPS256:
   20280              :     case IX86_BUILTIN_MAXPD256:
   20281              :     case IX86_BUILTIN_MAXPS512:
   20282              :     case IX86_BUILTIN_MAXPD512:
   20283              :     case IX86_BUILTIN_MAXPS128_MASK:
   20284              :     case IX86_BUILTIN_MAXPD128_MASK:
   20285              :     case IX86_BUILTIN_MAXPS256_MASK:
   20286              :     case IX86_BUILTIN_MAXPD256_MASK:
   20287              :     case IX86_BUILTIN_MAXPH128_MASK:
   20288              :     case IX86_BUILTIN_MAXPH256_MASK:
   20289              :     case IX86_BUILTIN_MAXPH512_MASK:
   20290              :       tcode = GT_EXPR;
   20291         4435 :     do_minmax:
   20292         4435 :       gcc_assert (n_args >= 2);
   20293              :       /* Without SSE4.1 we often aren't able to pattern match it back to the
   20294              :          desired instruction.  */
   20295         4435 :       if (!gimple_call_lhs (stmt) || !optimize || !TARGET_SSE4_1)
   20296              :         break;
   20297         3865 :       arg0 = gimple_call_arg (stmt, 0);
   20298         3865 :       arg1 = gimple_call_arg (stmt, 1);
   20299         3865 :       elems = TYPE_VECTOR_SUBPARTS (TREE_TYPE (arg0));
   20300              :       /* For masked minmax, only optimize if the mask is all ones.  */
   20301         3865 :       if (n_args > 2
   20302         3865 :           && !ix86_masked_all_ones (elems, gimple_call_arg (stmt, 3)))
   20303              :         break;
   20304          647 :       if (n_args >= 5)
   20305              :         {
   20306          436 :           tree arg4 = gimple_call_arg (stmt, 4);
   20307          436 :           if (!tree_fits_uhwi_p (arg4))
   20308              :             break;
   20309          424 :           if (tree_to_uhwi (arg4) == 4)
   20310              :             /* Ok.  */;
   20311          416 :           else if (tree_to_uhwi (arg4) != 8)
   20312              :             /* Invalid round argument.  */
   20313              :             break;
   20314          416 :           else if (HONOR_NANS (arg0))
   20315              :             /* Lowering to comparison would raise exceptions which
   20316              :                shouldn't be raised.  */
   20317              :             break;
   20318              :         }
   20319          219 :       {
   20320          219 :         tree type = truth_type_for (TREE_TYPE (arg0));
   20321          219 :         tree cmpres = gimple_build (&stmts, tcode, type, arg0, arg1);
   20322          219 :         gsi_insert_seq_before (gsi, stmts, GSI_SAME_STMT);
   20323          219 :         g = gimple_build_assign (gimple_call_lhs (stmt),
   20324              :                                  VEC_COND_EXPR, cmpres, arg0, arg1);
   20325          219 :         gsi_replace (gsi, g, false);
   20326              :       }
   20327          219 :       return true;
   20328              : 
   20329              :     default:
   20330              :       break;
   20331              :     }
   20332              : 
   20333              :   return false;
   20334              : }
   20335              : 
   20336              : /* Handler for an SVML-style interface to
   20337              :    a library with vectorized intrinsics.  */
   20338              : 
   20339              : tree
   20340           10 : ix86_veclibabi_svml (combined_fn fn, tree type_out, tree type_in)
   20341              : {
   20342           10 :   char name[20];
   20343           10 :   tree fntype, new_fndecl, args;
   20344           10 :   unsigned arity;
   20345           10 :   const char *bname;
   20346           10 :   machine_mode el_mode, in_mode;
   20347           10 :   int n, in_n;
   20348              : 
   20349              :   /* The SVML is suitable for unsafe math only.  */
   20350           10 :   if (!flag_unsafe_math_optimizations)
   20351              :     return NULL_TREE;
   20352              : 
   20353           10 :   el_mode = TYPE_MODE (TREE_TYPE (type_out));
   20354           10 :   n = TYPE_VECTOR_SUBPARTS (type_out);
   20355           10 :   in_mode = TYPE_MODE (TREE_TYPE (type_in));
   20356           10 :   in_n = TYPE_VECTOR_SUBPARTS (type_in);
   20357           10 :   if (el_mode != in_mode
   20358           10 :       || n != in_n)
   20359              :     return NULL_TREE;
   20360              : 
   20361           10 :   switch (fn)
   20362              :     {
   20363           10 :     CASE_CFN_EXP:
   20364           10 :     CASE_CFN_LOG:
   20365           10 :     CASE_CFN_LOG10:
   20366           10 :     CASE_CFN_POW:
   20367           10 :     CASE_CFN_TANH:
   20368           10 :     CASE_CFN_TAN:
   20369           10 :     CASE_CFN_ATAN:
   20370           10 :     CASE_CFN_ATAN2:
   20371           10 :     CASE_CFN_ATANH:
   20372           10 :     CASE_CFN_CBRT:
   20373           10 :     CASE_CFN_SINH:
   20374           10 :     CASE_CFN_SIN:
   20375           10 :     CASE_CFN_ASINH:
   20376           10 :     CASE_CFN_ASIN:
   20377           10 :     CASE_CFN_COSH:
   20378           10 :     CASE_CFN_COS:
   20379           10 :     CASE_CFN_ACOSH:
   20380           10 :     CASE_CFN_ACOS:
   20381           10 :       if ((el_mode != DFmode || n != 2)
   20382            8 :           && (el_mode != SFmode || n != 4))
   20383              :         return NULL_TREE;
   20384            6 :       break;
   20385              : 
   20386              :     default:
   20387              :       return NULL_TREE;
   20388              :     }
   20389              : 
   20390            6 :   tree fndecl = mathfn_built_in (el_mode == DFmode
   20391              :                                  ? double_type_node : float_type_node, fn);
   20392            6 :   bname = IDENTIFIER_POINTER (DECL_NAME (fndecl));
   20393              : 
   20394            6 :   if (DECL_FUNCTION_CODE (fndecl) == BUILT_IN_LOGF)
   20395            2 :     strcpy (name, "vmlsLn4");
   20396            4 :   else if (DECL_FUNCTION_CODE (fndecl) == BUILT_IN_LOG)
   20397            0 :     strcpy (name, "vmldLn2");
   20398            4 :   else if (n == 4)
   20399              :     {
   20400            2 :       sprintf (name, "vmls%s", bname+10);
   20401            2 :       name[strlen (name)-1] = '4';
   20402              :     }
   20403              :   else
   20404            2 :     sprintf (name, "vmld%s2", bname+10);
   20405              : 
   20406              :   /* Convert to uppercase. */
   20407            6 :   name[4] &= ~0x20;
   20408              : 
   20409            6 :   arity = 0;
   20410            6 :   for (args = DECL_ARGUMENTS (fndecl); args; args = TREE_CHAIN (args))
   20411            0 :     arity++;
   20412              : 
   20413            6 :   if (arity == 1)
   20414            0 :     fntype = build_function_type_list (type_out, type_in, NULL);
   20415              :   else
   20416            6 :     fntype = build_function_type_list (type_out, type_in, type_in, NULL);
   20417              : 
   20418              :   /* Build a function declaration for the vectorized function.  */
   20419            6 :   new_fndecl = build_decl (BUILTINS_LOCATION,
   20420              :                            FUNCTION_DECL, get_identifier (name), fntype);
   20421            6 :   TREE_PUBLIC (new_fndecl) = 1;
   20422            6 :   DECL_EXTERNAL (new_fndecl) = 1;
   20423            6 :   DECL_IS_NOVOPS (new_fndecl) = 1;
   20424            6 :   TREE_READONLY (new_fndecl) = 1;
   20425              : 
   20426            6 :   return new_fndecl;
   20427              : }
   20428              : 
   20429              : /* Handler for an ACML-style interface to
   20430              :    a library with vectorized intrinsics.  */
   20431              : 
   20432              : tree
   20433            3 : ix86_veclibabi_acml (combined_fn fn, tree type_out, tree type_in)
   20434              : {
   20435            3 :   char name[20] = "__vr.._";
   20436            3 :   tree fntype, new_fndecl, args;
   20437            3 :   unsigned arity;
   20438            3 :   const char *bname;
   20439            3 :   machine_mode el_mode, in_mode;
   20440            3 :   int n, in_n;
   20441              : 
   20442              :   /* The ACML is 64bits only and suitable for unsafe math only as
   20443              :      it does not correctly support parts of IEEE with the required
   20444              :      precision such as denormals.  */
   20445            3 :   if (!TARGET_64BIT
   20446            3 :       || !flag_unsafe_math_optimizations)
   20447              :     return NULL_TREE;
   20448              : 
   20449            3 :   el_mode = TYPE_MODE (TREE_TYPE (type_out));
   20450            3 :   n = TYPE_VECTOR_SUBPARTS (type_out);
   20451            3 :   in_mode = TYPE_MODE (TREE_TYPE (type_in));
   20452            3 :   in_n = TYPE_VECTOR_SUBPARTS (type_in);
   20453            3 :   if (el_mode != in_mode
   20454            3 :       || n != in_n)
   20455              :     return NULL_TREE;
   20456              : 
   20457            3 :   switch (fn)
   20458              :     {
   20459            3 :     CASE_CFN_SIN:
   20460            3 :     CASE_CFN_COS:
   20461            3 :     CASE_CFN_EXP:
   20462            3 :     CASE_CFN_LOG:
   20463            3 :     CASE_CFN_LOG2:
   20464            3 :     CASE_CFN_LOG10:
   20465            3 :       if (el_mode == DFmode && n == 2)
   20466              :         {
   20467            3 :           name[4] = 'd';
   20468            3 :           name[5] = '2';
   20469              :         }
   20470            0 :       else if (el_mode == SFmode && n == 4)
   20471              :         {
   20472            0 :           name[4] = 's';
   20473            0 :           name[5] = '4';
   20474              :         }
   20475              :       else
   20476              :         return NULL_TREE;
   20477            3 :       break;
   20478              : 
   20479              :     default:
   20480              :       return NULL_TREE;
   20481              :     }
   20482              : 
   20483            3 :   tree fndecl = mathfn_built_in (el_mode == DFmode
   20484              :                                  ? double_type_node : float_type_node, fn);
   20485            3 :   bname = IDENTIFIER_POINTER (DECL_NAME (fndecl));
   20486            3 :   sprintf (name + 7, "%s", bname+10);
   20487              : 
   20488            3 :   arity = 0;
   20489            3 :   for (args = DECL_ARGUMENTS (fndecl); args; args = TREE_CHAIN (args))
   20490            0 :     arity++;
   20491              : 
   20492            3 :   if (arity == 1)
   20493            0 :     fntype = build_function_type_list (type_out, type_in, NULL);
   20494              :   else
   20495            3 :     fntype = build_function_type_list (type_out, type_in, type_in, NULL);
   20496              : 
   20497              :   /* Build a function declaration for the vectorized function.  */
   20498            3 :   new_fndecl = build_decl (BUILTINS_LOCATION,
   20499              :                            FUNCTION_DECL, get_identifier (name), fntype);
   20500            3 :   TREE_PUBLIC (new_fndecl) = 1;
   20501            3 :   DECL_EXTERNAL (new_fndecl) = 1;
   20502            3 :   DECL_IS_NOVOPS (new_fndecl) = 1;
   20503            3 :   TREE_READONLY (new_fndecl) = 1;
   20504              : 
   20505            3 :   return new_fndecl;
   20506              : }
   20507              : 
   20508              : /* Handler for an AOCL-LibM-style interface to
   20509              :    a library with vectorized intrinsics.  */
   20510              : 
   20511              : tree
   20512          220 : ix86_veclibabi_aocl (combined_fn fn, tree type_out, tree type_in)
   20513              : {
   20514          220 :   char name[20] = "amd_vr";
   20515          220 :   int name_len = 6;
   20516          220 :   tree fntype, new_fndecl, args;
   20517          220 :   unsigned arity;
   20518          220 :   const char *bname;
   20519          220 :   machine_mode el_mode, in_mode;
   20520          220 :   int n, in_n;
   20521              : 
   20522              :   /* AOCL-LibM is 64bits only.  It is also only suitable for unsafe math only
   20523              :      as it trades off some accuracy for increased performance.  */
   20524          220 :   if (!TARGET_64BIT
   20525          220 :       || !flag_unsafe_math_optimizations)
   20526              :     return NULL_TREE;
   20527              : 
   20528          220 :   el_mode = TYPE_MODE (TREE_TYPE (type_out));
   20529          220 :   n = TYPE_VECTOR_SUBPARTS (type_out);
   20530          220 :   in_mode = TYPE_MODE (TREE_TYPE (type_in));
   20531          220 :   in_n = TYPE_VECTOR_SUBPARTS (type_in);
   20532          220 :   if (el_mode != in_mode
   20533          220 :       || n != in_n)
   20534              :     return NULL_TREE;
   20535              : 
   20536          220 :   gcc_checking_assert (n > 0);
   20537              : 
   20538              :   /* Decide whether there exists a function for the combination of FN, the mode
   20539              :      and the vector width.  Return early if it doesn't.  */
   20540              : 
   20541          220 :   if (el_mode != DFmode && el_mode != SFmode)
   20542              :     return NULL_TREE;
   20543              : 
   20544              :   /* Supported vector widths for given FN and single/double precision.  Zeros
   20545              :      are used to fill out unused positions in the arrays.  */
   20546          220 :   static const int supported_n[][2][3] = {
   20547              :   /*   Single prec. ,  Double prec.  */
   20548              :     { { 16,  0,  0 }, {  2,  4,  8 } }, /* TAN.  */
   20549              :     { {  4,  8, 16 }, {  2,  4,  8 } }, /* EXP.  */
   20550              :     { {  4,  8, 16 }, {  2,  4,  8 } }, /* EXP2.  */
   20551              :     { {  4,  8, 16 }, {  2,  4,  8 } }, /* LOG.  */
   20552              :     { {  4,  8, 16 }, {  2,  4,  8 } }, /* LOG2.  */
   20553              :     { {  4,  8, 16 }, {  2,  4,  8 } }, /* COS.  */
   20554              :     { {  4,  8, 16 }, {  2,  4,  8 } }, /* SIN.  */
   20555              :     { {  4,  8, 16 }, {  2,  4,  8 } }, /* POW.  */
   20556              :     { {  4,  8, 16 }, {  2,  4,  8 } }, /* ERF.  */
   20557              :     { {  4,  8, 16 }, {  2,  8,  0 } }, /* ATAN.  */
   20558              :     { {  4,  8, 16 }, {  2,  0,  0 } }, /* LOG10.  */
   20559              :     { {  4,  0,  0 }, {  2,  0,  0 } }, /* EXP10.  */
   20560              :     { {  4,  0,  0 }, {  2,  0,  0 } }, /* LOG1P.  */
   20561              :     { {  4,  8, 16 }, {  8,  0,  0 } }, /* ASIN.  */
   20562              :     { {  4, 16,  0 }, {  0,  0,  0 } }, /* ACOS.  */
   20563              :     { {  4,  8, 16 }, {  0,  0,  0 } }, /* TANH.  */
   20564              :     { {  4,  0,  0 }, {  0,  0,  0 } }, /* EXPM1.  */
   20565              :     { {  4,  8,  0 }, {  0,  0,  0 } }, /* COSH.  */
   20566              :   };
   20567              : 
   20568              :   /* We cannot simply index the supported_n array with FN since multiple FNs
   20569              :      may correspond to a single operation (see the definitions of these
   20570              :      CASE_CFN_* macros).  */
   20571          220 :   int i;
   20572          220 :   switch (fn)
   20573              :     {
   20574              :     CASE_CFN_TAN   :  i = 0; break;
   20575           16 :     CASE_CFN_EXP   :  i = 1; break;
   20576           16 :     CASE_CFN_EXP2  :  i = 2; break;
   20577           16 :     CASE_CFN_LOG   :  i = 3; break;
   20578           16 :     CASE_CFN_LOG2  :  i = 4; break;
   20579           16 :     CASE_CFN_COS   :  i = 5; break;
   20580           16 :     CASE_CFN_SIN   :  i = 6; break;
   20581           16 :     CASE_CFN_POW   :  i = 7; break;
   20582           16 :     CASE_CFN_ERF   :  i = 8; break;
   20583           13 :     CASE_CFN_ATAN  :  i = 9; break;
   20584           11 :     CASE_CFN_LOG10 : i = 10; break;
   20585            8 :     CASE_CFN_EXP10 : i = 11; break;
   20586            8 :     CASE_CFN_LOG1P : i = 12; break;
   20587           11 :     CASE_CFN_ASIN  : i = 13; break;
   20588            7 :     CASE_CFN_ACOS  : i = 14; break;
   20589            9 :     CASE_CFN_TANH  : i = 15; break;
   20590            7 :     CASE_CFN_EXPM1 : i = 16; break;
   20591            9 :     CASE_CFN_COSH  : i = 17; break;
   20592              :     default: return NULL_TREE;
   20593              :     }
   20594              : 
   20595          220 :   int j = el_mode == DFmode;
   20596          220 :   bool n_is_supported = false;
   20597          489 :   for (unsigned k = 0; k < 3; k++)
   20598          470 :     if (supported_n[i][j][k] == n)
   20599              :       {
   20600              :         n_is_supported = true;
   20601              :         break;
   20602              :       }
   20603          220 :   if (!n_is_supported)
   20604              :     return NULL_TREE;
   20605              : 
   20606              :   /* Append the precision and the vector width to the function name we are
   20607              :      constructing.  */
   20608          201 :   name[name_len++] = el_mode == DFmode ? 'd' : 's';
   20609          201 :   switch (n)
   20610              :     {
   20611          148 :       case 2:
   20612          148 :       case 4:
   20613          148 :       case 8:
   20614          148 :         name[name_len++] = '0' + n;
   20615          148 :         break;
   20616           53 :       case 16:
   20617           53 :         name[name_len++] = '1';
   20618           53 :         name[name_len++] = '6';
   20619           53 :         break;
   20620            0 :       default:
   20621            0 :         gcc_unreachable ();
   20622              :     }
   20623          201 :   name[name_len++] = '_';
   20624              : 
   20625              :   /* Append the operation name (steal it from the name of a builtin).  */
   20626          201 :   tree fndecl = mathfn_built_in (el_mode == DFmode
   20627              :                                  ? double_type_node : float_type_node, fn);
   20628          201 :   bname = IDENTIFIER_POINTER (DECL_NAME (fndecl));
   20629          201 :   sprintf (name + name_len, "%s", bname + 10);
   20630              : 
   20631          201 :   arity = 0;
   20632          201 :   for (args = DECL_ARGUMENTS (fndecl); args; args = TREE_CHAIN (args))
   20633            0 :     arity++;
   20634              : 
   20635          201 :   if (arity == 1)
   20636            0 :     fntype = build_function_type_list (type_out, type_in, NULL);
   20637              :   else
   20638          201 :     fntype = build_function_type_list (type_out, type_in, type_in, NULL);
   20639              : 
   20640              :   /* Build a function declaration for the vectorized function.  */
   20641          201 :   new_fndecl = build_decl (BUILTINS_LOCATION,
   20642              :                            FUNCTION_DECL, get_identifier (name), fntype);
   20643          201 :   TREE_PUBLIC (new_fndecl) = 1;
   20644          201 :   DECL_EXTERNAL (new_fndecl) = 1;
   20645          201 :   TREE_READONLY (new_fndecl) = 1;
   20646              : 
   20647          201 :   return new_fndecl;
   20648              : }
   20649              : 
   20650              : /* Returns a decl of a function that implements scatter store with
   20651              :    register type VECTYPE and index type INDEX_TYPE and SCALE.
   20652              :    Return NULL_TREE if it is not available.  */
   20653              : 
   20654              : static tree
   20655       127955 : ix86_vectorize_builtin_scatter (const_tree vectype,
   20656              :                                 const_tree index_type, int scale)
   20657              : {
   20658       127955 :   bool si;
   20659       127955 :   enum ix86_builtins code;
   20660              : 
   20661       127955 :   if (!TARGET_AVX512F)
   20662              :     return NULL_TREE;
   20663              : 
   20664         3207 :   if (known_eq (TYPE_VECTOR_SUBPARTS (vectype), 2u)
   20665         5760 :       ? !TARGET_USE_SCATTER_2PARTS
   20666         5760 :       : (known_eq (TYPE_VECTOR_SUBPARTS (vectype), 4u)
   20667         2553 :          ? !TARGET_USE_SCATTER_4PARTS
   20668         1773 :          : !TARGET_USE_SCATTER_8PARTS))
   20669              :     return NULL_TREE;
   20670              : 
   20671         3207 :   if ((TREE_CODE (index_type) != INTEGER_TYPE
   20672          463 :        && !POINTER_TYPE_P (index_type))
   20673         3670 :       || (TYPE_MODE (index_type) != SImode
   20674         1392 :           && TYPE_MODE (index_type) != DImode))
   20675            0 :     return NULL_TREE;
   20676              : 
   20677         3399 :   if (TYPE_PRECISION (index_type) > POINTER_SIZE)
   20678              :     return NULL_TREE;
   20679              : 
   20680              :   /* v*scatter* insn sign extends index to pointer mode.  */
   20681         3207 :   if (TYPE_PRECISION (index_type) < POINTER_SIZE
   20682         3207 :       && TYPE_UNSIGNED (index_type))
   20683              :     return NULL_TREE;
   20684              : 
   20685              :   /* Scale can be 1, 2, 4 or 8.  */
   20686         3207 :   if (scale <= 0
   20687         3207 :       || scale > 8
   20688         3193 :       || (scale & (scale - 1)) != 0)
   20689              :     return NULL_TREE;
   20690              : 
   20691         3193 :   si = TYPE_MODE (index_type) == SImode;
   20692         3193 :   switch (TYPE_MODE (vectype))
   20693              :     {
   20694          169 :     case E_V8DFmode:
   20695          169 :       code = si ? IX86_BUILTIN_SCATTERALTSIV8DF : IX86_BUILTIN_SCATTERDIV8DF;
   20696              :       break;
   20697          104 :     case E_V8DImode:
   20698          104 :       code = si ? IX86_BUILTIN_SCATTERALTSIV8DI : IX86_BUILTIN_SCATTERDIV8DI;
   20699              :       break;
   20700          177 :     case E_V16SFmode:
   20701          177 :       code = si ? IX86_BUILTIN_SCATTERSIV16SF : IX86_BUILTIN_SCATTERALTDIV16SF;
   20702              :       break;
   20703          257 :     case E_V16SImode:
   20704          257 :       code = si ? IX86_BUILTIN_SCATTERSIV16SI : IX86_BUILTIN_SCATTERALTDIV16SI;
   20705              :       break;
   20706          151 :     case E_V4DFmode:
   20707          151 :       if (TARGET_AVX512VL)
   20708           34 :         code = si ? IX86_BUILTIN_SCATTERALTSIV4DF : IX86_BUILTIN_SCATTERDIV4DF;
   20709              :       else
   20710              :         return NULL_TREE;
   20711              :       break;
   20712          115 :     case E_V4DImode:
   20713          115 :       if (TARGET_AVX512VL)
   20714           34 :         code = si ? IX86_BUILTIN_SCATTERALTSIV4DI : IX86_BUILTIN_SCATTERDIV4DI;
   20715              :       else
   20716              :         return NULL_TREE;
   20717              :       break;
   20718          132 :     case E_V8SFmode:
   20719          132 :       if (TARGET_AVX512VL)
   20720           40 :         code = si ? IX86_BUILTIN_SCATTERSIV8SF : IX86_BUILTIN_SCATTERALTDIV8SF;
   20721              :       else
   20722              :         return NULL_TREE;
   20723              :       break;
   20724          202 :     case E_V8SImode:
   20725          202 :       if (TARGET_AVX512VL)
   20726           82 :         code = si ? IX86_BUILTIN_SCATTERSIV8SI : IX86_BUILTIN_SCATTERALTDIV8SI;
   20727              :       else
   20728              :         return NULL_TREE;
   20729              :       break;
   20730          171 :     case E_V2DFmode:
   20731          171 :       if (TARGET_AVX512VL)
   20732           66 :         code = si ? IX86_BUILTIN_SCATTERALTSIV2DF : IX86_BUILTIN_SCATTERDIV2DF;
   20733              :       else
   20734              :         return NULL_TREE;
   20735              :       break;
   20736          141 :     case E_V2DImode:
   20737          141 :       if (TARGET_AVX512VL)
   20738           66 :         code = si ? IX86_BUILTIN_SCATTERALTSIV2DI : IX86_BUILTIN_SCATTERDIV2DI;
   20739              :       else
   20740              :         return NULL_TREE;
   20741              :       break;
   20742          156 :     case E_V4SFmode:
   20743          156 :       if (TARGET_AVX512VL)
   20744           68 :         code = si ? IX86_BUILTIN_SCATTERSIV4SF : IX86_BUILTIN_SCATTERALTDIV4SF;
   20745              :       else
   20746              :         return NULL_TREE;
   20747              :       break;
   20748          226 :     case E_V4SImode:
   20749          226 :       if (TARGET_AVX512VL)
   20750          110 :         code = si ? IX86_BUILTIN_SCATTERSIV4SI : IX86_BUILTIN_SCATTERALTDIV4SI;
   20751              :       else
   20752              :         return NULL_TREE;
   20753              :       break;
   20754              :     default:
   20755              :       return NULL_TREE;
   20756              :     }
   20757              : 
   20758         1207 :   return get_ix86_builtin (code);
   20759              : }
   20760              : 
   20761              : /* Return true if it is safe to use the rsqrt optabs to optimize
   20762              :    1.0/sqrt.  */
   20763              : 
   20764              : static bool
   20765           84 : use_rsqrt_p (machine_mode mode)
   20766              : {
   20767           84 :   return ((mode == HFmode
   20768           36 :            || (TARGET_SSE && TARGET_SSE_MATH))
   20769           84 :           && flag_finite_math_only
   20770           83 :           && !flag_trapping_math
   20771          149 :           && flag_unsafe_math_optimizations);
   20772              : }
   20773              : 
   20774              : /* Helper for avx_vpermilps256_operand et al.  This is also used by
   20775              :    the expansion functions to turn the parallel back into a mask.
   20776              :    The return value is 0 for no match and the imm8+1 for a match.  */
   20777              : 
   20778              : int
   20779        64736 : avx_vpermilp_parallel (rtx par, machine_mode mode)
   20780              : {
   20781        64736 :   unsigned i, nelt = GET_MODE_NUNITS (mode);
   20782        64736 :   unsigned mask = 0;
   20783        64736 :   unsigned char ipar[16] = {};  /* Silence -Wuninitialized warning.  */
   20784              : 
   20785        64736 :   if (XVECLEN (par, 0) != (int) nelt)
   20786              :     return 0;
   20787              : 
   20788              :   /* Validate that all of the elements are constants, and not totally
   20789              :      out of range.  Copy the data into an integral array to make the
   20790              :      subsequent checks easier.  */
   20791       323884 :   for (i = 0; i < nelt; ++i)
   20792              :     {
   20793       259148 :       rtx er = XVECEXP (par, 0, i);
   20794       259148 :       unsigned HOST_WIDE_INT ei;
   20795              : 
   20796       259148 :       if (!CONST_INT_P (er))
   20797              :         return 0;
   20798       259148 :       ei = INTVAL (er);
   20799       259148 :       if (ei >= nelt)
   20800              :         return 0;
   20801       259148 :       ipar[i] = ei;
   20802              :     }
   20803              : 
   20804        64736 :   switch (mode)
   20805              :     {
   20806              :     case E_V8DFmode:
   20807              :     case E_V8DImode:
   20808              :       /* In the 512-bit DFmode case, we can only move elements within
   20809              :          a 128-bit lane.  First fill the second part of the mask,
   20810              :          then fallthru.  */
   20811         4945 :       for (i = 4; i < 6; ++i)
   20812              :         {
   20813         3427 :           if (!IN_RANGE (ipar[i], 4, 5))
   20814              :             return 0;
   20815         3202 :           mask |= (ipar[i] - 4) << i;
   20816              :         }
   20817         3702 :       for (i = 6; i < 8; ++i)
   20818              :         {
   20819         2610 :           if (!IN_RANGE (ipar[i], 6, 7))
   20820              :             return 0;
   20821         2184 :           mask |= (ipar[i] - 6) << i;
   20822              :         }
   20823              :       /* FALLTHRU */
   20824              : 
   20825              :     case E_V4DFmode:
   20826              :     case E_V4DImode:
   20827              :       /* In the 256-bit DFmode case, we can only move elements within
   20828              :          a 128-bit lane.  */
   20829        48137 :       for (i = 0; i < 2; ++i)
   20830              :         {
   20831        40262 :           if (!IN_RANGE (ipar[i], 0, 1))
   20832              :             return 0;
   20833        27160 :           mask |= ipar[i] << i;
   20834              :         }
   20835        20927 :       for (i = 2; i < 4; ++i)
   20836              :         {
   20837        14406 :           if (!IN_RANGE (ipar[i], 2, 3))
   20838              :             return 0;
   20839        13052 :           mask |= (ipar[i] - 2) << i;
   20840              :         }
   20841              :       break;
   20842              : 
   20843              :     case E_V16SFmode:
   20844              :     case E_V16SImode:
   20845              :       /* In 512 bit SFmode case, permutation in the upper 256 bits
   20846              :          must mirror the permutation in the lower 256-bits.  */
   20847         4398 :       for (i = 0; i < 8; ++i)
   20848         3918 :         if (ipar[i] + 8 != ipar[i + 8])
   20849              :           return 0;
   20850              :       /* FALLTHRU */
   20851              : 
   20852              :     case E_V8SFmode:
   20853              :     case E_V8SImode:
   20854              :       /* In 256 bit SFmode case, we have full freedom of
   20855              :          movement within the low 128-bit lane, but the high 128-bit
   20856              :          lane must mirror the exact same pattern.  */
   20857        37835 :       for (i = 0; i < 4; ++i)
   20858        32127 :         if (ipar[i] + 4 != ipar[i + 4])
   20859              :           return 0;
   20860              :       nelt = 4;
   20861              :       /* FALLTHRU */
   20862              : 
   20863        37609 :     case E_V2DFmode:
   20864        37609 :     case E_V2DImode:
   20865        37609 :     case E_V4SFmode:
   20866        37609 :     case E_V4SImode:
   20867              :       /* In the 128-bit case, we've full freedom in the placement of
   20868              :          the elements from the source operand.  */
   20869       132001 :       for (i = 0; i < nelt; ++i)
   20870        94392 :         mask |= ipar[i] << (i * (nelt / 2));
   20871              :       break;
   20872              : 
   20873            0 :     default:
   20874            0 :       gcc_unreachable ();
   20875              :     }
   20876              : 
   20877              :   /* Make sure success has a non-zero value by adding one.  */
   20878        44130 :   return mask + 1;
   20879              : }
   20880              : 
   20881              : /* Helper for avx_vperm2f128_v4df_operand et al.  This is also used by
   20882              :    the expansion functions to turn the parallel back into a mask.
   20883              :    The return value is 0 for no match and the imm8+1 for a match.  */
   20884              : 
   20885              : int
   20886        50646 : avx_vperm2f128_parallel (rtx par, machine_mode mode)
   20887              : {
   20888        50646 :   unsigned i, nelt = GET_MODE_NUNITS (mode), nelt2 = nelt / 2;
   20889        50646 :   unsigned mask = 0;
   20890        50646 :   unsigned char ipar[8] = {};  /* Silence -Wuninitialized warning.  */
   20891              : 
   20892        50646 :   if (XVECLEN (par, 0) != (int) nelt)
   20893              :     return 0;
   20894              : 
   20895              :   /* Validate that all of the elements are constants, and not totally
   20896              :      out of range.  Copy the data into an integral array to make the
   20897              :      subsequent checks easier.  */
   20898       404750 :   for (i = 0; i < nelt; ++i)
   20899              :     {
   20900       354104 :       rtx er = XVECEXP (par, 0, i);
   20901       354104 :       unsigned HOST_WIDE_INT ei;
   20902              : 
   20903       354104 :       if (!CONST_INT_P (er))
   20904              :         return 0;
   20905       354104 :       ei = INTVAL (er);
   20906       354104 :       if (ei >= 2 * nelt)
   20907              :         return 0;
   20908       354104 :       ipar[i] = ei;
   20909              :     }
   20910              : 
   20911              :   /* Validate that the halves of the permute are halves.  */
   20912        98909 :   for (i = 0; i < nelt2 - 1; ++i)
   20913        79326 :     if (ipar[i] + 1 != ipar[i + 1])
   20914              :       return 0;
   20915        57980 :   for (i = nelt2; i < nelt - 1; ++i)
   20916        39803 :     if (ipar[i] + 1 != ipar[i + 1])
   20917              :       return 0;
   20918              : 
   20919              :   /* Reconstruct the mask.  */
   20920        54443 :   for (i = 0; i < 2; ++i)
   20921              :     {
   20922        36312 :       unsigned e = ipar[i * nelt2];
   20923        36312 :       if (e % nelt2)
   20924              :         return 0;
   20925        36266 :       e /= nelt2;
   20926        36266 :       mask |= e << (i * 4);
   20927              :     }
   20928              : 
   20929              :   /* Make sure success has a non-zero value by adding one.  */
   20930        18131 :   return mask + 1;
   20931              : }
   20932              : 
   20933              : /* Return a mask of VPTERNLOG operands that do not affect output.  */
   20934              : 
   20935              : int
   20936         2441 : vpternlog_redundant_operand_mask (rtx pternlog_imm)
   20937              : {
   20938         2441 :   int mask = 0;
   20939         2441 :   int imm8 = INTVAL (pternlog_imm);
   20940              : 
   20941         2441 :   if (((imm8 >> 4) & 0x0F) == (imm8 & 0x0F))
   20942            6 :     mask |= 1;
   20943         2441 :   if (((imm8 >> 2) & 0x33) == (imm8 & 0x33))
   20944            6 :     mask |= 2;
   20945         2441 :   if (((imm8 >> 1) & 0x55) == (imm8 & 0x55))
   20946          163 :     mask |= 4;
   20947              : 
   20948         2441 :   return mask;
   20949              : }
   20950              : 
   20951              : /* Eliminate false dependencies on operands that do not affect output
   20952              :    by substituting other operands of a VPTERNLOG.  */
   20953              : 
   20954              : void
   20955           85 : substitute_vpternlog_operands (rtx *operands)
   20956              : {
   20957           85 :   int mask = vpternlog_redundant_operand_mask (operands[4]);
   20958              : 
   20959           85 :   if (mask & 1) /* The first operand is redundant.  */
   20960            2 :     operands[1] = operands[2];
   20961              : 
   20962           85 :   if (mask & 2) /* The second operand is redundant.  */
   20963            2 :     operands[2] = operands[1];
   20964              : 
   20965           85 :   if (mask & 4) /* The third operand is redundant.  */
   20966           81 :     operands[3] = operands[1];
   20967            4 :   else if (REG_P (operands[3]))
   20968              :     {
   20969            0 :       if (mask & 1)
   20970            0 :         operands[1] = operands[3];
   20971            0 :       if (mask & 2)
   20972            0 :         operands[2] = operands[3];
   20973              :     }
   20974           85 : }
   20975              : 
   20976              : /* Return a register priority for hard reg REGNO.  */
   20977              : static int
   20978     58017026 : ix86_register_priority (int hard_regno)
   20979              : {
   20980              :   /* ebp and r13 as the base always wants a displacement, r12 as the
   20981              :      base always wants an index.  So discourage their usage in an
   20982              :      address.  */
   20983     58017026 :   if (hard_regno == R12_REG || hard_regno == R13_REG)
   20984              :     return 0;
   20985     53596806 :   if (hard_regno == BP_REG)
   20986              :     return 1;
   20987              :   /* New x86-64 int registers result in bigger code size.  Discourage them.  */
   20988     51622791 :   if (REX_INT_REGNO_P (hard_regno))
   20989              :     return 2;
   20990     35231263 :   if (REX2_INT_REGNO_P (hard_regno))
   20991              :     return 2;
   20992              :   /* New x86-64 SSE registers result in bigger code size.  Discourage them.  */
   20993     35228823 :   if (REX_SSE_REGNO_P (hard_regno))
   20994              :     return 2;
   20995     29146013 :   if (EXT_REX_SSE_REGNO_P (hard_regno))
   20996              :     return 1;
   20997              :   /* Usage of AX register results in smaller code.  Prefer it.  */
   20998     28867813 :   if (hard_regno == AX_REG)
   20999      3776138 :     return 4;
   21000              :   return 3;
   21001              : }
   21002              : 
   21003              : /* Implement TARGET_PREFERRED_RELOAD_CLASS.
   21004              : 
   21005              :    Put float CONST_DOUBLE in the constant pool instead of fp regs.
   21006              :    QImode must go into class Q_REGS.
   21007              :    Narrow ALL_REGS to GENERAL_REGS.  This supports allowing movsf and
   21008              :    movdf to do mem-to-mem moves through integer regs.  */
   21009              : 
   21010              : static reg_class_t
   21011    547161196 : ix86_preferred_reload_class (rtx x, reg_class_t regclass)
   21012              : {
   21013    547161196 :   machine_mode mode = GET_MODE (x);
   21014              : 
   21015              :   /* We're only allowed to return a subclass of CLASS.  Many of the
   21016              :      following checks fail for NO_REGS, so eliminate that early.  */
   21017    547161196 :   if (regclass == NO_REGS)
   21018              :     return NO_REGS;
   21019              : 
   21020              :   /* All classes can load zeros.  */
   21021    546305686 :   if (x == CONST0_RTX (mode))
   21022              :     return regclass;
   21023              : 
   21024              :   /* Force constants into memory if we are loading a (nonzero) constant into
   21025              :      an MMX, SSE or MASK register.  This is because there are no MMX/SSE/MASK
   21026              :      instructions to load from a constant.  */
   21027    521419865 :   if (CONSTANT_P (x)
   21028    521419865 :       && (MAYBE_MMX_CLASS_P (regclass)
   21029    151790656 :           || MAYBE_SSE_CLASS_P (regclass)
   21030    121944000 :           || MAYBE_MASK_CLASS_P (regclass)))
   21031     29978025 :     return NO_REGS;
   21032              : 
   21033              :   /* Floating-point constants need more complex checks.  */
   21034    491441840 :   if (CONST_DOUBLE_P (x))
   21035              :     {
   21036              :       /* General regs can load everything.  */
   21037       302444 :       if (INTEGER_CLASS_P (regclass))
   21038              :         return regclass;
   21039              : 
   21040              :       /* Floats can load 0 and 1 plus some others.  Note that we eliminated
   21041              :          zero above.  We only want to wind up preferring 80387 registers if
   21042              :          we plan on doing computation with them.  */
   21043       179609 :       if (IS_STACK_MODE (mode)
   21044       237835 :           && standard_80387_constant_p (x) > 0)
   21045              :         {
   21046              :           /* Limit class to FP regs.  */
   21047        40456 :           if (FLOAT_CLASS_P (regclass))
   21048              :             return FLOAT_REGS;
   21049              :         }
   21050              : 
   21051       139153 :       return NO_REGS;
   21052              :     }
   21053              : 
   21054              :   /* Prefer SSE if we can use them for math.  Also allow integer regs
   21055              :      when moves between register units are cheap.  */
   21056    491139396 :   if (SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH)
   21057              :     {
   21058     31056765 :       if (TARGET_INTER_UNIT_MOVES_FROM_VEC
   21059     31041852 :           && TARGET_INTER_UNIT_MOVES_TO_VEC
   21060     93130957 :           && GET_MODE_SIZE (mode) <= GET_MODE_SIZE (word_mode))
   21061     30890491 :         return INT_SSE_CLASS_P (regclass) ? regclass : NO_REGS;
   21062              :       else
   21063       166274 :         return SSE_CLASS_P (regclass) ? regclass : NO_REGS;
   21064              :     }
   21065              : 
   21066              :   /* Generally when we see PLUS here, it's the function invariant
   21067              :      (plus soft-fp const_int).  Which can only be computed into general
   21068              :      regs.  */
   21069    460082631 :   if (GET_CODE (x) == PLUS)
   21070      1885460 :     return INTEGER_CLASS_P (regclass) ? regclass : NO_REGS;
   21071              : 
   21072              :   /* QImode constants are easy to load, but non-constant QImode data
   21073              :      must go into Q_REGS or ALL_MASK_REGS.  */
   21074    458197171 :   if (GET_MODE (x) == QImode && !CONSTANT_P (x))
   21075              :     {
   21076     24799588 :       if (Q_CLASS_P (regclass))
   21077              :         return regclass;
   21078     20044440 :       else if (reg_class_subset_p (Q_REGS, regclass))
   21079              :         return Q_REGS;
   21080        54851 :       else if (MASK_CLASS_P (regclass))
   21081              :         return regclass;
   21082              :       else
   21083              :         return NO_REGS;
   21084              :     }
   21085              : 
   21086              :   return regclass;
   21087              : }
   21088              : 
   21089              : /* Discourage putting floating-point values in SSE registers unless
   21090              :    SSE math is being used, and likewise for the 387 registers.  */
   21091              : static reg_class_t
   21092     74413340 : ix86_preferred_output_reload_class (rtx x, reg_class_t regclass)
   21093              : {
   21094              :   /* Restrict the output reload class to the register bank that we are doing
   21095              :      math on.  If we would like not to return a subset of CLASS, reject this
   21096              :      alternative: if reload cannot do this, it will still use its choice.  */
   21097     74413340 :   machine_mode mode = GET_MODE (x);
   21098     74413340 :   if (SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH)
   21099      7210118 :     return MAYBE_SSE_CLASS_P (regclass) ? ALL_SSE_REGS : NO_REGS;
   21100              : 
   21101     67203222 :   if (IS_STACK_MODE (mode))
   21102       209534 :     return FLOAT_CLASS_P (regclass) ? regclass : NO_REGS;
   21103              : 
   21104              :   return regclass;
   21105              : }
   21106              : 
   21107              : static reg_class_t
   21108    385974437 : ix86_secondary_reload (bool in_p, rtx x, reg_class_t rclass,
   21109              :                        machine_mode mode, secondary_reload_info *sri)
   21110              : {
   21111              :   /* Double-word spills from general registers to non-offsettable memory
   21112              :      references (zero-extended addresses) require special handling.  */
   21113    385974437 :   if (TARGET_64BIT
   21114    332924926 :       && MEM_P (x)
   21115    181286047 :       && GET_MODE_SIZE (mode) > UNITS_PER_WORD
   21116     19175809 :       && INTEGER_CLASS_P (rclass)
   21117    388739780 :       && !offsettable_memref_p (x))
   21118              :     {
   21119      2567866 :       sri->icode = (in_p
   21120      1283933 :                     ? CODE_FOR_reload_noff_load
   21121              :                     : CODE_FOR_reload_noff_store);
   21122              :       /* Add the cost of moving address to a temporary.  */
   21123      1283933 :       sri->extra_cost = 1;
   21124              : 
   21125      1283933 :       return NO_REGS;
   21126              :     }
   21127              : 
   21128              :   /* QImode spills from non-QI registers require
   21129              :      intermediate register on 32bit targets.  */
   21130    384690504 :   if (mode == QImode
   21131    384690504 :       && ((!TARGET_64BIT && !in_p
   21132       589586 :            && INTEGER_CLASS_P (rclass)
   21133       589542 :            && MAYBE_NON_Q_CLASS_P (rclass))
   21134     22234024 :           || (!TARGET_AVX512DQ
   21135     22035069 :               && MAYBE_MASK_CLASS_P (rclass))))
   21136              :     {
   21137         6560 :       int regno = true_regnum (x);
   21138              : 
   21139              :       /* Return Q_REGS if the operand is in memory.  */
   21140         6560 :       if (regno == -1)
   21141              :         return Q_REGS;
   21142              : 
   21143              :       return NO_REGS;
   21144              :     }
   21145              : 
   21146              :   /* Require movement to gpr, and then store to memory.  */
   21147    384683944 :   if ((mode == HFmode || mode == HImode || mode == V2QImode
   21148              :        || mode == BFmode)
   21149      3877606 :       && !TARGET_SSE4_1
   21150      3275361 :       && SSE_CLASS_P (rclass)
   21151       224056 :       && !in_p && MEM_P (x))
   21152              :     {
   21153       114616 :       sri->extra_cost = 1;
   21154       114616 :       return GENERAL_REGS;
   21155              :     }
   21156              : 
   21157              :   /* This condition handles corner case where an expression involving
   21158              :      pointers gets vectorized.  We're trying to use the address of a
   21159              :      stack slot as a vector initializer.
   21160              : 
   21161              :      (set (reg:V2DI 74 [ vect_cst_.2 ])
   21162              :           (vec_duplicate:V2DI (reg/f:DI 20 frame)))
   21163              : 
   21164              :      Eventually frame gets turned into sp+offset like this:
   21165              : 
   21166              :      (set (reg:V2DI 21 xmm0 [orig:74 vect_cst_.2 ] [74])
   21167              :           (vec_duplicate:V2DI (plus:DI (reg/f:DI 7 sp)
   21168              :                                        (const_int 392 [0x188]))))
   21169              : 
   21170              :      That later gets turned into:
   21171              : 
   21172              :      (set (reg:V2DI 21 xmm0 [orig:74 vect_cst_.2 ] [74])
   21173              :           (vec_duplicate:V2DI (plus:DI (reg/f:DI 7 sp)
   21174              :             (mem/u/c/i:DI (symbol_ref/u:DI ("*.LC0") [flags 0x2]) [0 S8 A64]))))
   21175              : 
   21176              :      We'll have the following reload recorded:
   21177              : 
   21178              :      Reload 0: reload_in (DI) =
   21179              :            (plus:DI (reg/f:DI 7 sp)
   21180              :             (mem/u/c/i:DI (symbol_ref/u:DI ("*.LC0") [flags 0x2]) [0 S8 A64]))
   21181              :      reload_out (V2DI) = (reg:V2DI 21 xmm0 [orig:74 vect_cst_.2 ] [74])
   21182              :      SSE_REGS, RELOAD_OTHER (opnum = 0), can't combine
   21183              :      reload_in_reg: (plus:DI (reg/f:DI 7 sp) (const_int 392 [0x188]))
   21184              :      reload_out_reg: (reg:V2DI 21 xmm0 [orig:74 vect_cst_.2 ] [74])
   21185              :      reload_reg_rtx: (reg:V2DI 22 xmm1)
   21186              : 
   21187              :      Which isn't going to work since SSE instructions can't handle scalar
   21188              :      additions.  Returning GENERAL_REGS forces the addition into integer
   21189              :      register and reload can handle subsequent reloads without problems.  */
   21190              : 
   21191    221412140 :   if (in_p && GET_CODE (x) == PLUS
   21192            2 :       && SSE_CLASS_P (rclass)
   21193    384569328 :       && SCALAR_INT_MODE_P (mode))
   21194              :     return GENERAL_REGS;
   21195              : 
   21196              :   return NO_REGS;
   21197              : }
   21198              : 
   21199              : /* Implement TARGET_CLASS_LIKELY_SPILLED_P.  */
   21200              : 
   21201              : static bool
   21202    716056063 : ix86_class_likely_spilled_p (reg_class_t rclass)
   21203              : {
   21204    705980310 :   switch (rclass)
   21205              :     {
   21206              :       case AREG:
   21207              :       case DREG:
   21208              :       case CREG:
   21209              :       case BREG:
   21210              :       case AD_REGS:
   21211              :       case SIREG:
   21212              :       case DIREG:
   21213              :       case SSE_FIRST_REG:
   21214              :       case FP_TOP_REG:
   21215              :       case FP_SECOND_REG:
   21216              :         return true;
   21217              : 
   21218    684500871 :       default:
   21219    684500871 :         break;
   21220              :     }
   21221              : 
   21222    684500871 :   return false;
   21223              : }
   21224              : 
   21225              : /* Implement TARGET_CALLEE_SAVE_COST.  */
   21226              : 
   21227              : static int
   21228     81614046 : ix86_callee_save_cost (spill_cost_type, unsigned int hard_regno, machine_mode,
   21229              :                        unsigned int, int mem_cost, const HARD_REG_SET &, bool)
   21230              : {
   21231              :   /* Account for the fact that push and pop are shorter and do their
   21232              :      own allocation and deallocation.  */
   21233     81614046 :   if (GENERAL_REGNO_P (hard_regno))
   21234              :     {
   21235              :       /* push is 1 byte while typical spill is 4-5 bytes.
   21236              :          ??? We probably should adjust size costs accordingly.
   21237              :          Costs are relative to reg-reg move that has 2 bytes for 32bit
   21238              :          and 3 bytes otherwise.  Be sure that no cost table sets cost
   21239              :          to 2, so we end up with 0.  */
   21240     81604828 :       if (mem_cost <= 2 || optimize_function_for_size_p (cfun))
   21241      3602502 :         return 1;
   21242     78002326 :       return mem_cost - 2;
   21243              :     }
   21244              :   return mem_cost;
   21245              : }
   21246              : 
   21247              : /* Return true if a set of DST by the expression SRC should be allowed.
   21248              :    This prevents complex sets of likely_spilled hard regs before split1.  */
   21249              : 
   21250              : bool
   21251    629510621 : ix86_hardreg_mov_ok (rtx dst, rtx src)
   21252              : {
   21253              :   /* Avoid complex sets of likely_spilled hard registers before reload.  */
   21254    512760979 :   if (REG_P (dst) && HARD_REGISTER_P (dst)
   21255    303162292 :       && !REG_P (src) && !MEM_P (src)
   21256     95250163 :       && !(VECTOR_MODE_P (GET_MODE (dst))
   21257     95250163 :            ? standard_sse_constant_p (src, GET_MODE (dst))
   21258     47389563 :            : x86_64_immediate_operand (src, GET_MODE (dst)))
   21259     10075753 :       && ix86_class_likely_spilled_p (REGNO_REG_CLASS (REGNO (dst)))
   21260    638361985 :       && ix86_pre_reload_split ())
   21261              :     return false;
   21262              :   return true;
   21263              : }
   21264              : 
   21265              : /* If we are copying between registers from different register sets
   21266              :    (e.g. FP and integer), we may need a memory location.
   21267              : 
   21268              :    The function can't work reliably when one of the CLASSES is a class
   21269              :    containing registers from multiple sets.  We avoid this by never combining
   21270              :    different sets in a single alternative in the machine description.
   21271              :    Ensure that this constraint holds to avoid unexpected surprises.
   21272              : 
   21273              :    When STRICT is false, we are being called from REGISTER_MOVE_COST,
   21274              :    so do not enforce these sanity checks.
   21275              : 
   21276              :    To optimize register_move_cost performance, define inline variant.  */
   21277              : 
   21278              : static inline bool
   21279   5673301605 : inline_secondary_memory_needed (machine_mode mode, reg_class_t class1,
   21280              :                                 reg_class_t class2, int strict)
   21281              : {
   21282   5673301605 :   if (lra_in_progress && (class1 == NO_REGS || class2 == NO_REGS))
   21283              :     return false;
   21284              : 
   21285   5640930102 :   if (MAYBE_FLOAT_CLASS_P (class1) != FLOAT_CLASS_P (class1)
   21286   4807022566 :       || MAYBE_FLOAT_CLASS_P (class2) != FLOAT_CLASS_P (class2)
   21287   4104915351 :       || MAYBE_SSE_CLASS_P (class1) != SSE_CLASS_P (class1)
   21288   3916510533 :       || MAYBE_SSE_CLASS_P (class2) != SSE_CLASS_P (class2)
   21289   3738211332 :       || MAYBE_MMX_CLASS_P (class1) != MMX_CLASS_P (class1)
   21290   3738211332 :       || MAYBE_MMX_CLASS_P (class2) != MMX_CLASS_P (class2)
   21291   3738211332 :       || MAYBE_MASK_CLASS_P (class1) != MASK_CLASS_P (class1)
   21292   9209236946 :       || MAYBE_MASK_CLASS_P (class2) != MASK_CLASS_P (class2))
   21293              :     {
   21294   2234523297 :       gcc_assert (!strict || lra_in_progress);
   21295              :       return true;
   21296              :     }
   21297              : 
   21298   3406406805 :   if (FLOAT_CLASS_P (class1) != FLOAT_CLASS_P (class2))
   21299              :     return true;
   21300              : 
   21301              :   /* ??? This is a lie.  We do have moves between mmx/general, and for
   21302              :      mmx/sse2.  But by saying we need secondary memory we discourage the
   21303              :      register allocator from using the mmx registers unless needed.  */
   21304   3257469294 :   if (MMX_CLASS_P (class1) != MMX_CLASS_P (class2))
   21305              :     return true;
   21306              : 
   21307              :   /* Between mask and general, we have moves no larger than word size.  */
   21308   3161333296 :   if (MASK_CLASS_P (class1) != MASK_CLASS_P (class2))
   21309              :     {
   21310      2608754 :       if (!(INTEGER_CLASS_P (class1) || INTEGER_CLASS_P (class2))
   21311      3410587 :           || GET_MODE_SIZE (mode) > UNITS_PER_WORD)
   21312       192332 :         return true;
   21313              :     }
   21314              : 
   21315   3161140964 :   if (SSE_CLASS_P (class1) != SSE_CLASS_P (class2))
   21316              :     {
   21317              :       /* SSE1 doesn't have any direct moves from other classes.  */
   21318    686719224 :       if (!TARGET_SSE2)
   21319              :         return true;
   21320              : 
   21321    684105540 :       if (!(INTEGER_CLASS_P (class1) || INTEGER_CLASS_P (class2)))
   21322              :         return true;
   21323              : 
   21324              :       /* If the target says that inter-unit moves are more expensive
   21325              :          than moving through memory, then don't generate them.  */
   21326   1025710655 :       if ((SSE_CLASS_P (class1) && !TARGET_INTER_UNIT_MOVES_FROM_VEC)
   21327   1025224875 :           || (SSE_CLASS_P (class2) && !TARGET_INTER_UNIT_MOVES_TO_VEC))
   21328      1320929 :         return true;
   21329              : 
   21330              :       /* With SSE4.1, *mov{ti,di}_internal supports moves between
   21331              :          SSE_REGS and GENERAL_REGS using pinsr{q,d} or pextr{q,d}.  */
   21332    682784611 :       if (TARGET_SSE4_1
   21333     36618779 :           && (TARGET_64BIT ? mode == TImode : mode == DImode))
   21334              :         return false;
   21335              : 
   21336    681197239 :       int msize = GET_MODE_SIZE (mode);
   21337              : 
   21338              :       /* Between SSE and general, we have moves no larger than word size.  */
   21339    697582142 :       if (msize > UNITS_PER_WORD)
   21340              :         return true;
   21341              : 
   21342              :       /* In addition to SImode moves, HImode moves are supported for SSE2 and above,
   21343              :          Use vmovw with AVX512FP16, or pinsrw/pextrw without AVX512FP16.  */
   21344    589217864 :       int minsize = GET_MODE_SIZE (TARGET_SSE2 ? HImode : SImode);
   21345              : 
   21346    589217864 :       if (msize < minsize)
   21347              :         return true;
   21348              :     }
   21349              : 
   21350              :   return false;
   21351              : }
   21352              : 
   21353              : /* Implement TARGET_SECONDARY_MEMORY_NEEDED.  */
   21354              : 
   21355              : static bool
   21356     70966917 : ix86_secondary_memory_needed (machine_mode mode, reg_class_t class1,
   21357              :                               reg_class_t class2)
   21358              : {
   21359     70966917 :   return inline_secondary_memory_needed (mode, class1, class2, true);
   21360              : }
   21361              : 
   21362              : /* Implement TARGET_SECONDARY_MEMORY_NEEDED_MODE.
   21363              : 
   21364              :    get_secondary_mem widens integral modes to BITS_PER_WORD.
   21365              :    There is no need to emit full 64 bit move on 64 bit targets
   21366              :    for integral modes that can be moved using 32 bit move.  */
   21367              : 
   21368              : static machine_mode
   21369        13178 : ix86_secondary_memory_needed_mode (machine_mode mode)
   21370              : {
   21371        26356 :   if (GET_MODE_BITSIZE (mode) < 32 && INTEGRAL_MODE_P (mode))
   21372           19 :     return mode_for_size (32, GET_MODE_CLASS (mode), 0).require ();
   21373              :   return mode;
   21374              : }
   21375              : 
   21376              : /* Implement the TARGET_CLASS_MAX_NREGS hook.
   21377              : 
   21378              :    On the 80386, this is the size of MODE in words,
   21379              :    except in the FP regs, where a single reg is always enough.  */
   21380              : 
   21381              : static unsigned char
   21382   5942220194 : ix86_class_max_nregs (reg_class_t rclass, machine_mode mode)
   21383              : {
   21384   5942220194 :   if (MAYBE_INTEGER_CLASS_P (rclass))
   21385              :     {
   21386   3997427355 :       if (mode == XFmode)
   21387    145675691 :         return (TARGET_64BIT ? 2 : 3);
   21388   3851751664 :       else if (mode == XCmode)
   21389    145675322 :         return (TARGET_64BIT ? 4 : 6);
   21390              :       else
   21391   7518172516 :         return CEIL (GET_MODE_SIZE (mode), UNITS_PER_WORD);
   21392              :     }
   21393              :   else
   21394              :     {
   21395   1944792839 :       if (COMPLEX_MODE_P (mode))
   21396              :         return 2;
   21397              :       else
   21398   1660986378 :         return 1;
   21399              :     }
   21400              : }
   21401              : 
   21402              : /* Implement TARGET_CAN_CHANGE_MODE_CLASS.  */
   21403              : 
   21404              : static bool
   21405     39492967 : ix86_can_change_mode_class (machine_mode from, machine_mode to,
   21406              :                             reg_class_t regclass)
   21407              : {
   21408     39492967 :   if (from == to)
   21409              :     return true;
   21410              : 
   21411              :   /* x87 registers can't do subreg at all, as all values are reformatted
   21412              :      to extended precision.
   21413              : 
   21414              :      ??? middle-end queries mode changes for ALL_REGS and this makes
   21415              :      vec_series_lowpart_p to always return false.  We probably should
   21416              :      restrict this to modes supported by i387 and check if it is enabled.  */
   21417     38094437 :   if (MAYBE_FLOAT_CLASS_P (regclass))
   21418              :     return false;
   21419              : 
   21420     33425905 :   if (MAYBE_SSE_CLASS_P (regclass) || MAYBE_MMX_CLASS_P (regclass))
   21421              :     {
   21422              :       /* Vector registers do not support QI or HImode loads.  If we don't
   21423              :          disallow a change to these modes, reload will assume it's ok to
   21424              :          drop the subreg from (subreg:SI (reg:HI 100) 0).  This affects
   21425              :          the vec_dupv4hi pattern.
   21426              :          NB: SSE2 can load 16bit data to sse register via pinsrw.  */
   21427     16252196 :       int mov_size = MAYBE_SSE_CLASS_P (regclass) && TARGET_SSE2 ? 2 : 4;
   21428     16252196 :       if (GET_MODE_SIZE (from) < mov_size
   21429     32504112 :           || GET_MODE_SIZE (to) < mov_size)
   21430              :         return false;
   21431              :     }
   21432              : 
   21433              :   return true;
   21434              : }
   21435              : 
   21436              : /* Return index of MODE in the sse load/store tables.  */
   21437              : 
   21438              : static inline int
   21439    772271204 : sse_store_index (machine_mode mode)
   21440              : {
   21441              :   /* NB: Use SFmode cost for HFmode instead of adding HFmode load/store
   21442              :      costs to processor_costs, which requires changes to all entries in
   21443              :      processor cost table.  */
   21444    772271204 :   if (mode == E_HFmode)
   21445    136922112 :     mode = E_SFmode;
   21446              : 
   21447   1544542408 :   switch (GET_MODE_SIZE (mode))
   21448              :     {
   21449              :     case 4:
   21450              :       return 0;
   21451              :     case 8:
   21452              :       return 1;
   21453              :     case 16:
   21454              :       return 2;
   21455              :     case 32:
   21456              :       return 3;
   21457              :     case 64:
   21458              :       return 4;
   21459              :     default:
   21460              :       return -1;
   21461              :     }
   21462              : }
   21463              : 
   21464              : /* Return the cost of moving data of mode M between a
   21465              :    register and memory.  A value of 2 is the default; this cost is
   21466              :    relative to those in `REGISTER_MOVE_COST'.
   21467              : 
   21468              :    This function is used extensively by register_move_cost that is used to
   21469              :    build tables at startup.  Make it inline in this case.
   21470              :    When IN is 2, return maximum of in and out move cost.
   21471              : 
   21472              :    If moving between registers and memory is more expensive than
   21473              :    between two registers, you should define this macro to express the
   21474              :    relative cost.
   21475              : 
   21476              :    Model also increased moving costs of QImode registers in non
   21477              :    Q_REGS classes.
   21478              :  */
   21479              : static inline int
   21480   6904884914 : inline_memory_move_cost (machine_mode mode, enum reg_class regclass, int in)
   21481              : {
   21482   6904884914 :   int cost;
   21483              : 
   21484   6904884914 :   if (FLOAT_CLASS_P (regclass))
   21485              :     {
   21486    352629704 :       int index;
   21487    352629704 :       switch (mode)
   21488              :         {
   21489              :           case E_SFmode:
   21490              :             index = 0;
   21491              :             break;
   21492              :           case E_DFmode:
   21493              :             index = 1;
   21494              :             break;
   21495              :           case E_XFmode:
   21496              :             index = 2;
   21497              :             break;
   21498              :           default:
   21499              :             return 100;
   21500              :         }
   21501    105406598 :       if (in == 2)
   21502    101473476 :         return MAX (ix86_cost->hard_register.fp_load [index],
   21503              :                     ix86_cost->hard_register.fp_store [index]);
   21504      3933122 :       return in ? ix86_cost->hard_register.fp_load [index]
   21505      3933122 :                 : ix86_cost->hard_register.fp_store [index];
   21506              :     }
   21507   6552255210 :   if (SSE_CLASS_P (regclass))
   21508              :     {
   21509    643880004 :       int index = sse_store_index (mode);
   21510    643880004 :       if (index == -1)
   21511              :         return 100;
   21512    559961169 :       if (in == 2)
   21513    396815180 :         return MAX (ix86_cost->hard_register.sse_load [index],
   21514              :                     ix86_cost->hard_register.sse_store [index]);
   21515    163145989 :       return in ? ix86_cost->hard_register.sse_load [index]
   21516    163145989 :                 : ix86_cost->hard_register.sse_store [index];
   21517              :     }
   21518   5908375206 :   if (MASK_CLASS_P (regclass))
   21519              :     {
   21520    108126550 :       int index;
   21521    216253100 :       switch (GET_MODE_SIZE (mode))
   21522              :         {
   21523              :         case 1:
   21524              :           index = 0;
   21525              :           break;
   21526      8921149 :         case 2:
   21527      8921149 :           index = 1;
   21528      8921149 :           break;
   21529              :         /* DImode loads and stores assumed to cost the same as SImode.  */
   21530     40127153 :         case 4:
   21531     40127153 :         case 8:
   21532     40127153 :           index = 2;
   21533     40127153 :           break;
   21534              :         default:
   21535              :           return 100;
   21536              :         }
   21537              : 
   21538     52620886 :       if (in == 2)
   21539       584275 :         return MAX (ix86_cost->hard_register.mask_load[index],
   21540              :                     ix86_cost->hard_register.mask_store[index]);
   21541     52036611 :       return in ? ix86_cost->hard_register.mask_load[2]
   21542     52036611 :                 : ix86_cost->hard_register.mask_store[2];
   21543              :     }
   21544   5800248656 :   if (MMX_CLASS_P (regclass))
   21545              :     {
   21546    172171813 :       int index;
   21547    344343626 :       switch (GET_MODE_SIZE (mode))
   21548              :         {
   21549              :           case 4:
   21550              :             index = 0;
   21551              :             break;
   21552    101046497 :           case 8:
   21553    101046497 :             index = 1;
   21554    101046497 :             break;
   21555              :           default:
   21556              :             return 100;
   21557              :         }
   21558    138348049 :       if (in == 2)
   21559    118400679 :         return MAX (ix86_cost->hard_register.mmx_load [index],
   21560              :                     ix86_cost->hard_register.mmx_store [index]);
   21561     19947370 :       return in ? ix86_cost->hard_register.mmx_load [index]
   21562     19947370 :                 : ix86_cost->hard_register.mmx_store [index];
   21563              :     }
   21564  11256153686 :   switch (GET_MODE_SIZE (mode))
   21565              :     {
   21566    124585275 :       case 1:
   21567    124585275 :         if (Q_CLASS_P (regclass) || TARGET_64BIT)
   21568              :           {
   21569    121954562 :             if (!in)
   21570     19563556 :               return ix86_cost->hard_register.int_store[0];
   21571    102391006 :             if (TARGET_PARTIAL_REG_DEPENDENCY
   21572    102391006 :                 && optimize_function_for_speed_p (cfun))
   21573     95490616 :               cost = ix86_cost->hard_register.movzbl_load;
   21574              :             else
   21575      6900390 :               cost = ix86_cost->hard_register.int_load[0];
   21576    102391006 :             if (in == 2)
   21577     82799870 :               return MAX (cost, ix86_cost->hard_register.int_store[0]);
   21578              :             return cost;
   21579              :           }
   21580              :         else
   21581              :           {
   21582      2630713 :            if (in == 2)
   21583      1863020 :              return MAX (ix86_cost->hard_register.movzbl_load,
   21584              :                          ix86_cost->hard_register.int_store[0] + 4);
   21585       767693 :            if (in)
   21586       383901 :              return ix86_cost->hard_register.movzbl_load;
   21587              :            else
   21588       383792 :              return ix86_cost->hard_register.int_store[0] + 4;
   21589              :           }
   21590    643094145 :         break;
   21591    643094145 :       case 2:
   21592    643094145 :         {
   21593    643094145 :           int cost;
   21594    643094145 :           if (in == 2)
   21595    543336563 :             cost = MAX (ix86_cost->hard_register.int_load[1],
   21596              :                         ix86_cost->hard_register.int_store[1]);
   21597              :           else
   21598     99757582 :             cost = in ? ix86_cost->hard_register.int_load[1]
   21599              :                       : ix86_cost->hard_register.int_store[1];
   21600              : 
   21601    643094145 :           if (mode == E_HFmode)
   21602              :             {
   21603              :               /* Prefer SSE over GPR for HFmode.  */
   21604    124616280 :               int sse_cost;
   21605    124616280 :               int index = sse_store_index (mode);
   21606    124616280 :               if (in == 2)
   21607    114642544 :                 sse_cost = MAX (ix86_cost->hard_register.sse_load[index],
   21608              :                                 ix86_cost->hard_register.sse_store[index]);
   21609              :               else
   21610     19947472 :                 sse_cost = (in
   21611      9973736 :                             ? ix86_cost->hard_register.sse_load [index]
   21612              :                             : ix86_cost->hard_register.sse_store [index]);
   21613    124616280 :               if (sse_cost >= cost)
   21614    124616280 :                 cost = sse_cost + 1;
   21615              :             }
   21616              :           return cost;
   21617              :         }
   21618   4860397423 :       default:
   21619   4860397423 :         if (in == 2)
   21620   3762908338 :           cost = MAX (ix86_cost->hard_register.int_load[2],
   21621              :                       ix86_cost->hard_register.int_store[2]);
   21622   1097489085 :         else if (in)
   21623    548937015 :           cost = ix86_cost->hard_register.int_load[2];
   21624              :         else
   21625    548552070 :           cost = ix86_cost->hard_register.int_store[2];
   21626              :         /* Multiply with the number of GPR moves needed.  */
   21627   9839842331 :         return cost * CEIL ((int) GET_MODE_SIZE (mode), UNITS_PER_WORD);
   21628              :     }
   21629              : }
   21630              : 
   21631              : static int
   21632   1775099928 : ix86_memory_move_cost (machine_mode mode, reg_class_t regclass, bool in)
   21633              : {
   21634   2662323276 :   return inline_memory_move_cost (mode, (enum reg_class) regclass, in ? 1 : 0);
   21635              : }
   21636              : 
   21637              : 
   21638              : /* Return the cost of moving data from a register in class CLASS1 to
   21639              :    one in class CLASS2.
   21640              : 
   21641              :    It is not required that the cost always equal 2 when FROM is the same as TO;
   21642              :    on some machines it is expensive to move between registers if they are not
   21643              :    general registers.  */
   21644              : 
   21645              : static int
   21646   5602334688 : ix86_register_move_cost (machine_mode mode, reg_class_t class1_i,
   21647              :                          reg_class_t class2_i)
   21648              : {
   21649   5602334688 :   enum reg_class class1 = (enum reg_class) class1_i;
   21650   5602334688 :   enum reg_class class2 = (enum reg_class) class2_i;
   21651              : 
   21652              :   /* In case we require secondary memory, compute cost of the store followed
   21653              :      by load.  In order to avoid bad register allocation choices, we need
   21654              :      for this to be *at least* as high as the symmetric MEMORY_MOVE_COST.  */
   21655              : 
   21656   5602334688 :   if (inline_secondary_memory_needed (mode, class1, class2, false))
   21657              :     {
   21658   2564892493 :       int cost = 1;
   21659              : 
   21660   2564892493 :       cost += inline_memory_move_cost (mode, class1, 2);
   21661   2564892493 :       cost += inline_memory_move_cost (mode, class2, 2);
   21662              : 
   21663              :       /* In case of copying from general_purpose_register we may emit multiple
   21664              :          stores followed by single load causing memory size mismatch stall.
   21665              :          Count this as arbitrarily high cost of 20.  */
   21666   5129784986 :       if (GET_MODE_BITSIZE (mode) > BITS_PER_WORD
   21667    767892416 :           && TARGET_MEMORY_MISMATCH_STALL
   21668   4100677325 :           && targetm.class_max_nregs (class1, mode)
   21669    767892416 :              > targetm.class_max_nregs (class2, mode))
   21670    146115602 :         cost += 20;
   21671              : 
   21672              :       /* In the case of FP/MMX moves, the registers actually overlap, and we
   21673              :          have to switch modes in order to treat them differently.  */
   21674     59200376 :       if ((MMX_CLASS_P (class1) && MAYBE_FLOAT_CLASS_P (class2))
   21675   2614769652 :           || (MMX_CLASS_P (class2) && MAYBE_FLOAT_CLASS_P (class1)))
   21676     18646434 :         cost += 20;
   21677              : 
   21678   2564892493 :       return cost;
   21679              :     }
   21680              : 
   21681              :   /* Moves between MMX and non-MMX units require secondary memory.  */
   21682   3037442195 :   if (MMX_CLASS_P (class1) != MMX_CLASS_P (class2))
   21683            0 :     gcc_unreachable ();
   21684              : 
   21685   3037442195 :   if (SSE_CLASS_P (class1) != SSE_CLASS_P (class2))
   21686    581442737 :     return (SSE_CLASS_P (class1)
   21687    581442737 :             ? ix86_cost->hard_register.sse_to_integer
   21688    581442737 :             : ix86_cost->hard_register.integer_to_sse);
   21689              : 
   21690              :   /* Moves between mask register and GPR.  */
   21691   2455999458 :   if (MASK_CLASS_P (class1) != MASK_CLASS_P (class2))
   21692              :     {
   21693      1055171 :       return (MASK_CLASS_P (class1)
   21694      1055171 :               ? ix86_cost->hard_register.mask_to_integer
   21695      1055171 :               : ix86_cost->hard_register.integer_to_mask);
   21696              :     }
   21697              :   /* Moving between mask registers.  */
   21698   2454944287 :   if (MASK_CLASS_P (class1) && MASK_CLASS_P (class2))
   21699       101344 :     return ix86_cost->hard_register.mask_move;
   21700              : 
   21701   2454842943 :   if (MAYBE_FLOAT_CLASS_P (class1))
   21702     11783902 :     return ix86_cost->hard_register.fp_move;
   21703   2443059041 :   if (MAYBE_SSE_CLASS_P (class1))
   21704              :     {
   21705    229315214 :       if (GET_MODE_BITSIZE (mode) <= 128)
   21706    112184339 :         return ix86_cost->hard_register.xmm_move;
   21707      4946536 :       if (GET_MODE_BITSIZE (mode) <= 256)
   21708      1572175 :         return ix86_cost->hard_register.ymm_move;
   21709       901093 :       return ix86_cost->hard_register.zmm_move;
   21710              :     }
   21711   2328401434 :   if (MAYBE_MMX_CLASS_P (class1))
   21712      2168243 :     return ix86_cost->hard_register.mmx_move;
   21713              :   return 2;
   21714              : }
   21715              : 
   21716              : /* Implement TARGET_HARD_REGNO_NREGS.  This is ordinarily the length in
   21717              :    words of a value of mode MODE but can be less for certain modes in
   21718              :    special long registers.
   21719              : 
   21720              :    Actually there are no two word move instructions for consecutive
   21721              :    registers.  And only registers 0-3 may have mov byte instructions
   21722              :    applied to them.  */
   21723              : 
   21724              : static unsigned int
   21725   8884675888 : ix86_hard_regno_nregs (unsigned int regno, machine_mode mode)
   21726              : {
   21727   8884675888 :   if (GENERAL_REGNO_P (regno))
   21728              :     {
   21729   3090322048 :       if (mode == XFmode)
   21730     25400608 :         return TARGET_64BIT ? 2 : 3;
   21731   3065400096 :       if (mode == XCmode)
   21732     25400608 :         return TARGET_64BIT ? 4 : 6;
   21733   6139352320 :       return CEIL (GET_MODE_SIZE (mode), UNITS_PER_WORD);
   21734              :     }
   21735   5794353840 :   if (COMPLEX_MODE_P (mode))
   21736              :     return 2;
   21737              :   /* Register pair for mask registers.  */
   21738   5046695280 :   if (mode == P2QImode || mode == P2HImode)
   21739     93457320 :     return 2;
   21740              : 
   21741              :   return 1;
   21742              : }
   21743              : 
   21744              : /* Implement REGMODE_NATURAL_SIZE(MODE).  */
   21745              : unsigned int
   21746    106615971 : ix86_regmode_natural_size (machine_mode mode)
   21747              : {
   21748    106615971 :   if (mode == P2HImode || mode == P2QImode)
   21749         2480 :     return GET_MODE_SIZE (mode) / 2;
   21750    106614731 :   return UNITS_PER_WORD;
   21751              : }
   21752              : 
   21753              : /* Implement TARGET_HARD_REGNO_MODE_OK.  */
   21754              : 
   21755              : static bool
   21756  54098043673 : ix86_hard_regno_mode_ok (unsigned int regno, machine_mode mode)
   21757              : {
   21758              :   /* Flags and only flags can only hold CCmode values.  */
   21759  54098043673 :   if (CC_REGNO_P (regno))
   21760    430750482 :     return GET_MODE_CLASS (mode) == MODE_CC;
   21761  53667293191 :   if (GET_MODE_CLASS (mode) == MODE_CC
   21762              :       || GET_MODE_CLASS (mode) == MODE_RANDOM)
   21763              :     return false;
   21764  48149166281 :   if (STACK_REGNO_P (regno))
   21765   4686072895 :     return VALID_FP_MODE_P (mode);
   21766  43463093386 :   if (MASK_REGNO_P (regno))
   21767              :     {
   21768              :       /* Register pair only starts at even register number.  */
   21769   3661855084 :       if ((mode == P2QImode || mode == P2HImode))
   21770     51042110 :         return MASK_PAIR_REGNO_P(regno);
   21771              : 
   21772   1001431422 :       return ((TARGET_AVX512F && VALID_MASK_REG_MODE (mode))
   21773   4591880768 :               || (TARGET_AVX512BW && VALID_MASK_AVX512BW_MODE (mode)));
   21774              :     }
   21775              : 
   21776  39801238302 :   if (GET_MODE_CLASS (mode) == MODE_PARTIAL_INT)
   21777              :     return false;
   21778              : 
   21779  38825104011 :   if (SSE_REGNO_P (regno))
   21780              :     {
   21781              :       /* We implement the move patterns for all vector modes into and
   21782              :          out of SSE registers, even when no operation instructions
   21783              :          are available.  */
   21784              : 
   21785              :       /* For AVX-512 we allow, regardless of regno:
   21786              :           - XI mode
   21787              :           - any of 512-bit wide vector mode
   21788              :           - any scalar mode.  */
   21789  16794332125 :       if (TARGET_AVX512F
   21790              :           && ((VALID_AVX512F_REG_OR_XI_MODE (mode))
   21791              :               || VALID_AVX512F_SCALAR_MODE (mode)))
   21792              :         return true;
   21793              : 
   21794              :       /* TODO check for QI/HI scalars.  */
   21795              :       /* AVX512VL allows sse regs16+ for 128/256 bit modes.  */
   21796  16103517863 :       if (TARGET_AVX512VL
   21797   1751315882 :           && (VALID_AVX256_REG_OR_OI_MODE (mode)
   21798   1539034657 :               || VALID_AVX512VL_128_REG_MODE (mode)))
   21799              :         return true;
   21800              : 
   21801              :       /* xmm16-xmm31 are only available for AVX-512.  */
   21802  15656955310 :       if (EXT_REX_SSE_REGNO_P (regno))
   21803              :         return false;
   21804              : 
   21805              :       /* OImode and AVX modes are available only when AVX is enabled.  */
   21806   9054877239 :       return ((TARGET_AVX
   21807   1925399835 :                && VALID_AVX256_REG_OR_OI_MODE (mode))
   21808              :               || VALID_SSE_REG_MODE (mode)
   21809              :               || VALID_SSE2_REG_MODE (mode)
   21810              :               || VALID_MMX_REG_MODE (mode)
   21811   9054877239 :               || VALID_MMX_REG_MODE_3DNOW (mode));
   21812              :     }
   21813  22030771886 :   if (MMX_REGNO_P (regno))
   21814              :     {
   21815              :       /* We implement the move patterns for 3DNOW modes even in MMX mode,
   21816              :          so if the register is available at all, then we can move data of
   21817              :          the given mode into or out of it.  */
   21818   3927156805 :       return (VALID_MMX_REG_MODE (mode)
   21819              :               || VALID_MMX_REG_MODE_3DNOW (mode));
   21820              :     }
   21821              : 
   21822  18103615081 :   if (mode == QImode)
   21823              :     {
   21824              :       /* Take care for QImode values - they can be in non-QI regs,
   21825              :          but then they do cause partial register stalls.  */
   21826    205468178 :       if (ANY_QI_REGNO_P (regno))
   21827              :         return true;
   21828     14282128 :       if (!TARGET_PARTIAL_REG_STALL)
   21829              :         return true;
   21830              :       /* LRA checks if the hard register is OK for the given mode.
   21831              :          QImode values can live in non-QI regs, so we allow all
   21832              :          registers here.  */
   21833            0 :       if (lra_in_progress)
   21834              :        return true;
   21835            0 :       return !can_create_pseudo_p ();
   21836              :     }
   21837              :   /* We handle both integer and floats in the general purpose registers.  */
   21838  17898146903 :   else if (VALID_INT_MODE_P (mode)
   21839  13092050575 :            || VALID_FP_MODE_P (mode))
   21840              :     return true;
   21841              :   /* Lots of MMX code casts 8 byte vector modes to DImode.  If we then go
   21842              :      on to use that value in smaller contexts, this can easily force a
   21843              :      pseudo to be allocated to GENERAL_REGS.  Since this is no worse than
   21844              :      supporting DImode, allow it.  */
   21845  12037341810 :   else if (VALID_MMX_REG_MODE_3DNOW (mode) || VALID_MMX_REG_MODE (mode))
   21846              :     return true;
   21847              : 
   21848              :   return false;
   21849              : }
   21850              : 
   21851              : /* Implement TARGET_INSN_CALLEE_ABI.  */
   21852              : 
   21853              : const predefined_function_abi &
   21854    247865737 : ix86_insn_callee_abi (const rtx_insn *insn)
   21855              : {
   21856    247865737 :   unsigned int abi_id = 0;
   21857    247865737 :   rtx pat = PATTERN (insn);
   21858    247865737 :   if (vzeroupper_pattern (pat, VOIDmode))
   21859       410334 :     abi_id = ABI_VZEROUPPER;
   21860              : 
   21861    247865737 :   return function_abis[abi_id];
   21862              : }
   21863              : 
   21864              : /* Initialize function_abis with corresponding abi_id,
   21865              :    currently only handle vzeroupper.  */
   21866              : void
   21867        22624 : ix86_initialize_callee_abi (unsigned int abi_id)
   21868              : {
   21869        22624 :   gcc_assert (abi_id == ABI_VZEROUPPER);
   21870        22624 :   predefined_function_abi &vzeroupper_abi = function_abis[abi_id];
   21871        22624 :   if (!vzeroupper_abi.initialized_p ())
   21872              :     {
   21873              :       HARD_REG_SET full_reg_clobbers;
   21874         4274 :       CLEAR_HARD_REG_SET (full_reg_clobbers);
   21875         4274 :       vzeroupper_abi.initialize (ABI_VZEROUPPER, full_reg_clobbers);
   21876              :     }
   21877        22624 : }
   21878              : 
   21879              : void
   21880        22624 : ix86_expand_avx_vzeroupper (void)
   21881              : {
   21882              :   /* Initialize vzeroupper_abi here.  */
   21883        22624 :   ix86_initialize_callee_abi (ABI_VZEROUPPER);
   21884        22624 :   rtx_insn *insn = emit_call_insn (gen_avx_vzeroupper_callee_abi ());
   21885              :   /* Return false for non-local goto in can_nonlocal_goto.  */
   21886        22624 :   make_reg_eh_region_note (insn, 0, INT_MIN);
   21887              :   /* Flag used for call_insn indicates it's a fake call.  */
   21888        22624 :   RTX_FLAG (insn, used) = 1;
   21889        22624 : }
   21890              : 
   21891              : 
   21892              : /* Implement TARGET_HARD_REGNO_CALL_PART_CLOBBERED.  The only ABI that
   21893              :    saves SSE registers across calls is Win64 (thus no need to check the
   21894              :    current ABI here), and with AVX enabled Win64 only guarantees that
   21895              :    the low 16 bytes are saved.  */
   21896              : 
   21897              : static bool
   21898   2035087238 : ix86_hard_regno_call_part_clobbered (unsigned int abi_id, unsigned int regno,
   21899              :                                      machine_mode mode)
   21900              : {
   21901              :   /* Special ABI for vzeroupper which only clobber higher part of sse regs.  */
   21902   2035087238 :   if (abi_id == ABI_VZEROUPPER)
   21903     30889952 :       return (GET_MODE_SIZE (mode) > 16
   21904     30889952 :               && ((TARGET_64BIT && REX_SSE_REGNO_P (regno))
   21905      4723004 :                   || LEGACY_SSE_REGNO_P (regno)));
   21906              : 
   21907   2637800286 :   return SSE_REGNO_P (regno) && GET_MODE_SIZE (mode) > 16;
   21908              : }
   21909              : 
   21910              : /* A subroutine of ix86_modes_tieable_p.  Return true if MODE is a
   21911              :    tieable integer mode.  */
   21912              : 
   21913              : static bool
   21914     52269727 : ix86_tieable_integer_mode_p (machine_mode mode)
   21915              : {
   21916     52269727 :   switch (mode)
   21917              :     {
   21918              :     case E_HImode:
   21919              :     case E_SImode:
   21920              :       return true;
   21921              : 
   21922      5260227 :     case E_QImode:
   21923      5260227 :       return TARGET_64BIT || !TARGET_PARTIAL_REG_STALL;
   21924              : 
   21925     10229056 :     case E_DImode:
   21926     10229056 :       return TARGET_64BIT;
   21927              : 
   21928              :     default:
   21929              :       return false;
   21930              :     }
   21931              : }
   21932              : 
   21933              : /* Implement TARGET_MODES_TIEABLE_P.
   21934              : 
   21935              :    Return true if MODE1 is accessible in a register that can hold MODE2
   21936              :    without copying.  That is, all register classes that can hold MODE2
   21937              :    can also hold MODE1.  */
   21938              : 
   21939              : static bool
   21940     33856412 : ix86_modes_tieable_p (machine_mode mode1, machine_mode mode2)
   21941              : {
   21942     33856412 :   if (mode1 == mode2)
   21943              :     return true;
   21944              : 
   21945     33770079 :   if (ix86_tieable_integer_mode_p (mode1)
   21946     33770079 :       && ix86_tieable_integer_mode_p (mode2))
   21947              :     return true;
   21948              : 
   21949              :   /* MODE2 being XFmode implies fp stack or general regs, which means we
   21950              :      can tie any smaller floating point modes to it.  Note that we do not
   21951              :      tie this with TFmode.  */
   21952     24677965 :   if (mode2 == XFmode)
   21953         4313 :     return mode1 == SFmode || mode1 == DFmode;
   21954              : 
   21955              :   /* MODE2 being DFmode implies fp stack, general or sse regs, which means
   21956              :      that we can tie it with SFmode.  */
   21957     24673652 :   if (mode2 == DFmode)
   21958       249967 :     return mode1 == SFmode;
   21959              : 
   21960              :   /* If MODE2 is only appropriate for an SSE register, then tie with
   21961              :      any vector modes or scalar floating point modes acceptable to SSE
   21962              :      registers, excluding scalar integer modes with SUBREG:
   21963              :         (subreg:QI (reg:TI 99) 0))
   21964              :         (subreg:HI (reg:TI 99) 0))
   21965              :         (subreg:SI (reg:TI 99) 0))
   21966              :         (subreg:DI (reg:TI 99) 0))
   21967              :      to avoid unnecessary move from SSE register to integer register.
   21968              :    */
   21969     24423685 :   if (GET_MODE_SIZE (mode2) >= 16
   21970     38246266 :       && (GET_MODE_SIZE (mode1) == GET_MODE_SIZE (mode2)
   21971     13443828 :           || ((VECTOR_MODE_P (mode1) || SCALAR_FLOAT_MODE_P (mode1))
   21972       483220 :               && GET_MODE_SIZE (mode1) <= GET_MODE_SIZE (mode2)))
   21973     30344011 :       && ix86_hard_regno_mode_ok (FIRST_SSE_REG, mode2))
   21974      5487303 :     return ix86_hard_regno_mode_ok (FIRST_SSE_REG, mode1);
   21975              : 
   21976              :   /* If MODE2 is appropriate for an MMX register, then tie
   21977              :      with any other mode acceptable to MMX registers.  */
   21978     18936382 :   if (GET_MODE_SIZE (mode2) == 8
   21979     18936382 :       && ix86_hard_regno_mode_ok (FIRST_MMX_REG, mode2))
   21980      3293870 :     return (GET_MODE_SIZE (mode1) == 8
   21981      3293870 :             && ix86_hard_regno_mode_ok (FIRST_MMX_REG, mode1));
   21982              : 
   21983              :   /* SCmode and DImode can be tied.  */
   21984     15642512 :   if ((mode1 == E_SCmode && mode2 == E_DImode)
   21985     15642512 :       || (mode1 == E_DImode && mode2 == E_SCmode))
   21986          108 :     return TARGET_64BIT;
   21987              : 
   21988              :   /* [SD]Cmode and V2[SD]Fmode modes can be tied.  */
   21989     15642404 :   if ((mode1 == E_SCmode && mode2 == E_V2SFmode)
   21990     15642404 :       || (mode1 == E_V2SFmode && mode2 == E_SCmode)
   21991     15642404 :       || (mode1 == E_DCmode && mode2 == E_V2DFmode)
   21992     15642404 :       || (mode1 == E_V2DFmode && mode2 == E_DCmode))
   21993            0 :     return true;
   21994              : 
   21995              :   return false;
   21996              : }
   21997              : 
   21998              : /* Return the cost of moving between two registers of mode MODE.  */
   21999              : 
   22000              : static int
   22001     29222860 : ix86_set_reg_reg_cost (machine_mode mode)
   22002              : {
   22003     29222860 :   unsigned int units = UNITS_PER_WORD;
   22004              : 
   22005     29222860 :   switch (GET_MODE_CLASS (mode))
   22006              :     {
   22007              :     default:
   22008              :       break;
   22009              : 
   22010              :     case MODE_CC:
   22011     29222860 :       units = GET_MODE_SIZE (CCmode);
   22012              :       break;
   22013              : 
   22014      1162795 :     case MODE_FLOAT:
   22015      1162795 :       if ((TARGET_SSE && mode == TFmode)
   22016       681542 :           || (TARGET_80387 && mode == XFmode)
   22017       209441 :           || ((TARGET_80387 || TARGET_SSE2) && mode == DFmode)
   22018       141802 :           || ((TARGET_80387 || TARGET_SSE) && mode == SFmode))
   22019      2295732 :         units = GET_MODE_SIZE (mode);
   22020              :       break;
   22021              : 
   22022      1305680 :     case MODE_COMPLEX_FLOAT:
   22023      1305680 :       if ((TARGET_SSE && mode == TCmode)
   22024       875386 :           || (TARGET_80387 && mode == XCmode)
   22025       444972 :           || ((TARGET_80387 || TARGET_SSE2) && mode == DCmode)
   22026        14530 :           || ((TARGET_80387 || TARGET_SSE) && mode == SCmode))
   22027      2604876 :         units = GET_MODE_SIZE (mode);
   22028              :       break;
   22029              : 
   22030     18683826 :     case MODE_VECTOR_INT:
   22031     18683826 :     case MODE_VECTOR_FLOAT:
   22032     18683826 :       if ((TARGET_AVX512F && VALID_AVX512F_REG_MODE (mode))
   22033     18587725 :           || (TARGET_AVX && VALID_AVX256_REG_MODE (mode))
   22034     18415523 :           || (TARGET_SSE2 && VALID_SSE2_REG_MODE (mode))
   22035     15791197 :           || (TARGET_SSE && VALID_SSE_REG_MODE (mode))
   22036     14487537 :           || ((TARGET_MMX || TARGET_MMX_WITH_SSE)
   22037     14442027 :               && VALID_MMX_REG_MODE (mode)))
   22038      8499570 :         units = GET_MODE_SIZE (mode);
   22039              :     }
   22040              : 
   22041              :   /* Return the cost of moving between two registers of mode MODE,
   22042              :      assuming that the move will be in pieces of at most UNITS bytes.  */
   22043     29222860 :   return COSTS_N_INSNS (CEIL (GET_MODE_SIZE (mode), units));
   22044              : }
   22045              : 
   22046              : /* Return cost of vector operation in MODE given that scalar version has
   22047              :    COST.  */
   22048              : 
   22049              : static int
   22050   2827787303 : ix86_vec_cost (machine_mode mode, int cost)
   22051              : {
   22052   2827787303 :   if (!VECTOR_MODE_P (mode))
   22053              :     return cost;
   22054              : 
   22055   2827563605 :   if (GET_MODE_BITSIZE (mode) == 128
   22056   2827563605 :       && TARGET_SSE_SPLIT_REGS)
   22057      2862046 :     return cost * GET_MODE_BITSIZE (mode) / 64;
   22058   2826132582 :   else if (GET_MODE_BITSIZE (mode) > 128
   22059   2826132582 :       && TARGET_AVX256_SPLIT_REGS)
   22060      1676180 :     return cost * GET_MODE_BITSIZE (mode) / 128;
   22061   2825294492 :   else if (GET_MODE_BITSIZE (mode) > 256
   22062   2825294492 :       && TARGET_AVX512_SPLIT_REGS)
   22063       194508 :     return cost * GET_MODE_BITSIZE (mode) / 256;
   22064              :   return cost;
   22065              : }
   22066              : 
   22067              : /* Return cost of vec_widen_<s>mult_hi/lo_<mode>,
   22068              :    vec_widen_<s>mul_hi/lo_<mode> is only available for VI124_AVX2.  */
   22069              : static int
   22070         1018 : ix86_widen_mult_cost (const struct processor_costs *cost,
   22071              :                       enum machine_mode mode, bool uns_p)
   22072              : {
   22073         1018 :   gcc_assert (GET_MODE_CLASS (mode) == MODE_VECTOR_INT);
   22074         1018 :   int extra_cost = 0;
   22075         1018 :   int basic_cost = 0;
   22076         1018 :   switch (mode)
   22077              :     {
   22078          108 :     case V8HImode:
   22079          108 :     case V16HImode:
   22080          108 :       if (!uns_p || mode == V16HImode)
   22081           43 :         extra_cost = cost->sse_op * 2;
   22082          108 :       basic_cost = cost->mulss * 2 + cost->sse_op * 4;
   22083          108 :       break;
   22084          188 :     case V4SImode:
   22085          188 :     case V8SImode:
   22086              :       /* pmulhw/pmullw can be used.  */
   22087          188 :       basic_cost = cost->mulss * 2 + cost->sse_op * 2;
   22088          188 :       break;
   22089          659 :     case V2DImode:
   22090              :       /* pmuludq under sse2, pmuldq under sse4.1, for sign_extend,
   22091              :          require extra 4 mul, 4 add, 4 cmp and 2 shift.  */
   22092          659 :       if (!TARGET_SSE4_1 && !uns_p)
   22093          403 :         extra_cost = (cost->mulss + cost->sse_op + cost->sse_op) * 4
   22094          403 :                       + cost->sse_op * 2;
   22095              :       /* Fallthru.  */
   22096          706 :     case V4DImode:
   22097          706 :       basic_cost = cost->mulss * 2 + cost->sse_op * 4;
   22098          706 :       break;
   22099              :     default:
   22100              :       /* Not implemented.  */
   22101              :       return 100;
   22102              :     }
   22103         1002 :   return ix86_vec_cost (mode, basic_cost + extra_cost);
   22104              : }
   22105              : 
   22106              : /* Return cost of multiplication in MODE.  */
   22107              : 
   22108              : static int
   22109   1206941595 : ix86_multiplication_cost (const struct processor_costs *cost,
   22110              :                           enum machine_mode mode)
   22111              : {
   22112   1206941595 :   machine_mode inner_mode = mode;
   22113   1206941595 :   if (VECTOR_MODE_P (mode))
   22114   1205974837 :     inner_mode = GET_MODE_INNER (mode);
   22115              : 
   22116   1206941595 :   if (SSE_FLOAT_MODE_SSEMATH_OR_HFBF_P (mode))
   22117       718345 :     return inner_mode == DFmode ? cost->mulsd : cost->mulss;
   22118   1206223250 :   else if (X87_FLOAT_MODE_P (mode))
   22119       162330 :     return cost->fmul;
   22120   1206060920 :   else if (FLOAT_MODE_P (mode))
   22121       211613 :     return  ix86_vec_cost (mode,
   22122       211613 :                            inner_mode == DFmode ? cost->mulsd : cost->mulss);
   22123   1205849307 :   else if (GET_MODE_CLASS (mode) == MODE_VECTOR_INT)
   22124              :     {
   22125   1205787470 :       int nmults, nops;
   22126              :       /* Cost of reading the memory.  */
   22127   1205787470 :       int extra;
   22128              : 
   22129   1205787470 :       switch (mode)
   22130              :         {
   22131     18986644 :         case V4QImode:
   22132     18986644 :         case V8QImode:
   22133              :           /* Partial V*QImode is emulated with 4-6 insns.  */
   22134     18986644 :           nmults = 1;
   22135     18986644 :           nops = 3;
   22136     18986644 :           extra = 0;
   22137              : 
   22138     18986644 :           if (TARGET_AVX512BW && TARGET_AVX512VL)
   22139              :             ;
   22140     18877935 :           else if (TARGET_AVX2)
   22141              :             nops += 2;
   22142     18370944 :           else if (TARGET_XOP)
   22143         9504 :             extra += COSTS_N_INSNS (cost->sse_load[2]) / 2;
   22144              :           else
   22145              :             {
   22146     18361440 :               nops += 1;
   22147     18361440 :               extra += COSTS_N_INSNS (cost->sse_load[2]) / 2;
   22148              :             }
   22149     18986644 :           goto do_qimode;
   22150              : 
   22151      9494564 :         case V16QImode:
   22152              :           /* V*QImode is emulated with 4-11 insns.  */
   22153      9494564 :           nmults = 1;
   22154      9494564 :           nops = 3;
   22155      9494564 :           extra = 0;
   22156              : 
   22157      9494564 :           if (TARGET_AVX2 && !TARGET_PREFER_AVX128)
   22158              :             {
   22159       306115 :               if (!(TARGET_AVX512BW && TARGET_AVX512VL))
   22160       252083 :                 nops += 3;
   22161              :             }
   22162      9188449 :           else if (TARGET_XOP)
   22163              :             {
   22164         5200 :               nmults += 1;
   22165         5200 :               nops += 2;
   22166         5200 :               extra += COSTS_N_INSNS (cost->sse_load[2]) / 2;
   22167              :             }
   22168              :           else
   22169              :             {
   22170      9183249 :               nmults += 1;
   22171      9183249 :               nops += 4;
   22172      9183249 :               extra += COSTS_N_INSNS (cost->sse_load[2]) / 2;
   22173              :             }
   22174      9494564 :           goto do_qimode;
   22175              : 
   22176      9493273 :         case V32QImode:
   22177      9493273 :           nmults = 1;
   22178      9493273 :           nops = 3;
   22179      9493273 :           extra = 0;
   22180              : 
   22181      9493273 :           if (!TARGET_AVX512BW || TARGET_PREFER_AVX256)
   22182              :             {
   22183      9410724 :               nmults += 1;
   22184      9410724 :               nops += 4;
   22185              :               /* 2 loads, so no division by 2.  */
   22186      9410724 :               extra += COSTS_N_INSNS (cost->sse_load[3]);
   22187              :             }
   22188      9493273 :           goto do_qimode;
   22189              : 
   22190      9492678 :         case V64QImode:
   22191      9492678 :           nmults = 2;
   22192      9492678 :           nops = 9;
   22193              :           /* 2 loads of each size, so no division by 2.  */
   22194      9492678 :           extra = COSTS_N_INSNS (cost->sse_load[3] + cost->sse_load[4]);
   22195              : 
   22196     47467159 :         do_qimode:
   22197     47467159 :           return ix86_vec_cost (mode, cost->mulss * nmults
   22198     47467159 :                                 + cost->sse_op * nops) + extra;
   22199              : 
   22200     40594555 :         case V4SImode:
   22201              :           /* pmulld is used in this case. No emulation is needed.  */
   22202     40594555 :           if (TARGET_SSE4_1)
   22203      2237777 :             goto do_native;
   22204              :           /* V4SImode is emulated with 7 insns.  */
   22205              :           else
   22206     38356778 :             return ix86_vec_cost (mode, cost->mulss * 2 + cost->sse_op * 5);
   22207              : 
   22208    164084866 :         case V2DImode:
   22209    164084866 :         case V4DImode:
   22210              :           /* vpmullq is used in this case. No emulation is needed.  */
   22211    164084866 :           if (TARGET_AVX512DQ && TARGET_AVX512VL)
   22212       588498 :             goto do_native;
   22213              :           /* V*DImode is emulated with 6-8 insns.  */
   22214    163496368 :           else if (TARGET_XOP && mode == V2DImode)
   22215        52592 :             return ix86_vec_cost (mode, cost->mulss * 2 + cost->sse_op * 4);
   22216              :           /* FALLTHRU */
   22217    245426814 :         case V8DImode:
   22218              :           /* vpmullq is used in this case. No emulation is needed.  */
   22219    245426814 :           if (TARGET_AVX512DQ && mode == V8DImode)
   22220       387514 :             goto do_native;
   22221              :           else
   22222    245039300 :             return ix86_vec_cost (mode, cost->mulss * 3 + cost->sse_op * 5);
   22223              : 
   22224    874871641 :         default:
   22225    874871641 :         do_native:
   22226    874871641 :           return ix86_vec_cost (mode, cost->mulss);
   22227              :         }
   22228              :     }
   22229              :   else
   22230       123666 :     return (cost->mult_init[MODE_INDEX (mode)] + cost->mult_bit * 7);
   22231              : }
   22232              : 
   22233              : /* Return cost of multiplication in MODE.  */
   22234              : 
   22235              : static int
   22236     72506382 : ix86_division_cost (const struct processor_costs *cost,
   22237              :                           enum machine_mode mode)
   22238              : {
   22239     72506382 :   machine_mode inner_mode = mode;
   22240     72506382 :   if (VECTOR_MODE_P (mode))
   22241     53517129 :     inner_mode = GET_MODE_INNER (mode);
   22242              : 
   22243     72506382 :   if (SSE_FLOAT_MODE_SSEMATH_OR_HFBF_P (mode))
   22244       247929 :     return inner_mode == DFmode ? cost->divsd : cost->divss;
   22245     72258453 :   else if (X87_FLOAT_MODE_P (mode))
   22246        44810 :     return cost->fdiv;
   22247     72213643 :   else if (FLOAT_MODE_P (mode))
   22248        17202 :     return ix86_vec_cost (mode,
   22249        17202 :                           inner_mode == DFmode ? cost->divsd : cost->divss);
   22250              :   else
   22251     80535944 :     return cost->divide[MODE_INDEX (mode)];
   22252              : }
   22253              : 
   22254              : /* Return cost of shift in MODE.
   22255              :    If CONSTANT_OP1 is true, the op1 value is known and set in OP1_VAL.
   22256              :    AND_IN_OP1 specify in op1 is result of AND and SHIFT_AND_TRUNCATE
   22257              :    if op1 is a result of subreg.
   22258              : 
   22259              :    SKIP_OP0/1 is set to true if cost of OP0/1 should be ignored.  */
   22260              : 
   22261              : static int
   22262    774693582 : ix86_shift_rotate_cost (const struct processor_costs *cost,
   22263              :                         enum rtx_code code,
   22264              :                         enum machine_mode mode, bool constant_op1,
   22265              :                         HOST_WIDE_INT op1_val,
   22266              :                         bool and_in_op1,
   22267              :                         bool shift_and_truncate,
   22268              :                         bool *skip_op0, bool *skip_op1)
   22269              : {
   22270    774693582 :   if (skip_op0)
   22271    774637694 :     *skip_op0 = *skip_op1 = false;
   22272              : 
   22273    774693582 :   if (GET_MODE_CLASS (mode) == MODE_VECTOR_INT)
   22274              :     {
   22275    398049381 :       int count;
   22276              :       /* Cost of reading the memory.  */
   22277    398049381 :       int extra;
   22278              : 
   22279    398049381 :       switch (mode)
   22280              :         {
   22281      6040788 :         case V4QImode:
   22282      6040788 :         case V8QImode:
   22283      6040788 :           if (TARGET_AVX2)
   22284              :             /* Use vpbroadcast.  */
   22285       195728 :             extra = cost->sse_op;
   22286              :           else
   22287      5845060 :             extra = COSTS_N_INSNS (cost->sse_load[2]) / 2;
   22288              : 
   22289      6040788 :           if (constant_op1)
   22290              :             {
   22291      6040760 :               if (code == ASHIFTRT)
   22292              :                 {
   22293           40 :                   count = 4;
   22294           40 :                   extra *= 2;
   22295              :                 }
   22296              :               else
   22297              :                 count = 2;
   22298              :             }
   22299           28 :           else if (TARGET_AVX512BW && TARGET_AVX512VL)
   22300           28 :             return ix86_vec_cost (mode, cost->sse_op * 4);
   22301            0 :           else if (TARGET_SSE4_1)
   22302              :             count = 5;
   22303            0 :           else if (code == ASHIFTRT)
   22304              :             count = 6;
   22305              :           else
   22306            0 :             count = 5;
   22307      6040760 :           return ix86_vec_cost (mode, cost->sse_op * count) + extra;
   22308              : 
   22309      3023189 :         case V16QImode:
   22310      3023189 :           if (TARGET_XOP)
   22311              :             {
   22312              :               /* For XOP we use vpshab, which requires a broadcast of the
   22313              :                  value to the variable shift insn.  For constants this
   22314              :                  means a V16Q const in mem; even when we can perform the
   22315              :                  shift with one insn set the cost to prefer paddb.  */
   22316         3489 :               if (constant_op1)
   22317              :                 {
   22318         2530 :                   extra = COSTS_N_INSNS (cost->sse_load[2]) / 2;
   22319         2530 :                   return ix86_vec_cost (mode, cost->sse_op) + extra;
   22320              :                 }
   22321              :               else
   22322              :                 {
   22323          959 :                   count = (code == ASHIFT) ? 3 : 4;
   22324          959 :                   return ix86_vec_cost (mode, cost->sse_op * count);
   22325              :                 }
   22326              :             }
   22327              :           /* FALLTHRU */
   22328      6040575 :         case V32QImode:
   22329      6040575 :           if (TARGET_GFNI && constant_op1)
   22330              :             {
   22331              :               /* Use vgf2p8affine.  One extra load for the mask, but in a loop
   22332              :                  with enough registers it will be moved out.  So for now don't
   22333              :                  account the constant mask load.  This is not quite right
   22334              :                  for non loop vectorization.  */
   22335        11355 :               extra = 0;
   22336        11355 :               return ix86_vec_cost (mode, cost->sse_op) + extra;
   22337              :             }
   22338      6029220 :           if (TARGET_AVX2)
   22339              :             /* Use vpbroadcast.  */
   22340       189234 :             extra = cost->sse_op;
   22341              :           else
   22342      5839986 :             extra = COSTS_N_INSNS (mode == V16QImode
   22343              :                                    ? cost->sse_load[2]
   22344      5839986 :                                    : cost->sse_load[3]) / 2;
   22345              : 
   22346      6029220 :           if (constant_op1)
   22347              :             {
   22348      6029033 :               if (code == ASHIFTRT)
   22349              :                 {
   22350          177 :                   count = 4;
   22351          177 :                   extra *= 2;
   22352              :                 }
   22353              :               else
   22354              :                 count = 2;
   22355              :             }
   22356          187 :           else if (TARGET_AVX512BW
   22357           75 :                    && ((mode == V32QImode && !TARGET_PREFER_AVX256)
   22358           37 :                        || (mode == V16QImode && TARGET_AVX512VL
   22359           37 :                            && !TARGET_PREFER_AVX128)))
   22360           75 :             return ix86_vec_cost (mode, cost->sse_op * 4);
   22361          112 :           else if (TARGET_AVX2
   22362            0 :                    && mode == V16QImode && !TARGET_PREFER_AVX128)
   22363              :             count = 6;
   22364          112 :           else if (TARGET_SSE4_1)
   22365              :             count = 9;
   22366          112 :           else if (code == ASHIFTRT)
   22367              :             count = 10;
   22368              :           else
   22369           76 :             count = 9;
   22370      6029145 :           return ix86_vec_cost (mode, cost->sse_op * count) + extra;
   22371              : 
   22372      3020858 :         case V64QImode:
   22373              :           /* Ignore the mask load for GF2P8AFFINEQB.  */
   22374      3020858 :           extra = 0;
   22375      3020858 :           return ix86_vec_cost (mode, cost->sse_op) + extra;
   22376              : 
   22377     54456190 :         case V2DImode:
   22378     54456190 :         case V4DImode:
   22379              :           /* V*DImode arithmetic right shift is emulated.  */
   22380     54456190 :           if (code == ASHIFTRT && !TARGET_AVX512VL)
   22381              :             {
   22382         1286 :               if (constant_op1)
   22383              :                 {
   22384          562 :                   if (op1_val == 63)
   22385          440 :                     count = TARGET_SSE4_2 ? 1 : 2;
   22386          421 :                   else if (TARGET_XOP)
   22387              :                     count = 2;
   22388          122 :                   else if (TARGET_SSE4_1)
   22389              :                     count = 3;
   22390              :                   else
   22391          131 :                     count = 4;
   22392              :                 }
   22393          724 :               else if (TARGET_XOP)
   22394              :                 count = 3;
   22395           43 :               else if (TARGET_SSE4_2)
   22396              :                 count = 4;
   22397              :               else
   22398         1286 :                 count = 5;
   22399              : 
   22400         1286 :               return ix86_vec_cost (mode, cost->sse_op * count);
   22401              :             }
   22402              :           /* FALLTHRU */
   22403    382942385 :         default:
   22404    382942385 :           return ix86_vec_cost (mode, cost->sse_op);
   22405              :         }
   22406              :     }
   22407              : 
   22408    761993694 :   if (GET_MODE_SIZE (mode) > UNITS_PER_WORD)
   22409              :     {
   22410    192836265 :       if (constant_op1)
   22411              :         {
   22412    192801795 :           if (op1_val > 32)
   22413    136980755 :             return cost->shift_const + COSTS_N_INSNS (2);
   22414              :           else
   22415     55821040 :             return cost->shift_const * 2;
   22416              :         }
   22417              :       else
   22418              :         {
   22419        34470 :           if (and_in_op1)
   22420           63 :             return cost->shift_var * 2;
   22421              :           else
   22422        34407 :             return cost->shift_var * 6 + COSTS_N_INSNS (2);
   22423              :         }
   22424              :     }
   22425              :   else
   22426              :     {
   22427    183807936 :       if (constant_op1)
   22428    183076487 :         return cost->shift_const;
   22429       731449 :       else if (shift_and_truncate)
   22430              :         {
   22431        22869 :           if (skip_op0)
   22432        22869 :             *skip_op0 = *skip_op1 = true;
   22433              :           /* Return the cost after shift-and truncation.  */
   22434        22869 :           return cost->shift_var;
   22435              :         }
   22436              :       else
   22437       708580 :         return cost->shift_var;
   22438              :     }
   22439              : }
   22440              : 
   22441              : static int
   22442    149056544 : ix86_insn_cost (rtx_insn *insn, bool speed)
   22443              : {
   22444    149056544 :   int insn_cost = 0;
   22445              :   /* Add extra cost to avoid post_reload late_combine revert
   22446              :      the optimization did in pass_rpad.  */
   22447    149056544 :   if (reload_completed
   22448      4659561 :       && ix86_rpad_gate ()
   22449       265597 :       && recog_memoized (insn) >= 0
   22450    149321887 :       && get_attr_avx_partial_xmm_update (insn)
   22451              :       == AVX_PARTIAL_XMM_UPDATE_TRUE)
   22452              :     insn_cost += COSTS_N_INSNS (3);
   22453              : 
   22454    149056544 :   return insn_cost + pattern_cost (PATTERN (insn), speed);
   22455              : }
   22456              : 
   22457              : /* Return cost of SSE/AVX FP->FP conversion (extensions and truncates).  */
   22458              : 
   22459              : static int
   22460       745758 : vec_fp_conversion_cost (const struct processor_costs *cost, int size)
   22461              : {
   22462       745758 :   if (size < 128)
   22463       742268 :     return cost->cvtss2sd;
   22464         3490 :   else if (size < 256)
   22465              :     {
   22466         1420 :       if (TARGET_SSE_SPLIT_REGS)
   22467            0 :         return cost->cvtss2sd * size / 64;
   22468         1420 :       return cost->cvtss2sd;
   22469              :     }
   22470         2070 :   if (size < 512)
   22471          768 :     return cost->vcvtps2pd256;
   22472              :   else
   22473         1302 :     return cost->vcvtps2pd512;
   22474              : }
   22475              : 
   22476              : /* Return true of X is UNSPEC with UNSPEC_PCMP or UNSPEC_UNSIGNED_PCMP.  */
   22477              : 
   22478              : static bool
   22479       272281 : unspec_pcmp_p (rtx x)
   22480              : {
   22481       272281 :   return GET_CODE (x) == UNSPEC
   22482       272281 :          && (XINT (x, 1) == UNSPEC_PCMP || XINT (x, 1) == UNSPEC_UNSIGNED_PCMP);
   22483              : }
   22484              : 
   22485              : /* Compute a (partial) cost for rtx X.  Return true if the complete
   22486              :    cost has been computed, and false if subexpressions should be
   22487              :    scanned.  In either case, *TOTAL contains the cost result.  */
   22488              : 
   22489              : static bool
   22490   7700754681 : ix86_rtx_costs (rtx x, machine_mode mode, int outer_code_i, int opno,
   22491              :                 int *total, bool speed)
   22492              : {
   22493   7700754681 :   rtx mask;
   22494   7700754681 :   enum rtx_code code = GET_CODE (x);
   22495   7700754681 :   enum rtx_code outer_code = (enum rtx_code) outer_code_i;
   22496   4122754523 :   const struct processor_costs *cost
   22497   7700754681 :     = speed ? ix86_tune_cost : &ix86_size_cost;
   22498   7700754681 :   int src_cost;
   22499              : 
   22500              :   /* Handling different vternlog variants.  */
   22501   7700754681 :   if ((GET_MODE_SIZE (mode) == 64
   22502   7700754681 :        ? TARGET_AVX512F
   22503   6517342257 :        : (TARGET_AVX512VL
   22504   6455757773 :           || (TARGET_AVX512F && !TARGET_PREFER_AVX256)))
   22505    178122495 :       && GET_MODE_SIZE (mode) >= 16
   22506    120959225 :       && outer_code_i == SET
   22507   7747495853 :       && ternlog_operand (x, mode))
   22508              :     {
   22509        33846 :       rtx args[3];
   22510              : 
   22511        33846 :       args[0] = NULL_RTX;
   22512        33846 :       args[1] = NULL_RTX;
   22513        33846 :       args[2] = NULL_RTX;
   22514        33846 :       int idx = ix86_ternlog_idx (x, args);
   22515        33846 :       gcc_assert (idx >= 0);
   22516              : 
   22517        33846 :       *total = cost->sse_op;
   22518       135384 :       for (int i = 0; i != 3; i++)
   22519       101538 :         if (args[i])
   22520        71400 :           *total += rtx_cost (args[i], GET_MODE (args[i]), UNSPEC, i, speed);
   22521        33846 :       return true;
   22522              :     }
   22523              : 
   22524              : 
   22525   7700720835 :   switch (code)
   22526              :     {
   22527     47590053 :     case SET:
   22528     47590053 :       if (register_operand (SET_DEST (x), VOIDmode)
   22529     47590053 :           && register_operand (SET_SRC (x), VOIDmode))
   22530              :         {
   22531     29222860 :           *total = ix86_set_reg_reg_cost (GET_MODE (SET_DEST (x)));
   22532     29222860 :           return true;
   22533              :         }
   22534              : 
   22535     18367193 :       if (register_operand (SET_SRC (x), VOIDmode))
   22536              :         /* Avoid potentially incorrect high cost from rtx_costs
   22537              :            for non-tieable SUBREGs.  */
   22538              :         src_cost = 0;
   22539              :       else
   22540              :         {
   22541     15543083 :           src_cost = rtx_cost (SET_SRC (x), mode, SET, 1, speed);
   22542              : 
   22543     15543083 :           if (CONSTANT_P (SET_SRC (x)))
   22544              :             /* Constant costs assume a base value of COSTS_N_INSNS (1) and add
   22545              :                a small value, possibly zero for cheap constants.  */
   22546      6970554 :             src_cost += COSTS_N_INSNS (1);
   22547              :         }
   22548              : 
   22549     18367193 :       *total = src_cost + rtx_cost (SET_DEST (x), mode, SET, 0, speed);
   22550     18367193 :       return true;
   22551              : 
   22552   2836509020 :     case CONST_INT:
   22553   2836509020 :     case CONST:
   22554   2836509020 :     case LABEL_REF:
   22555   2836509020 :     case SYMBOL_REF:
   22556   2836509020 :       if (x86_64_immediate_operand (x, VOIDmode))
   22557   2231290181 :         *total = 0;
   22558    605218839 :       else if (TARGET_64BIT && x86_64_zext_immediate_operand (x, VOIDmode))
   22559              :         /* Consider the zext constants slightly more expensive, as they
   22560              :            can't appear in most instructions.  */
   22561     28006819 :         *total = 1;
   22562              :       else
   22563              :         /* movabsq is slightly more expensive than a simple instruction. */
   22564    577212020 :         *total = COSTS_N_INSNS (1) + 1;
   22565              :       return true;
   22566              : 
   22567      7486371 :     case CONST_DOUBLE:
   22568      7486371 :       if (IS_STACK_MODE (mode))
   22569      1298367 :         switch (standard_80387_constant_p (x))
   22570              :           {
   22571              :           case -1:
   22572              :           case 0:
   22573              :             break;
   22574       283374 :           case 1: /* 0.0 */
   22575       283374 :             *total = 1;
   22576       283374 :             return true;
   22577       484819 :           default: /* Other constants */
   22578       484819 :             *total = 2;
   22579       484819 :             return true;
   22580              :           }
   22581              :       /* FALLTHRU */
   22582              : 
   22583     14304460 :     case CONST_VECTOR:
   22584     14304460 :       switch (standard_sse_constant_p (x, mode))
   22585              :         {
   22586              :         case 0:
   22587              :           break;
   22588      4214332 :         case 1:  /* 0: xor eliminates false dependency */
   22589      4214332 :           *total = 0;
   22590      4214332 :           return true;
   22591       171790 :         default: /* -1: cmp contains false dependency */
   22592       171790 :           *total = 1;
   22593       171790 :           return true;
   22594              :         }
   22595              :       /* FALLTHRU */
   22596              : 
   22597     10902626 :     case CONST_WIDE_INT:
   22598              :       /* Fall back to (MEM (SYMBOL_REF)), since that's where
   22599              :          it'll probably end up.  Add a penalty for size.  */
   22600     21805252 :       *total = (COSTS_N_INSNS (1)
   22601     21575706 :                 + (!TARGET_64BIT && flag_pic)
   22602     21805252 :                 + (GET_MODE_SIZE (mode) <= 4
   22603     19036187 :                    ? 0 : GET_MODE_SIZE (mode) <= 8 ? 1 : 2));
   22604     10902626 :       return true;
   22605              : 
   22606     22550999 :     case ZERO_EXTEND:
   22607              :       /* The zero extensions is often completely free on x86_64, so make
   22608              :          it as cheap as possible.  */
   22609     22550999 :       if (TARGET_64BIT && mode == DImode
   22610      4944739 :           && GET_MODE (XEXP (x, 0)) == SImode)
   22611      3062793 :         *total = 1;
   22612     19488206 :       else if (TARGET_ZERO_EXTEND_WITH_AND)
   22613            0 :         *total = cost->add;
   22614              :       else
   22615     19488206 :         *total = cost->movzx;
   22616              :       return false;
   22617              : 
   22618      2675645 :     case SIGN_EXTEND:
   22619      2675645 :       *total = cost->movsx;
   22620      2675645 :       return false;
   22621              : 
   22622    637419247 :     case ASHIFT:
   22623    637419247 :       if (SCALAR_INT_MODE_P (mode)
   22624    246334806 :           && GET_MODE_SIZE (mode) < UNITS_PER_WORD
   22625    680501306 :           && CONST_INT_P (XEXP (x, 1)))
   22626              :         {
   22627     42903124 :           HOST_WIDE_INT value = INTVAL (XEXP (x, 1));
   22628     42903124 :           if (value == 1)
   22629              :             {
   22630      2477798 :               *total = cost->add;
   22631      2477798 :               return false;
   22632              :             }
   22633     40425326 :           if ((value == 2 || value == 3)
   22634      4542986 :               && cost->lea <= cost->shift_const)
   22635              :             {
   22636      2145991 :               *total = cost->lea;
   22637      2145991 :               return false;
   22638              :             }
   22639              :         }
   22640              :       /* FALLTHRU */
   22641              : 
   22642    774637694 :     case ROTATE:
   22643    774637694 :     case ASHIFTRT:
   22644    774637694 :     case LSHIFTRT:
   22645    774637694 :     case ROTATERT:
   22646    774637694 :       bool skip_op0, skip_op1;
   22647    774637694 :       *total = ix86_shift_rotate_cost (cost, code, mode,
   22648    774637694 :                                        CONSTANT_P (XEXP (x, 1)),
   22649              :                                        CONST_INT_P (XEXP (x, 1))
   22650              :                                          ? INTVAL (XEXP (x, 1)) : -1,
   22651              :                                        GET_CODE (XEXP (x, 1)) == AND,
   22652    774637694 :                                        SUBREG_P (XEXP (x, 1))
   22653    774637694 :                                        && GET_CODE (XEXP (XEXP (x, 1),
   22654              :                                                           0)) == AND,
   22655              :                                        &skip_op0, &skip_op1);
   22656    774637694 :       if (skip_op0 || skip_op1)
   22657              :         {
   22658        22869 :           if (!skip_op0)
   22659            0 :             *total += rtx_cost (XEXP (x, 0), mode, code, 0, speed);
   22660        22869 :           if (!skip_op1)
   22661            0 :             *total += rtx_cost (XEXP (x, 1), mode, code, 0, speed);
   22662        22869 :           return true;
   22663              :         }
   22664              :       return false;
   22665              : 
   22666       230516 :     case FMA:
   22667       230516 :       {
   22668       230516 :         rtx sub;
   22669              : 
   22670       230516 :         gcc_assert (FLOAT_MODE_P (mode));
   22671       230516 :         gcc_assert (TARGET_FMA || TARGET_FMA4 || TARGET_AVX512F);
   22672              : 
   22673       461032 :         *total = ix86_vec_cost (mode,
   22674       230516 :                                 GET_MODE_INNER (mode) == SFmode
   22675              :                                 ? cost->fmass : cost->fmasd);
   22676       230516 :         *total += rtx_cost (XEXP (x, 1), mode, FMA, 1, speed);
   22677              : 
   22678              :         /* Negate in op0 or op2 is free: FMS, FNMA, FNMS.  */
   22679       230516 :         sub = XEXP (x, 0);
   22680       230516 :         if (GET_CODE (sub) == NEG)
   22681        51068 :           sub = XEXP (sub, 0);
   22682       230516 :         *total += rtx_cost (sub, mode, FMA, 0, speed);
   22683              : 
   22684       230516 :         sub = XEXP (x, 2);
   22685       230516 :         if (GET_CODE (sub) == NEG)
   22686        40544 :           sub = XEXP (sub, 0);
   22687       230516 :         *total += rtx_cost (sub, mode, FMA, 2, speed);
   22688       230516 :         return true;
   22689              :       }
   22690              : 
   22691   1757982486 :     case MULT:
   22692   1757982486 :       if (!FLOAT_MODE_P (mode) && !VECTOR_MODE_P (mode))
   22693              :         {
   22694    551219679 :           rtx op0 = XEXP (x, 0);
   22695    551219679 :           rtx op1 = XEXP (x, 1);
   22696    551219679 :           int nbits;
   22697    551219679 :           if (CONST_INT_P (XEXP (x, 1)))
   22698              :             {
   22699    533102640 :               unsigned HOST_WIDE_INT value = INTVAL (XEXP (x, 1));
   22700   1081625588 :               for (nbits = 0; value != 0; value &= value - 1)
   22701    548522948 :                 nbits++;
   22702              :             }
   22703              :           else
   22704              :             /* This is arbitrary.  */
   22705              :             nbits = 7;
   22706              : 
   22707              :           /* Compute costs correctly for widening multiplication.  */
   22708    551219679 :           if ((GET_CODE (op0) == SIGN_EXTEND || GET_CODE (op0) == ZERO_EXTEND)
   22709    556727295 :               && GET_MODE_SIZE (GET_MODE (XEXP (op0, 0))) * 2
   22710      5507616 :                  == GET_MODE_SIZE (mode))
   22711              :             {
   22712      5503523 :               int is_mulwiden = 0;
   22713      5503523 :               machine_mode inner_mode = GET_MODE (op0);
   22714              : 
   22715      5503523 :               if (GET_CODE (op0) == GET_CODE (op1))
   22716      5419662 :                 is_mulwiden = 1, op1 = XEXP (op1, 0);
   22717        83861 :               else if (CONST_INT_P (op1))
   22718              :                 {
   22719        73929 :                   if (GET_CODE (op0) == SIGN_EXTEND)
   22720        22066 :                     is_mulwiden = trunc_int_for_mode (INTVAL (op1), inner_mode)
   22721        22066 :                                   == INTVAL (op1);
   22722              :                   else
   22723        51863 :                     is_mulwiden = !(INTVAL (op1) & ~GET_MODE_MASK (inner_mode));
   22724              :                 }
   22725              : 
   22726      5493591 :               if (is_mulwiden)
   22727      5493591 :                 op0 = XEXP (op0, 0), mode = GET_MODE (op0);
   22728              :             }
   22729              : 
   22730    551219679 :           int mult_init;
   22731              :           // Double word multiplication requires 3 mults and 2 adds.
   22732   1118114132 :           if (GET_MODE_SIZE (mode) > UNITS_PER_WORD)
   22733              :             {
   22734    331841506 :               mult_init = 3 * cost->mult_init[MODE_INDEX (word_mode)]
   22735    331841506 :                           + 2 * cost->add;
   22736    331841506 :               nbits *= 3;
   22737              :             }
   22738    378351386 :           else mult_init = cost->mult_init[MODE_INDEX (mode)];
   22739              : 
   22740   1102439358 :           *total = (mult_init
   22741    551219679 :                     + nbits * cost->mult_bit
   22742    551219679 :                     + rtx_cost (op0, mode, outer_code, opno, speed)
   22743    551219679 :                     + rtx_cost (op1, mode, outer_code, opno, speed));
   22744              : 
   22745    551219679 :           return true;
   22746              :         }
   22747   1206762807 :       *total = ix86_multiplication_cost (cost, mode);
   22748   1206762807 :       return false;
   22749              : 
   22750     72493989 :     case DIV:
   22751     72493989 :     case UDIV:
   22752     72493989 :     case MOD:
   22753     72493989 :     case UMOD:
   22754     72493989 :       *total = ix86_division_cost (cost, mode);
   22755     72493989 :       return false;
   22756              : 
   22757    689564503 :     case PLUS:
   22758    689564503 :       if (GET_MODE_CLASS (mode) == MODE_INT
   22759    946179397 :           && GET_MODE_SIZE (mode) <= UNITS_PER_WORD)
   22760              :         {
   22761    143034063 :           if (GET_CODE (XEXP (x, 0)) == PLUS
   22762      3942838 :               && GET_CODE (XEXP (XEXP (x, 0), 0)) == MULT
   22763       839606 :               && CONST_INT_P (XEXP (XEXP (XEXP (x, 0), 0), 1))
   22764       839581 :               && CONSTANT_P (XEXP (x, 1)))
   22765              :             {
   22766       839524 :               HOST_WIDE_INT val = INTVAL (XEXP (XEXP (XEXP (x, 0), 0), 1));
   22767       839524 :               if (val == 2 || val == 4 || val == 8)
   22768              :                 {
   22769       839420 :                   *total = cost->lea;
   22770       839420 :                   *total += rtx_cost (XEXP (XEXP (x, 0), 1), mode,
   22771              :                                       outer_code, opno, speed);
   22772       839420 :                   *total += rtx_cost (XEXP (XEXP (XEXP (x, 0), 0), 0), mode,
   22773              :                                       outer_code, opno, speed);
   22774       839420 :                   *total += rtx_cost (XEXP (x, 1), mode,
   22775              :                                       outer_code, opno, speed);
   22776       839420 :                   return true;
   22777              :                 }
   22778              :             }
   22779    142194539 :           else if (GET_CODE (XEXP (x, 0)) == MULT
   22780     52359504 :                    && CONST_INT_P (XEXP (XEXP (x, 0), 1)))
   22781              :             {
   22782     52300201 :               HOST_WIDE_INT val = INTVAL (XEXP (XEXP (x, 0), 1));
   22783     52300201 :               if (val == 2 || val == 4 || val == 8)
   22784              :                 {
   22785      8007603 :                   *total = cost->lea;
   22786      8007603 :                   *total += rtx_cost (XEXP (XEXP (x, 0), 0), mode,
   22787              :                                       outer_code, opno, speed);
   22788      8007603 :                   *total += rtx_cost (XEXP (x, 1), mode,
   22789              :                                       outer_code, opno, speed);
   22790      8007603 :                   return true;
   22791              :                 }
   22792              :             }
   22793     89894338 :           else if (GET_CODE (XEXP (x, 0)) == PLUS)
   22794              :             {
   22795      3103314 :               rtx op = XEXP (XEXP (x, 0), 0);
   22796              : 
   22797              :               /* Add with carry, ignore the cost of adding a carry flag.  */
   22798      3103314 :               if (ix86_carry_flag_operator (op, mode)
   22799      3103314 :                   || ix86_carry_flag_unset_operator (op, mode))
   22800        69349 :                 *total = cost->add;
   22801              :               else
   22802              :                 {
   22803      3033965 :                   *total = cost->lea;
   22804      3033965 :                   *total += rtx_cost (op, mode,
   22805              :                                       outer_code, opno, speed);
   22806              :                 }
   22807              : 
   22808      3103314 :               *total += rtx_cost (XEXP (XEXP (x, 0), 1), mode,
   22809              :                                   outer_code, opno, speed);
   22810      3103314 :               *total += rtx_cost (XEXP (x, 1), mode,
   22811              :                                   outer_code, opno, speed);
   22812      3103314 :               return true;
   22813              :             }
   22814              :         }
   22815              :       /* FALLTHRU */
   22816              : 
   22817   1835577872 :     case MINUS:
   22818              :       /* Subtract with borrow, ignore the cost of subtracting a carry flag.  */
   22819   1835577872 :       if (GET_MODE_CLASS (mode) == MODE_INT
   22820    519400917 :           && GET_MODE_SIZE (mode) <= UNITS_PER_WORD
   22821    234873958 :           && GET_CODE (XEXP (x, 0)) == MINUS
   22822   1835616632 :           && (ix86_carry_flag_operator (XEXP (XEXP (x, 0), 1), mode)
   22823        14630 :               || ix86_carry_flag_unset_operator (XEXP (XEXP (x, 0), 1), mode)))
   22824              :         {
   22825        24130 :           *total = cost->add;
   22826        24130 :           *total += rtx_cost (XEXP (XEXP (x, 0), 0), mode,
   22827              :                               outer_code, opno, speed);
   22828        24130 :           *total += rtx_cost (XEXP (x, 1), mode,
   22829              :                               outer_code, opno, speed);
   22830        24130 :           return true;
   22831              :         }
   22832              : 
   22833   1835553742 :       if (SSE_FLOAT_MODE_SSEMATH_OR_HFBF_P (mode))
   22834      2385552 :         *total = cost->addss;
   22835   1833168190 :       else if (X87_FLOAT_MODE_P (mode))
   22836       217782 :         *total = cost->fadd;
   22837   1832950408 :       else if (FLOAT_MODE_P (mode))
   22838       441061 :         *total = ix86_vec_cost (mode, cost->addss);
   22839   1832509347 :       else if (GET_MODE_CLASS (mode) == MODE_VECTOR_INT)
   22840   1206412961 :         *total = ix86_vec_cost (mode, cost->sse_op);
   22841   1291599275 :       else if (GET_MODE_SIZE (mode) > UNITS_PER_WORD)
   22842    330841598 :         *total = cost->add * 2;
   22843              :       else
   22844    295254788 :         *total = cost->add;
   22845              :       return false;
   22846              : 
   22847      3932216 :     case IOR:
   22848      3932216 :       if (GET_MODE_CLASS (mode) == MODE_VECTOR_INT
   22849      3683562 :           || SSE_FLOAT_MODE_P (mode))
   22850              :         {
   22851              :           /* (ior (not ...) ...) can be a single insn in AVX512.  */
   22852          482 :           if (GET_CODE (XEXP (x, 0)) == NOT && TARGET_AVX512F
   22853       258248 :               && (GET_MODE_SIZE (mode) == 64
   22854            0 :                   || (TARGET_AVX512VL
   22855            0 :                       && (GET_MODE_SIZE (mode) == 32
   22856            0 :                           || GET_MODE_SIZE (mode) == 16))))
   22857              :             {
   22858            0 :               rtx right = GET_CODE (XEXP (x, 1)) != NOT
   22859            0 :                           ? XEXP (x, 1) : XEXP (XEXP (x, 1), 0);
   22860              : 
   22861            0 :               *total = ix86_vec_cost (mode, cost->sse_op)
   22862            0 :                        + rtx_cost (XEXP (XEXP (x, 0), 0), mode,
   22863              :                                    outer_code, opno, speed)
   22864            0 :                        + rtx_cost (right, mode, outer_code, opno, speed);
   22865            0 :               return true;
   22866              :             }
   22867       258248 :           *total = ix86_vec_cost (mode, cost->sse_op);
   22868       258248 :         }
   22869      3673968 :       else if (TARGET_64BIT
   22870      3381591 :                && mode == TImode
   22871      1674681 :                && GET_CODE (XEXP (x, 0)) == ASHIFT
   22872       254017 :                && GET_CODE (XEXP (XEXP (x, 0), 0)) == ZERO_EXTEND
   22873       252025 :                && GET_MODE (XEXP (XEXP (XEXP (x, 0), 0), 0)) == DImode
   22874       252025 :                && CONST_INT_P (XEXP (XEXP (x, 0), 1))
   22875       252025 :                && INTVAL (XEXP (XEXP (x, 0), 1)) == 64
   22876       252025 :                && GET_CODE (XEXP (x, 1)) == ZERO_EXTEND
   22877       226700 :                && GET_MODE (XEXP (XEXP (x, 1), 0)) == DImode)
   22878              :         {
   22879              :           /* *concatditi3 is cheap.  */
   22880       226700 :           rtx op0 = XEXP (XEXP (XEXP (x, 0), 0), 0);
   22881       226700 :           rtx op1 = XEXP (XEXP (x, 1), 0);
   22882         1431 :           *total = (SUBREG_P (op0) && GET_MODE (SUBREG_REG (op0)) == DFmode)
   22883       226700 :                    ? COSTS_N_INSNS (1)    /* movq.  */
   22884       225269 :                    : set_src_cost (op0, DImode, speed);
   22885         2336 :           *total += (SUBREG_P (op1) && GET_MODE (SUBREG_REG (op1)) == DFmode)
   22886       226700 :                     ? COSTS_N_INSNS (1)    /* movq.  */
   22887       224377 :                     : set_src_cost (op1, DImode, speed);
   22888       226700 :           return true;
   22889              :         }
   22890      3447268 :       else if (TARGET_64BIT
   22891      3154891 :                && mode == TImode
   22892      1447981 :                && GET_CODE (XEXP (x, 0)) == AND
   22893      1385206 :                && REG_P (XEXP (XEXP (x, 0), 0))
   22894      1379979 :                && CONST_WIDE_INT_P (XEXP (XEXP (x, 0), 1))
   22895      1377302 :                && CONST_WIDE_INT_NUNITS (XEXP (XEXP (x, 0), 1)) == 2
   22896      1377302 :                && CONST_WIDE_INT_ELT (XEXP (XEXP (x, 0), 1), 0) == -1
   22897       902854 :                && CONST_WIDE_INT_ELT (XEXP (XEXP (x, 0), 1), 1) == 0
   22898       902854 :                && GET_CODE (XEXP (x, 1)) == ASHIFT
   22899       900734 :                && GET_CODE (XEXP (XEXP (x, 1), 0)) == ZERO_EXTEND
   22900       900734 :                && GET_MODE (XEXP (XEXP (XEXP (x, 1), 0), 0)) == DImode
   22901       900734 :                && CONST_INT_P (XEXP (XEXP (x, 1), 1))
   22902      4348002 :                && INTVAL (XEXP (XEXP (x, 1), 1)) == 64)
   22903              :         {
   22904              :           /* *insvti_highpart is cheap.  */
   22905       900734 :           rtx op = XEXP (XEXP (XEXP (x, 1), 0), 0);
   22906       900734 :           *total = COSTS_N_INSNS (1) + 1;
   22907         1404 :           *total += (SUBREG_P (op) && GET_MODE (SUBREG_REG (op)) == DFmode)
   22908       900734 :                     ? COSTS_N_INSNS (1)    /* movq.  */
   22909       899818 :                     : set_src_cost (op, DImode, speed);
   22910       900734 :           return true;
   22911              :         }
   22912      5385445 :       else if (GET_MODE_SIZE (mode) > UNITS_PER_WORD)
   22913       740963 :         *total = cost->add * 2;
   22914              :       else
   22915      1805571 :         *total = cost->add;
   22916              :       return false;
   22917              : 
   22918       560571 :     case XOR:
   22919       560571 :       if (GET_MODE_CLASS (mode) == MODE_VECTOR_INT
   22920       427592 :           || SSE_FLOAT_MODE_P (mode))
   22921       132979 :         *total = ix86_vec_cost (mode, cost->sse_op);
   22922       913937 :       else if (GET_MODE_SIZE (mode) > UNITS_PER_WORD)
   22923        16310 :         *total = cost->add * 2;
   22924              :       else
   22925       411282 :         *total = cost->add;
   22926              :       return false;
   22927              : 
   22928      7074871 :     case AND:
   22929      7074871 :       if (address_no_seg_operand (x, mode))
   22930              :         {
   22931        15707 :           *total = cost->lea;
   22932        15707 :           return true;
   22933              :         }
   22934      7059164 :       else if (GET_MODE_CLASS (mode) == MODE_VECTOR_INT
   22935      6649431 :                || SSE_FLOAT_MODE_P (mode))
   22936              :         {
   22937              :           /* pandn is a single instruction.  */
   22938       443280 :           if (GET_CODE (XEXP (x, 0)) == NOT)
   22939              :             {
   22940        57869 :               rtx right = XEXP (x, 1);
   22941              : 
   22942              :               /* (and (not ...) (not ...)) can be a single insn in AVX512.  */
   22943          418 :               if (GET_CODE (right) == NOT && TARGET_AVX512F
   22944        57869 :                   && (GET_MODE_SIZE (mode) == 64
   22945            0 :                       || (TARGET_AVX512VL
   22946            0 :                           && (GET_MODE_SIZE (mode) == 32
   22947            0 :                               || GET_MODE_SIZE (mode) == 16))))
   22948            0 :                 right = XEXP (right, 0);
   22949              : 
   22950        57869 :               *total = ix86_vec_cost (mode, cost->sse_op)
   22951        57869 :                        + rtx_cost (XEXP (XEXP (x, 0), 0), mode,
   22952              :                                    outer_code, opno, speed)
   22953        57869 :                        + rtx_cost (right, mode, outer_code, opno, speed);
   22954        57869 :               return true;
   22955              :             }
   22956       385411 :           else if (GET_CODE (XEXP (x, 1)) == NOT)
   22957              :             {
   22958          852 :               *total = ix86_vec_cost (mode, cost->sse_op)
   22959          852 :                        + rtx_cost (XEXP (x, 0), mode,
   22960              :                                    outer_code, opno, speed)
   22961          852 :                        + rtx_cost (XEXP (XEXP (x, 1), 0), mode,
   22962              :                                    outer_code, opno, speed);
   22963          852 :               return true;
   22964              :             }
   22965       384559 :           *total = ix86_vec_cost (mode, cost->sse_op);
   22966       384559 :         }
   22967     13931296 :       else if (GET_MODE_SIZE (mode) > UNITS_PER_WORD)
   22968              :         {
   22969      1124489 :           if (TARGET_BMI && GET_CODE (XEXP (x,0)) == NOT)
   22970              :             {
   22971         1670 :               *total = cost->add * 2
   22972          835 :                        + rtx_cost (XEXP (XEXP (x, 0), 0), mode,
   22973              :                                    outer_code, opno, speed)
   22974          835 :                        + rtx_cost (XEXP (x, 1), mode,
   22975              :                                    outer_code, opno, speed);
   22976          835 :               return true;
   22977              :             }
   22978      1123654 :           else if (TARGET_BMI && GET_CODE (XEXP (x, 1)) == NOT)
   22979              :             {
   22980            0 :               *total = cost->add * 2
   22981            0 :                        + rtx_cost (XEXP (x, 0), mode,
   22982              :                                    outer_code, opno, speed)
   22983            0 :                        + rtx_cost (XEXP (XEXP (x, 1), 0), mode,
   22984              :                                    outer_code, opno, speed);
   22985            0 :               return true;
   22986              :             }
   22987      1123654 :           *total = cost->add * 2;
   22988              :         }
   22989      5491395 :       else if (TARGET_BMI && GET_CODE (XEXP (x,0)) == NOT)
   22990              :         {
   22991         7578 :           *total = cost->add
   22992         3789 :                    + rtx_cost (XEXP (XEXP (x, 0), 0), mode,
   22993              :                                outer_code, opno, speed)
   22994         3789 :                    + rtx_cost (XEXP (x, 1), mode, outer_code, opno, speed);
   22995         3789 :           return true;
   22996              :         }
   22997      5487606 :       else if (TARGET_BMI && GET_CODE (XEXP (x,1)) == NOT)
   22998              :         {
   22999          112 :           *total = cost->add
   23000           56 :                    + rtx_cost (XEXP (x, 0), mode, outer_code, opno, speed)
   23001           56 :                    + rtx_cost (XEXP (XEXP (x, 1), 0), mode,
   23002              :                                outer_code, opno, speed);
   23003           56 :           return true;
   23004              :         }
   23005              :       else
   23006      5487550 :         *total = cost->add;
   23007              :       return false;
   23008              : 
   23009       507924 :     case NOT:
   23010       507924 :       if (GET_MODE_CLASS (mode) == MODE_VECTOR_INT)
   23011              :         {
   23012              :           /* (not (xor ...)) can be a single insn in AVX512.  */
   23013            0 :           if (GET_CODE (XEXP (x, 0)) == XOR && TARGET_AVX512F
   23014        10934 :               && (GET_MODE_SIZE (mode) == 64
   23015            0 :                   || (TARGET_AVX512VL
   23016            0 :                       && (GET_MODE_SIZE (mode) == 32
   23017            0 :                           || GET_MODE_SIZE (mode) == 16))))
   23018              :             {
   23019            0 :               *total = ix86_vec_cost (mode, cost->sse_op)
   23020            0 :                        + rtx_cost (XEXP (XEXP (x, 0), 0), mode,
   23021              :                                    outer_code, opno, speed)
   23022            0 :                        + rtx_cost (XEXP (XEXP (x, 0), 1), mode,
   23023              :                                    outer_code, opno, speed);
   23024            0 :               return true;
   23025              :             }
   23026              : 
   23027              :           // vnot is pxor -1.
   23028        10934 :           *total = ix86_vec_cost (mode, cost->sse_op) + 1;
   23029              :         }
   23030      1138955 :       else if (GET_MODE_SIZE (mode) > UNITS_PER_WORD)
   23031        45873 :         *total = cost->add * 2;
   23032              :       else
   23033       451117 :         *total = cost->add;
   23034              :       return false;
   23035              : 
   23036     18164110 :     case NEG:
   23037     18164110 :       if (SSE_FLOAT_MODE_SSEMATH_OR_HFBF_P (mode))
   23038        51389 :         *total = cost->sse_op;
   23039     18112721 :       else if (X87_FLOAT_MODE_P (mode))
   23040        15127 :         *total = cost->fchs;
   23041     18097594 :       else if (FLOAT_MODE_P (mode))
   23042        27005 :         *total = ix86_vec_cost (mode, cost->sse_op);
   23043     18070589 :       else if (GET_MODE_CLASS (mode) == MODE_VECTOR_INT)
   23044     13388430 :         *total = ix86_vec_cost (mode, cost->sse_op);
   23045      9513456 :       else if (GET_MODE_SIZE (mode) > UNITS_PER_WORD)
   23046      1760651 :         *total = cost->add * 3;
   23047              :       else
   23048      2921508 :         *total = cost->add;
   23049              :       return false;
   23050              : 
   23051     54410718 :     case COMPARE:
   23052     54410718 :       rtx op0, op1;
   23053     54410718 :       op0 = XEXP (x, 0);
   23054     54410718 :       op1 = XEXP (x, 1);
   23055     54410718 :       if (GET_CODE (op0) == ZERO_EXTRACT
   23056       173566 :           && XEXP (op0, 1) == const1_rtx
   23057       156150 :           && CONST_INT_P (XEXP (op0, 2))
   23058       156114 :           && op1 == const0_rtx)
   23059              :         {
   23060              :           /* This kind of construct is implemented using test[bwl].
   23061              :              Treat it as if we had an AND.  */
   23062       156114 :           mode = GET_MODE (XEXP (op0, 0));
   23063       312228 :           *total = (cost->add
   23064       156114 :                     + rtx_cost (XEXP (op0, 0), mode, outer_code,
   23065              :                                 opno, speed)
   23066       156114 :                     + rtx_cost (const1_rtx, mode, outer_code, opno, speed));
   23067       156114 :           return true;
   23068              :         }
   23069              : 
   23070     54254604 :       if (GET_CODE (op0) == PLUS && rtx_equal_p (XEXP (op0, 0), op1))
   23071              :         {
   23072              :           /* This is an overflow detection, count it as a normal compare.  */
   23073       139165 :           *total = rtx_cost (op0, GET_MODE (op0), COMPARE, 0, speed);
   23074       139165 :           return true;
   23075              :         }
   23076              : 
   23077     54115439 :       rtx geu;
   23078              :       /* Match x
   23079              :          (compare:CCC (neg:QI (geu:QI (reg:CC_CCC FLAGS_REG) (const_int 0)))
   23080              :                       (ltu:QI (reg:CC_CCC FLAGS_REG) (const_int 0)))  */
   23081     54115439 :       if (mode == CCCmode
   23082       291507 :           && GET_CODE (op0) == NEG
   23083         8048 :           && GET_CODE (geu = XEXP (op0, 0)) == GEU
   23084         8045 :           && REG_P (XEXP (geu, 0))
   23085         8045 :           && (GET_MODE (XEXP (geu, 0)) == CCCmode
   23086          759 :               || GET_MODE (XEXP (geu, 0)) == CCmode)
   23087         8045 :           && REGNO (XEXP (geu, 0)) == FLAGS_REG
   23088         8045 :           && XEXP (geu, 1) == const0_rtx
   23089         8045 :           && GET_CODE (op1) == LTU
   23090         8045 :           && REG_P (XEXP (op1, 0))
   23091         8045 :           && GET_MODE (XEXP (op1, 0)) == GET_MODE (XEXP (geu, 0))
   23092         8045 :           && REGNO (XEXP (op1, 0)) == FLAGS_REG
   23093     54123484 :           && XEXP (op1, 1) == const0_rtx)
   23094              :         {
   23095              :           /* This is *setcc_qi_addqi3_cconly_overflow_1_* patterns, a nop.  */
   23096         8045 :           *total = 0;
   23097         8045 :           return true;
   23098              :         }
   23099              :       /* Match x
   23100              :          (compare:CCC (neg:QI (ltu:QI (reg:CCC FLAGS_REG) (const_int 0)))
   23101              :                       (geu:QI (reg:CCC FLAGS_REG) (const_int 0)))  */
   23102     54107394 :       if (mode == CCCmode
   23103       283462 :           && GET_CODE (op0) == NEG
   23104            3 :           && GET_CODE (XEXP (op0, 0)) == LTU
   23105            3 :           && REG_P (XEXP (XEXP (op0, 0), 0))
   23106            3 :           && GET_MODE (XEXP (XEXP (op0, 0), 0)) == CCCmode
   23107            3 :           && REGNO (XEXP (XEXP (op0, 0), 0)) == FLAGS_REG
   23108            3 :           && XEXP (XEXP (op0, 0), 1) == const0_rtx
   23109            3 :           && GET_CODE (op1) == GEU
   23110            3 :           && REG_P (XEXP (op1, 0))
   23111            3 :           && GET_MODE (XEXP (op1, 0)) == CCCmode
   23112            3 :           && REGNO (XEXP (op1, 0)) == FLAGS_REG
   23113     54107397 :           && XEXP (op1, 1) == const0_rtx)
   23114              :         {
   23115              :           /* This is *x86_cmc.  */
   23116            3 :           if (!speed)
   23117            0 :             *total = COSTS_N_BYTES (1);
   23118            3 :           else if (TARGET_SLOW_STC)
   23119            0 :             *total = COSTS_N_INSNS (2);
   23120              :           else
   23121            3 :             *total = COSTS_N_INSNS (1);
   23122            3 :           return true;
   23123              :         }
   23124              : 
   23125     54107391 :       if (SCALAR_INT_MODE_P (GET_MODE (op0))
   23126    112677719 :           && GET_MODE_SIZE (GET_MODE (op0)) > UNITS_PER_WORD)
   23127              :         {
   23128       750817 :           if (op1 == const0_rtx)
   23129       217322 :             *total = cost->add
   23130       108661 :                      + rtx_cost (op0, GET_MODE (op0), outer_code, opno, speed);
   23131              :           else
   23132      1284312 :             *total = 3*cost->add
   23133       642156 :                      + rtx_cost (op0, GET_MODE (op0), outer_code, opno, speed)
   23134       642156 :                      + rtx_cost (op1, GET_MODE (op0), outer_code, opno, speed);
   23135       750817 :           return true;
   23136              :         }
   23137              : 
   23138              :       /* The embedded comparison operand is completely free.  */
   23139     53356574 :       if (!general_operand (op0, GET_MODE (op0)) && op1 == const0_rtx)
   23140       379661 :         *total = 0;
   23141              : 
   23142              :       return false;
   23143              : 
   23144      1370880 :     case FLOAT_EXTEND:
   23145              :       /* x87 represents all values extended to 80bit.  */
   23146      1370880 :       if (!SSE_FLOAT_MODE_SSEMATH_OR_HFBF_P (mode))
   23147       668736 :         *total = 0;
   23148              :       else
   23149      1404288 :         *total = vec_fp_conversion_cost (cost, GET_MODE_BITSIZE (mode));
   23150              :       return false;
   23151              : 
   23152        83563 :     case FLOAT_TRUNCATE:
   23153        83563 :       if (!SSE_FLOAT_MODE_SSEMATH_OR_HFBF_P (mode))
   23154        58201 :         *total = cost->fadd;
   23155              :       else
   23156        50724 :         *total = vec_fp_conversion_cost (cost, GET_MODE_BITSIZE (mode));
   23157              :       return false;
   23158       682875 :     case FLOAT:
   23159       682875 :     case UNSIGNED_FLOAT:
   23160       682875 :       if (!SSE_FLOAT_MODE_SSEMATH_OR_HFBF_P (mode))
   23161              :         /* TODO: We do not have cost tables for x87.  */
   23162        93122 :         *total = cost->fadd;
   23163       589753 :       else if (VECTOR_MODE_P (mode))
   23164            0 :         *total = ix86_vec_cost (mode, cost->cvtpi2ps);
   23165              :       else
   23166       589753 :         *total = cost->cvtsi2ss;
   23167              :       return false;
   23168              : 
   23169       283553 :     case FIX:
   23170       283553 :     case UNSIGNED_FIX:
   23171       283553 :       if (!SSE_FLOAT_MODE_SSEMATH_OR_HFBF_P (mode))
   23172              :         /* TODO: We do not have cost tables for x87.  */
   23173       283553 :         *total = cost->fadd;
   23174            0 :       else if (VECTOR_MODE_P (mode))
   23175            0 :         *total = ix86_vec_cost (mode, cost->cvtps2pi);
   23176              :       else
   23177            0 :         *total = cost->cvtss2si;
   23178              :       return false;
   23179              : 
   23180       338752 :     case ABS:
   23181              :       /* SSE requires memory load for the constant operand. It may make
   23182              :          sense to account for this.  Of course the constant operand may or
   23183              :          may not be reused. */
   23184       338752 :       if (SSE_FLOAT_MODE_SSEMATH_OR_HFBF_P (mode))
   23185       241596 :         *total = cost->sse_op;
   23186        97156 :       else if (X87_FLOAT_MODE_P (mode))
   23187        31403 :         *total = cost->fabs;
   23188        65753 :       else if (FLOAT_MODE_P (mode))
   23189        25620 :         *total = ix86_vec_cost (mode, cost->sse_op);
   23190        40133 :       else if (GET_MODE_CLASS (mode) == MODE_VECTOR_INT)
   23191         6610 :         *total = cost->sse_op;
   23192              :       return false;
   23193              : 
   23194        28633 :     case SQRT:
   23195        28633 :       if (SSE_FLOAT_MODE_SSEMATH_OR_HFBF_P (mode))
   23196        18295 :         *total = mode == SFmode ? cost->sqrtss : cost->sqrtsd;
   23197        10338 :       else if (X87_FLOAT_MODE_P (mode))
   23198         4315 :         *total = cost->fsqrt;
   23199         6023 :       else if (FLOAT_MODE_P (mode))
   23200         6023 :         *total = ix86_vec_cost (mode,
   23201              :                                 mode == SFmode ? cost->sqrtss : cost->sqrtsd);
   23202              :       return false;
   23203              : 
   23204      3956463 :     case UNSPEC:
   23205      3956463 :       switch (XINT (x, 1))
   23206              :         {
   23207       125918 :         case UNSPEC_TP:
   23208       125918 :           *total = 0;
   23209       125918 :           break;
   23210              : 
   23211         5210 :         case UNSPEC_VTERNLOG:
   23212         5210 :           *total = cost->sse_op;
   23213         5210 :           if (!REG_P (XVECEXP (x, 0, 0)))
   23214          720 :             *total += rtx_cost (XVECEXP (x, 0, 0), mode, code, 0, speed);
   23215         5210 :           if (!REG_P (XVECEXP (x, 0, 1)))
   23216          694 :             *total += rtx_cost (XVECEXP (x, 0, 1), mode, code, 1, speed);
   23217         5210 :           if (!REG_P (XVECEXP (x, 0, 2)))
   23218          733 :             *total += rtx_cost (XVECEXP (x, 0, 2), mode, code, 2, speed);
   23219              :           return true;
   23220              : 
   23221        98550 :         case UNSPEC_PTEST:
   23222        98550 :           {
   23223        98550 :             *total = cost->sse_op;
   23224        98550 :             rtx test_op0 = XVECEXP (x, 0, 0);
   23225        98550 :             if (!rtx_equal_p (test_op0, XVECEXP (x, 0, 1)))
   23226              :               return false;
   23227        97873 :             if (GET_CODE (test_op0) == AND)
   23228              :               {
   23229           23 :                 rtx and_op0 = XEXP (test_op0, 0);
   23230           23 :                 if (GET_CODE (and_op0) == NOT)
   23231            0 :                   and_op0 = XEXP (and_op0, 0);
   23232           23 :                 *total += rtx_cost (and_op0, GET_MODE (and_op0),
   23233              :                                     AND, 0, speed)
   23234           23 :                           + rtx_cost (XEXP (test_op0, 1), GET_MODE (and_op0),
   23235              :                                       AND, 1, speed);
   23236              :              }
   23237              :             else
   23238        97850 :               *total = rtx_cost (test_op0, GET_MODE (test_op0),
   23239              :                                  UNSPEC, 0, speed);
   23240              :           }
   23241              :           return true;
   23242              : 
   23243        20414 :         case UNSPEC_BLENDV:
   23244        20414 :           *total = cost->sse_op;
   23245        20414 :           if (!REG_P (XVECEXP (x, 0, 0)))
   23246         8283 :             *total += rtx_cost (XVECEXP (x, 0, 0), mode, code, 0, speed);
   23247        20414 :           if (!REG_P (XVECEXP (x, 0, 1)))
   23248         9779 :             *total += rtx_cost (XVECEXP (x, 0, 1), mode, code, 1, speed);
   23249        20414 :           if (!REG_P (XVECEXP (x, 0, 2)))
   23250              :             {
   23251        12568 :               rtx cond = XVECEXP (x, 0, 2);
   23252        12568 :               if ((GET_CODE (cond) == LT || GET_CODE (cond) == GT)
   23253          773 :                   && CONST_VECTOR_P (XEXP (cond, 1)))
   23254              :                 {
   23255              :                   /* avx2_blendvpd256_gt and friends.  */
   23256          153 :                   if (!REG_P (XEXP (cond, 0)))
   23257           70 :                     *total += rtx_cost (XEXP (cond, 0), mode, code, 2, speed);
   23258              :                 }
   23259              :               else
   23260        12415 :                 *total += rtx_cost (cond, mode, code, 2, speed);
   23261              :             }
   23262              :           return true;
   23263              : 
   23264        27708 :         case UNSPEC_MOVMSK:
   23265        27708 :           *total = cost->sse_op;
   23266        27708 :           return true;
   23267              : 
   23268              :         default:
   23269              :           break;
   23270              :         }
   23271              :       return false;
   23272              : 
   23273      2016101 :     case VEC_CONCAT:
   23274              :       /* ??? Assume all of these vector manipulation patterns are
   23275              :          recognizable.  In which case they all pretty much have the
   23276              :          same cost.
   23277              :          ??? We should still recruse when computing cost.  */
   23278      2016101 :      *total = cost->sse_op;
   23279      2016101 :      return true;
   23280              : 
   23281      2463874 :     case VEC_SELECT:
   23282              :      /* Special case extracting lower part from the vector.
   23283              :         This by itself needs to code and most of SSE/AVX instructions have
   23284              :         packed and single forms where the single form may be represented
   23285              :         by such VEC_SELECT.
   23286              : 
   23287              :         Use cost 1 (despite the fact that functionally equivalent SUBREG has
   23288              :         cost 0).  Making VEC_SELECT completely free, for example instructs CSE
   23289              :         to forward propagate VEC_SELECT into
   23290              : 
   23291              :            (set (reg eax) (reg src))
   23292              : 
   23293              :         which then prevents fwprop and combining. See i.e.
   23294              :         gcc.target/i386/pr91103-1.c.
   23295              : 
   23296              :         ??? rtvec_series_p test should be, for valid patterns, equivalent to
   23297              :         vec_series_lowpart_p but is not, since the latter calls
   23298              :         can_cange_mode_class on ALL_REGS and this return false since x87 does
   23299              :         not support subregs at all.  */
   23300      2463874 :      if (rtvec_series_p (XVEC (XEXP (x, 1), 0), 0))
   23301       775660 :        *total = rtx_cost (XEXP (x, 0), GET_MODE (XEXP (x, 0)),
   23302       775660 :                           outer_code, opno, speed) + 1;
   23303              :      else
   23304              :        /* ??? We should still recruse when computing cost.  */
   23305      1688214 :        *total = cost->sse_op;
   23306              :      return true;
   23307              : 
   23308      1224597 :     case VEC_DUPLICATE:
   23309      2449194 :       *total = rtx_cost (XEXP (x, 0),
   23310      1224597 :                          GET_MODE (XEXP (x, 0)),
   23311              :                          VEC_DUPLICATE, 0, speed);
   23312              :       /* It's broadcast instruction, not embedded broadcasting.  */
   23313      1224597 :       if (outer_code == SET)
   23314      1176401 :         *total += cost->sse_op;
   23315              : 
   23316              :      return true;
   23317              : 
   23318       723701 :     case VEC_MERGE:
   23319       723701 :       mask = XEXP (x, 2);
   23320              :       /* Scalar versions of SSE instructions may be represented as:
   23321              : 
   23322              :          (vec_merge (vec_duplicate (operation ....))
   23323              :                      (register or memory)
   23324              :                      (const_int 1))
   23325              : 
   23326              :          In this case vec_merge and vec_duplicate is for free.
   23327              :          Just recurse into operation and second operand.  */
   23328       723701 :       if (mask == const1_rtx
   23329       211024 :           && GET_CODE (XEXP (x, 0)) == VEC_DUPLICATE)
   23330              :         {
   23331        74976 :           *total = rtx_cost (XEXP (XEXP (x, 0), 0), mode,
   23332              :                              outer_code, opno, speed)
   23333        74976 :                    + rtx_cost (XEXP (x, 1), mode, outer_code, opno, speed);
   23334        74976 :           return true;
   23335              :         }
   23336              :       /* This is masked instruction, assume the same cost,
   23337              :          as nonmasked variant.  */
   23338       648725 :       else if (TARGET_AVX512F
   23339       648725 :                && (register_operand (mask, GET_MODE (mask))
   23340              :                    /* Redunduant clean up of high bits for kmask with VL=2/4
   23341              :                       .i.e (vec_merge op0, op1, (and op3 15)).  */
   23342       120908 :                    || (GET_CODE (mask) == AND
   23343          369 :                        && register_operand (XEXP (mask, 0), GET_MODE (mask))
   23344          369 :                        && CONST_INT_P (XEXP (mask, 1))
   23345          369 :                        && ((INTVAL (XEXP (mask, 1)) == 3
   23346          131 :                             && GET_MODE_NUNITS (mode) == 2)
   23347          238 :                            || (INTVAL (XEXP (mask, 1)) == 15
   23348          238 :                                && GET_MODE_NUNITS (mode) == 4)))))
   23349              :         {
   23350       374194 :           *total = rtx_cost (XEXP (x, 0), mode, outer_code, opno, speed)
   23351       374194 :                    + rtx_cost (XEXP (x, 1), mode, outer_code, opno, speed);
   23352       374194 :           return true;
   23353              :         }
   23354              :       /* Combination of the two above:
   23355              : 
   23356              :          (vec_merge (vec_merge (vec_duplicate (operation ...))
   23357              :                        (register or memory)
   23358              :                        (reg:QI mask))
   23359              :                     (register or memory)
   23360              :                     (const_int 1))
   23361              : 
   23362              :          i.e. avx512fp16_vcvtss2sh_mask.  */
   23363       274531 :       else if (TARGET_AVX512F
   23364       120539 :                && mask == const1_rtx
   23365        46497 :                && GET_CODE (XEXP (x, 0)) == VEC_MERGE
   23366        27158 :                && GET_CODE (XEXP (XEXP (x, 0), 0)) == VEC_DUPLICATE
   23367       276793 :                && register_operand (XEXP (XEXP (x, 0), 2),
   23368         2262 :                                     GET_MODE (XEXP (XEXP (x, 0), 2))))
   23369              :         {
   23370         2250 :           *total = rtx_cost (XEXP (XEXP (XEXP (x, 0), 0), 0),
   23371              :                              mode, outer_code, opno, speed)
   23372         2250 :                    + rtx_cost (XEXP (XEXP (x, 0), 1),
   23373              :                                mode, outer_code, opno, speed)
   23374         2250 :                    + rtx_cost (XEXP (x, 1), mode, outer_code, opno, speed);
   23375         2250 :           return true;
   23376              :         }
   23377              :       /* vcmp.  */
   23378       272281 :       else if (unspec_pcmp_p (mask)
   23379       272281 :                || (GET_CODE (mask) == NOT
   23380            0 :                    && unspec_pcmp_p (XEXP (mask, 0))))
   23381              :         {
   23382         1950 :           rtx uns = GET_CODE (mask) == NOT ? XEXP (mask, 0) : mask;
   23383         1950 :           rtx unsop0 = XVECEXP (uns, 0, 0);
   23384              :           /* Make (subreg:V4SI (not:V16QI (reg:V16QI ..)) 0)
   23385              :              cost the same as register.
   23386              :              This is used by avx_cmp<mode>3_ltint_not.  */
   23387         1950 :           if (SUBREG_P (unsop0))
   23388          417 :             unsop0 = XEXP (unsop0, 0);
   23389         1950 :           if (GET_CODE (unsop0) == NOT)
   23390           18 :             unsop0 = XEXP (unsop0, 0);
   23391         1950 :           *total = rtx_cost (XEXP (x, 0), mode, outer_code, opno, speed)
   23392         1950 :                    + rtx_cost (XEXP (x, 1), mode, outer_code, opno, speed)
   23393         1950 :                    + rtx_cost (unsop0, mode, UNSPEC, opno, speed)
   23394         1950 :                    + rtx_cost (XVECEXP (uns, 0, 1), mode, UNSPEC, opno, speed)
   23395         1950 :                    + cost->sse_op;
   23396         1950 :           return true;
   23397              :         }
   23398              :       else
   23399       270331 :         *total = cost->sse_op;
   23400       270331 :       return false;
   23401              : 
   23402    107093139 :     case MEM:
   23403              :       /* CONST_VECTOR_DUPLICATE_P in constant_pool is just broadcast.
   23404              :          or variants in ix86_vector_duplicate_simode_const.  */
   23405              : 
   23406    107093139 :       if (GET_MODE_SIZE (mode) >= 16
   23407     18292188 :           && VECTOR_MODE_P (mode)
   23408     12260505 :           && SYMBOL_REF_P (XEXP (x, 0))
   23409      2215690 :           && CONSTANT_POOL_ADDRESS_P (XEXP (x, 0))
   23410    109080385 :           && ix86_broadcast_from_constant (mode, x))
   23411              :         {
   23412       494949 :           *total = COSTS_N_INSNS (2) + speed;
   23413       494949 :           return true;
   23414              :         }
   23415              : 
   23416              :       /* An insn that accesses memory is slightly more expensive
   23417              :          than one that does not.  */
   23418    106598190 :       if (speed)
   23419              :         {
   23420     95146968 :           *total += 1;
   23421     95146968 :           rtx addr = XEXP (x, 0);
   23422              :           /* For MEM, rtx_cost iterates each subrtx, and adds up the costs,
   23423              :              so for MEM (reg) and MEM (reg + 4), the former costs 5,
   23424              :              the latter costs 9, it is not accurate for x86. Ideally
   23425              :              address_cost should be used, but it reduce cost too much.
   23426              :              So current solution is make constant disp as cheap as possible.  */
   23427     95146968 :           if (GET_CODE (addr) == PLUS
   23428     77640348 :               && x86_64_immediate_operand (XEXP (addr, 1), Pmode)
   23429              :               /* Only handle (reg + disp) since other forms of addr are mostly LEA,
   23430              :                  there's no additional cost for the plus of disp.  */
   23431    167167265 :               && register_operand (XEXP (addr, 0), Pmode))
   23432              :             {
   23433     55906970 :               *total += 1;
   23434     68759013 :               *total += rtx_cost (XEXP (addr, 0), Pmode, PLUS, 0, speed);
   23435     55906970 :               return true;
   23436              :             }
   23437              :         }
   23438              : 
   23439              :       return false;
   23440              : 
   23441        53024 :     case ZERO_EXTRACT:
   23442        53024 :       if (XEXP (x, 1) == const1_rtx
   23443        11254 :           && GET_CODE (XEXP (x, 2)) == ZERO_EXTEND
   23444            0 :           && GET_MODE (XEXP (x, 2)) == SImode
   23445            0 :           && GET_MODE (XEXP (XEXP (x, 2), 0)) == QImode)
   23446              :         {
   23447              :           /* Ignore cost of zero extension and masking of last argument.  */
   23448            0 :           *total += rtx_cost (XEXP (x, 0), mode, code, 0, speed);
   23449            0 :           *total += rtx_cost (XEXP (x, 1), mode, code, 1, speed);
   23450            0 :           *total += rtx_cost (XEXP (XEXP (x, 2), 0), mode, code, 2, speed);
   23451            0 :           return true;
   23452              :         }
   23453              :       return false;
   23454              : 
   23455     29334302 :     case IF_THEN_ELSE:
   23456     29334302 :       if (TARGET_XOP
   23457        25487 :           && VECTOR_MODE_P (mode)
   23458     29339917 :           && (GET_MODE_SIZE (mode) == 16 || GET_MODE_SIZE (mode) == 32))
   23459              :         {
   23460              :           /* vpcmov.  */
   23461         5047 :           *total = speed ? COSTS_N_INSNS (2) : COSTS_N_BYTES (6);
   23462         5047 :           if (!REG_P (XEXP (x, 0)))
   23463         4887 :             *total += rtx_cost (XEXP (x, 0), mode, code, 0, speed);
   23464         5047 :           if (!REG_P (XEXP (x, 1)))
   23465         4854 :             *total += rtx_cost (XEXP (x, 1), mode, code, 1, speed);
   23466         5047 :           if (!REG_P (XEXP (x, 2)))
   23467         4856 :             *total += rtx_cost (XEXP (x, 2), mode, code, 2, speed);
   23468         5047 :           return true;
   23469              :         }
   23470            0 :       else if (TARGET_CMOVE
   23471     29329255 :                && SCALAR_INT_MODE_P (mode)
   23472     31702577 :                && GET_MODE_SIZE (mode) <= UNITS_PER_WORD)
   23473              :         {
   23474              :           /* cmov.  */
   23475      2174355 :           *total = COSTS_N_INSNS (1);
   23476      2174355 :           if (!COMPARISON_P (XEXP (x, 0)) && !REG_P (XEXP (x, 0)))
   23477            0 :             *total += rtx_cost (XEXP (x, 0), mode, code, 0, speed);
   23478      2174355 :           if (!REG_P (XEXP (x, 1)))
   23479       115950 :             *total += rtx_cost (XEXP (x, 1), mode, code, 1, speed);
   23480      2174355 :           if (!REG_P (XEXP (x, 2)))
   23481       708213 :             *total += rtx_cost (XEXP (x, 2), mode, code, 2, speed);
   23482      2174355 :           return true;
   23483              :         }
   23484              :       return false;
   23485              : 
   23486     18395137 :     case EQ:
   23487     18395137 :     case GT:
   23488     18395137 :     case GTU:
   23489     18395137 :     case LT:
   23490     18395137 :     case LTU:
   23491     18395137 :       if (TARGET_SSE2
   23492     18391933 :           && GET_MODE_CLASS (mode) == MODE_VECTOR_INT
   23493     18738296 :           && GET_MODE_SIZE (mode) >= 8)
   23494              :         {
   23495              :           /* vpcmpeq */
   23496       338954 :           *total = speed ? COSTS_N_INSNS (1) : COSTS_N_BYTES (4);
   23497       338954 :           if (!REG_P (XEXP (x, 0)))
   23498        64007 :             *total += rtx_cost (XEXP (x, 0), mode, code, 0, speed);
   23499       338954 :           if (!REG_P (XEXP (x, 1)))
   23500       130221 :             *total += rtx_cost (XEXP (x, 1), mode, code, 1, speed);
   23501       338954 :           return true;
   23502              :         }
   23503     18056183 :       if (TARGET_XOP
   23504        12298 :           && GET_MODE_CLASS (mode) == MODE_VECTOR_INT
   23505     18056291 :           && GET_MODE_SIZE (mode) <= 16)
   23506              :         {
   23507              :           /* vpcomeq */
   23508          108 :           *total = speed ? COSTS_N_INSNS (1) : COSTS_N_BYTES (6);
   23509          108 :           if (!REG_P (XEXP (x, 0)))
   23510            0 :             *total += rtx_cost (XEXP (x, 0), mode, code, 0, speed);
   23511          108 :           if (!REG_P (XEXP (x, 1)))
   23512            0 :             *total += rtx_cost (XEXP (x, 1), mode, code, 1, speed);
   23513          108 :           return true;
   23514              :         }
   23515              :       return false;
   23516              : 
   23517     16062114 :     case NE:
   23518     16062114 :     case GE:
   23519     16062114 :     case GEU:
   23520     16062114 :       if (TARGET_XOP
   23521        22467 :           && GET_MODE_CLASS (mode) == MODE_VECTOR_INT
   23522     16069268 :           && GET_MODE_SIZE (mode) <= 16)
   23523              :         {
   23524              :           /* vpcomneq */
   23525         7154 :           *total = speed ? COSTS_N_INSNS (1) : COSTS_N_BYTES (6);
   23526         7154 :           if (!REG_P (XEXP (x, 0)))
   23527         1449 :             *total += rtx_cost (XEXP (x, 0), mode, code, 0, speed);
   23528         7154 :           if (!REG_P (XEXP (x, 1)))
   23529         6134 :             *total += rtx_cost (XEXP (x, 1), mode, code, 1, speed);
   23530         7154 :           return true;
   23531              :         }
   23532     16054960 :       if (TARGET_SSE2
   23533     16052853 :           && GET_MODE_CLASS (mode) == MODE_VECTOR_INT
   23534     16056260 :           && GET_MODE_SIZE (mode) >= 8)
   23535              :         {
   23536         1326 :           if (TARGET_AVX512F && GET_MODE_SIZE (mode) >= 16)
   23537              :             /* vpcmpeq + vpternlog */
   23538           28 :             *total = speed ? COSTS_N_INSNS (2) : COSTS_N_BYTES (11);
   23539              :           else
   23540              :             /* vpcmpeq + pxor + vpcmpeq */
   23541         1272 :             *total = speed ? COSTS_N_INSNS (3) : COSTS_N_BYTES (12);
   23542         1284 :           if (!REG_P (XEXP (x, 0)))
   23543           28 :             *total += rtx_cost (XEXP (x, 0), mode, code, 0, speed);
   23544         1284 :           if (!REG_P (XEXP (x, 1)))
   23545           28 :             *total += rtx_cost (XEXP (x, 1), mode, code, 1, speed);
   23546         1284 :           return true;
   23547              :         }
   23548              :       return false;
   23549              : 
   23550              :     default:
   23551              :       return false;
   23552              :     }
   23553              : }
   23554              : 
   23555              : #if TARGET_MACHO
   23556              : 
   23557              : static int current_machopic_label_num;
   23558              : 
   23559              : /* Given a symbol name and its associated stub, write out the
   23560              :    definition of the stub.  */
   23561              : 
   23562              : void
   23563              : machopic_output_stub (FILE *file, const char *symb, const char *stub)
   23564              : {
   23565              :   unsigned int length;
   23566              :   char *binder_name, *symbol_name, lazy_ptr_name[32];
   23567              :   int label = ++current_machopic_label_num;
   23568              : 
   23569              :   /* For 64-bit we shouldn't get here.  */
   23570              :   gcc_assert (!TARGET_64BIT);
   23571              : 
   23572              :   /* Lose our funky encoding stuff so it doesn't contaminate the stub.  */
   23573              :   symb = targetm.strip_name_encoding (symb);
   23574              : 
   23575              :   length = strlen (stub);
   23576              :   binder_name = XALLOCAVEC (char, length + 32);
   23577              :   GEN_BINDER_NAME_FOR_STUB (binder_name, stub, length);
   23578              : 
   23579              :   length = strlen (symb);
   23580              :   symbol_name = XALLOCAVEC (char, length + 32);
   23581              :   GEN_SYMBOL_NAME_FOR_SYMBOL (symbol_name, symb, length);
   23582              : 
   23583              :   sprintf (lazy_ptr_name, "L%d$lz", label);
   23584              : 
   23585              :   if (MACHOPIC_ATT_STUB)
   23586              :     switch_to_section (darwin_sections[machopic_picsymbol_stub3_section]);
   23587              :   else if (MACHOPIC_PURE)
   23588              :     switch_to_section (darwin_sections[machopic_picsymbol_stub2_section]);
   23589              :   else
   23590              :     switch_to_section (darwin_sections[machopic_symbol_stub_section]);
   23591              : 
   23592              :   fprintf (file, "%s:\n", stub);
   23593              :   fprintf (file, "\t.indirect_symbol %s\n", symbol_name);
   23594              : 
   23595              :   if (MACHOPIC_ATT_STUB)
   23596              :     {
   23597              :       fprintf (file, "\thlt ; hlt ; hlt ; hlt ; hlt\n");
   23598              :     }
   23599              :   else if (MACHOPIC_PURE)
   23600              :     {
   23601              :       /* PIC stub.  */
   23602              :       /* 25-byte PIC stub using "CALL get_pc_thunk".  */
   23603              :       rtx tmp = gen_rtx_REG (SImode, 2 /* ECX */);
   23604              :       output_set_got (tmp, NULL_RTX);   /* "CALL ___<cpu>.get_pc_thunk.cx".  */
   23605              :       fprintf (file, "LPC$%d:\tmovl\t%s-LPC$%d(%%ecx),%%ecx\n",
   23606              :                label, lazy_ptr_name, label);
   23607              :       fprintf (file, "\tjmp\t*%%ecx\n");
   23608              :     }
   23609              :   else
   23610              :     fprintf (file, "\tjmp\t*%s\n", lazy_ptr_name);
   23611              : 
   23612              :   /* The AT&T-style ("self-modifying") stub is not lazily bound, thus
   23613              :      it needs no stub-binding-helper.  */
   23614              :   if (MACHOPIC_ATT_STUB)
   23615              :     return;
   23616              : 
   23617              :   fprintf (file, "%s:\n", binder_name);
   23618              : 
   23619              :   if (MACHOPIC_PURE)
   23620              :     {
   23621              :       fprintf (file, "\tlea\t%s-%s(%%ecx),%%ecx\n", lazy_ptr_name, binder_name);
   23622              :       fprintf (file, "\tpushl\t%%ecx\n");
   23623              :     }
   23624              :   else
   23625              :     fprintf (file, "\tpushl\t$%s\n", lazy_ptr_name);
   23626              : 
   23627              :   fputs ("\tjmp\tdyld_stub_binding_helper\n", file);
   23628              : 
   23629              :   /* N.B. Keep the correspondence of these
   23630              :      'symbol_ptr/symbol_ptr2/symbol_ptr3' sections consistent with the
   23631              :      old-pic/new-pic/non-pic stubs; altering this will break
   23632              :      compatibility with existing dylibs.  */
   23633              :   if (MACHOPIC_PURE)
   23634              :     {
   23635              :       /* 25-byte PIC stub using "CALL get_pc_thunk".  */
   23636              :       switch_to_section (darwin_sections[machopic_lazy_symbol_ptr2_section]);
   23637              :     }
   23638              :   else
   23639              :     /* 16-byte -mdynamic-no-pic stub.  */
   23640              :     switch_to_section(darwin_sections[machopic_lazy_symbol_ptr3_section]);
   23641              : 
   23642              :   fprintf (file, "%s:\n", lazy_ptr_name);
   23643              :   fprintf (file, "\t.indirect_symbol %s\n", symbol_name);
   23644              :   fprintf (file, ASM_LONG "%s\n", binder_name);
   23645              : }
   23646              : #endif /* TARGET_MACHO */
   23647              : 
   23648              : /* Order the registers for register allocator.  */
   23649              : 
   23650              : void
   23651       216819 : x86_order_regs_for_local_alloc (void)
   23652              : {
   23653       216819 :    int pos = 0;
   23654       216819 :    int i;
   23655              : 
   23656              :    /* First allocate the local general purpose registers.  */
   23657     20164167 :    for (i = 0; i < FIRST_PSEUDO_REGISTER; i++)
   23658     26885556 :      if (GENERAL_REGNO_P (i) && call_used_or_fixed_reg_p (i))
   23659      5646757 :         reg_alloc_order [pos++] = i;
   23660              : 
   23661              :    /* Global general purpose registers.  */
   23662     20164167 :    for (i = 0; i < FIRST_PSEUDO_REGISTER; i++)
   23663     23155757 :      if (GENERAL_REGNO_P (i) && !call_used_or_fixed_reg_p (i))
   23664      1291451 :         reg_alloc_order [pos++] = i;
   23665              : 
   23666              :    /* x87 registers come first in case we are doing FP math
   23667              :       using them.  */
   23668       216819 :    if (!TARGET_SSE_MATH)
   23669        57582 :      for (i = FIRST_STACK_REG; i <= LAST_STACK_REG; i++)
   23670        51184 :        reg_alloc_order [pos++] = i;
   23671              : 
   23672              :    /* SSE registers.  */
   23673      1951371 :    for (i = FIRST_SSE_REG; i <= LAST_SSE_REG; i++)
   23674      1734552 :      reg_alloc_order [pos++] = i;
   23675      1951371 :    for (i = FIRST_REX_SSE_REG; i <= LAST_REX_SSE_REG; i++)
   23676      1734552 :      reg_alloc_order [pos++] = i;
   23677              : 
   23678              :    /* Extended REX SSE registers.  */
   23679      3685923 :    for (i = FIRST_EXT_REX_SSE_REG; i <= LAST_EXT_REX_SSE_REG; i++)
   23680      3469104 :      reg_alloc_order [pos++] = i;
   23681              : 
   23682              :    /* Mask register.  */
   23683      1951371 :    for (i = FIRST_MASK_REG; i <= LAST_MASK_REG; i++)
   23684      1734552 :      reg_alloc_order [pos++] = i;
   23685              : 
   23686              :    /* x87 registers.  */
   23687       216819 :    if (TARGET_SSE_MATH)
   23688      1893789 :      for (i = FIRST_STACK_REG; i <= LAST_STACK_REG; i++)
   23689      1683368 :        reg_alloc_order [pos++] = i;
   23690              : 
   23691      1951371 :    for (i = FIRST_MMX_REG; i <= LAST_MMX_REG; i++)
   23692      1734552 :      reg_alloc_order [pos++] = i;
   23693              : 
   23694              :    /* Initialize the rest of array as we do not allocate some registers
   23695              :       at all.  */
   23696      1084095 :    while (pos < FIRST_PSEUDO_REGISTER)
   23697       867276 :      reg_alloc_order [pos++] = 0;
   23698       216819 : }
   23699              : 
   23700              : static bool
   23701    246671595 : ix86_ms_bitfield_layout_p (const_tree record_type)
   23702              : {
   23703    246671595 :   return ((TARGET_MS_BITFIELD_LAYOUT
   23704          215 :            && !lookup_attribute ("gcc_struct", TYPE_ATTRIBUTES (record_type)))
   23705    246671595 :           || lookup_attribute ("ms_struct", TYPE_ATTRIBUTES (record_type)));
   23706              : }
   23707              : 
   23708              : /* Returns an expression indicating where the this parameter is
   23709              :    located on entry to the FUNCTION.  */
   23710              : 
   23711              : static rtx
   23712         1761 : x86_this_parameter (tree function)
   23713              : {
   23714         1761 :   tree type = TREE_TYPE (function);
   23715         1761 :   bool aggr = aggregate_value_p (TREE_TYPE (type), type) != 0;
   23716         1761 :   int nregs;
   23717              : 
   23718         1761 :   if (TARGET_64BIT)
   23719              :     {
   23720         1759 :       const int *parm_regs;
   23721              : 
   23722         1759 :       if (lookup_attribute ("preserve_none", TYPE_ATTRIBUTES (type)))
   23723              :         parm_regs = x86_64_preserve_none_int_parameter_registers;
   23724         1759 :       else if (ix86_function_type_abi (type) == MS_ABI)
   23725              :         parm_regs = x86_64_ms_abi_int_parameter_registers;
   23726              :       else
   23727         1759 :         parm_regs = x86_64_int_parameter_registers;
   23728         1759 :       return gen_rtx_REG (Pmode, parm_regs[aggr]);
   23729              :     }
   23730              : 
   23731            2 :   nregs = ix86_function_regparm (type, function);
   23732              : 
   23733            2 :   if (nregs > 0 && !stdarg_p (type))
   23734              :     {
   23735            0 :       int regno;
   23736            0 :       unsigned int ccvt = ix86_get_callcvt (type);
   23737              : 
   23738            0 :       if ((ccvt & IX86_CALLCVT_FASTCALL) != 0)
   23739            0 :         regno = aggr ? DX_REG : CX_REG;
   23740            0 :       else if ((ccvt & IX86_CALLCVT_THISCALL) != 0)
   23741              :         {
   23742            0 :           regno = CX_REG;
   23743            0 :           if (aggr)
   23744            0 :             return gen_rtx_MEM (SImode,
   23745            0 :                                 plus_constant (Pmode, stack_pointer_rtx, 4));
   23746              :         }
   23747              :       else
   23748              :         {
   23749            0 :           regno = AX_REG;
   23750            0 :           if (aggr)
   23751              :             {
   23752            0 :               regno = DX_REG;
   23753            0 :               if (nregs == 1)
   23754            0 :                 return gen_rtx_MEM (SImode,
   23755            0 :                                     plus_constant (Pmode,
   23756              :                                                    stack_pointer_rtx, 4));
   23757              :             }
   23758              :         }
   23759            0 :       return gen_rtx_REG (SImode, regno);
   23760              :     }
   23761              : 
   23762            4 :   return gen_rtx_MEM (SImode, plus_constant (Pmode, stack_pointer_rtx,
   23763            4 :                                              aggr ? 8 : 4));
   23764              : }
   23765              : 
   23766              : /* Determine whether x86_output_mi_thunk can succeed.  */
   23767              : 
   23768              : static bool
   23769         4908 : x86_can_output_mi_thunk (const_tree, HOST_WIDE_INT, HOST_WIDE_INT vcall_offset,
   23770              :                          const_tree function)
   23771              : {
   23772              :   /* 64-bit can handle anything.  */
   23773         4908 :   if (TARGET_64BIT)
   23774              :     return true;
   23775              : 
   23776              :   /* For 32-bit, everything's fine if we have one free register.  */
   23777           76 :   if (ix86_function_regparm (TREE_TYPE (function), function) < 3)
   23778              :     return true;
   23779              : 
   23780              :   /* Need a free register for vcall_offset.  */
   23781            0 :   if (vcall_offset)
   23782              :     return false;
   23783              : 
   23784              :   /* Need a free register for GOT references.  */
   23785            0 :   if (flag_pic && !targetm.binds_local_p (function))
   23786              :     return false;
   23787              : 
   23788              :   /* Otherwise ok.  */
   23789              :   return true;
   23790              : }
   23791              : 
   23792              : /* Output the assembler code for a thunk function.  THUNK_DECL is the
   23793              :    declaration for the thunk function itself, FUNCTION is the decl for
   23794              :    the target function.  DELTA is an immediate constant offset to be
   23795              :    added to THIS.  If VCALL_OFFSET is nonzero, the word at
   23796              :    *(*this + vcall_offset) should be added to THIS.  */
   23797              : 
   23798              : static void
   23799         1761 : x86_output_mi_thunk (FILE *file, tree thunk_fndecl, HOST_WIDE_INT delta,
   23800              :                      HOST_WIDE_INT vcall_offset, tree function)
   23801              : {
   23802         1761 :   const char *fnname = IDENTIFIER_POINTER (DECL_ASSEMBLER_NAME (thunk_fndecl));
   23803         1761 :   rtx this_param = x86_this_parameter (function);
   23804         1761 :   rtx this_reg, tmp, fnaddr;
   23805         1761 :   unsigned int tmp_regno;
   23806         1761 :   rtx_insn *insn;
   23807         1761 :   int saved_flag_force_indirect_call = flag_force_indirect_call;
   23808              : 
   23809         1761 :   if (TARGET_64BIT)
   23810              :     tmp_regno = R10_REG;
   23811              :   else
   23812              :     {
   23813            2 :       unsigned int ccvt = ix86_get_callcvt (TREE_TYPE (function));
   23814            2 :       if ((ccvt & IX86_CALLCVT_FASTCALL) != 0)
   23815              :         tmp_regno = AX_REG;
   23816            2 :       else if ((ccvt & IX86_CALLCVT_THISCALL) != 0)
   23817              :         tmp_regno = DX_REG;
   23818              :       else
   23819            2 :         tmp_regno = CX_REG;
   23820              : 
   23821            2 :       if (flag_pic)
   23822            2 :   flag_force_indirect_call = 0;
   23823              :     }
   23824              : 
   23825         1761 :   emit_note (NOTE_INSN_PROLOGUE_END);
   23826              : 
   23827              :   /* CET is enabled, insert EB instruction.  */
   23828         1761 :   if ((flag_cf_protection & CF_BRANCH))
   23829           20 :     emit_insn (gen_nop_endbr ());
   23830              : 
   23831              :   /* If VCALL_OFFSET, we'll need THIS in a register.  Might as well
   23832              :      pull it in now and let DELTA benefit.  */
   23833         1761 :   if (REG_P (this_param))
   23834              :     this_reg = this_param;
   23835            2 :   else if (vcall_offset)
   23836              :     {
   23837              :       /* Put the this parameter into %eax.  */
   23838            2 :       this_reg = gen_rtx_REG (Pmode, AX_REG);
   23839            1 :       emit_move_insn (this_reg, this_param);
   23840              :     }
   23841              :   else
   23842              :     this_reg = NULL_RTX;
   23843              : 
   23844              :   /* Adjust the this parameter by a fixed constant.  */
   23845         1761 :   if (delta)
   23846              :     {
   23847          826 :       rtx delta_rtx = GEN_INT (delta);
   23848          826 :       rtx delta_dst = this_reg ? this_reg : this_param;
   23849              : 
   23850          826 :       if (TARGET_64BIT)
   23851              :         {
   23852          825 :           if (!x86_64_general_operand (delta_rtx, Pmode))
   23853              :             {
   23854            0 :               tmp = gen_rtx_REG (Pmode, tmp_regno);
   23855            0 :               emit_move_insn (tmp, delta_rtx);
   23856            0 :               delta_rtx = tmp;
   23857              :             }
   23858              :         }
   23859              : 
   23860          827 :       ix86_emit_binop (PLUS, Pmode, delta_dst, delta_rtx);
   23861              :     }
   23862              : 
   23863              :   /* Adjust the this parameter by a value stored in the vtable.  */
   23864         1761 :   if (vcall_offset)
   23865              :     {
   23866          986 :       rtx vcall_addr, vcall_mem, this_mem;
   23867              : 
   23868          987 :       tmp = gen_rtx_REG (Pmode, tmp_regno);
   23869              : 
   23870          986 :       this_mem = gen_rtx_MEM (ptr_mode, this_reg);
   23871          987 :       if (Pmode != ptr_mode)
   23872            0 :         this_mem = gen_rtx_ZERO_EXTEND (Pmode, this_mem);
   23873          986 :       emit_move_insn (tmp, this_mem);
   23874              : 
   23875              :       /* Adjust the this parameter.  */
   23876          987 :       vcall_addr = plus_constant (Pmode, tmp, vcall_offset);
   23877          986 :       if (TARGET_64BIT
   23878          986 :           && !ix86_legitimate_address_p (ptr_mode, vcall_addr, true))
   23879              :         {
   23880            0 :           rtx tmp2 = gen_rtx_REG (Pmode, R11_REG);
   23881            0 :           emit_move_insn (tmp2, GEN_INT (vcall_offset));
   23882            0 :           vcall_addr = gen_rtx_PLUS (Pmode, tmp, tmp2);
   23883              :         }
   23884              : 
   23885          986 :       vcall_mem = gen_rtx_MEM (ptr_mode, vcall_addr);
   23886          987 :       if (Pmode != ptr_mode)
   23887            0 :         emit_insn (gen_addsi_1_zext (this_reg,
   23888              :                                      gen_rtx_REG (ptr_mode,
   23889              :                                                   REGNO (this_reg)),
   23890              :                                      vcall_mem));
   23891              :       else
   23892          986 :         ix86_emit_binop (PLUS, Pmode, this_reg, vcall_mem);
   23893              :     }
   23894              : 
   23895              :   /* If necessary, drop THIS back to its stack slot.  */
   23896         1761 :   if (this_reg && this_reg != this_param)
   23897            1 :     emit_move_insn (this_param, this_reg);
   23898              : 
   23899         1761 :   fnaddr = XEXP (DECL_RTL (function), 0);
   23900         1761 :   if (TARGET_64BIT)
   23901              :     {
   23902           25 :       if (!flag_pic || targetm.binds_local_p (function)
   23903         1784 :           || TARGET_PECOFF)
   23904              :         ;
   23905              :       else
   23906              :         {
   23907            0 :           tmp = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, fnaddr), UNSPEC_GOTPCREL);
   23908            0 :           tmp = gen_rtx_CONST (Pmode, tmp);
   23909            0 :           fnaddr = gen_const_mem (Pmode, tmp);
   23910              :         }
   23911              :     }
   23912              :   else
   23913              :     {
   23914            2 :       if (!flag_pic || targetm.binds_local_p (function))
   23915              :         ;
   23916              : #if TARGET_MACHO
   23917              :       else if (TARGET_MACHO)
   23918              :         {
   23919              :           fnaddr = machopic_indirect_call_target (DECL_RTL (function));
   23920              :           fnaddr = XEXP (fnaddr, 0);
   23921              :         }
   23922              : #endif /* TARGET_MACHO */
   23923              :       else
   23924              :         {
   23925            0 :           tmp = gen_rtx_REG (Pmode, CX_REG);
   23926            0 :           output_set_got (tmp, NULL_RTX);
   23927              : 
   23928            0 :           fnaddr = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, fnaddr), UNSPEC_GOT);
   23929            0 :           fnaddr = gen_rtx_CONST (Pmode, fnaddr);
   23930            0 :           fnaddr = gen_rtx_PLUS (Pmode, tmp, fnaddr);
   23931            0 :           fnaddr = gen_const_mem (Pmode, fnaddr);
   23932              :         }
   23933              :     }
   23934              : 
   23935              :   /* Our sibling call patterns do not allow memories, because we have no
   23936              :      predicate that can distinguish between frame and non-frame memory.
   23937              :      For our purposes here, we can get away with (ab)using a jump pattern,
   23938              :      because we're going to do no optimization.  */
   23939         1761 :   if (MEM_P (fnaddr))
   23940              :     {
   23941            0 :       if (sibcall_insn_operand (fnaddr, word_mode))
   23942              :         {
   23943            0 :           fnaddr = XEXP (DECL_RTL (function), 0);
   23944            0 :           tmp = gen_rtx_MEM (QImode, fnaddr);
   23945            0 :           tmp = gen_rtx_CALL (VOIDmode, tmp, const0_rtx);
   23946            0 :           tmp = emit_call_insn (tmp);
   23947            0 :           SIBLING_CALL_P (tmp) = 1;
   23948              :         }
   23949              :       else
   23950            0 :         emit_jump_insn (gen_indirect_jump (fnaddr));
   23951              :     }
   23952              :   else
   23953              :     {
   23954         1761 :       if (ix86_cmodel == CM_LARGE_PIC && SYMBOLIC_CONST (fnaddr))
   23955              :         {
   23956              :           // CM_LARGE_PIC always uses pseudo PIC register which is
   23957              :           // uninitialized.  Since FUNCTION is local and calling it
   23958              :           // doesn't go through PLT, we use scratch register %r11 as
   23959              :           // PIC register and initialize it here.
   23960            3 :           pic_offset_table_rtx = gen_rtx_REG (Pmode, R11_REG);
   23961            3 :           ix86_init_large_pic_reg (tmp_regno);
   23962            3 :           fnaddr = legitimize_pic_address (fnaddr,
   23963            3 :                                            gen_rtx_REG (Pmode, tmp_regno));
   23964              :         }
   23965              : 
   23966         1761 :       if (!sibcall_insn_operand (fnaddr, word_mode))
   23967              :         {
   23968            9 :           tmp = gen_rtx_REG (word_mode, tmp_regno);
   23969            9 :           if (GET_MODE (fnaddr) != word_mode)
   23970            0 :             fnaddr = gen_rtx_ZERO_EXTEND (word_mode, fnaddr);
   23971            9 :           emit_move_insn (tmp, fnaddr);
   23972            9 :           fnaddr = tmp;
   23973              :         }
   23974              : 
   23975         1761 :       tmp = gen_rtx_MEM (QImode, fnaddr);
   23976         1761 :       tmp = gen_rtx_CALL (VOIDmode, tmp, const0_rtx);
   23977         1761 :       tmp = emit_call_insn (tmp);
   23978         1761 :       SIBLING_CALL_P (tmp) = 1;
   23979              :     }
   23980         1761 :   emit_barrier ();
   23981              : 
   23982              :   /* Emit just enough of rest_of_compilation to get the insns emitted.  */
   23983         1761 :   insn = get_insns ();
   23984         1761 :   shorten_branches (insn);
   23985         1761 :   assemble_start_function (thunk_fndecl, fnname);
   23986         1761 :   final_start_function (insn, file, 1);
   23987         1761 :   final (insn, file, 1);
   23988         1761 :   final_end_function ();
   23989         1761 :   assemble_end_function (thunk_fndecl, fnname);
   23990              : 
   23991         1761 :   flag_force_indirect_call = saved_flag_force_indirect_call;
   23992         1761 : }
   23993              : 
   23994              : static void
   23995       273426 : x86_file_start (void)
   23996              : {
   23997       273426 :   default_file_start ();
   23998       273426 :   if (TARGET_16BIT)
   23999            6 :     fputs ("\t.code16gcc\n", asm_out_file);
   24000              : #if TARGET_MACHO
   24001              :   darwin_file_start ();
   24002              : #endif
   24003       273426 :   if (X86_FILE_START_VERSION_DIRECTIVE)
   24004              :     fputs ("\t.version\t\"01.01\"\n", asm_out_file);
   24005       273426 :   if (X86_FILE_START_FLTUSED)
   24006              :     fputs ("\t.global\t__fltused\n", asm_out_file);
   24007       273426 :   if (ix86_asm_dialect == ASM_INTEL)
   24008           68 :     fputs ("\t.intel_syntax noprefix\n", asm_out_file);
   24009       273426 : }
   24010              : 
   24011              : int
   24012    102261464 : x86_field_alignment (tree type, int computed)
   24013              : {
   24014    102261464 :   machine_mode mode;
   24015              : 
   24016    102261464 :   if (TARGET_64BIT || TARGET_ALIGN_DOUBLE)
   24017              :     return computed;
   24018      9102704 :   if (TARGET_IAMCU)
   24019            0 :     return iamcu_alignment (type, computed);
   24020      9102704 :   type = strip_array_types (type);
   24021      9102704 :   mode = TYPE_MODE (type);
   24022      9102704 :   if (mode == DFmode || mode == DCmode
   24023      8997618 :       || GET_MODE_CLASS (mode) == MODE_INT
   24024      3014821 :       || GET_MODE_CLASS (mode) == MODE_COMPLEX_INT)
   24025              :     {
   24026      6087883 :       if (TYPE_ATOMIC (type) && computed > 32)
   24027              :         {
   24028            0 :           static bool warned;
   24029              : 
   24030            0 :           if (!warned && warn_psabi)
   24031              :             {
   24032            0 :               const char *url
   24033              :                 = CHANGES_ROOT_URL "gcc-11/changes.html#ia32_atomic";
   24034              : 
   24035            0 :               warned = true;
   24036            0 :               inform (input_location, "the alignment of %<_Atomic %T%> "
   24037              :                                       "fields changed in %{GCC 11.1%}",
   24038            0 :                       TYPE_MAIN_VARIANT (type), url);
   24039              :             }
   24040              :         }
   24041              :       else
   24042      6087883 :       return MIN (32, computed);
   24043              :     }
   24044              :   return computed;
   24045              : }
   24046              : 
   24047              : /* Print call to TARGET to FILE.  */
   24048              : 
   24049              : static void
   24050          308 : x86_print_call_or_nop (FILE *file, const char *target,
   24051              :                        const char *label)
   24052              : {
   24053          308 :   if (flag_nop_mcount || !strcmp (target, "nop"))
   24054              :     {
   24055            9 :       if (TARGET_16BIT)
   24056              :         /* 3 byte no-op: lea 0(%si), %si */
   24057            1 :         fprintf (file, "%s" ASM_BYTE "0x8d, 0x74, 0x00\n", label);
   24058              :       else
   24059              :         /* 5 byte nop: nopl 0(%[re]ax,%[re]ax,1) */
   24060            8 :         fprintf (file, "%s" ASM_BYTE "0x0f, 0x1f, 0x44, 0x00, 0x00\n",
   24061              :                  label);
   24062              :     }
   24063          299 :   else if (!TARGET_PECOFF && flag_pic)
   24064              :     {
   24065            8 :       gcc_assert (flag_plt);
   24066              : 
   24067            8 :       fprintf (file, "%s\tcall\t%s@PLT\n", label, target);
   24068              :     }
   24069              :   else
   24070          291 :     fprintf (file, "%s\tcall\t%s\n", label, target);
   24071          308 : }
   24072              : 
   24073              : static bool
   24074          328 : current_fentry_name (const char **name)
   24075              : {
   24076          328 :   tree attr = lookup_attribute ("fentry_name",
   24077          328 :                                 DECL_ATTRIBUTES (current_function_decl));
   24078          328 :   if (!attr)
   24079              :     return false;
   24080            2 :   *name = TREE_STRING_POINTER (TREE_VALUE (TREE_VALUE (attr)));
   24081            2 :   return true;
   24082              : }
   24083              : 
   24084              : static bool
   24085           16 : current_fentry_section (const char **name)
   24086              : {
   24087           16 :   tree attr = lookup_attribute ("fentry_section",
   24088           16 :                                 DECL_ATTRIBUTES (current_function_decl));
   24089           16 :   if (!attr)
   24090              :     return false;
   24091            2 :   *name = TREE_STRING_POINTER (TREE_VALUE (TREE_VALUE (attr)));
   24092            2 :   return true;
   24093              : }
   24094              : 
   24095              : /* Return a caller-saved register which isn't live or a callee-saved
   24096              :    register which has been saved on stack in the prologue at entry for
   24097              :    profile.  */
   24098              : 
   24099              : static int
   24100           17 : x86_64_select_profile_regnum (bool r11_ok ATTRIBUTE_UNUSED)
   24101              : {
   24102              :   /* Use %r10 if the profiler is emitted before the prologue or it isn't
   24103              :      used by DRAP.  */
   24104           17 :   if (ix86_profile_before_prologue ()
   24105            4 :       || !crtl->drap_reg
   24106           17 :       || REGNO (crtl->drap_reg) != R10_REG)
   24107              :     return R10_REG;
   24108              : 
   24109              :   /* The profiler is emitted after the prologue.  If there is a
   24110              :      caller-saved register which isn't live or a callee-saved
   24111              :      register saved on stack in the prologue, use it.  */
   24112              : 
   24113            0 :   bitmap reg_live = df_get_live_out (ENTRY_BLOCK_PTR_FOR_FN (cfun));
   24114              : 
   24115            0 :   int i;
   24116            0 :   for (i = 0; i < FIRST_PSEUDO_REGISTER; i++)
   24117            0 :     if (GENERAL_REGNO_P (i)
   24118            0 :         && i != R10_REG
   24119              : #ifdef NO_PROFILE_COUNTERS
   24120            0 :         && (r11_ok || i != R11_REG)
   24121              : #else
   24122              :         && i != R11_REG
   24123              : #endif
   24124            0 :         && TEST_HARD_REG_BIT (accessible_reg_set, i)
   24125            0 :         && (ix86_save_reg (i, true, true)
   24126            0 :             || (call_used_regs[i]
   24127            0 :                 && !fixed_regs[i]
   24128            0 :                 && !REGNO_REG_SET_P (reg_live, i))))
   24129            0 :       return i;
   24130              : 
   24131            0 :   sorry ("no register available for profiling %<-mcmodel=large%s%>",
   24132            0 :          ix86_cmodel == CM_LARGE_PIC ? " -fPIC" : "");
   24133              : 
   24134            0 :   return R10_REG;
   24135              : }
   24136              : 
   24137              : /* Output assembler code to FILE to increment profiler label # LABELNO
   24138              :    for profiling a function entry.  */
   24139              : void
   24140          328 : x86_function_profiler (FILE *file, int labelno ATTRIBUTE_UNUSED)
   24141              : {
   24142          328 :   if (cfun->machine->insn_queued_at_entrance)
   24143              :     {
   24144            7 :       if (cfun->machine->insn_queued_at_entrance == TYPE_ENDBR)
   24145            6 :         fprintf (file, "\t%s\n", TARGET_64BIT ? "endbr64" : "endbr32");
   24146            7 :       unsigned int patch_area_size
   24147            7 :         = crtl->patch_area_size - crtl->patch_area_entry;
   24148            7 :       if (patch_area_size)
   24149            2 :         ix86_output_patchable_area (patch_area_size,
   24150              :                                     crtl->patch_area_entry == 0);
   24151              :     }
   24152              : 
   24153          328 :   const char *mcount_name = MCOUNT_NAME;
   24154              : 
   24155          328 :   bool fentry_section_p
   24156          328 :     = (flag_record_mcount
   24157          641 :        || lookup_attribute ("fentry_section",
   24158          313 :                             DECL_ATTRIBUTES (current_function_decl)));
   24159              : 
   24160              :   const char *label = fentry_section_p ? "1:" : "";
   24161              : 
   24162          328 :   if (current_fentry_name (&mcount_name))
   24163              :     ;
   24164          326 :   else if (fentry_name)
   24165            1 :     mcount_name = fentry_name;
   24166          325 :   else if (flag_fentry)
   24167          313 :     mcount_name = MCOUNT_NAME_BEFORE_PROLOGUE;
   24168              : 
   24169          328 :   if (TARGET_64BIT)
   24170              :     {
   24171              : #ifndef NO_PROFILE_COUNTERS
   24172              :       if (ASSEMBLER_DIALECT == ASM_INTEL)
   24173              :         fprintf (file, "\tlea\tr11, %sP%d[rip]\n", LPREFIX, labelno);
   24174              :       else
   24175              :         fprintf (file, "\tleaq\t%sP%d(%%rip), %%r11\n", LPREFIX, labelno);
   24176              : #endif
   24177              : 
   24178          327 :       int scratch;
   24179          327 :       const char *reg;
   24180          327 :       char legacy_reg[4] = { 0 };
   24181              : 
   24182          327 :       if (!TARGET_PECOFF)
   24183              :         {
   24184          327 :           switch (ix86_cmodel)
   24185              :             {
   24186            7 :             case CM_LARGE:
   24187            7 :               scratch = x86_64_select_profile_regnum (true);
   24188            7 :               reg = hi_reg_name[scratch];
   24189            7 :               if (LEGACY_INT_REGNO_P (scratch))
   24190              :                 {
   24191            0 :                   legacy_reg[0] = 'r';
   24192            0 :                   legacy_reg[1] = reg[0];
   24193            0 :                   legacy_reg[2] = reg[1];
   24194            0 :                   reg = legacy_reg;
   24195              :                 }
   24196            7 :               if (ASSEMBLER_DIALECT == ASM_INTEL)
   24197            1 :                 fprintf (file, "%s\tmovabs\t%s, OFFSET FLAT:%s\n"
   24198              :                                "\tcall\t%s\n", label, reg, mcount_name,
   24199              :                                reg);
   24200              :               else
   24201            6 :                 fprintf (file, "%s\tmovabsq\t$%s, %%%s\n\tcall\t*%%%s\n",
   24202              :                          label, mcount_name, reg, reg);
   24203              :               break;
   24204           10 :             case CM_LARGE_PIC:
   24205              : #ifdef NO_PROFILE_COUNTERS
   24206           10 :               scratch = x86_64_select_profile_regnum (false);
   24207           10 :               reg = hi_reg_name[scratch];
   24208           10 :               if (LEGACY_INT_REGNO_P (scratch))
   24209              :                 {
   24210            0 :                   legacy_reg[0] = 'r';
   24211            0 :                   legacy_reg[1] = reg[0];
   24212            0 :                   legacy_reg[2] = reg[1];
   24213            0 :                   reg = legacy_reg;
   24214              :                 }
   24215           10 :               if (ASSEMBLER_DIALECT == ASM_INTEL)
   24216              :                 {
   24217            1 :                   fprintf (file, "1:movabs\tr11, "
   24218              :                                  "OFFSET FLAT:_GLOBAL_OFFSET_TABLE_-1b\n");
   24219            1 :                   fprintf (file, "\tlea\t%s, 1b[rip]\n", reg);
   24220            1 :                   fprintf (file, "\tadd\t%s, r11\n", reg);
   24221            1 :                   fprintf (file, "\tmovabs\tr11, OFFSET FLAT:%s@PLTOFF\n",
   24222              :                            mcount_name);
   24223            1 :                   fprintf (file, "\tadd\t%s, r11\n", reg);
   24224            1 :                   fprintf (file, "\tcall\t%s\n", reg);
   24225            1 :                   break;
   24226              :                 }
   24227            9 :               fprintf (file,
   24228              :                        "1:\tmovabsq\t$_GLOBAL_OFFSET_TABLE_-1b, %%r11\n");
   24229            9 :               fprintf (file, "\tleaq\t1b(%%rip), %%%s\n", reg);
   24230            9 :               fprintf (file, "\taddq\t%%r11, %%%s\n", reg);
   24231            9 :               fprintf (file, "\tmovabsq\t$%s@PLTOFF, %%r11\n", mcount_name);
   24232            9 :               fprintf (file, "\taddq\t%%r11, %%%s\n", reg);
   24233            9 :               fprintf (file, "\tcall\t*%%%s\n", reg);
   24234              : #else
   24235              :               sorry ("profiling %<-mcmodel=large%> with PIC is not supported");
   24236              : #endif
   24237            9 :               break;
   24238           12 :             case CM_SMALL_PIC:
   24239           12 :             case CM_MEDIUM_PIC:
   24240           12 :               if (!flag_plt)
   24241              :                 {
   24242            3 :                   if (ASSEMBLER_DIALECT == ASM_INTEL)
   24243            0 :                     fprintf (file, "%s\tcall\t[QWORD PTR %s@GOTPCREL[rip]]\n",
   24244              :                              label, mcount_name);
   24245              :                   else
   24246            3 :                     fprintf (file, "%s\tcall\t*%s@GOTPCREL(%%rip)\n",
   24247              :                              label, mcount_name);
   24248              :                   break;
   24249              :                 }
   24250              :               /* fall through */
   24251          307 :             default:
   24252          307 :               x86_print_call_or_nop (file, mcount_name, label);
   24253          307 :               break;
   24254              :             }
   24255              :         }
   24256              :       else
   24257              :         x86_print_call_or_nop (file, mcount_name, label);
   24258              :     }
   24259            1 :   else if (flag_pic)
   24260              :     {
   24261              : #ifndef NO_PROFILE_COUNTERS
   24262              :       if (ASSEMBLER_DIALECT == ASM_INTEL)
   24263              :         fprintf (file,
   24264              :                  "\tlea\t" PROFILE_COUNT_REGISTER ", %sP%d@GOTOFF[ebx]\n",
   24265              :                  LPREFIX, labelno);
   24266              :       else
   24267              :         fprintf (file,
   24268              :                  "\tleal\t%sP%d@GOTOFF(%%ebx), %%" PROFILE_COUNT_REGISTER "\n",
   24269              :                  LPREFIX, labelno);
   24270              : #endif
   24271            0 :       if (flag_plt)
   24272            0 :         x86_print_call_or_nop (file, mcount_name, label);
   24273            0 :       else if (ASSEMBLER_DIALECT == ASM_INTEL)
   24274            0 :         fprintf (file, "%s\tcall\t[DWORD PTR %s@GOT[ebx]]\n",
   24275              :                  label, mcount_name);
   24276              :       else
   24277            0 :         fprintf (file, "%s\tcall\t*%s@GOT(%%ebx)\n",
   24278              :                  label, mcount_name);
   24279              :     }
   24280              :   else
   24281              :     {
   24282              : #ifndef NO_PROFILE_COUNTERS
   24283              :       if (ASSEMBLER_DIALECT == ASM_INTEL)
   24284              :         fprintf (file,
   24285              :                  "\tmov\t" PROFILE_COUNT_REGISTER ", OFFSET FLAT:%sP%d\n",
   24286              :                  LPREFIX, labelno);
   24287              :       else
   24288              :         fprintf (file, "\tmovl\t$%sP%d, %%" PROFILE_COUNT_REGISTER "\n",
   24289              :                  LPREFIX, labelno);
   24290              : #endif
   24291            1 :       x86_print_call_or_nop (file, mcount_name, label);
   24292              :     }
   24293              : 
   24294          328 :   if (fentry_section_p)
   24295              :     {
   24296           16 :       const char *sname = "__mcount_loc";
   24297              : 
   24298           16 :       if (current_fentry_section (&sname))
   24299              :         ;
   24300           14 :       else if (fentry_section)
   24301            1 :         sname = fentry_section;
   24302              : 
   24303           16 :       fprintf (file, "\t.section %s, \"a\",@progbits\n", sname);
   24304           16 :       fprintf (file, "\t.%s 1b\n", TARGET_64BIT ? "quad" : "long");
   24305           16 :       fprintf (file, "\t.previous\n");
   24306              :     }
   24307          328 : }
   24308              : 
   24309              : /* We don't have exact information about the insn sizes, but we may assume
   24310              :    quite safely that we are informed about all 1 byte insns and memory
   24311              :    address sizes.  This is enough to eliminate unnecessary padding in
   24312              :    99% of cases.  */
   24313              : 
   24314              : int
   24315    384174065 : ix86_min_insn_size (rtx_insn *insn)
   24316              : {
   24317    384174065 :   int l = 0, len;
   24318              : 
   24319    384174065 :   if (!INSN_P (insn) || !active_insn_p (insn))
   24320       500333 :     return 0;
   24321              : 
   24322              :   /* Discard alignments we've emit and jump instructions.  */
   24323    383673732 :   if (GET_CODE (PATTERN (insn)) == UNSPEC_VOLATILE
   24324    383673732 :       && XINT (PATTERN (insn), 1) == UNSPECV_ALIGN)
   24325              :     return 0;
   24326              : 
   24327              :   /* Important case - calls are always 5 bytes.
   24328              :      It is common to have many calls in the row.  */
   24329    383673727 :   if (CALL_P (insn)
   24330      9147319 :       && symbolic_reference_mentioned_p (PATTERN (insn))
   24331    392489459 :       && !SIBLING_CALL_P (insn))
   24332              :     return 5;
   24333    375097676 :   len = get_attr_length (insn);
   24334    375097676 :   if (len <= 1)
   24335              :     return 1;
   24336              : 
   24337              :   /* For normal instructions we rely on get_attr_length being exact,
   24338              :      with a few exceptions.  */
   24339    366494642 :   if (!JUMP_P (insn))
   24340              :     {
   24341    361131576 :       enum attr_type type = get_attr_type (insn);
   24342              : 
   24343    361131576 :       switch (type)
   24344              :         {
   24345        95304 :         case TYPE_MULTI:
   24346        95304 :           if (GET_CODE (PATTERN (insn)) == ASM_INPUT
   24347        95304 :               || asm_noperands (PATTERN (insn)) >= 0)
   24348          527 :             return 0;
   24349              :           break;
   24350              :         case TYPE_OTHER:
   24351              :         case TYPE_FCMP:
   24352              :           break;
   24353              :         default:
   24354              :           /* Otherwise trust get_attr_length.  */
   24355              :           return len;
   24356              :         }
   24357              : 
   24358       474282 :       l = get_attr_length_address (insn);
   24359       474282 :       if (l < 4 && symbolic_reference_mentioned_p (PATTERN (insn)))
   24360              :         l = 4;
   24361              :     }
   24362       383824 :   if (l)
   24363        90458 :     return 1+l;
   24364              :   else
   24365      5746890 :     return 2;
   24366              : }
   24367              : 
   24368              : #ifdef ASM_OUTPUT_MAX_SKIP_ALIGN
   24369              : 
   24370              : /* AMD K8 core mispredicts jumps when there are more than 3 jumps in 16 byte
   24371              :    window.  */
   24372              : 
   24373              : static void
   24374        45424 : ix86_avoid_jump_mispredicts (void)
   24375              : {
   24376        45424 :   rtx_insn *insn, *start = get_insns ();
   24377        45424 :   int nbytes = 0, njumps = 0;
   24378        45424 :   bool isjump = false;
   24379              : 
   24380              :   /* Look for all minimal intervals of instructions containing 4 jumps.
   24381              :      The intervals are bounded by START and INSN.  NBYTES is the total
   24382              :      size of instructions in the interval including INSN and not including
   24383              :      START.  When the NBYTES is smaller than 16 bytes, it is possible
   24384              :      that the end of START and INSN ends up in the same 16byte page.
   24385              : 
   24386              :      The smallest offset in the page INSN can start is the case where START
   24387              :      ends on the offset 0.  Offset of INSN is then NBYTES - sizeof (INSN).
   24388              :      We add p2align to 16byte window with maxskip 15 - NBYTES + sizeof (INSN).
   24389              : 
   24390              :      Don't consider asm goto as jump, while it can contain a jump, it doesn't
   24391              :      have to, control transfer to label(s) can be performed through other
   24392              :      means, and also we estimate minimum length of all asm stmts as 0.  */
   24393       700744 :   for (insn = start; insn; insn = NEXT_INSN (insn))
   24394              :     {
   24395       655320 :       int min_size;
   24396              : 
   24397       655320 :       if (LABEL_P (insn))
   24398              :         {
   24399          956 :           align_flags alignment = label_to_alignment (insn);
   24400          956 :           int align = alignment.levels[0].log;
   24401          956 :           int max_skip = alignment.levels[0].maxskip;
   24402              : 
   24403          956 :           if (max_skip > 15)
   24404              :             max_skip = 15;
   24405              :           /* If align > 3, only up to 16 - max_skip - 1 bytes can be
   24406              :              already in the current 16 byte page, because otherwise
   24407              :              ASM_OUTPUT_MAX_SKIP_ALIGN could skip max_skip or fewer
   24408              :              bytes to reach 16 byte boundary.  */
   24409          956 :           if (align <= 0
   24410          328 :               || (align <= 3 && max_skip != (1 << align) - 1))
   24411          956 :             max_skip = 0;
   24412          956 :           if (dump_file)
   24413            0 :             fprintf (dump_file, "Label %i with max_skip %i\n",
   24414            0 :                      INSN_UID (insn), max_skip);
   24415          956 :           if (max_skip)
   24416              :             {
   24417         6278 :               while (nbytes + max_skip >= 16)
   24418              :                 {
   24419         5950 :                   start = NEXT_INSN (start);
   24420          310 :                   if ((JUMP_P (start) && asm_noperands (PATTERN (start)) < 0)
   24421         5967 :                       || CALL_P (start))
   24422          350 :                     njumps--, isjump = true;
   24423              :                   else
   24424              :                     isjump = false;
   24425         5950 :                   nbytes -= ix86_min_insn_size (start);
   24426              :                 }
   24427              :             }
   24428          956 :           continue;
   24429          956 :         }
   24430              : 
   24431       654364 :       min_size = ix86_min_insn_size (insn);
   24432       654364 :       nbytes += min_size;
   24433       654364 :       if (dump_file)
   24434            0 :         fprintf (dump_file, "Insn %i estimated to %i bytes\n",
   24435            0 :                  INSN_UID (insn), min_size);
   24436        46582 :       if ((JUMP_P (insn) && asm_noperands (PATTERN (insn)) < 0)
   24437       654384 :           || CALL_P (insn))
   24438        47597 :         njumps++;
   24439              :       else
   24440       606767 :         continue;
   24441              : 
   24442        55983 :       while (njumps > 3)
   24443              :         {
   24444         8386 :           start = NEXT_INSN (start);
   24445          545 :           if ((JUMP_P (start) && asm_noperands (PATTERN (start)) < 0)
   24446         8386 :               || CALL_P (start))
   24447         1247 :             njumps--, isjump = true;
   24448              :           else
   24449              :             isjump = false;
   24450         8386 :           nbytes -= ix86_min_insn_size (start);
   24451              :         }
   24452        47597 :       gcc_assert (njumps >= 0);
   24453        47597 :       if (dump_file)
   24454            0 :         fprintf (dump_file, "Interval %i to %i has %i bytes\n",
   24455            0 :                  INSN_UID (start), INSN_UID (insn), nbytes);
   24456              : 
   24457        47597 :       if (njumps == 3 && isjump && nbytes < 16)
   24458              :         {
   24459           40 :           int padsize = 15 - nbytes + ix86_min_insn_size (insn);
   24460              : 
   24461           40 :           if (dump_file)
   24462            0 :             fprintf (dump_file, "Padding insn %i by %i bytes!\n",
   24463            0 :                      INSN_UID (insn), padsize);
   24464           40 :           emit_insn_before (gen_max_skip_align (GEN_INT (4), GEN_INT (padsize)), insn);
   24465              :         }
   24466              :     }
   24467        45424 : }
   24468              : #endif
   24469              : 
   24470              : /* AMD Athlon works faster
   24471              :    when RET is not destination of conditional jump or directly preceded
   24472              :    by other jump instruction.  We avoid the penalty by inserting NOP just
   24473              :    before the RET instructions in such cases.  */
   24474              : static void
   24475        45144 : ix86_pad_returns (void)
   24476              : {
   24477        45144 :   edge e;
   24478        45144 :   edge_iterator ei;
   24479              : 
   24480        90312 :   FOR_EACH_EDGE (e, ei, EXIT_BLOCK_PTR_FOR_FN (cfun)->preds)
   24481              :     {
   24482        45168 :       basic_block bb = e->src;
   24483        45168 :       rtx_insn *ret = BB_END (bb);
   24484        45168 :       rtx_insn *prev;
   24485        45168 :       bool replace = false;
   24486              : 
   24487        45158 :       if (!JUMP_P (ret) || !ANY_RETURN_P (PATTERN (ret))
   24488        90326 :           || optimize_bb_for_size_p (bb))
   24489           23 :         continue;
   24490       179724 :       for (prev = PREV_INSN (ret); prev; prev = PREV_INSN (prev))
   24491       134161 :         if (active_insn_p (prev) || LABEL_P (prev))
   24492              :           break;
   24493        45145 :       if (prev && LABEL_P (prev))
   24494              :         {
   24495           43 :           edge e;
   24496           43 :           edge_iterator ei;
   24497              : 
   24498           56 :           FOR_EACH_EDGE (e, ei, bb->preds)
   24499          146 :             if (EDGE_FREQUENCY (e) && e->src->index >= 0
   24500           97 :                 && !(e->flags & EDGE_FALLTHRU))
   24501              :               {
   24502              :                 replace = true;
   24503              :                 break;
   24504              :               }
   24505              :         }
   24506           43 :       if (!replace)
   24507              :         {
   24508        45109 :           prev = prev_active_insn (ret);
   24509        45109 :           if (prev
   24510        45109 :               && ((JUMP_P (prev) && any_condjump_p (prev))
   24511        44673 :                   || CALL_P (prev)))
   24512              :             replace = true;
   24513              :           /* Empty functions get branch mispredict even when
   24514              :              the jump destination is not visible to us.  */
   24515        45109 :           if (!prev && !optimize_function_for_size_p (cfun))
   24516              :             replace = true;
   24517              :         }
   24518        44691 :       if (replace)
   24519              :         {
   24520          489 :           emit_jump_insn_before (gen_simple_return_internal_long (), ret);
   24521          489 :           delete_insn (ret);
   24522              :         }
   24523              :     }
   24524        45144 : }
   24525              : 
   24526              : /* Count the minimum number of instructions in BB.  Return 4 if the
   24527              :    number of instructions >= 4.  */
   24528              : 
   24529              : static int
   24530           42 : ix86_count_insn_bb (basic_block bb)
   24531              : {
   24532           42 :   rtx_insn *insn;
   24533           42 :   int insn_count = 0;
   24534              : 
   24535              :   /* Count number of instructions in this block.  Return 4 if the number
   24536              :      of instructions >= 4.  */
   24537          297 :   FOR_BB_INSNS (bb, insn)
   24538              :     {
   24539              :       /* Only happen in exit blocks.  */
   24540          291 :       if (JUMP_P (insn)
   24541          291 :           && ANY_RETURN_P (PATTERN (insn)))
   24542              :         break;
   24543              : 
   24544          267 :       if (NONDEBUG_INSN_P (insn)
   24545          102 :           && GET_CODE (PATTERN (insn)) != USE
   24546          351 :           && GET_CODE (PATTERN (insn)) != CLOBBER)
   24547              :         {
   24548           84 :           insn_count++;
   24549           84 :           if (insn_count >= 4)
   24550              :             return insn_count;
   24551              :         }
   24552              :     }
   24553              : 
   24554              :   return insn_count;
   24555              : }
   24556              : 
   24557              : 
   24558              : /* Count the minimum number of instructions in code path in BB.
   24559              :    Return 4 if the number of instructions >= 4.  */
   24560              : 
   24561              : static int
   24562           62 : ix86_count_insn (basic_block bb)
   24563              : {
   24564           62 :   edge e;
   24565           62 :   edge_iterator ei;
   24566           62 :   int min_prev_count;
   24567              : 
   24568              :   /* Only bother counting instructions along paths with no
   24569              :      more than 2 basic blocks between entry and exit.  Given
   24570              :      that BB has an edge to exit, determine if a predecessor
   24571              :      of BB has an edge from entry.  If so, compute the number
   24572              :      of instructions in the predecessor block.  If there
   24573              :      happen to be multiple such blocks, compute the minimum.  */
   24574           62 :   min_prev_count = 4;
   24575          145 :   FOR_EACH_EDGE (e, ei, bb->preds)
   24576              :     {
   24577          109 :       edge prev_e;
   24578          109 :       edge_iterator prev_ei;
   24579              : 
   24580          109 :       if (e->src == ENTRY_BLOCK_PTR_FOR_FN (cfun))
   24581              :         {
   24582           26 :           min_prev_count = 0;
   24583           26 :           break;
   24584              :         }
   24585          182 :       FOR_EACH_EDGE (prev_e, prev_ei, e->src->preds)
   24586              :         {
   24587          109 :           if (prev_e->src == ENTRY_BLOCK_PTR_FOR_FN (cfun))
   24588              :             {
   24589           10 :               int count = ix86_count_insn_bb (e->src);
   24590           10 :               if (count < min_prev_count)
   24591           83 :                 min_prev_count = count;
   24592              :               break;
   24593              :             }
   24594              :         }
   24595              :     }
   24596              : 
   24597           62 :   if (min_prev_count < 4)
   24598           32 :     min_prev_count += ix86_count_insn_bb (bb);
   24599              : 
   24600           62 :   return min_prev_count;
   24601              : }
   24602              : 
   24603              : /* Pad short function to 4 instructions.   */
   24604              : 
   24605              : static void
   24606           63 : ix86_pad_short_function (void)
   24607              : {
   24608           63 :   edge e;
   24609           63 :   edge_iterator ei;
   24610              : 
   24611          128 :   FOR_EACH_EDGE (e, ei, EXIT_BLOCK_PTR_FOR_FN (cfun)->preds)
   24612              :     {
   24613           65 :       rtx_insn *ret = BB_END (e->src);
   24614           65 :       if (JUMP_P (ret) && ANY_RETURN_P (PATTERN (ret)))
   24615              :         {
   24616           62 :           int insn_count = ix86_count_insn (e->src);
   24617              : 
   24618              :           /* Pad short function.  */
   24619           62 :           if (insn_count < 4)
   24620              :             {
   24621              :               rtx_insn *insn = ret;
   24622              : 
   24623              :               /* Find epilogue.  */
   24624              :               while (insn
   24625           60 :                      && (!NOTE_P (insn)
   24626           26 :                          || NOTE_KIND (insn) != NOTE_INSN_EPILOGUE_BEG))
   24627           37 :                 insn = PREV_INSN (insn);
   24628              : 
   24629           23 :               if (!insn)
   24630            0 :                 insn = ret;
   24631              : 
   24632              :               /* Two NOPs count as one instruction.  */
   24633           23 :               insn_count = 2 * (4 - insn_count);
   24634           23 :               emit_insn_before (gen_nops (GEN_INT (insn_count)), insn);
   24635              :             }
   24636              :         }
   24637              :     }
   24638           63 : }
   24639              : 
   24640              : /* Fix up a Windows system unwinder issue.  If an EH region falls through into
   24641              :    the epilogue, the Windows system unwinder will apply epilogue logic and
   24642              :    produce incorrect offsets.  This can be avoided by adding a nop between
   24643              :    the last insn that can throw and the first insn of the epilogue.  */
   24644              : 
   24645              : static void
   24646            0 : ix86_seh_fixup_eh_fallthru (void)
   24647              : {
   24648            0 :   edge e;
   24649            0 :   edge_iterator ei;
   24650              : 
   24651            0 :   FOR_EACH_EDGE (e, ei, EXIT_BLOCK_PTR_FOR_FN (cfun)->preds)
   24652              :     {
   24653            0 :       rtx_insn *insn, *next;
   24654              : 
   24655              :       /* Find the beginning of the epilogue.  */
   24656            0 :       for (insn = BB_END (e->src); insn != NULL; insn = PREV_INSN (insn))
   24657            0 :         if (NOTE_P (insn) && NOTE_KIND (insn) == NOTE_INSN_EPILOGUE_BEG)
   24658              :           break;
   24659            0 :       if (insn == NULL)
   24660            0 :         continue;
   24661              : 
   24662              :       /* We only care about preceding insns that can throw.  */
   24663            0 :       insn = prev_active_insn (insn);
   24664            0 :       if (insn == NULL || !can_throw_internal (insn))
   24665            0 :         continue;
   24666              : 
   24667              :       /* Do not separate calls from their debug information.  */
   24668            0 :       for (next = NEXT_INSN (insn); next != NULL; next = NEXT_INSN (next))
   24669            0 :         if (NOTE_P (next) && NOTE_KIND (next) == NOTE_INSN_VAR_LOCATION)
   24670            0 :           insn = next;
   24671              :         else
   24672              :           break;
   24673              : 
   24674            0 :       emit_insn_after (gen_nops (const1_rtx), insn);
   24675              :     }
   24676            0 : }
   24677              : /* Split vector load from parm_decl to elemental loads to avoid STLF
   24678              :    stalls.  */
   24679              : static void
   24680       978491 : ix86_split_stlf_stall_load ()
   24681              : {
   24682       978491 :   rtx_insn* insn, *start = get_insns ();
   24683       978491 :   unsigned window = 0;
   24684              : 
   24685     26707763 :   for (insn = start; insn; insn = NEXT_INSN (insn))
   24686              :     {
   24687     26706917 :       if (!NONDEBUG_INSN_P (insn))
   24688     15112353 :         continue;
   24689     11594564 :       window++;
   24690              :       /* Insert 64 vaddps %xmm18, %xmm19, %xmm20(no dependence between each
   24691              :          other, just emulate for pipeline) before stalled load, stlf stall
   24692              :          case is as fast as no stall cases on CLX.
   24693              :          Since CFG is freed before machine_reorg, just do a rough
   24694              :          calculation of the window according to the layout.  */
   24695     11594564 :       if (window > (unsigned) x86_stlf_window_ninsns)
   24696              :         return;
   24697              : 
   24698     11576657 :       if (any_uncondjump_p (insn)
   24699     11541305 :           || ANY_RETURN_P (PATTERN (insn))
   24700     22742489 :           || CALL_P (insn))
   24701              :         return;
   24702              : 
   24703     10616919 :       rtx set = single_set (insn);
   24704     10616919 :       if (!set)
   24705       434892 :         continue;
   24706     10182027 :       rtx src = SET_SRC (set);
   24707     20363708 :       if (!MEM_P (src)
   24708              :           /* Only handle V2DFmode load since it doesn't need any scratch
   24709              :              register.  */
   24710      1453325 :           || GET_MODE (src) != E_V2DFmode
   24711         5487 :           || !MEM_EXPR (src)
   24712     10186022 :           || TREE_CODE (get_base_address (MEM_EXPR (src))) != PARM_DECL)
   24713     10181681 :         continue;
   24714              : 
   24715          346 :       rtx zero = CONST0_RTX (V2DFmode);
   24716          346 :       rtx dest = SET_DEST (set);
   24717          346 :       rtx m = adjust_address (src, DFmode, 0);
   24718          346 :       rtx loadlpd = gen_sse2_loadlpd (dest, zero, m);
   24719          346 :       emit_insn_before (loadlpd, insn);
   24720          346 :       m = adjust_address (src, DFmode, 8);
   24721          346 :       rtx loadhpd = gen_sse2_loadhpd (dest, dest, m);
   24722          346 :       if (dump_file && (dump_flags & TDF_DETAILS))
   24723              :         {
   24724            0 :           fputs ("Due to potential STLF stall, split instruction:\n",
   24725              :                  dump_file);
   24726            0 :           print_rtl_single (dump_file, insn);
   24727            0 :           fputs ("To:\n", dump_file);
   24728            0 :           print_rtl_single (dump_file, loadlpd);
   24729            0 :           print_rtl_single (dump_file, loadhpd);
   24730              :         }
   24731          346 :       PATTERN (insn) = loadhpd;
   24732          346 :       INSN_CODE (insn) = -1;
   24733          346 :       gcc_assert (recog_memoized (insn) != -1);
   24734              :     }
   24735              : }
   24736              : 
   24737              : /* Implement machine specific optimizations.  We implement padding of returns
   24738              :    for K8 CPUs and pass to avoid 4 jumps in the single 16 byte window.  */
   24739              : static void
   24740      1480118 : ix86_reorg (void)
   24741              : {
   24742              :   /* We are freeing block_for_insn in the toplev to keep compatibility
   24743              :      with old MDEP_REORGS that are not CFG based.  Recompute it now.  */
   24744      1480118 :   compute_bb_for_insn ();
   24745              : 
   24746      1480118 :   if (TARGET_SEH && current_function_has_exception_handlers ())
   24747              :     ix86_seh_fixup_eh_fallthru ();
   24748              : 
   24749      1480118 :   if (optimize && optimize_function_for_speed_p (cfun))
   24750              :     {
   24751       980792 :       if (TARGET_SSE2)
   24752       978491 :         ix86_split_stlf_stall_load ();
   24753       980792 :       if (TARGET_PAD_SHORT_FUNCTION)
   24754           63 :         ix86_pad_short_function ();
   24755       980729 :       else if (TARGET_PAD_RETURNS)
   24756        45144 :         ix86_pad_returns ();
   24757              : #ifdef ASM_OUTPUT_MAX_SKIP_ALIGN
   24758       980792 :       if (TARGET_FOUR_JUMP_LIMIT)
   24759        45424 :         ix86_avoid_jump_mispredicts ();
   24760              : #endif
   24761              :     }
   24762      1480118 : }
   24763              : 
   24764              : /* Return nonzero when QImode register that must be represented via REX prefix
   24765              :    is used.  */
   24766              : bool
   24767      9086973 : x86_extended_QIreg_mentioned_p (rtx_insn *insn)
   24768              : {
   24769      9086973 :   int i;
   24770      9086973 :   extract_insn_cached (insn);
   24771     34425531 :   for (i = 0; i < recog_data.n_operands; i++)
   24772      4691861 :     if (GENERAL_REG_P (recog_data.operand[i])
   24773     22555565 :         && !QI_REGNO_P (REGNO (recog_data.operand[i])))
   24774              :        return true;
   24775              :   return false;
   24776              : }
   24777              : 
   24778              : /* Return true when INSN mentions register that must be encoded using REX
   24779              :    prefix.  */
   24780              : bool
   24781    196697858 : x86_extended_reg_mentioned_p (rtx insn)
   24782              : {
   24783    196697858 :   subrtx_iterator::array_type array;
   24784   1030771766 :   FOR_EACH_SUBRTX (iter, array, INSN_P (insn) ? PATTERN (insn) : insn, NONCONST)
   24785              :     {
   24786    882603811 :       const_rtx x = *iter;
   24787    882603811 :       if (REG_P (x)
   24788    882603811 :           && (REX_INT_REGNO_P (REGNO (x)) || REX_SSE_REGNO_P (REGNO (x))
   24789    253505051 :               || REX2_INT_REGNO_P (REGNO (x))))
   24790     48529903 :         return true;
   24791              :     }
   24792    148167955 :   return false;
   24793    196697858 : }
   24794              : 
   24795              : /* Return true when INSN mentions register that must be encoded using REX2
   24796              :    prefix.  */
   24797              : bool
   24798      2094808 : x86_extended_rex2reg_mentioned_p (rtx insn)
   24799              : {
   24800      2094808 :   subrtx_iterator::array_type array;
   24801      9751672 :   FOR_EACH_SUBRTX (iter, array, INSN_P (insn) ? PATTERN (insn) : insn, NONCONST)
   24802              :     {
   24803      7657535 :       const_rtx x = *iter;
   24804      7657535 :       if (REG_P (x) && REX2_INT_REGNO_P (REGNO (x)))
   24805          671 :         return true;
   24806              :     }
   24807      2094137 :   return false;
   24808      2094808 : }
   24809              : 
   24810              : /* Return true when rtx operands mentions register that must be encoded using
   24811              :    evex prefix.  */
   24812              : bool
   24813           10 : x86_evex_reg_mentioned_p (rtx operands[], int nops)
   24814              : {
   24815           10 :   int i;
   24816           28 :   for (i = 0; i < nops; i++)
   24817           22 :     if (EXT_REX_SSE_REG_P (operands[i])
   24818           40 :         || x86_extended_rex2reg_mentioned_p (operands[i]))
   24819            4 :       return true;
   24820              :   return false;
   24821              : }
   24822              : 
   24823              : /* If profitable, negate (without causing overflow) integer constant
   24824              :    of mode MODE at location LOC.  Return true in this case.  */
   24825              : bool
   24826      5876412 : x86_maybe_negate_const_int (rtx *loc, machine_mode mode)
   24827              : {
   24828      5876412 :   HOST_WIDE_INT val;
   24829              : 
   24830      5876412 :   if (!CONST_INT_P (*loc))
   24831              :     return false;
   24832              : 
   24833      4965477 :   switch (mode)
   24834              :     {
   24835      2816527 :     case E_DImode:
   24836              :       /* DImode x86_64 constants must fit in 32 bits.  */
   24837      2816527 :       gcc_assert (x86_64_immediate_operand (*loc, mode));
   24838              : 
   24839              :       mode = SImode;
   24840              :       break;
   24841              : 
   24842              :     case E_SImode:
   24843              :     case E_HImode:
   24844              :     case E_QImode:
   24845              :       break;
   24846              : 
   24847            0 :     default:
   24848            0 :       gcc_unreachable ();
   24849              :     }
   24850              : 
   24851              :   /* Avoid overflows.  */
   24852      4965477 :   if (mode_signbit_p (mode, *loc))
   24853              :     return false;
   24854              : 
   24855      4964964 :   val = INTVAL (*loc);
   24856              : 
   24857              :   /* Make things pretty and `subl $4,%eax' rather than `addl $-4,%eax'.
   24858              :      Exceptions: -128 encodes smaller than 128, so swap sign and op.  */
   24859      4964964 :   if ((val < 0 && val != -128)
   24860      3263763 :       || val == 128)
   24861              :     {
   24862      1712488 :       *loc = GEN_INT (-val);
   24863      1712488 :       return true;
   24864              :     }
   24865              : 
   24866              :   return false;
   24867              : }
   24868              : 
   24869              : /* Generate an unsigned DImode/SImode to FP conversion.  This is the same code
   24870              :    optabs would emit if we didn't have TFmode patterns.  */
   24871              : 
   24872              : void
   24873         4510 : x86_emit_floatuns (rtx operands[2])
   24874              : {
   24875         4510 :   rtx_code_label *neglab, *donelab;
   24876         4510 :   rtx i0, i1, f0, in, out;
   24877         4510 :   machine_mode mode, inmode;
   24878              : 
   24879         4510 :   inmode = GET_MODE (operands[1]);
   24880         4510 :   gcc_assert (inmode == SImode || inmode == DImode);
   24881              : 
   24882         4510 :   out = operands[0];
   24883         4510 :   in = force_reg (inmode, operands[1]);
   24884         4510 :   mode = GET_MODE (out);
   24885         4510 :   neglab = gen_label_rtx ();
   24886         4510 :   donelab = gen_label_rtx ();
   24887         4510 :   f0 = gen_reg_rtx (mode);
   24888              : 
   24889         4510 :   emit_cmp_and_jump_insns (in, const0_rtx, LT, const0_rtx, inmode, 0, neglab);
   24890              : 
   24891         4510 :   expand_float (out, in, 0);
   24892              : 
   24893         4510 :   emit_jump_insn (gen_jump (donelab));
   24894         4510 :   emit_barrier ();
   24895              : 
   24896         4510 :   emit_label (neglab);
   24897              : 
   24898         4510 :   i0 = expand_simple_binop (inmode, LSHIFTRT, in, const1_rtx, NULL,
   24899              :                             1, OPTAB_DIRECT);
   24900         4510 :   i1 = expand_simple_binop (inmode, AND, in, const1_rtx, NULL,
   24901              :                             1, OPTAB_DIRECT);
   24902         4510 :   i0 = expand_simple_binop (inmode, IOR, i0, i1, i0, 1, OPTAB_DIRECT);
   24903              : 
   24904         4510 :   expand_float (f0, i0, 0);
   24905              : 
   24906         4510 :   emit_insn (gen_rtx_SET (out, gen_rtx_PLUS (mode, f0, f0)));
   24907              : 
   24908         4510 :   emit_label (donelab);
   24909         4510 : }
   24910              : 
   24911              : /* Return the diagnostic message string if conversion from FROMTYPE to
   24912              :    TOTYPE is not allowed, NULL otherwise.  */
   24913              : 
   24914              : static const char *
   24915   1082084384 : ix86_invalid_conversion (const_tree fromtype, const_tree totype)
   24916              : {
   24917   1082084384 :   machine_mode from_mode = element_mode (fromtype);
   24918   1082084384 :   machine_mode to_mode = element_mode (totype);
   24919              : 
   24920   1082084384 :   if (!TARGET_SSE2 && from_mode != to_mode)
   24921              :     {
   24922              :       /* Do no allow conversions to/from BFmode/HFmode scalar types
   24923              :          when TARGET_SSE2 is not available.  */
   24924       468008 :       if (from_mode == BFmode)
   24925              :         return N_("invalid conversion from type %<__bf16%> "
   24926              :                   "without option %<-msse2%>");
   24927       468007 :       if (from_mode == HFmode)
   24928              :         return N_("invalid conversion from type %<_Float16%> "
   24929              :                   "without option %<-msse2%>");
   24930       468007 :       if (to_mode == BFmode)
   24931              :         return N_("invalid conversion to type %<__bf16%> "
   24932              :                   "without option %<-msse2%>");
   24933       468007 :       if (to_mode == HFmode)
   24934              :         return N_("invalid conversion to type %<_Float16%> "
   24935              :                   "without option %<-msse2%>");
   24936              :     }
   24937              : 
   24938              :   /* Warn for silent implicit conversion between __bf16 and short,
   24939              :      since __bfloat16 is refined as real __bf16 instead of short
   24940              :      since GCC13.  */
   24941   1082084382 :   if (element_mode (fromtype) != element_mode (totype)
   24942   1082084382 :       && (TARGET_AVX512BF16 || TARGET_AVXNECONVERT))
   24943              :     {
   24944              :       /* Warn for silent implicit conversion where user may expect
   24945              :          a bitcast.  */
   24946      7767959 :       if ((TYPE_MODE (fromtype) == BFmode
   24947          279 :            && TYPE_MODE (totype) == HImode)
   24948      7768237 :           || (TYPE_MODE (totype) == BFmode
   24949          423 :               && TYPE_MODE (fromtype) == HImode))
   24950            1 :         warning (0, "%<__bfloat16%> is redefined from typedef %<short%> "
   24951              :                 "to real %<__bf16%> since GCC 13.1, be careful of "
   24952              :                  "implicit conversion between %<__bf16%> and %<short%>; "
   24953              :                  "an explicit bitcast may be needed here");
   24954              :     }
   24955              : 
   24956              :   /* Conversion allowed.  */
   24957              :   return NULL;
   24958              : }
   24959              : 
   24960              : /* Return the diagnostic message string if the unary operation OP is
   24961              :    not permitted on TYPE, NULL otherwise.  */
   24962              : 
   24963              : static const char *
   24964     91040445 : ix86_invalid_unary_op (int op, const_tree type)
   24965              : {
   24966     91040445 :   machine_mode mmode = element_mode (type);
   24967              :   /* Reject all single-operand operations on BFmode/HFmode except for &
   24968              :      when TARGET_SSE2 is not available.  */
   24969     91040445 :   if (!TARGET_SSE2 && op != ADDR_EXPR)
   24970              :     {
   24971       111098 :       if (mmode == BFmode)
   24972              :         return N_("operation not permitted on type %<__bf16%> "
   24973              :                   "without option %<-msse2%>");
   24974       111098 :       if (mmode == HFmode)
   24975            0 :         return N_("operation not permitted on type %<_Float16%> "
   24976              :                   "without option %<-msse2%>");
   24977              :     }
   24978              : 
   24979              :   /* Operation allowed.  */
   24980              :   return NULL;
   24981              : }
   24982              : 
   24983              : /* Return the diagnostic message string if the binary operation OP is
   24984              :    not permitted on TYPE1 and TYPE2, NULL otherwise.  */
   24985              : 
   24986              : static const char *
   24987    160400145 : ix86_invalid_binary_op (int op ATTRIBUTE_UNUSED, const_tree type1,
   24988              :                         const_tree type2)
   24989              : {
   24990    160400145 :   machine_mode type1_mode = element_mode (type1);
   24991    160400145 :   machine_mode type2_mode = element_mode (type2);
   24992              :   /* Reject all 2-operand operations on BFmode or HFmode
   24993              :      when TARGET_SSE2 is not available.  */
   24994    160400145 :   if (!TARGET_SSE2)
   24995              :     {
   24996      1008823 :       if (type1_mode == BFmode || type2_mode == BFmode)
   24997              :         return N_("operation not permitted on type %<__bf16%> "
   24998              :                   "without option %<-msse2%>");
   24999              : 
   25000      1008823 :       if (type1_mode == HFmode || type2_mode == HFmode)
   25001            0 :         return N_("operation not permitted on type %<_Float16%> "
   25002              :                   "without option %<-msse2%>");
   25003              :     }
   25004              : 
   25005              :   /* Operation allowed.  */
   25006              :   return NULL;
   25007              : }
   25008              : 
   25009              : 
   25010              : /* Target hook for scalar_mode_supported_p.  */
   25011              : static bool
   25012      4521866 : ix86_scalar_mode_supported_p (scalar_mode mode)
   25013              : {
   25014      4521866 :   if (DECIMAL_FLOAT_MODE_P (mode))
   25015       630517 :     return default_decimal_float_supported_p ();
   25016      3891349 :   else if (mode == TFmode)
   25017              :     return true;
   25018      3568988 :   else if (mode == HFmode || mode == BFmode)
   25019              :     return true;
   25020              :   else
   25021      2926249 :     return default_scalar_mode_supported_p (mode);
   25022              : }
   25023              : 
   25024              : /* Implement TARGET_LIBGCC_FLOATING_POINT_MODE_SUPPORTED_P - return TRUE
   25025              :    if MODE is HFmode, and punt to the generic implementation otherwise.  */
   25026              : 
   25027              : static bool
   25028      2213833 : ix86_libgcc_floating_mode_supported_p (scalar_float_mode mode)
   25029              : {
   25030              :   /* NB: Always return TRUE for HFmode so that the _Float16 type will
   25031              :      be defined by the C front-end for AVX512FP16 intrinsics.  We will
   25032              :      issue an error in ix86_expand_move for HFmode if AVX512FP16 isn't
   25033              :      enabled.  */
   25034      1892939 :   return ((mode == HFmode || mode == BFmode)
   25035      3785878 :           ? true
   25036      1572045 :           : default_libgcc_floating_mode_supported_p (mode));
   25037              : }
   25038              : 
   25039              : /* Implements target hook vector_mode_supported_p.  */
   25040              : static bool
   25041   1318079707 : ix86_vector_mode_supported_p (machine_mode mode)
   25042              : {
   25043              :   /* For ia32, scalar TImode isn't supported and so V1TImode shouldn't be
   25044              :      either.  */
   25045   1454115481 :   if (!TARGET_64BIT && GET_MODE_INNER (mode) == TImode)
   25046              :     return false;
   25047   1318079257 :   if (TARGET_SSE && VALID_SSE_REG_MODE (mode))
   25048              :     return true;
   25049   1109366475 :   if (TARGET_SSE2 && VALID_SSE2_REG_MODE (mode))
   25050              :     return true;
   25051    494890408 :   if (TARGET_AVX && VALID_AVX256_REG_MODE (mode))
   25052              :     return true;
   25053    355515471 :   if (TARGET_AVX512F && VALID_AVX512F_REG_MODE (mode))
   25054              :     return true;
   25055    221300836 :   if ((TARGET_MMX || TARGET_MMX_WITH_SSE)
   25056    221244149 :       && VALID_MMX_REG_MODE (mode))
   25057              :     return true;
   25058     31902055 :   if ((TARGET_3DNOW || TARGET_MMX_WITH_SSE)
   25059     31266068 :       && VALID_MMX_REG_MODE_3DNOW (mode))
   25060              :     return true;
   25061     22345104 :   if (mode == V2QImode)
   25062        24814 :     return true;
   25063              :   return false;
   25064              : }
   25065              : 
   25066              : /* Target hook for c_mode_for_suffix.  */
   25067              : static machine_mode
   25068       196611 : ix86_c_mode_for_suffix (char suffix)
   25069              : {
   25070       196611 :   if (suffix == 'q')
   25071              :     return TFmode;
   25072           37 :   if (suffix == 'w')
   25073              :     return XFmode;
   25074              : 
   25075            0 :   return VOIDmode;
   25076              : }
   25077              : 
   25078              : /* Helper function to map common constraints to non-EGPR ones.
   25079              :    All related constraints have h prefix, and h plus Upper letter
   25080              :    means the constraint is strictly EGPR enabled, while h plus
   25081              :    lower letter indicates the constraint is strictly gpr16 only.
   25082              : 
   25083              :    Specially for "g" constraint, split it to rmi as there is
   25084              :    no corresponding general constraint define for backend.
   25085              : 
   25086              :    Here is the full list to map constraints that may involve
   25087              :    gpr to h prefixed.
   25088              : 
   25089              :    "g" -> "jrjmi"
   25090              :    "r" -> "jr"
   25091              :    "m" -> "jm"
   25092              :    "<" -> "j<"
   25093              :    ">" -> "j>"
   25094              :    "o" -> "jo"
   25095              :    "V" -> "jV"
   25096              :    "p" -> "jp"
   25097              :    "Bm" -> "ja"
   25098              : */
   25099              : 
   25100           50 : static void map_egpr_constraints (vec<const char *> &constraints)
   25101              : {
   25102           60 :   for (size_t i = 0; i < constraints.length(); i++)
   25103              :     {
   25104           10 :       const char *cur = constraints[i];
   25105              : 
   25106           10 :       if (startswith (cur, "=@cc"))
   25107            0 :         continue;
   25108              : 
   25109           10 :       int len = strlen (cur);
   25110           10 :       auto_vec<char> buf;
   25111              : 
   25112           24 :       for (int j = 0; j < len; j++)
   25113              :         {
   25114           14 :           switch (cur[j])
   25115              :             {
   25116            2 :             case 'g':
   25117            2 :               buf.safe_push ('j');
   25118            2 :               buf.safe_push ('r');
   25119            2 :               buf.safe_push ('j');
   25120            2 :               buf.safe_push ('m');
   25121            2 :               buf.safe_push ('i');
   25122            2 :               break;
   25123            8 :             case 'r':
   25124            8 :             case 'm':
   25125            8 :             case '<':
   25126            8 :             case '>':
   25127            8 :             case 'o':
   25128            8 :             case 'V':
   25129            8 :             case 'p':
   25130            8 :               buf.safe_push ('j');
   25131            8 :               buf.safe_push (cur[j]);
   25132            8 :               break;
   25133            0 :             case 'B':
   25134            0 :               if (cur[j + 1] == 'm')
   25135              :                 {
   25136            0 :                   buf.safe_push ('j');
   25137            0 :                   buf.safe_push ('a');
   25138            0 :                   j++;
   25139              :                 }
   25140              :               else
   25141              :                 {
   25142            0 :                   buf.safe_push (cur[j]);
   25143            0 :                   buf.safe_push (cur[j + 1]);
   25144            0 :                   j++;
   25145              :                 }
   25146              :               break;
   25147            0 :             case 'T':
   25148            0 :             case 'Y':
   25149            0 :             case 'W':
   25150            0 :             case 'j':
   25151            0 :               buf.safe_push (cur[j]);
   25152            0 :               buf.safe_push (cur[j + 1]);
   25153            0 :               j++;
   25154            0 :               break;
   25155            0 :             case '{':
   25156            0 :               do
   25157              :                 {
   25158            0 :                   buf.safe_push (cur[j]);
   25159            0 :                 } while (cur[j++] != '}');
   25160              :               break;
   25161            4 :             default:
   25162            4 :               buf.safe_push (cur[j]);
   25163            4 :               break;
   25164              :             }
   25165              :         }
   25166           10 :       buf.safe_push ('\0');
   25167           20 :       constraints[i] = xstrdup (buf.address ());
   25168           10 :     }
   25169           50 : }
   25170              : 
   25171              : /* Worker function for TARGET_MD_ASM_ADJUST.
   25172              : 
   25173              :    We implement asm flag outputs, and maintain source compatibility
   25174              :    with the old cc0-based compiler.  */
   25175              : 
   25176              : static rtx_insn *
   25177       108263 : ix86_md_asm_adjust (vec<rtx> &outputs, vec<rtx> & /*inputs*/,
   25178              :                     vec<machine_mode> & /*input_modes*/,
   25179              :                     vec<const char *> &constraints, vec<rtx> &/*uses*/,
   25180              :                     vec<rtx> &clobbers, HARD_REG_SET &clobbered_regs,
   25181              :                     location_t loc)
   25182              : {
   25183       108263 :   bool saw_asm_flag = false;
   25184              : 
   25185       108263 :   start_sequence ();
   25186              : 
   25187       108263 :   if (TARGET_APX_EGPR && !ix86_apx_inline_asm_use_gpr32)
   25188           50 :     map_egpr_constraints (constraints);
   25189              : 
   25190       292550 :   for (unsigned i = 0, n = outputs.length (); i < n; ++i)
   25191              :     {
   25192        76858 :       const char *con = constraints[i];
   25193        76858 :       if (!startswith (con, "=@cc"))
   25194        76770 :         continue;
   25195           88 :       con += 4;
   25196           88 :       if (strchr (con, ',') != NULL)
   25197              :         {
   25198            1 :           error_at (loc, "alternatives not allowed in %<asm%> flag output");
   25199            1 :           continue;
   25200              :         }
   25201              : 
   25202           87 :       bool invert = false;
   25203           87 :       if (con[0] == 'n')
   25204           19 :         invert = true, con++;
   25205              : 
   25206           87 :       machine_mode mode = CCmode;
   25207           87 :       rtx_code code = UNKNOWN;
   25208              : 
   25209           87 :       switch (con[0])
   25210              :         {
   25211           15 :         case 'a':
   25212           15 :           if (con[1] == 0)
   25213              :             mode = CCAmode, code = EQ;
   25214            4 :           else if (con[1] == 'e' && con[2] == 0)
   25215              :             mode = CCCmode, code = NE;
   25216              :           break;
   25217           11 :         case 'b':
   25218           11 :           if (con[1] == 0)
   25219              :             mode = CCCmode, code = EQ;
   25220            6 :           else if (con[1] == 'e' && con[2] == 0)
   25221              :             mode = CCAmode, code = NE;
   25222              :           break;
   25223           14 :         case 'c':
   25224           14 :           if (con[1] == 0)
   25225              :             mode = CCCmode, code = EQ;
   25226              :           break;
   25227            8 :         case 'e':
   25228            8 :           if (con[1] == 0)
   25229              :             mode = CCZmode, code = EQ;
   25230              :           break;
   25231           11 :         case 'g':
   25232           11 :           if (con[1] == 0)
   25233              :             mode = CCGCmode, code = GT;
   25234            5 :           else if (con[1] == 'e' && con[2] == 0)
   25235              :             mode = CCGCmode, code = GE;
   25236              :           break;
   25237           10 :         case 'l':
   25238           10 :           if (con[1] == 0)
   25239              :             mode = CCGCmode, code = LT;
   25240            5 :           else if (con[1] == 'e' && con[2] == 0)
   25241              :             mode = CCGCmode, code = LE;
   25242              :           break;
   25243            4 :         case 'o':
   25244            4 :           if (con[1] == 0)
   25245              :             mode = CCOmode, code = EQ;
   25246              :           break;
   25247            4 :         case 'p':
   25248            4 :           if (con[1] == 0)
   25249              :             mode = CCPmode, code = EQ;
   25250              :           break;
   25251            4 :         case 's':
   25252            4 :           if (con[1] == 0)
   25253              :             mode = CCSmode, code = EQ;
   25254              :           break;
   25255            6 :         case 'z':
   25256            6 :           if (con[1] == 0)
   25257              :             mode = CCZmode, code = EQ;
   25258              :           break;
   25259              :         }
   25260            1 :       if (code == UNKNOWN)
   25261              :         {
   25262            1 :           error_at (loc, "unknown %<asm%> flag output %qs", constraints[i]);
   25263            1 :           continue;
   25264              :         }
   25265           86 :       if (invert)
   25266           19 :         code = reverse_condition (code);
   25267              : 
   25268           86 :       rtx dest = outputs[i];
   25269           86 :       if (!saw_asm_flag)
   25270              :         {
   25271              :           /* This is the first asm flag output.  Here we put the flags
   25272              :              register in as the real output and adjust the condition to
   25273              :              allow it.  */
   25274           75 :           constraints[i] = "=Bf";
   25275           75 :           outputs[i] = gen_rtx_REG (CCmode, FLAGS_REG);
   25276           75 :           saw_asm_flag = true;
   25277              :         }
   25278              :       else
   25279              :         {
   25280              :           /* We don't need the flags register as output twice.  */
   25281           11 :           constraints[i] = "=X";
   25282           11 :           outputs[i] = gen_rtx_SCRATCH (SImode);
   25283              :         }
   25284              : 
   25285           86 :       rtx x = gen_rtx_REG (mode, FLAGS_REG);
   25286           86 :       x = gen_rtx_fmt_ee (code, QImode, x, const0_rtx);
   25287              : 
   25288           86 :       machine_mode dest_mode = GET_MODE (dest);
   25289           86 :       if (!SCALAR_INT_MODE_P (dest_mode))
   25290              :         {
   25291            3 :           error_at (loc, "invalid type for %<asm%> flag output");
   25292            3 :           continue;
   25293              :         }
   25294              : 
   25295           83 :       if (dest_mode == QImode)
   25296           73 :         emit_insn (gen_rtx_SET (dest, x));
   25297              :       else
   25298              :         {
   25299           10 :           rtx reg = gen_reg_rtx (QImode);
   25300           10 :           emit_insn (gen_rtx_SET (reg, x));
   25301              : 
   25302           10 :           reg = convert_to_mode (dest_mode, reg, 1);
   25303           10 :           emit_move_insn (dest, reg);
   25304              :         }
   25305              :     }
   25306              : 
   25307       108263 :   rtx_insn *seq = end_sequence ();
   25308              : 
   25309       108263 :   if (saw_asm_flag)
   25310              :     return seq;
   25311              :   else
   25312              :     {
   25313              :       /* If we had no asm flag outputs, clobber the flags.  */
   25314       108188 :       clobbers.safe_push (gen_rtx_REG (CCmode, FLAGS_REG));
   25315       108188 :       SET_HARD_REG_BIT (clobbered_regs, FLAGS_REG);
   25316       108188 :       return NULL;
   25317              :     }
   25318              : }
   25319              : 
   25320              : /* Implements target vector targetm.asm.encode_section_info.  */
   25321              : 
   25322              : static void ATTRIBUTE_UNUSED
   25323      9909570 : ix86_encode_section_info (tree decl, rtx rtl, int first)
   25324              : {
   25325      9909570 :   default_encode_section_info (decl, rtl, first);
   25326              : 
   25327      9909570 :   if (ix86_in_large_data_p (decl))
   25328           32 :     SYMBOL_REF_FLAGS (XEXP (rtl, 0)) |= SYMBOL_FLAG_FAR_ADDR;
   25329      9909570 : }
   25330              : 
   25331              : /* Worker function for REVERSE_CONDITION.  */
   25332              : 
   25333              : enum rtx_code
   25334     31754999 : ix86_reverse_condition (enum rtx_code code, machine_mode mode)
   25335              : {
   25336     31754999 :   return (mode == CCFPmode
   25337     31754999 :           ? reverse_condition_maybe_unordered (code)
   25338     27412115 :           : reverse_condition (code));
   25339              : }
   25340              : 
   25341              : /* Output code to perform an x87 FP register move, from OPERANDS[1]
   25342              :    to OPERANDS[0].  */
   25343              : 
   25344              : const char *
   25345       651077 : output_387_reg_move (rtx_insn *insn, rtx *operands)
   25346              : {
   25347       651077 :   if (REG_P (operands[0]))
   25348              :     {
   25349       544119 :       if (REG_P (operands[1])
   25350       544119 :           && find_regno_note (insn, REG_DEAD, REGNO (operands[1])))
   25351              :         {
   25352       296547 :           if (REGNO (operands[0]) == FIRST_STACK_REG)
   25353       275937 :             return output_387_ffreep (operands, 0);
   25354              :           return "fstp\t%y0";
   25355              :         }
   25356       247572 :       if (STACK_TOP_P (operands[0]))
   25357       247572 :         return "fld%Z1\t%y1";
   25358              :       return "fst\t%y0";
   25359              :     }
   25360       106958 :   else if (MEM_P (operands[0]))
   25361              :     {
   25362       106958 :       gcc_assert (REG_P (operands[1]));
   25363       106958 :       if (find_regno_note (insn, REG_DEAD, REGNO (operands[1])))
   25364              :         return "fstp%Z0\t%y0";
   25365              :       else
   25366              :         {
   25367              :           /* There is no non-popping store to memory for XFmode.
   25368              :              So if we need one, follow the store with a load.  */
   25369         8390 :           if (GET_MODE (operands[0]) == XFmode)
   25370              :             return "fstp%Z0\t%y0\n\tfld%Z0\t%y0";
   25371              :           else
   25372         1882 :             return "fst%Z0\t%y0";
   25373              :         }
   25374              :     }
   25375              :   else
   25376            0 :     gcc_unreachable();
   25377              : }
   25378              : #ifdef TARGET_SOLARIS
   25379              : /* Solaris implementation of TARGET_ASM_NAMED_SECTION.  */
   25380              : 
   25381              : static void
   25382              : i386_solaris_elf_named_section (const char *name, unsigned int flags,
   25383              :                                 tree decl)
   25384              : {
   25385              :   /* With Binutils 2.15, the "@unwind" marker must be specified on
   25386              :      every occurrence of the ".eh_frame" section, not just the first
   25387              :      one.  */
   25388              :   if (TARGET_64BIT
   25389              :       && strcmp (name, ".eh_frame") == 0)
   25390              :     {
   25391              :       fprintf (asm_out_file, "\t.section\t%s,\"%s\",@unwind\n", name,
   25392              :                flags & SECTION_WRITE ? "aw" : "a");
   25393              :       return;
   25394              :     }
   25395              : 
   25396              : #if HAVE_SOLARIS_AS
   25397              :   if (HAVE_COMDAT_GROUP && flags & SECTION_LINKONCE)
   25398              :     {
   25399              :       solaris_elf_asm_comdat_section (name, flags, decl);
   25400              :       return;
   25401              :     }
   25402              : 
   25403              :   /* Solaris/x86 as uses the same syntax for the SHF_EXCLUDE flags as the
   25404              :      SPARC assembler.  One cannot mix single-letter flags and #exclude, so
   25405              :      only emit the latter here.  */
   25406              :   if (flags & SECTION_EXCLUDE)
   25407              :     {
   25408              :       fprintf (asm_out_file, "\t.section\t%s,#exclude\n", name);
   25409              :       return;
   25410              :     }
   25411              : #endif
   25412              : 
   25413              :   default_elf_asm_named_section (name, flags, decl);
   25414              : }
   25415              : #endif /* TARGET_SOLARIS */
   25416              : 
   25417              : /* Return the mangling of TYPE if it is an extended fundamental type.  */
   25418              : 
   25419              : static const char *
   25420   1044701921 : ix86_mangle_type (const_tree type)
   25421              : {
   25422   1044701921 :   type = TYPE_MAIN_VARIANT (type);
   25423              : 
   25424   1044701921 :   if (TREE_CODE (type) != VOID_TYPE && TREE_CODE (type) != BOOLEAN_TYPE
   25425              :       && TREE_CODE (type) != INTEGER_TYPE && TREE_CODE (type) != REAL_TYPE)
   25426              :     return NULL;
   25427              : 
   25428    564811083 :   if (type == float128_type_node || type == float64x_type_node)
   25429              :     return NULL;
   25430              : 
   25431    564127518 :   switch (TYPE_MODE (type))
   25432              :     {
   25433              :     case E_BFmode:
   25434              :       return "DF16b";
   25435       295241 :     case E_HFmode:
   25436              :       /* _Float16 is "DF16_".
   25437              :          Align with clang's decision in https://reviews.llvm.org/D33719. */
   25438       295241 :       return "DF16_";
   25439       637962 :     case E_TFmode:
   25440              :       /* __float128 is "g".  */
   25441       637962 :       return "g";
   25442      7848200 :     case E_XFmode:
   25443              :       /* "long double" or __float80 is "e".  */
   25444      7848200 :       return "e";
   25445              :     default:
   25446              :       return NULL;
   25447              :     }
   25448              : }
   25449              : 
   25450              : /* Create C++ tinfo symbols for only conditionally available fundamental
   25451              :    types.  */
   25452              : 
   25453              : static void
   25454            5 : ix86_emit_support_tinfos (emit_support_tinfos_callback callback)
   25455              : {
   25456            5 :   extern tree ix86_float16_type_node;
   25457            5 :   extern tree ix86_bf16_type_node;
   25458              : 
   25459            5 :   if (!TARGET_SSE2)
   25460              :     {
   25461            0 :       if (!float16_type_node)
   25462            0 :         float16_type_node = ix86_float16_type_node;
   25463            0 :       if (!bfloat16_type_node)
   25464            0 :         bfloat16_type_node = ix86_bf16_type_node;
   25465            0 :       callback (float16_type_node);
   25466            0 :       callback (bfloat16_type_node);
   25467            0 :       float16_type_node = NULL_TREE;
   25468            0 :       bfloat16_type_node = NULL_TREE;
   25469              :     }
   25470            5 : }
   25471              : 
   25472              : static GTY(()) tree ix86_tls_stack_chk_guard_decl;
   25473              : 
   25474              : static tree
   25475          253 : ix86_stack_protect_guard (void)
   25476              : {
   25477          253 :   if (TARGET_SSP_TLS_GUARD)
   25478              :     {
   25479          250 :       tree type_node = lang_hooks.types.type_for_mode (ptr_mode, 1);
   25480          250 :       int qual = ENCODE_QUAL_ADDR_SPACE (ix86_stack_protector_guard_reg);
   25481          250 :       tree type = build_qualified_type (type_node, qual);
   25482          250 :       tree t;
   25483              : 
   25484          250 :       if (OPTION_SET_P (ix86_stack_protector_guard_symbol_str))
   25485              :         {
   25486            1 :           t = ix86_tls_stack_chk_guard_decl;
   25487              : 
   25488            1 :           if (t == NULL)
   25489              :             {
   25490            1 :               rtx x;
   25491              : 
   25492            1 :               t = build_decl
   25493            1 :                 (UNKNOWN_LOCATION, VAR_DECL,
   25494              :                  get_identifier (ix86_stack_protector_guard_symbol_str),
   25495              :                  type);
   25496            1 :               TREE_STATIC (t) = 1;
   25497            1 :               TREE_PUBLIC (t) = 1;
   25498            1 :               DECL_EXTERNAL (t) = 1;
   25499            1 :               TREE_USED (t) = 1;
   25500            1 :               TREE_THIS_VOLATILE (t) = 1;
   25501            1 :               DECL_ARTIFICIAL (t) = 1;
   25502            1 :               DECL_IGNORED_P (t) = 1;
   25503              : 
   25504              :               /* Do not share RTL as the declaration is visible outside of
   25505              :                  current function.  */
   25506            1 :               x = DECL_RTL (t);
   25507            1 :               RTX_FLAG (x, used) = 1;
   25508              : 
   25509            1 :               ix86_tls_stack_chk_guard_decl = t;
   25510              :             }
   25511              :         }
   25512              :       else
   25513              :         {
   25514          249 :           tree asptrtype = build_pointer_type (type);
   25515              : 
   25516          249 :           t = build_int_cst (asptrtype, ix86_stack_protector_guard_offset);
   25517          249 :           t = build2 (MEM_REF, asptrtype, t,
   25518              :                       build_int_cst (asptrtype, 0));
   25519          249 :           TREE_THIS_VOLATILE (t) = 1;
   25520              :         }
   25521              : 
   25522          250 :       return t;
   25523              :     }
   25524              : 
   25525            3 :   return default_stack_protect_guard ();
   25526              : }
   25527              : 
   25528              : static bool
   25529          795 : ix86_stack_protect_runtime_enabled_p (void)
   25530              : {
   25531              :   /* Naked functions should not enable stack protector.  */
   25532          795 :   return !ix86_function_naked (current_function_decl);
   25533              : }
   25534              : 
   25535              : /* For 32-bit code we can save PIC register setup by using
   25536              :    __stack_chk_fail_local hidden function instead of calling
   25537              :    __stack_chk_fail directly.  64-bit code doesn't need to setup any PIC
   25538              :    register, so it is better to call __stack_chk_fail directly.  */
   25539              : 
   25540              : static tree ATTRIBUTE_UNUSED
   25541          286 : ix86_stack_protect_fail (void)
   25542              : {
   25543          286 :   return TARGET_64BIT
   25544          286 :          ? default_external_stack_protect_fail ()
   25545            1 :          : default_hidden_stack_protect_fail ();
   25546              : }
   25547              : 
   25548              : /* Select a format to encode pointers in exception handling data.  CODE
   25549              :    is 0 for data, 1 for code labels, 2 for function pointers.  GLOBAL is
   25550              :    true if the symbol may be affected by dynamic relocations.
   25551              : 
   25552              :    ??? All x86 object file formats are capable of representing this.
   25553              :    After all, the relocation needed is the same as for the call insn.
   25554              :    Whether or not a particular assembler allows us to enter such, I
   25555              :    guess we'll have to see.  */
   25556              : 
   25557              : int
   25558       787892 : asm_preferred_eh_data_format (int code, int global)
   25559              : {
   25560              :   /* PE-COFF is effectively always -fPIC because of the .reloc section.  */
   25561       787892 :   if (flag_pic || TARGET_PECOFF || !ix86_direct_extern_access)
   25562              :     {
   25563        39070 :       int type = DW_EH_PE_sdata8;
   25564        39070 :       if (ptr_mode == SImode
   25565        25098 :           || ix86_cmodel == CM_SMALL_PIC
   25566        39156 :           || (ix86_cmodel == CM_MEDIUM_PIC && (global || code)))
   25567              :         type = DW_EH_PE_sdata4;
   25568        54682 :       return (global ? DW_EH_PE_indirect : 0) | DW_EH_PE_pcrel | type;
   25569              :     }
   25570              : 
   25571       748822 :   if (ix86_cmodel == CM_SMALL
   25572        18678 :       || (ix86_cmodel == CM_MEDIUM && code))
   25573       730157 :     return DW_EH_PE_udata4;
   25574              : 
   25575              :   return DW_EH_PE_absptr;
   25576              : }
   25577              : 
   25578              : /* Worker for ix86_builtin_vectorization_cost and the fallback calls
   25579              :    from ix86_vector_costs::add_stmt_cost.  */
   25580              : static int
   25581     14902911 : ix86_default_vector_cost (enum vect_cost_for_stmt type_of_cost,
   25582              :                           machine_mode mode)
   25583              : {
   25584     14902911 :   bool fp = FLOAT_MODE_P (mode);
   25585     14902911 :   int index;
   25586     14902911 :   switch (type_of_cost)
   25587              :     {
   25588      2196910 :       case scalar_stmt:
   25589      2196910 :         return fp ? ix86_cost->addss : COSTS_N_INSNS (1);
   25590              : 
   25591      2044364 :       case scalar_load:
   25592              :         /* load/store costs are relative to register move which is 2. Recompute
   25593              :            it to COSTS_N_INSNS so everything have same base.  */
   25594      4088728 :         return COSTS_N_INSNS (fp ? ix86_cost->sse_load[0]
   25595      2044364 :                               : ix86_cost->int_load [2]) / 2;
   25596              : 
   25597      3876294 :       case scalar_store:
   25598      7752588 :         return COSTS_N_INSNS (fp ? ix86_cost->sse_store[0]
   25599      3876294 :                               : ix86_cost->int_store [2]) / 2;
   25600              : 
   25601       965965 :       case vector_stmt:
   25602      1931930 :         return ix86_vec_cost (mode,
   25603      1931930 :                               fp ? ix86_cost->addss : ix86_cost->sse_op);
   25604              : 
   25605      1721765 :       case vector_load:
   25606      1721765 :         index = sse_store_index (mode);
   25607              :         /* See PR82713 - we may end up being called on non-vector type.  */
   25608      1721765 :         if (index < 0)
   25609        98843 :           index = 2;
   25610      1721765 :         return COSTS_N_INSNS (ix86_cost->sse_load[index]) / 2;
   25611              : 
   25612       869328 :       case vector_store:
   25613       869328 :         index = sse_store_index (mode);
   25614              :         /* See PR82713 - we may end up being called on non-vector type.  */
   25615       869328 :         if (index < 0)
   25616        90773 :           index = 2;
   25617       869328 :         return COSTS_N_INSNS (ix86_cost->sse_store[index]) / 2;
   25618              : 
   25619       751363 :       case vec_to_scalar:
   25620       751363 :       case scalar_to_vec:
   25621       751363 :         return ix86_vec_cost (mode, ix86_cost->sse_op);
   25622              : 
   25623              :       /* We should have separate costs for unaligned loads and gather/scatter.
   25624              :          Do that incrementally.  */
   25625       394345 :       case unaligned_load:
   25626       394345 :         index = sse_store_index (mode);
   25627              :         /* See PR82713 - we may end up being called on non-vector type.  */
   25628       394345 :         if (index < 0)
   25629         2728 :           index = 2;
   25630       394345 :         return COSTS_N_INSNS (ix86_cost->sse_unaligned_load[index]) / 2;
   25631              : 
   25632       788511 :       case unaligned_store:
   25633       788511 :         index = sse_store_index (mode);
   25634              :         /* See PR82713 - we may end up being called on non-vector type.  */
   25635       788511 :         if (index < 0)
   25636        17010 :           index = 2;
   25637       788511 :         return COSTS_N_INSNS (ix86_cost->sse_unaligned_store[index]) / 2;
   25638              : 
   25639            0 :       case vector_gather_load:
   25640            0 :         return ix86_vec_cost (mode,
   25641            0 :                               COSTS_N_INSNS
   25642              :                                  (ix86_cost->gather_static
   25643              :                                   + ix86_cost->gather_per_elt
   25644            0 :                                     * GET_MODE_NUNITS (mode)) / 2);
   25645              : 
   25646            0 :       case vector_scatter_store:
   25647            0 :         return ix86_vec_cost (mode,
   25648            0 :                               COSTS_N_INSNS
   25649              :                                  (ix86_cost->scatter_static
   25650              :                                   + ix86_cost->scatter_per_elt
   25651            0 :                                     * GET_MODE_NUNITS (mode)) / 2);
   25652              : 
   25653       274998 :       case cond_branch_taken:
   25654       274998 :         return ix86_cost->cond_taken_branch_cost;
   25655              : 
   25656         5504 :       case cond_branch_not_taken:
   25657         5504 :         return ix86_cost->cond_not_taken_branch_cost;
   25658              : 
   25659       245723 :       case vec_perm:
   25660       245723 :         return ix86_vec_cost (mode, ix86_cost->sse_op);
   25661              : 
   25662        69653 :       case vec_promote_demote:
   25663        69653 :         if (fp)
   25664         7927 :           return vec_fp_conversion_cost (ix86_tune_cost, mode);
   25665        61726 :         return ix86_vec_cost (mode, ix86_cost->sse_op);
   25666              : 
   25667       698188 :       case vec_construct:
   25668       698188 :         {
   25669       698188 :           int n = GET_MODE_NUNITS (mode);
   25670              :           /* N - 1 element inserts into an SSE vector, the possible
   25671              :              GPR -> XMM move is accounted for in add_stmt_cost.  */
   25672      1396376 :           if (GET_MODE_BITSIZE (mode) <= 128)
   25673       691819 :             return (n - 1) * ix86_cost->sse_op;
   25674              :           /* One vinserti128 for combining two SSE vectors for AVX256.  */
   25675        12738 :           else if (GET_MODE_BITSIZE (mode) == 256)
   25676         5097 :             return ((n - 2) * ix86_cost->sse_op
   25677         5097 :                     + ix86_vec_cost (mode, ix86_cost->sse_op));
   25678              :           /* One vinserti64x4 and two vinserti128 for combining SSE
   25679              :              and AVX256 vectors to AVX512.  */
   25680         2544 :           else if (GET_MODE_BITSIZE (mode) == 512)
   25681              :             {
   25682         1272 :               machine_mode half_mode
   25683         1272 :                 = mode_for_vector (GET_MODE_INNER (mode),
   25684         2544 :                                    GET_MODE_NUNITS (mode) / 2).require ();
   25685         1272 :               return ((n - 4) * ix86_cost->sse_op
   25686         1272 :                       + 2 * ix86_vec_cost (half_mode, ix86_cost->sse_op)
   25687         1272 :                       + ix86_vec_cost (mode, ix86_cost->sse_op));
   25688              :             }
   25689            0 :           gcc_unreachable ();
   25690              :         }
   25691              : 
   25692            0 :       default:
   25693            0 :         gcc_unreachable ();
   25694              :     }
   25695              : }
   25696              : 
   25697              : /* Implement targetm.vectorize.builtin_vectorization_cost.  */
   25698              : static int
   25699      9295332 : ix86_builtin_vectorization_cost (enum vect_cost_for_stmt type_of_cost,
   25700              :                                  tree vectype, int)
   25701              : {
   25702      9295332 :   machine_mode mode = TImode;
   25703      9295332 :   if (vectype != NULL)
   25704      7671208 :     mode = TYPE_MODE (vectype);
   25705      9295332 :   return ix86_default_vector_cost (type_of_cost, mode);
   25706              : }
   25707              : 
   25708              : 
   25709              : /* This function returns the calling abi specific va_list type node.
   25710              :    It returns  the FNDECL specific va_list type.  */
   25711              : 
   25712              : static tree
   25713        47586 : ix86_fn_abi_va_list (tree fndecl)
   25714              : {
   25715        47586 :   if (!TARGET_64BIT)
   25716          726 :     return va_list_type_node;
   25717        46860 :   gcc_assert (fndecl != NULL_TREE);
   25718              : 
   25719        46860 :   if (ix86_function_abi ((const_tree) fndecl) == MS_ABI)
   25720        12868 :     return ms_va_list_type_node;
   25721              :   else
   25722        33992 :     return sysv_va_list_type_node;
   25723              : }
   25724              : 
   25725              : /* Returns the canonical va_list type specified by TYPE. If there
   25726              :    is no valid TYPE provided, it return NULL_TREE.  */
   25727              : 
   25728              : static tree
   25729       246492 : ix86_canonical_va_list_type (tree type)
   25730              : {
   25731       246492 :   if (TARGET_64BIT)
   25732              :     {
   25733       245990 :       if (lookup_attribute ("ms_abi va_list", TYPE_ATTRIBUTES (type)))
   25734         5944 :         return ms_va_list_type_node;
   25735              : 
   25736       240046 :       if ((TREE_CODE (type) == ARRAY_TYPE
   25737        49923 :            && integer_zerop (array_type_nelts_minus_one (type)))
   25738       240046 :           || POINTER_TYPE_P (type))
   25739              :         {
   25740       188201 :           tree elem_type = TREE_TYPE (type);
   25741       188201 :           if (TREE_CODE (elem_type) == RECORD_TYPE
   25742       339588 :               && lookup_attribute ("sysv_abi va_list",
   25743       151387 :                                    TYPE_ATTRIBUTES (elem_type)))
   25744       151387 :             return sysv_va_list_type_node;
   25745              :         }
   25746              : 
   25747        88659 :       return NULL_TREE;
   25748              :     }
   25749              : 
   25750          502 :   return std_canonical_va_list_type (type);
   25751              : }
   25752              : 
   25753              : /* Iterate through the target-specific builtin types for va_list.
   25754              :    IDX denotes the iterator, *PTREE is set to the result type of
   25755              :    the va_list builtin, and *PNAME to its internal type.
   25756              :    Returns zero if there is no element for this index, otherwise
   25757              :    IDX should be increased upon the next call.
   25758              :    Note, do not iterate a base builtin's name like __builtin_va_list.
   25759              :    Used from c_common_nodes_and_builtins.  */
   25760              : 
   25761              : static int
   25762       618685 : ix86_enum_va_list (int idx, const char **pname, tree *ptree)
   25763              : {
   25764       618685 :   if (TARGET_64BIT)
   25765              :     {
   25766       613305 :       switch (idx)
   25767              :         {
   25768              :         default:
   25769              :           break;
   25770              : 
   25771       204435 :         case 0:
   25772       204435 :           *ptree = ms_va_list_type_node;
   25773       204435 :           *pname = "__builtin_ms_va_list";
   25774       204435 :           return 1;
   25775              : 
   25776       204435 :         case 1:
   25777       204435 :           *ptree = sysv_va_list_type_node;
   25778       204435 :           *pname = "__builtin_sysv_va_list";
   25779       204435 :           return 1;
   25780              :         }
   25781              :     }
   25782              : 
   25783              :   return 0;
   25784              : }
   25785              : 
   25786              : #undef TARGET_SCHED_DISPATCH
   25787              : #define TARGET_SCHED_DISPATCH ix86_bd_has_dispatch
   25788              : #undef TARGET_SCHED_DISPATCH_DO
   25789              : #define TARGET_SCHED_DISPATCH_DO ix86_bd_do_dispatch
   25790              : #undef TARGET_SCHED_REASSOCIATION_WIDTH
   25791              : #define TARGET_SCHED_REASSOCIATION_WIDTH ix86_reassociation_width
   25792              : #undef TARGET_SCHED_REORDER
   25793              : #define TARGET_SCHED_REORDER ix86_atom_sched_reorder
   25794              : #undef TARGET_SCHED_ADJUST_PRIORITY
   25795              : #define TARGET_SCHED_ADJUST_PRIORITY ix86_adjust_priority
   25796              : #undef TARGET_SCHED_DEPENDENCIES_EVALUATION_HOOK
   25797              : #define TARGET_SCHED_DEPENDENCIES_EVALUATION_HOOK \
   25798              :   ix86_dependencies_evaluation_hook
   25799              : 
   25800              : 
   25801              : /* Implementation of reassociation_width target hook used by
   25802              :    reassoc phase to identify parallelism level in reassociated
   25803              :    tree.  Statements tree_code is passed in OPC.  Arguments type
   25804              :    is passed in MODE.  */
   25805              : 
   25806              : static int
   25807        28383 : ix86_reassociation_width (unsigned int op, machine_mode mode)
   25808              : {
   25809        28383 :   int width = 1;
   25810              :   /* Vector part.  */
   25811        28383 :   if (VECTOR_MODE_P (mode))
   25812              :     {
   25813         8443 :       int div = 1;
   25814         8443 :       if (INTEGRAL_MODE_P (mode))
   25815         2672 :         width = ix86_cost->reassoc_vec_int;
   25816         5771 :       else if (FLOAT_MODE_P (mode))
   25817         5771 :         width = ix86_cost->reassoc_vec_fp;
   25818              : 
   25819         8443 :       if (width == 1)
   25820              :         return 1;
   25821              : 
   25822              :       /* Znver1-4 Integer vector instructions execute in FP unit
   25823              :          and can execute 3 additions and one multiplication per cycle.  */
   25824         8438 :       if ((ix86_tune == PROCESSOR_ZNVER1 || ix86_tune == PROCESSOR_ZNVER2
   25825         8438 :            || ix86_tune == PROCESSOR_ZNVER3 || ix86_tune == PROCESSOR_ZNVER4)
   25826            0 :           && INTEGRAL_MODE_P (mode) && op != PLUS && op != MINUS)
   25827              :         return 1;
   25828              :       /* Znver5 can do 2 integer multiplications per cycle with latency
   25829              :          of 3.  */
   25830         8438 :       if ((ix86_tune == PROCESSOR_ZNVER5 || ix86_tune == PROCESSOR_ZNVER6)
   25831            0 :           && INTEGRAL_MODE_P (mode) && op != PLUS && op != MINUS)
   25832         8438 :         width = 6;
   25833              : 
   25834              :       /* Account for targets that splits wide vectors into multiple parts.  */
   25835         8438 :       if (TARGET_AVX512_SPLIT_REGS && GET_MODE_BITSIZE (mode) > 256)
   25836            0 :         div = GET_MODE_BITSIZE (mode) / 256;
   25837         8438 :       else if (TARGET_AVX256_SPLIT_REGS && GET_MODE_BITSIZE (mode) > 128)
   25838            0 :         div = GET_MODE_BITSIZE (mode) / 128;
   25839         8438 :       else if (TARGET_SSE_SPLIT_REGS && GET_MODE_BITSIZE (mode) > 64)
   25840            0 :         div = GET_MODE_BITSIZE (mode) / 64;
   25841         8438 :       width = (width + div - 1) / div;
   25842         8438 :     }
   25843              :   /* Scalar part.  */
   25844              :   else if (INTEGRAL_MODE_P (mode))
   25845        13972 :     width = ix86_cost->reassoc_int;
   25846              :   else if (FLOAT_MODE_P (mode))
   25847         5968 :     width = ix86_cost->reassoc_fp;
   25848              : 
   25849              :   /* Avoid using too many registers in 32bit mode.  */
   25850        28378 :   if (!TARGET_64BIT && width > 2)
   25851        28383 :     width = 2;
   25852              :   return width;
   25853              : }
   25854              : 
   25855              : /* ??? No autovectorization into MMX or 3DNOW until we can reliably
   25856              :    place emms and femms instructions.  */
   25857              : 
   25858              : static machine_mode
   25859      5241808 : ix86_preferred_simd_mode (scalar_mode mode)
   25860              : {
   25861      5241808 :   if (!TARGET_SSE)
   25862          862 :     return word_mode;
   25863              : 
   25864      5240946 :   switch (mode)
   25865              :     {
   25866       429171 :     case E_QImode:
   25867       429171 :       if (TARGET_AVX512BW && !TARGET_PREFER_AVX256)
   25868              :         return V64QImode;
   25869       418784 :       else if (TARGET_AVX && !TARGET_PREFER_AVX128)
   25870              :         return V32QImode;
   25871              :       else
   25872       396951 :         return V16QImode;
   25873              : 
   25874       199751 :     case E_HImode:
   25875       199751 :       if (TARGET_AVX512BW && !TARGET_PREFER_AVX256)
   25876              :         return V32HImode;
   25877       189143 :       else if (TARGET_AVX && !TARGET_PREFER_AVX128)
   25878              :         return V16HImode;
   25879              :       else
   25880       172655 :         return V8HImode;
   25881              : 
   25882      1558481 :     case E_SImode:
   25883      1558481 :       if (TARGET_AVX512F && !TARGET_PREFER_AVX256)
   25884              :         return V16SImode;
   25885      1491244 :       else if (TARGET_AVX && !TARGET_PREFER_AVX128)
   25886              :         return V8SImode;
   25887              :       else
   25888      1334668 :         return V4SImode;
   25889              : 
   25890      1890139 :     case E_DImode:
   25891      1890139 :       if (TARGET_AVX512F && !TARGET_PREFER_AVX256)
   25892              :         return V8DImode;
   25893      1485713 :       else if (TARGET_AVX && !TARGET_PREFER_AVX128)
   25894              :         return V4DImode;
   25895              :       else
   25896      1423276 :         return V2DImode;
   25897              : 
   25898       142600 :     case E_HFmode:
   25899       142600 :       if (TARGET_AVX512FP16)
   25900              :         {
   25901       141854 :           if (TARGET_AVX512VL)
   25902              :             {
   25903        68835 :               if (TARGET_PREFER_AVX128)
   25904              :                 return V8HFmode;
   25905        68605 :               else if (TARGET_PREFER_AVX256)
   25906              :                 return V16HFmode;
   25907              :             }
   25908       139419 :           return V32HFmode;
   25909              :         }
   25910          746 :       return word_mode;
   25911              : 
   25912        63115 :     case E_BFmode:
   25913        63115 :       if (TARGET_AVX512F && !TARGET_PREFER_AVX256)
   25914              :         return V32BFmode;
   25915        26590 :       else if (TARGET_AVX && !TARGET_PREFER_AVX128)
   25916              :         return V16BFmode;
   25917              :       else
   25918        13523 :         return V8BFmode;
   25919              : 
   25920       623997 :     case E_SFmode:
   25921       623997 :       if (TARGET_AVX512F && !TARGET_PREFER_AVX256)
   25922              :         return V16SFmode;
   25923       423377 :       else if (TARGET_AVX && !TARGET_PREFER_AVX128)
   25924              :         return V8SFmode;
   25925              :       else
   25926       355211 :         return V4SFmode;
   25927              : 
   25928       298250 :     case E_DFmode:
   25929       298250 :       if (TARGET_AVX512F && !TARGET_PREFER_AVX256)
   25930              :         return V8DFmode;
   25931       176055 :       else if (TARGET_AVX && !TARGET_PREFER_AVX128)
   25932              :         return V4DFmode;
   25933       120166 :       else if (TARGET_SSE2)
   25934              :         return V2DFmode;
   25935              :       /* FALLTHRU */
   25936              : 
   25937        35498 :     default:
   25938        35498 :       return word_mode;
   25939              :     }
   25940              : }
   25941              : 
   25942              : /* If AVX is enabled then try vectorizing with both 256bit and 128bit
   25943              :    vectors.  If AVX512F is enabled then try vectorizing with 512bit,
   25944              :    256bit and 128bit vectors.  */
   25945              : 
   25946              : static unsigned int
   25947      2198645 : ix86_autovectorize_vector_modes (vector_modes *modes, bool all)
   25948              : {
   25949      2198645 :   if (TARGET_AVX512F && !TARGET_PREFER_AVX256)
   25950              :     {
   25951        74836 :       modes->safe_push (V64QImode);
   25952        74836 :       modes->safe_push (V32QImode);
   25953        74836 :       modes->safe_push (V16QImode);
   25954              :     }
   25955      2123809 :   else if (TARGET_AVX512F && all)
   25956              :     {
   25957          558 :       modes->safe_push (V32QImode);
   25958          558 :       modes->safe_push (V16QImode);
   25959          558 :       modes->safe_push (V64QImode);
   25960              :     }
   25961      2123251 :   else if (TARGET_AVX && !TARGET_PREFER_AVX128)
   25962              :     {
   25963        28741 :       modes->safe_push (V32QImode);
   25964        28741 :       modes->safe_push (V16QImode);
   25965              :     }
   25966      2094510 :   else if (TARGET_AVX && all)
   25967              :     {
   25968           24 :       modes->safe_push (V16QImode);
   25969           24 :       modes->safe_push (V32QImode);
   25970              :     }
   25971      2094486 :   else if (TARGET_SSE2)
   25972      2092221 :     modes->safe_push (V16QImode);
   25973              : 
   25974      2198645 :   if (TARGET_MMX_WITH_SSE)
   25975      1802679 :     modes->safe_push (V8QImode);
   25976              : 
   25977      2198645 :   if (TARGET_SSE2)
   25978      2196380 :     modes->safe_push (V4QImode);
   25979              : 
   25980      2198645 :   return ix86_vect_compare_costs ? VECT_COMPARE_COSTS : 0;
   25981              : }
   25982              : 
   25983              : /* Implemenation of targetm.vectorize.get_mask_mode.  */
   25984              : 
   25985              : static opt_machine_mode
   25986      3057281 : ix86_get_mask_mode (machine_mode data_mode)
   25987              : {
   25988      3057281 :   unsigned vector_size = GET_MODE_SIZE (data_mode);
   25989      3057281 :   unsigned nunits = GET_MODE_NUNITS (data_mode);
   25990      3057281 :   unsigned elem_size = vector_size / nunits;
   25991              : 
   25992              :   /* Scalar mask case.  */
   25993       317519 :   if ((TARGET_AVX512F && vector_size == 64)
   25994      2942376 :       || (TARGET_AVX512VL && (vector_size == 32 || vector_size == 16))
   25995              :       /* AVX512FP16 only supports vector comparison
   25996              :          to kmask for _Float16.  */
   25997      2828542 :       || (TARGET_AVX512VL && TARGET_AVX512FP16
   25998         3213 :           && GET_MODE_INNER (data_mode) == E_HFmode)
   25999      5887429 :       || (TARGET_AVX10_2 && GET_MODE_INNER (data_mode) == E_BFmode))
   26000              :     {
   26001       229364 :       if (elem_size == 4
   26002       229364 :           || elem_size == 8
   26003       103265 :           || (TARGET_AVX512BW && (elem_size == 1 || elem_size == 2)))
   26004       201753 :         return smallest_int_mode_for_size (nunits).require ();
   26005              :     }
   26006              : 
   26007      2855528 :   scalar_int_mode elem_mode
   26008      2855528 :     = smallest_int_mode_for_size (elem_size * BITS_PER_UNIT).require ();
   26009              : 
   26010      2855528 :   gcc_assert (elem_size * nunits == vector_size);
   26011              : 
   26012      2855528 :   return mode_for_vector (elem_mode, nunits);
   26013              : }
   26014              : 
   26015              : 
   26016              : 
   26017              : /* Return class of registers which could be used for pseudo of MODE
   26018              :    and of class RCLASS for spilling instead of memory.  Return NO_REGS
   26019              :    if it is not possible or non-profitable.  */
   26020              : 
   26021              : /* Disabled due to PRs 70902, 71453, 71555, 71596 and 71657.  */
   26022              : 
   26023              : static reg_class_t
   26024   6240600449 : ix86_spill_class (reg_class_t rclass, machine_mode mode)
   26025              : {
   26026   6240600449 :   if (0 && TARGET_GENERAL_REGS_SSE_SPILL
   26027              :       && TARGET_SSE2
   26028              :       && TARGET_INTER_UNIT_MOVES_TO_VEC
   26029              :       && TARGET_INTER_UNIT_MOVES_FROM_VEC
   26030              :       && (mode == SImode || (TARGET_64BIT && mode == DImode))
   26031              :       && INTEGER_CLASS_P (rclass))
   26032              :     return ALL_SSE_REGS;
   26033   6240600449 :   return NO_REGS;
   26034              : }
   26035              : 
   26036              : /* Implement TARGET_MAX_NOCE_IFCVT_SEQ_COST.  Like the default implementation,
   26037              :    but returns a lower bound.  */
   26038              : 
   26039              : static unsigned int
   26040      1867170 : ix86_max_noce_ifcvt_seq_cost (edge e)
   26041              : {
   26042      1867170 :   bool predictable_p = predictable_edge_p (e);
   26043      1867170 :   if (predictable_p)
   26044              :     {
   26045       145354 :       if (OPTION_SET_P (param_max_rtl_if_conversion_predictable_cost))
   26046            8 :         return param_max_rtl_if_conversion_predictable_cost;
   26047              :     }
   26048              :   else
   26049              :     {
   26050      1721816 :       if (OPTION_SET_P (param_max_rtl_if_conversion_unpredictable_cost))
   26051           73 :         return param_max_rtl_if_conversion_unpredictable_cost;
   26052              :     }
   26053              : 
   26054              :   /* For modern machines with deeper pipeline, the penalty for branch
   26055              :      misprediction could be higher than before to reset the pipeline
   26056              :      slots. Add parameter br_mispredict_scale as a factor to describe
   26057              :      the impact of reseting the pipeline.  */
   26058              : 
   26059      1867089 :   return BRANCH_COST (true, predictable_p)
   26060      1867089 :          * ix86_tune_cost->br_mispredict_scale;
   26061              : }
   26062              : 
   26063              : /* Return true if SEQ is a good candidate as a replacement for the
   26064              :    if-convertible sequence described in IF_INFO.  */
   26065              : 
   26066              : static bool
   26067       196866 : ix86_noce_conversion_profitable_p (rtx_insn *seq, struct noce_if_info *if_info)
   26068              : {
   26069       196866 :   if (TARGET_ONE_IF_CONV_INSN && if_info->speed_p)
   26070              :     {
   26071              :       int cmov_cnt = 0;
   26072              :       /* Punt if SEQ contains more than one CMOV or FCMOV instruction.
   26073              :          Maybe we should allow even more conditional moves as long as they
   26074              :          are used far enough not to stall the CPU, or also consider
   26075              :          IF_INFO->TEST_BB succ edge probabilities.  */
   26076          247 :       for (rtx_insn *insn = seq; insn; insn = NEXT_INSN (insn))
   26077              :         {
   26078          205 :           rtx set = single_set (insn);
   26079          205 :           if (!set)
   26080            0 :             continue;
   26081          205 :           if (GET_CODE (SET_SRC (set)) != IF_THEN_ELSE)
   26082          163 :             continue;
   26083           42 :           rtx src = SET_SRC (set);
   26084           42 :           machine_mode mode = GET_MODE (src);
   26085           42 :           if (GET_MODE_CLASS (mode) != MODE_INT
   26086            0 :               && GET_MODE_CLASS (mode) != MODE_FLOAT)
   26087            0 :             continue;
   26088           42 :           if ((!REG_P (XEXP (src, 1)) && !MEM_P (XEXP (src, 1)))
   26089           41 :               || (!REG_P (XEXP (src, 2)) && !MEM_P (XEXP (src, 2))))
   26090            1 :             continue;
   26091              :           /* insn is CMOV or FCMOV.  */
   26092           41 :           if (++cmov_cnt > 1)
   26093              :             return false;
   26094              :         }
   26095              :     }
   26096              : 
   26097              :   /* W/o TARGET_SSE4_1, it takes 3 instructions (pand, pandn and por)
   26098              :      for movdfcc/movsfcc, and could possibly fail cost comparison.
   26099              :      Increase branch cost will hurt performance for other modes, so
   26100              :      specially add some preference for floating point ifcvt.  */
   26101       196858 :   if (!TARGET_SSE4_1 && if_info->x
   26102       152846 :       && GET_MODE_CLASS (GET_MODE (if_info->x)) == MODE_FLOAT
   26103        34090 :       && if_info->speed_p)
   26104              :     {
   26105        27048 :       unsigned cost = seq_cost (seq, true);
   26106              : 
   26107        27048 :       if (cost <= if_info->original_cost)
   26108              :         return true;
   26109              : 
   26110        25856 :       return cost <= (if_info->max_seq_cost + COSTS_N_INSNS (2));
   26111              :     }
   26112              : 
   26113       169810 :   return default_noce_conversion_profitable_p (seq, if_info);
   26114              : }
   26115              : 
   26116              : /* x86-specific vector costs.  */
   26117              : class ix86_vector_costs : public vector_costs
   26118              : {
   26119              : public:
   26120              :   ix86_vector_costs (vec_info *, bool);
   26121              : 
   26122              :   unsigned int add_stmt_cost (int count, vect_cost_for_stmt kind,
   26123              :                               stmt_vec_info stmt_info, slp_tree node,
   26124              :                               tree vectype, int misalign,
   26125              :                               vect_cost_model_location where) override;
   26126              :   void finish_cost (const vector_costs *) override;
   26127              : 
   26128              : private:
   26129              : 
   26130              :   /* Estimate register pressure of the vectorized code.  */
   26131              :   void ix86_vect_estimate_reg_pressure ();
   26132              :   /* Number of GENERAL_REGS/SSE_REGS used in the vectorizer, it's used for
   26133              :      estimation of register pressure.
   26134              :      ??? Currently it's only used by vec_construct/scalar_to_vec
   26135              :      where we know it's not loaded from memory.  */
   26136              :   unsigned m_num_gpr_needed[3];
   26137              :   unsigned m_num_sse_needed[3];
   26138              :   /* Number of 256-bit vector permutation.  */
   26139              :   unsigned m_num_avx256_vec_perm[3];
   26140              :   /* Number of reductions for FMA/DOT_PROD_EXPR/SAD_EXPR  */
   26141              :   unsigned m_num_reduc[X86_REDUC_LAST];
   26142              :   /* Don't do unroll if m_prefer_unroll is false, default is true.  */
   26143              :   bool m_prefer_unroll;
   26144              : };
   26145              : 
   26146      1968094 : ix86_vector_costs::ix86_vector_costs (vec_info* vinfo, bool costing_for_scalar)
   26147              :   : vector_costs (vinfo, costing_for_scalar),
   26148      1968094 :     m_num_gpr_needed (),
   26149      1968094 :     m_num_sse_needed (),
   26150      1968094 :     m_num_avx256_vec_perm (),
   26151      1968094 :     m_num_reduc (),
   26152      1968094 :     m_prefer_unroll (true)
   26153      1968094 : {}
   26154              : 
   26155              : /* Implement targetm.vectorize.create_costs.  */
   26156              : 
   26157              : static vector_costs *
   26158      1968094 : ix86_vectorize_create_costs (vec_info *vinfo, bool costing_for_scalar)
   26159              : {
   26160      1968094 :   return new ix86_vector_costs (vinfo, costing_for_scalar);
   26161              : }
   26162              : 
   26163              : unsigned
   26164      6670767 : ix86_vector_costs::add_stmt_cost (int count, vect_cost_for_stmt kind,
   26165              :                                   stmt_vec_info stmt_info, slp_tree node,
   26166              :                                   tree vectype, int,
   26167              :                                   vect_cost_model_location where)
   26168              : {
   26169      6670767 :   unsigned retval = 0;
   26170      6670767 :   bool scalar_p
   26171              :     = (kind == scalar_stmt || kind == scalar_load || kind == scalar_store);
   26172      6670767 :   int stmt_cost = - 1;
   26173              : 
   26174      6670767 :   bool fp = false;
   26175      6670767 :   machine_mode mode = scalar_p ? SImode : TImode;
   26176              : 
   26177      6670767 :   if (vectype != NULL)
   26178              :     {
   26179      2977211 :       fp = FLOAT_TYPE_P (vectype);
   26180      2977211 :       mode = TYPE_MODE (vectype);
   26181      2977211 :       if (scalar_p)
   26182       238042 :         mode = TYPE_MODE (TREE_TYPE (vectype));
   26183              :     }
   26184              :   /* When we are costing a scalar stmt use the scalar stmt to get at the
   26185              :      type of the operation.  */
   26186      3693556 :   else if (scalar_p && stmt_info)
   26187      3633093 :     if (tree lhs = gimple_get_lhs (stmt_info->stmt))
   26188              :       {
   26189      3461676 :         fp = FLOAT_TYPE_P (TREE_TYPE (lhs));
   26190      3461676 :         mode = TYPE_MODE (TREE_TYPE (lhs));
   26191              :       }
   26192              : 
   26193      6670767 :   if ((kind == vector_stmt || kind == scalar_stmt)
   26194      1612474 :       && stmt_info
   26195      8277275 :       && stmt_info->stmt && gimple_code (stmt_info->stmt) == GIMPLE_ASSIGN)
   26196              :     {
   26197      1248633 :       tree_code subcode = gimple_assign_rhs_code (stmt_info->stmt);
   26198              :       /*machine_mode inner_mode = mode;
   26199              :       if (VECTOR_MODE_P (mode))
   26200              :         inner_mode = GET_MODE_INNER (mode);*/
   26201              : 
   26202      1248633 :       switch (subcode)
   26203              :         {
   26204       500004 :         case PLUS_EXPR:
   26205       500004 :         case POINTER_PLUS_EXPR:
   26206       500004 :         case MINUS_EXPR:
   26207       500004 :           if (kind == scalar_stmt)
   26208              :             {
   26209       324427 :               if (SSE_FLOAT_MODE_SSEMATH_OR_HFBF_P (mode))
   26210        69143 :                 stmt_cost = ix86_cost->addss;
   26211       255284 :               else if (X87_FLOAT_MODE_P (mode))
   26212          128 :                 stmt_cost = ix86_cost->fadd;
   26213              :               else
   26214       255156 :                 stmt_cost = ix86_cost->add;
   26215              :             }
   26216              :           else
   26217       175577 :             stmt_cost = ix86_vec_cost (mode, fp ? ix86_cost->addss
   26218              :                                        : ix86_cost->sse_op);
   26219              :           break;
   26220              : 
   26221       178764 :         case MULT_EXPR:
   26222              :           /* For MULT_HIGHPART_EXPR, x86 only supports pmulhw,
   26223              :              take it as MULT_EXPR.  */
   26224       178764 :         case MULT_HIGHPART_EXPR:
   26225       178764 :           stmt_cost = ix86_multiplication_cost (ix86_cost, mode);
   26226       178764 :           break;
   26227              :           /* There's no direct instruction for WIDEN_MULT_EXPR,
   26228              :              take emulation into account.  */
   26229         1018 :         case WIDEN_MULT_EXPR:
   26230         2036 :           stmt_cost = ix86_widen_mult_cost (ix86_cost, mode,
   26231         1018 :                                             TYPE_UNSIGNED (vectype));
   26232         1018 :           break;
   26233              : 
   26234         6214 :         case NEGATE_EXPR:
   26235         6214 :           if (SSE_FLOAT_MODE_SSEMATH_OR_HFBF_P (mode))
   26236         1700 :             stmt_cost = ix86_cost->sse_op;
   26237         4514 :           else if (X87_FLOAT_MODE_P (mode))
   26238            0 :             stmt_cost = ix86_cost->fchs;
   26239         4514 :           else if (VECTOR_MODE_P (mode))
   26240         1888 :             stmt_cost = ix86_vec_cost (mode, ix86_cost->sse_op);
   26241              :           else
   26242         2626 :             stmt_cost = ix86_cost->add;
   26243              :           break;
   26244        12393 :         case TRUNC_DIV_EXPR:
   26245        12393 :         case CEIL_DIV_EXPR:
   26246        12393 :         case FLOOR_DIV_EXPR:
   26247        12393 :         case ROUND_DIV_EXPR:
   26248        12393 :         case TRUNC_MOD_EXPR:
   26249        12393 :         case CEIL_MOD_EXPR:
   26250        12393 :         case FLOOR_MOD_EXPR:
   26251        12393 :         case RDIV_EXPR:
   26252        12393 :         case ROUND_MOD_EXPR:
   26253        12393 :         case EXACT_DIV_EXPR:
   26254        12393 :           stmt_cost = ix86_division_cost (ix86_cost, mode);
   26255        12393 :           break;
   26256              : 
   26257        55888 :         case RSHIFT_EXPR:
   26258        55888 :         case LSHIFT_EXPR:
   26259        55888 :         case LROTATE_EXPR:
   26260        55888 :         case RROTATE_EXPR:
   26261        55888 :           {
   26262        55888 :             tree op1 = gimple_assign_rhs1 (stmt_info->stmt);
   26263        55888 :             tree op2 = gimple_assign_rhs2 (stmt_info->stmt);
   26264        55888 :             stmt_cost = ix86_shift_rotate_cost
   26265        55888 :                            (ix86_cost,
   26266              :                             (subcode == RSHIFT_EXPR
   26267        32521 :                              && !TYPE_UNSIGNED (TREE_TYPE (op1)))
   26268              :                             ? ASHIFTRT : LSHIFTRT, mode,
   26269        55888 :                             TREE_CODE (op2) == INTEGER_CST,
   26270        55888 :                             cst_and_fits_in_hwi (op2)
   26271        33261 :                             ? int_cst_value (op2) : -1,
   26272              :                             false, false, NULL, NULL);
   26273              :           }
   26274        55888 :           break;
   26275        83149 :         case NOP_EXPR:
   26276              :           /* Only sign-conversions are free.  */
   26277        83149 :           if (tree_nop_conversion_p
   26278        83149 :                 (TREE_TYPE (gimple_assign_lhs (stmt_info->stmt)),
   26279        83149 :                  TREE_TYPE (gimple_assign_rhs1 (stmt_info->stmt))))
   26280              :             stmt_cost = 0;
   26281        83149 :           else if (fp)
   26282         6894 :             stmt_cost = vec_fp_conversion_cost
   26283         6894 :                           (ix86_tune_cost, GET_MODE_BITSIZE (mode));
   26284              :           break;
   26285              : 
   26286        13442 :         case FLOAT_EXPR:
   26287        13442 :             if (SSE_FLOAT_MODE_SSEMATH_OR_HFBF_P (mode))
   26288        10344 :               stmt_cost = ix86_cost->cvtsi2ss;
   26289         3098 :             else if (X87_FLOAT_MODE_P (mode))
   26290              :               /* TODO: We do not have cost tables for x87.  */
   26291           50 :               stmt_cost = ix86_cost->fadd;
   26292              :             else
   26293         3048 :               stmt_cost = ix86_vec_cost (mode, ix86_cost->cvtpi2ps);
   26294              :             break;
   26295              : 
   26296         1706 :         case FIX_TRUNC_EXPR:
   26297         1706 :             if (SSE_FLOAT_MODE_SSEMATH_OR_HFBF_P (mode))
   26298            0 :               stmt_cost = ix86_cost->cvtss2si;
   26299         1706 :             else if (X87_FLOAT_MODE_P (mode))
   26300              :               /* TODO: We do not have cost tables for x87.  */
   26301            0 :               stmt_cost = ix86_cost->fadd;
   26302              :             else
   26303         1706 :               stmt_cost = ix86_vec_cost (mode, ix86_cost->cvtps2pi);
   26304              :             break;
   26305              : 
   26306        38539 :         case COND_EXPR:
   26307        38539 :           {
   26308              :             /* SSE2 conditinal move sequence is:
   26309              :                  pcmpgtd %xmm5, %xmm0 (accounted separately)
   26310              :                  pand    %xmm0, %xmm2
   26311              :                  pandn   %xmm1, %xmm0
   26312              :                  por     %xmm2, %xmm0
   26313              :                while SSE4 uses cmp + blend
   26314              :                and AVX512 masked moves.
   26315              : 
   26316              :                The condition is accounted separately since we usually have
   26317              :                  p = a < b
   26318              :                  c = p ? x : y
   26319              :                and we will account first statement as setcc.  Exception is when
   26320              :                p is loaded from memory as bool and then we will not acocunt
   26321              :                the compare, but there is no way to check for this.  */
   26322              : 
   26323        38539 :             int ninsns = TARGET_SSE4_1 ? 1 : 3;
   26324              : 
   26325              :             /* If one of parameters is 0 or -1 the sequence will be simplified:
   26326              :                (if_true & mask) | (if_false & ~mask) -> if_true & mask  */
   26327        19927 :             if (ninsns > 1
   26328        19927 :                 && (zerop (gimple_assign_rhs2 (stmt_info->stmt))
   26329        19601 :                     || zerop (gimple_assign_rhs3 (stmt_info->stmt))
   26330        11531 :                     || integer_minus_onep
   26331        11531 :                         (gimple_assign_rhs2 (stmt_info->stmt))
   26332        11105 :                     || integer_minus_onep
   26333        11105 :                         (gimple_assign_rhs3 (stmt_info->stmt))))
   26334              :               ninsns = 1;
   26335              : 
   26336        38539 :             if (SSE_FLOAT_MODE_SSEMATH_OR_HFBF_P (mode))
   26337         2776 :               stmt_cost = ninsns * ix86_cost->sse_op;
   26338        35763 :             else if (X87_FLOAT_MODE_P (mode))
   26339              :               /* x87 requires conditional branch.  We don't have cost for
   26340              :                  that.  */
   26341              :               ;
   26342        35754 :             else if (VECTOR_MODE_P (mode))
   26343        14729 :               stmt_cost = ix86_vec_cost (mode, ninsns * ix86_cost->sse_op);
   26344              :             else
   26345              :               /* compare (accounted separately) + cmov.  */
   26346        21025 :               stmt_cost = ix86_cost->add;
   26347              :           }
   26348              :           break;
   26349              : 
   26350        22127 :         case MIN_EXPR:
   26351        22127 :         case MAX_EXPR:
   26352        22127 :           if (fp)
   26353              :             {
   26354         1008 :               if (X87_FLOAT_MODE_P (mode)
   26355          384 :                   && !SSE_FLOAT_MODE_SSEMATH_OR_HFBF_P (mode))
   26356              :                 /* x87 requires conditional branch.  We don't have cost for
   26357              :                    that.  */
   26358              :                 ;
   26359              :               else
   26360              :                 /* minss  */
   26361         1008 :                 stmt_cost = ix86_vec_cost (mode, ix86_cost->sse_op);
   26362              :             }
   26363              :           else
   26364              :             {
   26365        21119 :               if (VECTOR_MODE_P (mode))
   26366              :                 {
   26367         4069 :                   stmt_cost = ix86_vec_cost (mode, ix86_cost->sse_op);
   26368              :                   /* vpmin was introduced in SSE3.
   26369              :                      SSE2 needs pcmpgtd + pand + pandn + pxor.
   26370              :                      If one of parameters is 0 or -1 the sequence is simplified
   26371              :                      to pcmpgtd + pand.  */
   26372         4069 :                   if (!TARGET_SSSE3)
   26373              :                     {
   26374         3100 :                       if (zerop (gimple_assign_rhs2 (stmt_info->stmt))
   26375         4434 :                           || integer_minus_onep
   26376         1334 :                                 (gimple_assign_rhs2 (stmt_info->stmt)))
   26377         1766 :                         stmt_cost *= 2;
   26378              :                       else
   26379         1334 :                         stmt_cost *= 4;
   26380              :                     }
   26381              :                 }
   26382              :               else
   26383              :                 /* cmp + cmov.  */
   26384        17050 :                 stmt_cost = ix86_cost->add * 2;
   26385              :             }
   26386              :           break;
   26387              : 
   26388          904 :         case ABS_EXPR:
   26389          904 :         case ABSU_EXPR:
   26390          904 :           if (fp)
   26391              :             {
   26392          338 :               if (X87_FLOAT_MODE_P (mode)
   26393          126 :                   && !SSE_FLOAT_MODE_SSEMATH_OR_HFBF_P (mode))
   26394              :                 /* fabs.  */
   26395            0 :                 stmt_cost = ix86_cost->fabs;
   26396              :               else
   26397              :                 /* andss of sign bit.  */
   26398          338 :                 stmt_cost = ix86_vec_cost (mode, ix86_cost->sse_op);
   26399              :             }
   26400              :           else
   26401              :             {
   26402          566 :               if (VECTOR_MODE_P (mode))
   26403              :                 {
   26404           99 :                   stmt_cost = ix86_vec_cost (mode, ix86_cost->sse_op);
   26405              :                   /* vabs was introduced in SSE3.
   26406              :                      SSE3 uses psrat + pxor + psub.  */
   26407           99 :                   if (!TARGET_SSSE3)
   26408           75 :                     stmt_cost *= 3;
   26409              :                 }
   26410              :               else
   26411              :                 /* neg + cmov.  */
   26412          467 :                 stmt_cost = ix86_cost->add * 2;
   26413              :             }
   26414              :           break;
   26415              : 
   26416       106851 :         case BIT_IOR_EXPR:
   26417       106851 :         case BIT_XOR_EXPR:
   26418       106851 :         case BIT_AND_EXPR:
   26419       106851 :         case BIT_NOT_EXPR:
   26420       106851 :           gcc_assert (!SSE_FLOAT_MODE_SSEMATH_OR_HFBF_P (mode)
   26421              :                       && !X87_FLOAT_MODE_P (mode));
   26422       106851 :           if (VECTOR_MODE_P (mode))
   26423        35234 :             stmt_cost = ix86_vec_cost (mode, ix86_cost->sse_op);
   26424              :           else
   26425        71617 :             stmt_cost = ix86_cost->add;
   26426              :           break;
   26427              : 
   26428       227634 :         default:
   26429       227634 :           if (truth_value_p (subcode))
   26430              :             {
   26431        73403 :               if (SSE_FLOAT_MODE_SSEMATH_OR_HFBF_P (mode))
   26432              :                 /* CMPccS? insructions are cheap, so use sse_op.  While they
   26433              :                    produce a mask which may need to be turned to 0/1 by and,
   26434              :                    expect that this will be optimized away in a common case.  */
   26435            0 :                 stmt_cost = ix86_cost->sse_op;
   26436        73403 :               else if (X87_FLOAT_MODE_P (mode))
   26437              :                 /* fcmp + setcc.  */
   26438            0 :                 stmt_cost = ix86_cost->fadd + ix86_cost->add;
   26439        73403 :               else if (VECTOR_MODE_P (mode))
   26440        14743 :                 stmt_cost = ix86_vec_cost (mode, ix86_cost->sse_op);
   26441              :               else
   26442              :                 /* setcc.  */
   26443        58660 :                 stmt_cost = ix86_cost->add;
   26444              :               break;
   26445              :             }
   26446              :           break;
   26447              :         }
   26448              :     }
   26449              : 
   26450              :   /* Record number of load/store/gather/scatter in vectorized body.  */
   26451      6670767 :   if (where == vect_body && !m_costing_for_scalar)
   26452              :     {
   26453      1698989 :       int scale = 1;
   26454      1698989 :       if (vectype
   26455      3389394 :           && ((GET_MODE_SIZE (TYPE_MODE (vectype)) == 64
   26456        59575 :               && TARGET_AVX512_SPLIT_REGS)
   26457      3380686 :               || (GET_MODE_SIZE (TYPE_MODE (vectype)) == 32
   26458        92429 :                   && TARGET_AVX256_SPLIT_REGS)))
   26459              :         scale = 2;
   26460              : 
   26461      1698989 :       switch (kind)
   26462              :         {
   26463              :           /* Emulated gather/scatter or any scalarization.  */
   26464       109037 :         case scalar_load:
   26465       109037 :         case scalar_stmt:
   26466       109037 :         case scalar_store:
   26467       109037 :         case vector_gather_load:
   26468       109037 :         case vector_scatter_store:
   26469       109037 :           m_prefer_unroll = false;
   26470       109037 :           break;
   26471              : 
   26472       473332 :         case vector_stmt:
   26473       473332 :         case vec_to_scalar:
   26474              :           /* Count number of reduction FMA and "real" DOT_PROD_EXPR,
   26475              :              unroll in the vectorizer will enable partial sum.  */
   26476       473332 :           if (stmt_info
   26477       473310 :               && vect_is_reduction (stmt_info)
   26478       520833 :               && stmt_info->stmt)
   26479              :             {
   26480              :               /* Handle __builtin_fma.  */
   26481        47501 :               if (gimple_call_combined_fn (stmt_info->stmt) == CFN_FMA)
   26482              :                 {
   26483            6 :                   m_num_reduc[X86_REDUC_FMA] += count * scale;
   26484            6 :                   break;
   26485              :                 }
   26486              : 
   26487        47495 :               if (!is_gimple_assign (stmt_info->stmt))
   26488              :                 break;
   26489              : 
   26490        45117 :               tree_code subcode = gimple_assign_rhs_code (stmt_info->stmt);
   26491        45117 :               machine_mode inner_mode = GET_MODE_INNER (mode);
   26492        45117 :               tree rhs1, rhs2;
   26493        45117 :               bool native_vnni_p = true;
   26494        45117 :               gimple* def;
   26495        45117 :               machine_mode mode_rhs;
   26496        45117 :               switch (subcode)
   26497              :                 {
   26498        35235 :                 case PLUS_EXPR:
   26499        35235 :                 case MINUS_EXPR:
   26500        35235 :                   if (!fp || !flag_associative_math
   26501        15940 :                       || flag_fp_contract_mode != FP_CONTRACT_FAST)
   26502              :                     break;
   26503              : 
   26504              :                   /* FMA condition for different modes.  */
   26505        15940 :                   if (((inner_mode == DFmode || inner_mode == SFmode)
   26506        15928 :                        && !TARGET_FMA && !TARGET_AVX512VL)
   26507         5776 :                       || (inner_mode == HFmode && !TARGET_AVX512FP16)
   26508         5776 :                       || (inner_mode == BFmode && !TARGET_AVX10_2))
   26509              :                     break;
   26510              : 
   26511              :                   /* MULT_EXPR + PLUS_EXPR/MINUS_EXPR is transformed
   26512              :                      to FMA/FNMA after vectorization.  */
   26513         5776 :                   rhs1 = gimple_assign_rhs1 (stmt_info->stmt);
   26514         5776 :                   rhs2 = gimple_assign_rhs2 (stmt_info->stmt);
   26515         5776 :                   if (subcode == PLUS_EXPR
   26516         4538 :                       && TREE_CODE (rhs1) == SSA_NAME
   26517         4538 :                       && (def = SSA_NAME_DEF_STMT (rhs1), true)
   26518         4538 :                       && is_gimple_assign (def)
   26519         8106 :                       && gimple_assign_rhs_code (def) == MULT_EXPR)
   26520         1402 :                     m_num_reduc[X86_REDUC_FMA] += count * scale;
   26521         4374 :                   else if (TREE_CODE (rhs2) == SSA_NAME
   26522         4374 :                            && (def = SSA_NAME_DEF_STMT (rhs2), true)
   26523         4374 :                            && is_gimple_assign (def)
   26524         8716 :                            && gimple_assign_rhs_code (def) == MULT_EXPR)
   26525         4338 :                     m_num_reduc[X86_REDUC_FMA] += count * scale;
   26526              :                   break;
   26527              : 
   26528              :                   /* Vectorizer lane_reducing_op_p supports DOT_PROX_EXPR,
   26529              :                      WIDEN_SUM_EXPR and SAD_EXPR, x86 backend only supports
   26530              :                      SAD_EXPR (usad{v16qi,v32qi,v64qi}) and DOT_PROD_EXPR.  */
   26531          374 :                 case DOT_PROD_EXPR:
   26532          374 :                   rhs1 = gimple_assign_rhs1 (stmt_info->stmt);
   26533          374 :                   mode_rhs = TYPE_MODE (TREE_TYPE (rhs1));
   26534          374 :                   if (mode_rhs == QImode)
   26535              :                     {
   26536          211 :                       rhs2 = gimple_assign_rhs2 (stmt_info->stmt);
   26537          211 :                       signop signop1_p = TYPE_SIGN (TREE_TYPE (rhs1));
   26538          211 :                       signop signop2_p = TYPE_SIGN (TREE_TYPE (rhs2));
   26539              : 
   26540              :                       /* vpdpbusd.  */
   26541          211 :                       if (signop1_p != signop2_p)
   26542           53 :                         native_vnni_p
   26543           53 :                           = (GET_MODE_SIZE (mode) == 64
   26544           53 :                              ? TARGET_AVX512VNNI
   26545           10 :                              : ((TARGET_AVX512VNNI && TARGET_AVX512VL)
   26546           53 :                                 || TARGET_AVXVNNI));
   26547              :                       else
   26548              :                         /* vpdpbssd.  */
   26549          158 :                         native_vnni_p
   26550          174 :                           = (GET_MODE_SIZE (mode) == 64
   26551          158 :                              ? TARGET_AVX10_2
   26552          142 :                              : (TARGET_AVXVNNIINT8 || TARGET_AVX10_2));
   26553              :                     }
   26554          374 :                   m_num_reduc[X86_REDUC_DOT_PROD] += count * scale;
   26555              : 
   26556              :                   /* Dislike to do unroll and partial sum for
   26557              :                      emulated DOT_PROD_EXPR.  */
   26558          374 :                   if (!native_vnni_p)
   26559          128 :                     m_num_reduc[X86_REDUC_DOT_PROD] += 3 * count;
   26560              :                   break;
   26561              : 
   26562           80 :                 case SAD_EXPR:
   26563           80 :                   m_num_reduc[X86_REDUC_SAD] += count * scale;
   26564           80 :                   break;
   26565              : 
   26566              :                 default:
   26567              :                   break;
   26568              :                 }
   26569              :             }
   26570              : 
   26571              :         default:
   26572              :           break;
   26573              :         }
   26574              :     }
   26575              : 
   26576              : 
   26577      6670767 :   combined_fn cfn;
   26578      6670767 :   if ((kind == vector_stmt || kind == scalar_stmt)
   26579      1612474 :       && stmt_info
   26580      1606508 :       && stmt_info->stmt
   26581      8277275 :       && (cfn = gimple_call_combined_fn (stmt_info->stmt)) != CFN_LAST)
   26582        17518 :     switch (cfn)
   26583              :       {
   26584           63 :       case CFN_FMA:
   26585           63 :         stmt_cost = ix86_vec_cost (mode,
   26586           63 :                                    mode == SFmode ? ix86_cost->fmass
   26587              :                                    : ix86_cost->fmasd);
   26588           63 :         break;
   26589           24 :       case CFN_MULH:
   26590           24 :         stmt_cost = ix86_multiplication_cost (ix86_cost, mode);
   26591           24 :         break;
   26592              :       default:
   26593              :         break;
   26594              :       }
   26595              : 
   26596      6670767 :   if (kind == vec_promote_demote)
   26597              :     {
   26598        44963 :       int outer_size
   26599              :         = tree_to_uhwi
   26600        44963 :             (TYPE_SIZE
   26601        44963 :                 (TREE_TYPE (gimple_assign_lhs (stmt_info->stmt))));
   26602        44963 :       int inner_size
   26603              :         = tree_to_uhwi
   26604        44963 :             (TYPE_SIZE
   26605        44963 :                 (TREE_TYPE (gimple_assign_rhs1 (stmt_info->stmt))));
   26606        44963 :       bool inner_fp = FLOAT_TYPE_P
   26607              :                         (TREE_TYPE (gimple_assign_rhs1 (stmt_info->stmt)));
   26608              : 
   26609         3831 :       if (fp && inner_fp)
   26610         3431 :         stmt_cost = vec_fp_conversion_cost
   26611         3431 :                           (ix86_tune_cost, GET_MODE_BITSIZE (mode));
   26612        41532 :       else if (fp && !inner_fp)
   26613         4106 :         stmt_cost = ix86_vec_cost (mode, ix86_cost->cvtpi2ps);
   26614        37426 :       else if (!fp && inner_fp)
   26615          400 :         stmt_cost = ix86_vec_cost (mode, ix86_cost->cvtps2pi);
   26616              :       else
   26617        37026 :         stmt_cost = ix86_vec_cost (mode, ix86_cost->sse_op);
   26618              :       /* VEC_PACK_TRUNC_EXPR and similar demote operations: If outer size is
   26619              :          greater than inner size we will end up doing two conversions and
   26620              :          packing them.  We always pack pairs; if the size difference is greater
   26621              :          it is split into multiple demote operations.  */
   26622        44963 :       if (inner_size > outer_size)
   26623        17126 :         stmt_cost = stmt_cost * 2
   26624        17126 :                     + ix86_vec_cost (mode, ix86_cost->sse_op);
   26625              :     }
   26626              : 
   26627              :   /* If we do elementwise loads into a vector then we are bound by
   26628              :      latency and execution resources for the many scalar loads
   26629              :      (AGU and load ports).  Try to account for this by scaling the
   26630              :      construction cost by the number of elements involved.  */
   26631      6670767 :   if ((kind == vec_construct || kind == vec_to_scalar)
   26632      6670767 :       && ((node
   26633       426191 :            && (((SLP_TREE_MEMORY_ACCESS_TYPE (node) == VMAT_ELEMENTWISE
   26634       437746 :                  || (SLP_TREE_MEMORY_ACCESS_TYPE (node) == VMAT_STRIDED_SLP
   26635        36097 :                      && SLP_TREE_LANES (node) == 1))
   26636        39082 :                 && (TREE_CODE (DR_STEP (STMT_VINFO_DATA_REF
   26637              :                                         (SLP_TREE_REPRESENTATIVE (node))))
   26638              :                     != INTEGER_CST))
   26639        69306 :                || mat_gather_scatter_p (SLP_TREE_MEMORY_ACCESS_TYPE (node))))))
   26640              :     {
   26641        30728 :       stmt_cost = ix86_default_vector_cost (kind, mode);
   26642        30728 :       stmt_cost *= (TYPE_VECTOR_SUBPARTS (vectype) + 1);
   26643              :     }
   26644      6640039 :   else if ((kind == vec_construct || kind == scalar_to_vec)
   26645       443948 :            && node
   26646       413134 :            && SLP_TREE_DEF_TYPE (node) == vect_external_def)
   26647              :     {
   26648       303571 :       stmt_cost = ix86_default_vector_cost (kind, mode);
   26649       303571 :       unsigned i;
   26650       303571 :       tree op;
   26651      1298849 :       FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_OPS (node), i, op)
   26652       691707 :         if (TREE_CODE (op) == SSA_NAME)
   26653       470257 :           TREE_VISITED (op) = 0;
   26654       995278 :       FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_OPS (node), i, op)
   26655              :         {
   26656       691707 :           if (TREE_CODE (op) != SSA_NAME
   26657       470257 :               || TREE_VISITED (op))
   26658       254971 :             continue;
   26659       436736 :           TREE_VISITED (op) = 1;
   26660       436736 :           gimple *def = SSA_NAME_DEF_STMT (op);
   26661       436736 :           tree tem;
   26662              :           /* Look through a conversion.  */
   26663       436736 :           if (is_gimple_assign (def)
   26664       247814 :               && CONVERT_EXPR_CODE_P (gimple_assign_rhs_code (def))
   26665        27368 :               && ((tem = gimple_assign_rhs1 (def)), true)
   26666       464104 :               && TREE_CODE (tem) == SSA_NAME)
   26667        27157 :             def = SSA_NAME_DEF_STMT (tem);
   26668              :           /* When the component is loaded from memory without sign-
   26669              :              or zero-extension we can move it to a vector register and/or
   26670              :              insert it via vpinsr with a memory operand.  */
   26671       436736 :           if (gimple_assign_load_p (def)
   26672       130183 :               && tree_nop_conversion_p (TREE_TYPE (op),
   26673       130183 :                                         TREE_TYPE (gimple_assign_lhs (def)))
   26674       690920 :               && (GET_MODE_SIZE (TYPE_MODE (TREE_TYPE (op))) > 1
   26675         5356 :                   || TARGET_SSE4_1))
   26676              :             ;
   26677              :           /* When the component is extracted from a vector it is already
   26678              :              in a vector register.  */
   26679       314003 :           else if (is_gimple_assign (def)
   26680       120796 :                    && gimple_assign_rhs_code (def) == BIT_FIELD_REF
   26681       316851 :                    && VECTOR_TYPE_P (TREE_TYPE
   26682              :                                 (TREE_OPERAND (gimple_assign_rhs1 (def), 0))))
   26683              :             ;
   26684              :           else
   26685              :             {
   26686       311570 :               if (fp)
   26687              :                 {
   26688              :                   /* Scalar FP values residing in x87 registers need to be
   26689              :                      spilled and reloaded.  */
   26690        13436 :                   auto mode2 = TYPE_MODE (TREE_TYPE (op));
   26691        13436 :                   if (IS_STACK_MODE (mode2))
   26692              :                     {
   26693          971 :                       int cost
   26694              :                         = (ix86_cost->hard_register.fp_store[mode2 == SFmode
   26695          971 :                                                              ? 0 : 1]
   26696          971 :                            + ix86_cost->sse_load[sse_store_index (mode2)]);
   26697          971 :                       stmt_cost += COSTS_N_INSNS (cost) / 2;
   26698              :                     }
   26699        13436 :                   m_num_sse_needed[where]++;
   26700              :                 }
   26701              :               else
   26702              :                 {
   26703       298134 :                   m_num_gpr_needed[where]++;
   26704              : 
   26705       298134 :                   stmt_cost += COSTS_N_INSNS (ix86_cost->integer_to_sse) / 2;
   26706              :                 }
   26707              :             }
   26708              :         }
   26709       995278 :       FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_OPS (node), i, op)
   26710       691707 :         if (TREE_CODE (op) == SSA_NAME)
   26711       470257 :           TREE_VISITED (op) = 0;
   26712              :     }
   26713      6670767 :   if (stmt_cost == -1)
   26714      5273280 :     stmt_cost = ix86_default_vector_cost (kind, mode);
   26715              : 
   26716      6670767 :   if (kind == vec_perm && vectype
   26717       178126 :       && GET_MODE_SIZE (TYPE_MODE (vectype)) == 32
   26718              :       /* BIT_FIELD_REF <vect_**, 64, 0> 0 times vec_perm costs 0 in body.  */
   26719      6674243 :       && count != 0)
   26720              :     {
   26721         3476 :       bool real_perm = true;
   26722         3476 :       unsigned nunits = TYPE_VECTOR_SUBPARTS (vectype);
   26723              : 
   26724         3476 :       if (node
   26725         3473 :           && SLP_TREE_LOAD_PERMUTATION (node).exists ()
   26726              :           /* Loop vectorization will have 4 times vec_perm
   26727              :              with index as {0, 0, 0, 0}.
   26728              :              But it actually generates
   26729              :              vec_perm_expr <vect, vect, 0, 0, 0, 0>
   26730              :              vec_perm_expr <vect, vect, 1, 1, 1, 1>
   26731              :              vec_perm_expr <vect, vect, 2, 2, 2, 2>
   26732              :              Need to be handled separately.  */
   26733         6304 :           && is_a <bb_vec_info> (m_vinfo))
   26734              :         {
   26735           39 :           unsigned half = nunits / 2;
   26736           39 :           unsigned i = 0;
   26737           39 :           bool allsame = true;
   26738           39 :           unsigned first = SLP_TREE_LOAD_PERMUTATION (node)[0];
   26739           39 :           bool cross_lane_p = false;
   26740          198 :           for (i = 0 ; i != SLP_TREE_LANES (node); i++)
   26741              :             {
   26742          197 :               unsigned tmp = SLP_TREE_LOAD_PERMUTATION (node)[i];
   26743              :               /* allsame is just a broadcast.  */
   26744          197 :               if (tmp != first)
   26745           92 :                 allsame = false;
   26746              : 
   26747              :               /* 4 times vec_perm with number of lanes multiple of nunits.  */
   26748          197 :               tmp = tmp & (nunits - 1);
   26749          197 :               unsigned index = i & (nunits - 1);
   26750          197 :               if ((index < half && tmp >= half)
   26751          197 :                   || (index >= half && tmp < half))
   26752           65 :                 cross_lane_p = true;
   26753              : 
   26754          197 :               if (!allsame && cross_lane_p)
   26755              :                 break;
   26756              :             }
   26757              : 
   26758           39 :           if (i == SLP_TREE_LANES (node))
   26759              :             real_perm = false;
   26760              :         }
   26761              : 
   26762              :       if (real_perm)
   26763              :         {
   26764         3475 :           m_num_avx256_vec_perm[where] += count;
   26765         3475 :           if (dump_file && (dump_flags & TDF_DETAILS))
   26766              :             {
   26767          231 :               fprintf (dump_file, "Detected avx256 cross-lane permutation: ");
   26768          231 :               if (stmt_info)
   26769          228 :                 print_gimple_expr (dump_file, stmt_info->stmt, 0, TDF_SLIM);
   26770          231 :               fprintf (dump_file, " \n");
   26771              :             }
   26772              :         }
   26773              :     }
   26774              : 
   26775              :   /* Penalize DFmode vector operations for Bonnell.  */
   26776      6670767 :   if (TARGET_CPU_P (BONNELL) && kind == vector_stmt
   26777      6670829 :       && vectype && GET_MODE_INNER (TYPE_MODE (vectype)) == DFmode)
   26778           12 :     stmt_cost *= 5;  /* FIXME: The value here is arbitrary.  */
   26779              : 
   26780              :   /* Statements in an inner loop relative to the loop being
   26781              :      vectorized are weighted more heavily.  The value here is
   26782              :      arbitrary and could potentially be improved with analysis.  */
   26783      6670767 :   retval = adjust_cost_for_freq (stmt_info, where, count * stmt_cost);
   26784              : 
   26785              :   /* We need to multiply all vector stmt cost by 1.7 (estimated cost)
   26786              :      for Silvermont as it has out of order integer pipeline and can execute
   26787              :      2 scalar instruction per tick, but has in order SIMD pipeline.  */
   26788      6670767 :   if ((TARGET_CPU_P (SILVERMONT) || TARGET_CPU_P (GOLDMONT)
   26789      6670767 :        || TARGET_CPU_P (GOLDMONT_PLUS) || TARGET_CPU_P (INTEL))
   26790         1811 :       && stmt_info && stmt_info->stmt)
   26791              :     {
   26792         1595 :       tree lhs_op = gimple_get_lhs (stmt_info->stmt);
   26793         1595 :       if (lhs_op && TREE_CODE (TREE_TYPE (lhs_op)) == INTEGER_TYPE)
   26794         1198 :         retval = (retval * 17) / 10;
   26795              :     }
   26796              : 
   26797      6670767 :   m_costs[where] += retval;
   26798              : 
   26799      6670767 :   return retval;
   26800              : }
   26801              : 
   26802              : void
   26803      1694942 : ix86_vector_costs::ix86_vect_estimate_reg_pressure ()
   26804              : {
   26805      1694942 :   unsigned gpr_spill_cost = COSTS_N_INSNS (ix86_cost->int_store [2]) / 2;
   26806      1694942 :   unsigned sse_spill_cost = COSTS_N_INSNS (ix86_cost->sse_store[0]) / 2;
   26807              : 
   26808              :   /* Any better way to have target available fp registers, currently use SSE_REGS.  */
   26809      1694942 :   unsigned target_avail_sse = TARGET_64BIT ? (TARGET_AVX512F ? 32 : 16) : 8;
   26810      6779768 :   for (unsigned i = 0; i != 3; i++)
   26811              :     {
   26812      5084826 :       if (m_num_gpr_needed[i] > target_avail_regs)
   26813          706 :         m_costs[i] += gpr_spill_cost * (m_num_gpr_needed[i] - target_avail_regs);
   26814              :       /* Only measure sse registers pressure.  */
   26815      5084826 :       if (TARGET_SSE && (m_num_sse_needed[i] > target_avail_sse))
   26816           92 :         m_costs[i] += sse_spill_cost * (m_num_sse_needed[i] - target_avail_sse);
   26817              :     }
   26818      1694942 : }
   26819              : 
   26820              : void
   26821      1694942 : ix86_vector_costs::finish_cost (const vector_costs *scalar_costs)
   26822              : {
   26823      1694942 :   loop_vec_info loop_vinfo = dyn_cast<loop_vec_info> (m_vinfo);
   26824       377938 :   if (loop_vinfo && !m_costing_for_scalar)
   26825              :     {
   26826              :       /* We are currently not asking the vectorizer to compare costs
   26827              :          between different vector mode sizes.  When using predication
   26828              :          that will end up always choosing the prefered mode size even
   26829              :          if there's a smaller mode covering all lanes.  Test for this
   26830              :          situation and artificially reject the larger mode attempt.
   26831              :          ???  We currently lack masked ops for sub-SSE sized modes,
   26832              :          so we could restrict this rejection to AVX and AVX512 modes
   26833              :          but error on the safe side for now.  */
   26834        82636 :       if (LOOP_VINFO_USING_PARTIAL_VECTORS_P (loop_vinfo)
   26835           22 :           && !LOOP_VINFO_EPILOGUE_P (loop_vinfo)
   26836           15 :           && LOOP_VINFO_NITERS_KNOWN_P (loop_vinfo)
   26837        82646 :           && (exact_log2 (LOOP_VINFO_VECT_FACTOR (loop_vinfo).to_constant ())
   26838           20 :               > ceil_log2 (LOOP_VINFO_INT_NITERS (loop_vinfo))))
   26839            8 :         m_costs[vect_body] = INT_MAX;
   26840              : 
   26841              :       /* We'd like to avoid using masking if there's an in-order reduction
   26842              :          to vectorize because that will also perform in-order adds of
   26843              :          masked elements (as neutral value, of course) here, but there
   26844              :          is currently no way to indicate to try un-masked with the same
   26845              :          mode.  */
   26846              : 
   26847        82636 :       bool any_reduc_p = false;
   26848       328400 :       for (int i = 0; i != X86_REDUC_LAST; i++)
   26849       246541 :         if (m_num_reduc[i])
   26850              :           {
   26851              :             any_reduc_p = true;
   26852              :             break;
   26853              :           }
   26854              : 
   26855        82636 :       if (any_reduc_p
   26856              :           /* Not much gain for loop with gather and scatter.  */
   26857          777 :           && m_prefer_unroll
   26858          627 :           && !LOOP_VINFO_EPILOGUE_P (loop_vinfo))
   26859              :         {
   26860          956 :           unsigned unroll_factor
   26861          478 :             = OPTION_SET_P (ix86_vect_unroll_limit)
   26862          478 :             ? ix86_vect_unroll_limit
   26863          478 :             : ix86_cost->vect_unroll_limit;
   26864              : 
   26865          478 :           if (unroll_factor > 1)
   26866              :             {
   26867         1912 :               for (int i = 0 ; i != X86_REDUC_LAST; i++)
   26868              :                 {
   26869         1434 :                   if (m_num_reduc[i])
   26870              :                     {
   26871          478 :                       unsigned tmp = CEIL (ix86_cost->reduc_lat_mult_thr[i],
   26872              :                                            m_num_reduc[i]);
   26873         1434 :                       unroll_factor = MIN (unroll_factor, tmp);
   26874              :                     }
   26875              :                 }
   26876              : 
   26877          956 :               m_suggested_unroll_factor  = 1 << ceil_log2 (unroll_factor);
   26878              :             }
   26879              :         }
   26880              : 
   26881              :     }
   26882              : 
   26883      1694942 :   ix86_vect_estimate_reg_pressure ();
   26884              : 
   26885      6779768 :   for (int i = 0; i != 3; i++)
   26886      5084826 :     if (m_num_avx256_vec_perm[i]
   26887          444 :         && TARGET_AVX256_AVOID_VEC_PERM)
   26888            7 :       m_costs[i] = INT_MAX;
   26889              : 
   26890              :   /* When X86_TUNE_AVX512_TWO_EPILOGUES is enabled arrange for both
   26891              :      a AVX2 and a SSE epilogue for AVX512 vectorized loops.  */
   26892      1694942 :   if (loop_vinfo
   26893       377938 :       && LOOP_VINFO_EPILOGUE_P (loop_vinfo)
   26894        46982 :       && GET_MODE_SIZE (loop_vinfo->vector_mode) == 32
   26895      1695766 :       && ix86_tune_features[X86_TUNE_AVX512_TWO_EPILOGUES])
   26896           14 :     m_suggested_epilogue_mode = V16QImode;
   26897              :   /* When a 128bit SSE vectorized epilogue still has a VF of 16 or larger
   26898              :      enable a 64bit SSE epilogue.  */
   26899      1694942 :   if (loop_vinfo
   26900       377938 :       && LOOP_VINFO_EPILOGUE_P (loop_vinfo)
   26901        46982 :       && GET_MODE_SIZE (loop_vinfo->vector_mode) == 16
   26902      1697416 :       && LOOP_VINFO_VECT_FACTOR (loop_vinfo).to_constant () >= 16)
   26903           91 :     m_suggested_epilogue_mode = V8QImode;
   26904              : 
   26905              :   /* When X86_TUNE_AVX512_MASKED_EPILOGUES is enabled try to use
   26906              :      a masked epilogue if that doesn't seem detrimental.  */
   26907      1694942 :   if (loop_vinfo
   26908       377938 :       && !LOOP_VINFO_EPILOGUE_P (loop_vinfo)
   26909       354447 :       && LOOP_VINFO_VECT_FACTOR (loop_vinfo).to_constant () > 2
   26910              :       /* Avoid a masked epilog if cascaded epilogues eventually get us
   26911              :          to one with VF 1 as that means no scalar epilog at all.  */
   26912        52309 :       && !((GET_MODE_SIZE (loop_vinfo->vector_mode)
   26913        52309 :             / LOOP_VINFO_VECT_FACTOR (loop_vinfo).to_constant () == 16)
   26914           35 :            && ix86_tune_features[X86_TUNE_AVX512_TWO_EPILOGUES])
   26915        52308 :       && ix86_tune_features[X86_TUNE_AVX512_MASKED_EPILOGUES]
   26916      1695032 :       && !OPTION_SET_P (param_vect_partial_vector_usage))
   26917              :     {
   26918           84 :       bool avoid = false;
   26919           84 :       if (LOOP_VINFO_NITERS_KNOWN_P (loop_vinfo)
   26920           68 :           && LOOP_VINFO_PEELING_FOR_ALIGNMENT (loop_vinfo) >= 0)
   26921              :         {
   26922           68 :           unsigned int peel_niter
   26923              :             = LOOP_VINFO_PEELING_FOR_ALIGNMENT (loop_vinfo);
   26924           68 :           if (LOOP_VINFO_PEELING_FOR_GAPS (loop_vinfo))
   26925            0 :             peel_niter += 1;
   26926              :           /* When we know the number of scalar iterations of the epilogue,
   26927              :              avoid masking when a single vector epilog iteration handles
   26928              :              it in full.  */
   26929           68 :           if (pow2p_hwi ((LOOP_VINFO_INT_NITERS (loop_vinfo) - peel_niter)
   26930           68 :                          % LOOP_VINFO_VECT_FACTOR (loop_vinfo).to_constant ()))
   26931              :             avoid = true;
   26932              :         }
   26933           83 :       if (!avoid && loop_outer (loop_outer (LOOP_VINFO_LOOP (loop_vinfo))))
   26934            7 :         for (auto ddr : LOOP_VINFO_DDRS (loop_vinfo))
   26935              :           {
   26936            2 :             if (DDR_ARE_DEPENDENT (ddr) == chrec_known)
   26937              :               ;
   26938            2 :             else if (DDR_ARE_DEPENDENT (ddr) == chrec_dont_know)
   26939              :               ;
   26940              :             else
   26941              :               {
   26942            1 :                 int loop_depth
   26943            2 :                     = index_in_loop_nest (LOOP_VINFO_LOOP (loop_vinfo)->num,
   26944            1 :                                           DDR_LOOP_NEST (ddr));
   26945            2 :                 if (DDR_NUM_DIST_VECTS (ddr) == 1
   26946            1 :                     && DDR_DIST_VECTS (ddr)[0][loop_depth] == 0)
   26947              :                   {
   26948              :                     /* Avoid the case when there's an outer loop that might
   26949              :                        traverse a multi-dimensional array with the inner
   26950              :                        loop just executing the masked epilogue with a
   26951              :                        read-write where the next outer iteration might
   26952              :                        read from the masked part of the previous write,
   26953              :                        'n' filling half a vector.
   26954              :                          for (j = 0; j < m; ++j)
   26955              :                            for (i = 0; i < n; ++i)
   26956              :                              a[j][i] = c * a[j][i];  */
   26957              :                     avoid = true;
   26958              :                     break;
   26959              :                   }
   26960              :               }
   26961              :           }
   26962              :       /* Avoid using masking if there's an in-order reduction
   26963              :          to vectorize because that will also perform in-order adds of
   26964              :          masked elements (as neutral value, of course).  */
   26965           84 :       if (!avoid)
   26966              :         {
   26967          331 :           for (auto inst : LOOP_VINFO_SLP_INSTANCES (loop_vinfo))
   26968           86 :             if (SLP_INSTANCE_KIND (inst) == slp_inst_kind_reduc_group
   26969           86 :                 && (vect_reduc_type (loop_vinfo, SLP_INSTANCE_TREE (inst))
   26970              :                     == FOLD_LEFT_REDUCTION))
   26971              :               {
   26972              :                 avoid = true;
   26973              :                 break;
   26974              :               }
   26975              :         }
   26976           82 :       if (!avoid)
   26977              :         {
   26978           81 :           m_suggested_epilogue_mode = loop_vinfo->vector_mode;
   26979           81 :           m_masked_epilogue = 1;
   26980              :         }
   26981              :     }
   26982              : 
   26983      1694942 :   vector_costs::finish_cost (scalar_costs);
   26984      1694942 : }
   26985              : 
   26986              : /* Validate target specific memory model bits in VAL. */
   26987              : 
   26988              : static unsigned HOST_WIDE_INT
   26989       411093 : ix86_memmodel_check (unsigned HOST_WIDE_INT val)
   26990              : {
   26991       411093 :   enum memmodel model = memmodel_from_int (val);
   26992       411093 :   bool strong;
   26993              : 
   26994       411093 :   if (val & ~(unsigned HOST_WIDE_INT)(IX86_HLE_ACQUIRE|IX86_HLE_RELEASE
   26995              :                                       |MEMMODEL_MASK)
   26996       411089 :       || ((val & IX86_HLE_ACQUIRE) && (val & IX86_HLE_RELEASE)))
   26997              :     {
   26998            4 :       warning (OPT_Winvalid_memory_model,
   26999              :                "unknown architecture specific memory model");
   27000            4 :       return MEMMODEL_SEQ_CST;
   27001              :     }
   27002       411089 :   strong = (is_mm_acq_rel (model) || is_mm_seq_cst (model));
   27003       411089 :   if (val & IX86_HLE_ACQUIRE && !(is_mm_acquire (model) || strong))
   27004              :     {
   27005            0 :       warning (OPT_Winvalid_memory_model,
   27006              :               "%<HLE_ACQUIRE%> not used with %<ACQUIRE%> or stronger "
   27007              :                "memory model");
   27008            0 :       return MEMMODEL_SEQ_CST | IX86_HLE_ACQUIRE;
   27009              :     }
   27010       411089 :   if (val & IX86_HLE_RELEASE && !(is_mm_release (model) || strong))
   27011              :     {
   27012            0 :       warning (OPT_Winvalid_memory_model,
   27013              :               "%<HLE_RELEASE%> not used with %<RELEASE%> or stronger "
   27014              :                "memory model");
   27015            0 :       return MEMMODEL_SEQ_CST | IX86_HLE_RELEASE;
   27016              :     }
   27017              :   return val;
   27018              : }
   27019              : 
   27020              : /* Set CLONEI->vecsize_mangle, CLONEI->mask_mode, CLONEI->vecsize_int,
   27021              :    CLONEI->vecsize_float and if CLONEI->simdlen is 0, also
   27022              :    CLONEI->simdlen.  Return 0 if SIMD clones shouldn't be emitted,
   27023              :    or number of vecsize_mangle variants that should be emitted.  */
   27024              : 
   27025              : static int
   27026         7593 : ix86_simd_clone_compute_vecsize_and_simdlen (struct cgraph_node *node,
   27027              :                                              struct cgraph_simd_clone *clonei,
   27028              :                                              tree base_type, int num,
   27029              :                                              bool explicit_p)
   27030              : {
   27031         7593 :   int ret = 1;
   27032              : 
   27033         7593 :   if (clonei->simdlen
   27034         7593 :       && (clonei->simdlen < 2
   27035         1321 :           || clonei->simdlen > 1024
   27036         1321 :           || (clonei->simdlen & (clonei->simdlen - 1)) != 0))
   27037              :     {
   27038            0 :       if (explicit_p)
   27039            0 :         warning_at (DECL_SOURCE_LOCATION (node->decl), 0,
   27040              :                     "unsupported simdlen %wd", clonei->simdlen.to_constant ());
   27041            0 :       return 0;
   27042              :     }
   27043              : 
   27044         7593 :   tree ret_type = TREE_TYPE (TREE_TYPE (node->decl));
   27045         7593 :   if (TREE_CODE (ret_type) != VOID_TYPE)
   27046         6801 :     switch (TYPE_MODE (ret_type))
   27047              :       {
   27048         6801 :       case E_QImode:
   27049         6801 :       case E_HImode:
   27050         6801 :       case E_SImode:
   27051         6801 :       case E_DImode:
   27052         6801 :       case E_SFmode:
   27053         6801 :       case E_DFmode:
   27054              :       /* case E_SCmode: */
   27055              :       /* case E_DCmode: */
   27056         6801 :         if (!AGGREGATE_TYPE_P (ret_type))
   27057              :           break;
   27058              :         /* FALLTHRU */
   27059            2 :       default:
   27060            2 :         if (explicit_p)
   27061            2 :           warning_at (DECL_SOURCE_LOCATION (node->decl), 0,
   27062              :                       "unsupported return type %qT for simd", ret_type);
   27063            2 :         return 0;
   27064              :       }
   27065              : 
   27066         7591 :   tree t;
   27067         7591 :   int i;
   27068         7591 :   tree type_arg_types = TYPE_ARG_TYPES (TREE_TYPE (node->decl));
   27069         7591 :   bool decl_arg_p = (node->definition || type_arg_types == NULL_TREE);
   27070              : 
   27071         7591 :   for (t = (decl_arg_p ? DECL_ARGUMENTS (node->decl) : type_arg_types), i = 0;
   27072        20438 :        t && t != void_list_node; t = TREE_CHAIN (t), i++)
   27073              :     {
   27074        16678 :       tree arg_type = decl_arg_p ? TREE_TYPE (t) : TREE_VALUE (t);
   27075        12852 :       switch (TYPE_MODE (arg_type))
   27076              :         {
   27077        12833 :         case E_QImode:
   27078        12833 :         case E_HImode:
   27079        12833 :         case E_SImode:
   27080        12833 :         case E_DImode:
   27081        12833 :         case E_SFmode:
   27082        12833 :         case E_DFmode:
   27083              :         /* case E_SCmode: */
   27084              :         /* case E_DCmode: */
   27085        12833 :           if (!AGGREGATE_TYPE_P (arg_type))
   27086              :             break;
   27087              :           /* FALLTHRU */
   27088           41 :         default:
   27089           41 :           if (clonei->args[i].arg_type == SIMD_CLONE_ARG_TYPE_UNIFORM)
   27090              :             break;
   27091            5 :           if (explicit_p)
   27092            5 :             warning_at (DECL_SOURCE_LOCATION (node->decl), 0,
   27093              :                         "unsupported argument type %qT for simd", arg_type);
   27094              :           return 0;
   27095              :         }
   27096              :     }
   27097              : 
   27098         7586 :   if (!TREE_PUBLIC (node->decl) || !explicit_p)
   27099              :     {
   27100              :       /* If the function isn't exported, we can pick up just one ISA
   27101              :          for the clones.  */
   27102          114 :       if (TARGET_AVX512F)
   27103            0 :         clonei->vecsize_mangle = 'e';
   27104          114 :       else if (TARGET_AVX2)
   27105            1 :         clonei->vecsize_mangle = 'd';
   27106          113 :       else if (TARGET_AVX)
   27107           88 :         clonei->vecsize_mangle = 'c';
   27108              :       else
   27109           25 :         clonei->vecsize_mangle = 'b';
   27110              :       ret = 1;
   27111              :     }
   27112              :   else
   27113              :     {
   27114         7472 :       clonei->vecsize_mangle = "bcde"[num];
   27115         7472 :       ret = 4;
   27116              :     }
   27117         7586 :   clonei->mask_mode = VOIDmode;
   27118         7586 :   switch (clonei->vecsize_mangle)
   27119              :     {
   27120         1893 :     case 'b':
   27121         1893 :       clonei->vecsize_int = 128;
   27122         1893 :       clonei->vecsize_float = 128;
   27123         1893 :       break;
   27124         1956 :     case 'c':
   27125         1956 :       clonei->vecsize_int = 128;
   27126         1956 :       clonei->vecsize_float = 256;
   27127         1956 :       break;
   27128         1869 :     case 'd':
   27129         1869 :       clonei->vecsize_int = 256;
   27130         1869 :       clonei->vecsize_float = 256;
   27131         1869 :       break;
   27132         1868 :     case 'e':
   27133         1868 :       clonei->vecsize_int = 512;
   27134         1868 :       clonei->vecsize_float = 512;
   27135         1868 :       if (TYPE_MODE (base_type) == QImode)
   27136           19 :         clonei->mask_mode = DImode;
   27137              :       else
   27138         1849 :         clonei->mask_mode = SImode;
   27139              :       break;
   27140              :     }
   27141         7586 :   if (clonei->simdlen == 0)
   27142              :     {
   27143         6265 :       if (SCALAR_INT_MODE_P (TYPE_MODE (base_type)))
   27144         3297 :         clonei->simdlen = clonei->vecsize_int;
   27145              :       else
   27146         2968 :         clonei->simdlen = clonei->vecsize_float;
   27147         6265 :       clonei->simdlen = clonei->simdlen
   27148        12530 :                         / GET_MODE_BITSIZE (TYPE_MODE (base_type));
   27149              :     }
   27150         1321 :   else if (clonei->simdlen > 16)
   27151              :     {
   27152              :       /* For compatibility with ICC, use the same upper bounds
   27153              :          for simdlen.  In particular, for CTYPE below, use the return type,
   27154              :          unless the function returns void, in that case use the characteristic
   27155              :          type.  If it is possible for given SIMDLEN to pass CTYPE value
   27156              :          in registers (8 [XYZ]MM* regs for 32-bit code, 16 [XYZ]MM* regs
   27157              :          for 64-bit code), accept that SIMDLEN, otherwise warn and don't
   27158              :          emit corresponding clone.  */
   27159           12 :       tree ctype = ret_type;
   27160           12 :       if (VOID_TYPE_P (ret_type))
   27161            0 :         ctype = base_type;
   27162           24 :       int cnt = GET_MODE_BITSIZE (TYPE_MODE (ctype)) * clonei->simdlen;
   27163           12 :       if (SCALAR_INT_MODE_P (TYPE_MODE (ctype)))
   27164            8 :         cnt /= clonei->vecsize_int;
   27165              :       else
   27166            4 :         cnt /= clonei->vecsize_float;
   27167           12 :       if (cnt > (TARGET_64BIT ? 16 : 8))
   27168              :         {
   27169            0 :           if (explicit_p)
   27170            0 :             warning_at (DECL_SOURCE_LOCATION (node->decl), 0,
   27171              :                         "unsupported simdlen %wd",
   27172              :                         clonei->simdlen.to_constant ());
   27173            0 :           return 0;
   27174              :         }
   27175              :       }
   27176              :   return ret;
   27177              : }
   27178              : 
   27179              : /* If SIMD clone NODE can't be used in a vectorized loop
   27180              :    in current function, return -1, otherwise return a badness of using it
   27181              :    (0 if it is most desirable from vecsize_mangle point of view, 1
   27182              :    slightly less desirable, etc.).  */
   27183              : 
   27184              : static int
   27185         1768 : ix86_simd_clone_usable (struct cgraph_node *node, machine_mode)
   27186              : {
   27187         1768 :   switch (node->simdclone->vecsize_mangle)
   27188              :     {
   27189          621 :     case 'b':
   27190          621 :       if (!TARGET_SSE2)
   27191              :         return -1;
   27192          621 :       if (!TARGET_AVX)
   27193              :         return 0;
   27194          520 :       return TARGET_AVX512F ? 3 : TARGET_AVX2 ? 2 : 1;
   27195          627 :     case 'c':
   27196          627 :       if (!TARGET_AVX)
   27197              :         return -1;
   27198          582 :       return TARGET_AVX512F ? 2 : TARGET_AVX2 ? 1 : 0;
   27199          332 :     case 'd':
   27200          332 :       if (!TARGET_AVX2)
   27201              :         return -1;
   27202          139 :       return TARGET_AVX512F ? 1 : 0;
   27203          188 :     case 'e':
   27204          188 :       if (!TARGET_AVX512F)
   27205          130 :         return -1;
   27206              :       return 0;
   27207            0 :     default:
   27208            0 :       gcc_unreachable ();
   27209              :     }
   27210              : }
   27211              : 
   27212              : /* This function adjusts the unroll factor based on
   27213              :    the hardware capabilities. For ex, bdver3 has
   27214              :    a loop buffer which makes unrolling of smaller
   27215              :    loops less important. This function decides the
   27216              :    unroll factor using number of memory references
   27217              :    (value 32 is used) as a heuristic. */
   27218              : 
   27219              : static unsigned
   27220       806777 : ix86_loop_unroll_adjust (unsigned nunroll, class loop *loop)
   27221              : {
   27222       806777 :   basic_block *bbs;
   27223       806777 :   rtx_insn *insn;
   27224       806777 :   unsigned i;
   27225       806777 :   unsigned mem_count = 0;
   27226              : 
   27227              :   /* Unroll small size loop when unroll factor is not explicitly
   27228              :      specified.  */
   27229       806777 :   if (ix86_unroll_only_small_loops && !loop->unroll)
   27230              :     {
   27231       763180 :       if (loop->ninsns <= ix86_cost->small_unroll_ninsns)
   27232        70478 :         return MIN (nunroll, ix86_cost->small_unroll_factor);
   27233              :       else
   27234              :         return 1;
   27235              :     }
   27236              : 
   27237        43597 :   if (!TARGET_ADJUST_UNROLL)
   27238              :      return nunroll;
   27239              : 
   27240              :   /* Count the number of memory references within the loop body.
   27241              :      This value determines the unrolling factor for bdver3 and bdver4
   27242              :      architectures. */
   27243            7 :   subrtx_iterator::array_type array;
   27244            7 :   bbs = get_loop_body (loop);
   27245           21 :   for (i = 0; i < loop->num_nodes; i++)
   27246          102 :     FOR_BB_INSNS (bbs[i], insn)
   27247           88 :       if (NONDEBUG_INSN_P (insn))
   27248          464 :         FOR_EACH_SUBRTX (iter, array, PATTERN (insn), NONCONST)
   27249          404 :           if (const_rtx x = *iter)
   27250          404 :             if (MEM_P (x))
   27251              :               {
   27252           25 :                 machine_mode mode = GET_MODE (x);
   27253           50 :                 unsigned int n_words = GET_MODE_SIZE (mode) / UNITS_PER_WORD;
   27254           25 :                 if (n_words > 4)
   27255            0 :                   mem_count += 2;
   27256              :                 else
   27257           25 :                   mem_count += 1;
   27258              :               }
   27259            7 :   free (bbs);
   27260              : 
   27261            7 :   if (mem_count && mem_count <=32)
   27262            7 :     return MIN (nunroll, 32 / mem_count);
   27263              : 
   27264              :   return nunroll;
   27265            7 : }
   27266              : 
   27267              : 
   27268              : /* Implement TARGET_FLOAT_EXCEPTIONS_ROUNDING_SUPPORTED_P.  */
   27269              : 
   27270              : static bool
   27271       418570 : ix86_float_exceptions_rounding_supported_p (void)
   27272              : {
   27273              :   /* For x87 floating point with standard excess precision handling,
   27274              :      there is no adddf3 pattern (since x87 floating point only has
   27275              :      XFmode operations) so the default hook implementation gets this
   27276              :      wrong.  */
   27277       418570 :   return TARGET_80387 || (TARGET_SSE && TARGET_SSE_MATH);
   27278              : }
   27279              : 
   27280              : /* Implement TARGET_ATOMIC_ASSIGN_EXPAND_FENV.  */
   27281              : 
   27282              : static void
   27283         7054 : ix86_atomic_assign_expand_fenv (tree *hold, tree *clear, tree *update)
   27284              : {
   27285         7054 :   if (!TARGET_80387 && !(TARGET_SSE && TARGET_SSE_MATH))
   27286              :     return;
   27287         7054 :   tree exceptions_var = create_tmp_var_raw (integer_type_node);
   27288         7054 :   if (TARGET_80387)
   27289              :     {
   27290         7054 :       tree fenv_index_type = build_index_type (size_int (6));
   27291         7054 :       tree fenv_type = build_array_type (unsigned_type_node, fenv_index_type);
   27292         7054 :       tree fenv_var = create_tmp_var_raw (fenv_type);
   27293         7054 :       TREE_ADDRESSABLE (fenv_var) = 1;
   27294         7054 :       tree fenv_ptr = build_pointer_type (fenv_type);
   27295         7054 :       tree fenv_addr = build1 (ADDR_EXPR, fenv_ptr, fenv_var);
   27296         7054 :       fenv_addr = fold_convert (ptr_type_node, fenv_addr);
   27297         7054 :       tree fnstenv = get_ix86_builtin (IX86_BUILTIN_FNSTENV);
   27298         7054 :       tree fldenv = get_ix86_builtin (IX86_BUILTIN_FLDENV);
   27299         7054 :       tree fnstsw = get_ix86_builtin (IX86_BUILTIN_FNSTSW);
   27300         7054 :       tree fnclex = get_ix86_builtin (IX86_BUILTIN_FNCLEX);
   27301         7054 :       tree hold_fnstenv = build_call_expr (fnstenv, 1, fenv_addr);
   27302         7054 :       tree hold_fnclex = build_call_expr (fnclex, 0);
   27303         7054 :       fenv_var = build4 (TARGET_EXPR, fenv_type, fenv_var, hold_fnstenv,
   27304              :                          NULL_TREE, NULL_TREE);
   27305         7054 :       *hold = build2 (COMPOUND_EXPR, void_type_node, fenv_var,
   27306              :                       hold_fnclex);
   27307         7054 :       *clear = build_call_expr (fnclex, 0);
   27308         7054 :       tree sw_var = create_tmp_var_raw (short_unsigned_type_node);
   27309         7054 :       tree fnstsw_call = build_call_expr (fnstsw, 0);
   27310         7054 :       tree sw_mod = build4 (TARGET_EXPR, short_unsigned_type_node, sw_var,
   27311              :                             fnstsw_call, NULL_TREE, NULL_TREE);
   27312         7054 :       tree exceptions_x87 = fold_convert (integer_type_node, sw_var);
   27313         7054 :       tree update_mod = build4 (TARGET_EXPR, integer_type_node,
   27314              :                                 exceptions_var, exceptions_x87,
   27315              :                                 NULL_TREE, NULL_TREE);
   27316         7054 :       *update = build2 (COMPOUND_EXPR, integer_type_node,
   27317              :                         sw_mod, update_mod);
   27318         7054 :       tree update_fldenv = build_call_expr (fldenv, 1, fenv_addr);
   27319         7054 :       *update = build2 (COMPOUND_EXPR, void_type_node, *update, update_fldenv);
   27320              :     }
   27321         7054 :   if (TARGET_SSE && TARGET_SSE_MATH)
   27322              :     {
   27323         7054 :       tree mxcsr_orig_var = create_tmp_var_raw (unsigned_type_node);
   27324         7054 :       tree mxcsr_mod_var = create_tmp_var_raw (unsigned_type_node);
   27325         7054 :       tree stmxcsr = get_ix86_builtin (IX86_BUILTIN_STMXCSR);
   27326         7054 :       tree ldmxcsr = get_ix86_builtin (IX86_BUILTIN_LDMXCSR);
   27327         7054 :       tree stmxcsr_hold_call = build_call_expr (stmxcsr, 0);
   27328         7054 :       tree hold_assign_orig = build4 (TARGET_EXPR, unsigned_type_node,
   27329              :                                       mxcsr_orig_var, stmxcsr_hold_call,
   27330              :                                       NULL_TREE, NULL_TREE);
   27331         7054 :       tree hold_mod_val = build2 (BIT_IOR_EXPR, unsigned_type_node,
   27332              :                                   mxcsr_orig_var,
   27333              :                                   build_int_cst (unsigned_type_node, 0x1f80));
   27334         7054 :       hold_mod_val = build2 (BIT_AND_EXPR, unsigned_type_node, hold_mod_val,
   27335              :                              build_int_cst (unsigned_type_node, 0xffffffc0));
   27336         7054 :       tree hold_assign_mod = build4 (TARGET_EXPR, unsigned_type_node,
   27337              :                                      mxcsr_mod_var, hold_mod_val,
   27338              :                                      NULL_TREE, NULL_TREE);
   27339         7054 :       tree ldmxcsr_hold_call = build_call_expr (ldmxcsr, 1, mxcsr_mod_var);
   27340         7054 :       tree hold_all = build2 (COMPOUND_EXPR, unsigned_type_node,
   27341              :                               hold_assign_orig, hold_assign_mod);
   27342         7054 :       hold_all = build2 (COMPOUND_EXPR, void_type_node, hold_all,
   27343              :                          ldmxcsr_hold_call);
   27344         7054 :       if (*hold)
   27345         7054 :         *hold = build2 (COMPOUND_EXPR, void_type_node, *hold, hold_all);
   27346              :       else
   27347            0 :         *hold = hold_all;
   27348         7054 :       tree ldmxcsr_clear_call = build_call_expr (ldmxcsr, 1, mxcsr_mod_var);
   27349         7054 :       if (*clear)
   27350         7054 :         *clear = build2 (COMPOUND_EXPR, void_type_node, *clear,
   27351              :                          ldmxcsr_clear_call);
   27352              :       else
   27353            0 :         *clear = ldmxcsr_clear_call;
   27354         7054 :       tree stxmcsr_update_call = build_call_expr (stmxcsr, 0);
   27355         7054 :       tree exceptions_sse = fold_convert (integer_type_node,
   27356              :                                           stxmcsr_update_call);
   27357         7054 :       if (*update)
   27358              :         {
   27359         7054 :           tree exceptions_mod = build2 (BIT_IOR_EXPR, integer_type_node,
   27360              :                                         exceptions_var, exceptions_sse);
   27361         7054 :           tree exceptions_assign = build2 (MODIFY_EXPR, integer_type_node,
   27362              :                                            exceptions_var, exceptions_mod);
   27363         7054 :           *update = build2 (COMPOUND_EXPR, integer_type_node, *update,
   27364              :                             exceptions_assign);
   27365              :         }
   27366              :       else
   27367            0 :         *update = build4 (TARGET_EXPR, integer_type_node, exceptions_var,
   27368              :                           exceptions_sse, NULL_TREE, NULL_TREE);
   27369         7054 :       tree ldmxcsr_update_call = build_call_expr (ldmxcsr, 1, mxcsr_orig_var);
   27370         7054 :       *update = build2 (COMPOUND_EXPR, void_type_node, *update,
   27371              :                         ldmxcsr_update_call);
   27372              :     }
   27373         7054 :   tree atomic_feraiseexcept
   27374         7054 :     = builtin_decl_implicit (BUILT_IN_ATOMIC_FERAISEEXCEPT);
   27375         7054 :   tree atomic_feraiseexcept_call = build_call_expr (atomic_feraiseexcept,
   27376              :                                                     1, exceptions_var);
   27377         7054 :   *update = build2 (COMPOUND_EXPR, void_type_node, *update,
   27378              :                     atomic_feraiseexcept_call);
   27379              : }
   27380              : 
   27381              : #if !TARGET_MACHO && !TARGET_DLLIMPORT_DECL_ATTRIBUTES
   27382              : /* For i386, common symbol is local only for non-PIE binaries.  For
   27383              :    x86-64, common symbol is local only for non-PIE binaries or linker
   27384              :    supports copy reloc in PIE binaries.   */
   27385              : 
   27386              : static bool
   27387    772199625 : ix86_binds_local_p (const_tree exp)
   27388              : {
   27389    772199625 :   bool direct_extern_access
   27390    772199625 :     = (ix86_direct_extern_access
   27391   1540893928 :        && !(VAR_OR_FUNCTION_DECL_P (exp)
   27392    768694303 :             && lookup_attribute ("nodirect_extern_access",
   27393    768694303 :                                  DECL_ATTRIBUTES (exp))));
   27394    772199625 :   if (!direct_extern_access)
   27395         1225 :     ix86_has_no_direct_extern_access = true;
   27396    772199625 :   return default_binds_local_p_3 (exp, flag_shlib != 0, true,
   27397              :                                   direct_extern_access,
   27398              :                                   (direct_extern_access
   27399    772198400 :                                    && (!flag_pic
   27400    132758366 :                                        || (TARGET_64BIT
   27401    772199625 :                                            && HAVE_LD_PIE_COPYRELOC != 0))));
   27402              : }
   27403              : 
   27404              : /* If flag_pic or ix86_direct_extern_access is false, then neither
   27405              :    local nor global relocs should be placed in readonly memory.  */
   27406              : 
   27407              : static int
   27408      5130748 : ix86_reloc_rw_mask (void)
   27409              : {
   27410      5130748 :   return (flag_pic || !ix86_direct_extern_access) ? 3 : 0;
   27411              : }
   27412              : #endif
   27413              : 
   27414              : /* Return true iff ADDR can be used as a symbolic base address.  */
   27415              : 
   27416              : static bool
   27417         3154 : symbolic_base_address_p (rtx addr)
   27418              : {
   27419            0 :   if (SYMBOL_REF_P (addr))
   27420              :     return true;
   27421              : 
   27422         3130 :   if (GET_CODE (addr) == UNSPEC && XINT (addr, 1) == UNSPEC_GOTOFF)
   27423            0 :     return true;
   27424              : 
   27425              :   return false;
   27426              : }
   27427              : 
   27428              : /* Return true iff ADDR can be used as a base address.  */
   27429              : 
   27430              : static bool
   27431         4718 : base_address_p (rtx addr)
   27432              : {
   27433            0 :   if (REG_P (addr))
   27434              :     return true;
   27435              : 
   27436         2936 :   if (symbolic_base_address_p (addr))
   27437            0 :     return true;
   27438              : 
   27439              :   return false;
   27440              : }
   27441              : 
   27442              : /* If MEM is in the form of [(base+symbase)+offset], extract the three
   27443              :    parts of address and set to BASE, SYMBASE and OFFSET, otherwise
   27444              :    return false.  */
   27445              : 
   27446              : static bool
   27447         3035 : extract_base_offset_in_addr (rtx mem, rtx *base, rtx *symbase, rtx *offset)
   27448              : {
   27449         3035 :   rtx addr;
   27450              : 
   27451         3035 :   gcc_assert (MEM_P (mem));
   27452              : 
   27453         3035 :   addr = XEXP (mem, 0);
   27454              : 
   27455         3035 :   if (GET_CODE (addr) == CONST)
   27456           10 :     addr = XEXP (addr, 0);
   27457              : 
   27458         3035 :   if (base_address_p (addr))
   27459              :     {
   27460         1352 :       *base = addr;
   27461         1352 :       *symbase = const0_rtx;
   27462         1352 :       *offset = const0_rtx;
   27463         1352 :       return true;
   27464              :     }
   27465              : 
   27466         1683 :   if (GET_CODE (addr) == PLUS
   27467         1683 :       && base_address_p (XEXP (addr, 0)))
   27468              :     {
   27469          454 :       rtx addend = XEXP (addr, 1);
   27470              : 
   27471          454 :       if (GET_CODE (addend) == CONST)
   27472            0 :         addend = XEXP (addend, 0);
   27473              : 
   27474          454 :       if (CONST_INT_P (addend))
   27475              :         {
   27476          236 :           *base = XEXP (addr, 0);
   27477          236 :           *symbase = const0_rtx;
   27478          236 :           *offset = addend;
   27479          236 :           return true;
   27480              :         }
   27481              : 
   27482              :       /* Also accept REG + symbolic ref, with or without a CONST_INT
   27483              :          offset.  */
   27484          218 :       if (REG_P (XEXP (addr, 0)))
   27485              :         {
   27486          218 :           if (symbolic_base_address_p (addend))
   27487              :             {
   27488            0 :               *base = XEXP (addr, 0);
   27489            0 :               *symbase = addend;
   27490            0 :               *offset = const0_rtx;
   27491            0 :               return true;
   27492              :             }
   27493              : 
   27494          218 :           if (GET_CODE (addend) == PLUS
   27495            0 :               && symbolic_base_address_p (XEXP (addend, 0))
   27496          218 :               && CONST_INT_P (XEXP (addend, 1)))
   27497              :             {
   27498            0 :               *base = XEXP (addr, 0);
   27499            0 :               *symbase = XEXP (addend, 0);
   27500            0 :               *offset = XEXP (addend, 1);
   27501            0 :               return true;
   27502              :             }
   27503              :         }
   27504              :     }
   27505              : 
   27506              :   return false;
   27507              : }
   27508              : 
   27509              : /* Given OPERANDS of consecutive load/store, check if we can merge
   27510              :    them into move multiple.  LOAD is true if they are load instructions.
   27511              :    MODE is the mode of memory operands.  */
   27512              : 
   27513              : bool
   27514         1693 : ix86_operands_ok_for_move_multiple (rtx *operands, bool load,
   27515              :                                     machine_mode mode)
   27516              : {
   27517         1693 :   HOST_WIDE_INT offval_1, offval_2, msize;
   27518         1693 :   rtx mem_1, mem_2, reg_1, reg_2, base_1, base_2,
   27519              :     symbase_1, symbase_2, offset_1, offset_2;
   27520              : 
   27521         1693 :   if (load)
   27522              :     {
   27523         1397 :       mem_1 = operands[1];
   27524         1397 :       mem_2 = operands[3];
   27525         1397 :       reg_1 = operands[0];
   27526         1397 :       reg_2 = operands[2];
   27527              :     }
   27528              :   else
   27529              :     {
   27530          296 :       mem_1 = operands[0];
   27531          296 :       mem_2 = operands[2];
   27532          296 :       reg_1 = operands[1];
   27533          296 :       reg_2 = operands[3];
   27534              :     }
   27535              : 
   27536         1693 :   gcc_assert (REG_P (reg_1) && REG_P (reg_2));
   27537              : 
   27538         1693 :   if (REGNO (reg_1) != REGNO (reg_2))
   27539              :     return false;
   27540              : 
   27541              :   /* Check if the addresses are in the form of [base+offset].  */
   27542         1693 :   if (!extract_base_offset_in_addr (mem_1, &base_1, &symbase_1, &offset_1))
   27543              :     return false;
   27544         1342 :   if (!extract_base_offset_in_addr (mem_2, &base_2, &symbase_2, &offset_2))
   27545              :     return false;
   27546              : 
   27547              :   /* Check if the bases are the same.  */
   27548          246 :   if (!rtx_equal_p (base_1, base_2) || !rtx_equal_p (symbase_1, symbase_2))
   27549          119 :     return false;
   27550              : 
   27551          127 :   offval_1 = INTVAL (offset_1);
   27552          127 :   offval_2 = INTVAL (offset_2);
   27553          127 :   msize = GET_MODE_SIZE (mode);
   27554              :   /* Check if mem_1 is adjacent to mem_2 and mem_1 has lower address.  */
   27555          127 :   if (offval_1 + msize != offval_2)
   27556              :     return false;
   27557              : 
   27558              :   return true;
   27559              : }
   27560              : 
   27561              : /* Implement the TARGET_OPTAB_SUPPORTED_P hook.  */
   27562              : 
   27563              : static bool
   27564       341764 : ix86_optab_supported_p (int op, machine_mode mode1, machine_mode,
   27565              :                         optimization_type opt_type)
   27566              : {
   27567       341764 :   switch (op)
   27568              :     {
   27569          216 :     case asin_optab:
   27570          216 :     case acos_optab:
   27571          216 :     case log1p_optab:
   27572          216 :     case exp_optab:
   27573          216 :     case exp10_optab:
   27574          216 :     case exp2_optab:
   27575          216 :     case expm1_optab:
   27576          216 :     case ldexp_optab:
   27577          216 :     case scalb_optab:
   27578          216 :     case round_optab:
   27579          216 :     case lround_optab:
   27580          216 :       return opt_type == OPTIMIZE_FOR_SPEED;
   27581              : 
   27582          263 :     case rint_optab:
   27583          263 :       if (SSE_FLOAT_MODE_P (mode1)
   27584          144 :           && TARGET_SSE_MATH
   27585          128 :           && !flag_trapping_math
   27586           21 :           && !TARGET_SSE4_1
   27587              :           && mode1 != HFmode)
   27588           21 :         return opt_type == OPTIMIZE_FOR_SPEED;
   27589              :       return true;
   27590              : 
   27591         1892 :     case floor_optab:
   27592         1892 :     case ceil_optab:
   27593         1892 :     case btrunc_optab:
   27594         1892 :       if ((SSE_FLOAT_MODE_P (mode1)
   27595         1594 :            && TARGET_SSE_MATH
   27596         1515 :            && TARGET_SSE4_1)
   27597         1825 :           || mode1 == HFmode)
   27598              :         return true;
   27599         1756 :       return opt_type == OPTIMIZE_FOR_SPEED;
   27600              : 
   27601           84 :     case rsqrt_optab:
   27602           84 :       return opt_type == OPTIMIZE_FOR_SPEED && use_rsqrt_p (mode1);
   27603              : 
   27604              :     default:
   27605              :       return true;
   27606              :     }
   27607              : }
   27608              : 
   27609              : /* Address space support.
   27610              : 
   27611              :    This is not "far pointers" in the 16-bit sense, but an easy way
   27612              :    to use %fs and %gs segment prefixes.  Therefore:
   27613              : 
   27614              :     (a) All address spaces have the same modes,
   27615              :     (b) All address spaces have the same addresss forms,
   27616              :     (c) While %fs and %gs are technically subsets of the generic
   27617              :         address space, they are probably not subsets of each other.
   27618              :     (d) Since we have no access to the segment base register values
   27619              :         without resorting to a system call, we cannot convert a
   27620              :         non-default address space to a default address space.
   27621              :         Therefore we do not claim %fs or %gs are subsets of generic.
   27622              : 
   27623              :    Therefore we can (mostly) use the default hooks.  */
   27624              : 
   27625              : /* All use of segmentation is assumed to make address 0 valid.  */
   27626              : 
   27627              : static bool
   27628     67738141 : ix86_addr_space_zero_address_valid (addr_space_t as)
   27629              : {
   27630     67738141 :   return as != ADDR_SPACE_GENERIC;
   27631              : }
   27632              : 
   27633              : static void
   27634       778808 : ix86_init_libfuncs (void)
   27635              : {
   27636       778808 :   if (TARGET_64BIT)
   27637              :     {
   27638       763850 :       set_optab_libfunc (sdivmod_optab, TImode, "__divmodti4");
   27639       763850 :       set_optab_libfunc (udivmod_optab, TImode, "__udivmodti4");
   27640              :     }
   27641              :   else
   27642              :     {
   27643        14958 :       set_optab_libfunc (sdivmod_optab, DImode, "__divmoddi4");
   27644        14958 :       set_optab_libfunc (udivmod_optab, DImode, "__udivmoddi4");
   27645              :     }
   27646              : 
   27647              : #if TARGET_MACHO
   27648              :   darwin_rename_builtins ();
   27649              : #endif
   27650       778808 : }
   27651              : 
   27652              : /* Set the value of FLT_EVAL_METHOD in float.h.  When using only the
   27653              :    FPU, assume that the fpcw is set to extended precision; when using
   27654              :    only SSE, rounding is correct; when using both SSE and the FPU,
   27655              :    the rounding precision is indeterminate, since either may be chosen
   27656              :    apparently at random.  */
   27657              : 
   27658              : static enum flt_eval_method
   27659     89587275 : ix86_get_excess_precision (enum excess_precision_type type)
   27660              : {
   27661     89587275 :   switch (type)
   27662              :     {
   27663     85637612 :       case EXCESS_PRECISION_TYPE_FAST:
   27664              :         /* The fastest type to promote to will always be the native type,
   27665              :            whether that occurs with implicit excess precision or
   27666              :            otherwise.  */
   27667     85637612 :         return TARGET_AVX512FP16
   27668     85637612 :                ? FLT_EVAL_METHOD_PROMOTE_TO_FLOAT16
   27669     85637612 :                : FLT_EVAL_METHOD_PROMOTE_TO_FLOAT;
   27670      3949582 :       case EXCESS_PRECISION_TYPE_STANDARD:
   27671      3949582 :       case EXCESS_PRECISION_TYPE_IMPLICIT:
   27672              :         /* Otherwise, the excess precision we want when we are
   27673              :            in a standards compliant mode, and the implicit precision we
   27674              :            provide would be identical were it not for the unpredictable
   27675              :            cases.  */
   27676      3949582 :         if (TARGET_AVX512FP16 && TARGET_SSE_MATH)
   27677              :           return FLT_EVAL_METHOD_PROMOTE_TO_FLOAT16;
   27678      3943768 :         else if (!TARGET_80387)
   27679              :           return FLT_EVAL_METHOD_PROMOTE_TO_FLOAT;
   27680      3937646 :         else if (!TARGET_MIX_SSE_I387)
   27681              :           {
   27682      3937474 :             if (!(TARGET_SSE && TARGET_SSE_MATH))
   27683              :               return FLT_EVAL_METHOD_PROMOTE_TO_LONG_DOUBLE;
   27684      2950648 :             else if (TARGET_SSE2)
   27685              :               return FLT_EVAL_METHOD_PROMOTE_TO_FLOAT;
   27686              :           }
   27687              : 
   27688              :         /* If we are in standards compliant mode, but we know we will
   27689              :            calculate in unpredictable precision, return
   27690              :            FLT_EVAL_METHOD_FLOAT.  There is no reason to introduce explicit
   27691              :            excess precision if the target can't guarantee it will honor
   27692              :            it.  */
   27693          320 :         return (type == EXCESS_PRECISION_TYPE_STANDARD
   27694          320 :                 ? FLT_EVAL_METHOD_PROMOTE_TO_FLOAT
   27695              :                 : FLT_EVAL_METHOD_UNPREDICTABLE);
   27696           81 :       case EXCESS_PRECISION_TYPE_FLOAT16:
   27697           81 :         if (TARGET_80387
   27698           75 :             && !(TARGET_SSE_MATH && TARGET_SSE))
   27699            4 :           error ("%<-fexcess-precision=16%> is not compatible with %<-mfpmath=387%>");
   27700              :         return FLT_EVAL_METHOD_PROMOTE_TO_FLOAT16;
   27701            0 :       default:
   27702            0 :         gcc_unreachable ();
   27703              :     }
   27704              : 
   27705              :   return FLT_EVAL_METHOD_UNPREDICTABLE;
   27706              : }
   27707              : 
   27708              : /* Return true if _BitInt(N) is supported and fill its details into *INFO.  */
   27709              : bool
   27710       350014 : ix86_bitint_type_info (int n, struct bitint_info *info)
   27711              : {
   27712       350014 :   if (n <= 8)
   27713         5535 :     info->limb_mode = QImode;
   27714       344479 :   else if (n <= 16)
   27715         1893 :     info->limb_mode = HImode;
   27716       342586 :   else if (n <= 32 || (!TARGET_64BIT && n > 64))
   27717        45555 :     info->limb_mode = SImode;
   27718              :   else
   27719       297031 :     info->limb_mode = DImode;
   27720       350014 :   info->abi_limb_mode = info->limb_mode;
   27721       350014 :   info->big_endian = false;
   27722       350014 :   info->extended = bitint_ext_undef;
   27723       350014 :   return true;
   27724              : }
   27725              : 
   27726              : /* Implement TARGET_C_MODE_FOR_FLOATING_TYPE.  Return DFmode, TFmode
   27727              :    or XFmode for TI_LONG_DOUBLE_TYPE which is for long double type,
   27728              :    based on long double bits, go with the default one for the others.  */
   27729              : 
   27730              : static machine_mode
   27731      3656139 : ix86_c_mode_for_floating_type (enum tree_index ti)
   27732              : {
   27733      3656139 :   if (ti == TI_LONG_DOUBLE_TYPE)
   27734       610252 :     return (TARGET_LONG_DOUBLE_64 ? DFmode
   27735       610220 :                                   : (TARGET_LONG_DOUBLE_128 ? TFmode : XFmode));
   27736      3045887 :   return default_mode_for_floating_type (ti);
   27737              : }
   27738              : 
   27739              : /* Returns modified FUNCTION_TYPE for cdtor callabi.  */
   27740              : tree
   27741        13938 : ix86_cxx_adjust_cdtor_callabi_fntype (tree fntype)
   27742              : {
   27743        13938 :   if (TARGET_64BIT
   27744           71 :       || TARGET_RTD
   27745        14009 :       || ix86_function_type_abi (fntype) != MS_ABI)
   27746        13938 :     return fntype;
   27747              :   /* For 32-bit MS ABI add thiscall attribute.  */
   27748            0 :   tree attribs = tree_cons (get_identifier ("thiscall"), NULL_TREE,
   27749            0 :                             TYPE_ATTRIBUTES (fntype));
   27750            0 :   return build_type_attribute_variant (fntype, attribs);
   27751              : }
   27752              : 
   27753              : /* Implement PUSH_ROUNDING.  On 386, we have pushw instruction that
   27754              :    decrements by exactly 2 no matter what the position was, there is no pushb.
   27755              : 
   27756              :    But as CIE data alignment factor on this arch is -4 for 32bit targets
   27757              :    and -8 for 64bit targets, we need to make sure all stack pointer adjustments
   27758              :    are in multiple of 4 for 32bit targets and 8 for 64bit targets.  */
   27759              : 
   27760              : poly_int64
   27761    272626357 : ix86_push_rounding (poly_int64 bytes)
   27762              : {
   27763    352223118 :   return ROUND_UP (bytes, UNITS_PER_WORD);
   27764              : }
   27765              : 
   27766              : /* Use 8 bits metadata start from bit48 for LAM_U48,
   27767              :    6 bits metadat start from bit57 for LAM_U57.  */
   27768              : #define IX86_HWASAN_SHIFT (ix86_lam_type == lam_u48             \
   27769              :                            ? 48                                 \
   27770              :                            : (ix86_lam_type == lam_u57 ? 57 : 0))
   27771              : #define IX86_HWASAN_TAG_SIZE (ix86_lam_type == lam_u48          \
   27772              :                               ? 8                               \
   27773              :                               : (ix86_lam_type == lam_u57 ? 6 : 0))
   27774              : 
   27775              : /* Implement TARGET_MEMTAG_CAN_TAG_ADDRESSES.  */
   27776              : bool
   27777      6233777 : ix86_memtag_can_tag_addresses ()
   27778              : {
   27779      6233777 :   return ix86_lam_type != lam_none && TARGET_LP64;
   27780              : }
   27781              : 
   27782              : /* Implement TARGET_MEMTAG_TAG_BITSIZE.  */
   27783              : unsigned char
   27784          450 : ix86_memtag_tag_bitsize ()
   27785              : {
   27786          450 :   return IX86_HWASAN_TAG_SIZE;
   27787              : }
   27788              : 
   27789              : /* Implement TARGET_MEMTAG_SET_TAG.  */
   27790              : rtx
   27791          106 : ix86_memtag_set_tag (rtx untagged, rtx tag, rtx target)
   27792              : {
   27793              :   /* default_memtag_insert_random_tag may
   27794              :      generate tag with value more than 6 bits.  */
   27795          106 :   if (ix86_lam_type == lam_u57)
   27796              :     {
   27797          106 :       unsigned HOST_WIDE_INT and_imm
   27798              :         = (HOST_WIDE_INT_1U << IX86_HWASAN_TAG_SIZE) - 1;
   27799              : 
   27800          106 :       emit_insn (gen_andqi3 (tag, tag, GEN_INT (and_imm)));
   27801              :     }
   27802          106 :   tag = expand_simple_binop (Pmode, ASHIFT, tag,
   27803          106 :                              GEN_INT (IX86_HWASAN_SHIFT), NULL_RTX,
   27804              :                              /* unsignedp = */1, OPTAB_WIDEN);
   27805          106 :   rtx ret = expand_simple_binop (Pmode, IOR, untagged, tag, target,
   27806              :                                  /* unsignedp = */1, OPTAB_DIRECT);
   27807          106 :   return ret;
   27808              : }
   27809              : 
   27810              : /* Implement TARGET_MEMTAG_EXTRACT_TAG.  */
   27811              : rtx
   27812          180 : ix86_memtag_extract_tag (rtx tagged_pointer, rtx target)
   27813              : {
   27814          180 :   rtx tag = expand_simple_binop (Pmode, LSHIFTRT, tagged_pointer,
   27815          180 :                                  GEN_INT (IX86_HWASAN_SHIFT), target,
   27816              :                                  /* unsignedp = */0,
   27817              :                                  OPTAB_DIRECT);
   27818          180 :   rtx ret = gen_reg_rtx (QImode);
   27819              :   /* Mask off bit63 when LAM_U57.  */
   27820          180 :   if (ix86_lam_type == lam_u57)
   27821              :     {
   27822          180 :       unsigned HOST_WIDE_INT and_imm
   27823              :         = (HOST_WIDE_INT_1U << IX86_HWASAN_TAG_SIZE) - 1;
   27824          180 :       emit_insn (gen_andqi3 (ret, gen_lowpart (QImode, tag),
   27825          180 :                              gen_int_mode (and_imm, QImode)));
   27826              :     }
   27827              :   else
   27828            0 :     emit_move_insn (ret, gen_lowpart (QImode, tag));
   27829          180 :   return ret;
   27830              : }
   27831              : 
   27832              : /* The default implementation of TARGET_MEMTAG_UNTAGGED_POINTER.  */
   27833              : rtx
   27834          114 : ix86_memtag_untagged_pointer (rtx tagged_pointer, rtx target)
   27835              : {
   27836              :   /* Leave bit63 alone.  */
   27837          114 :   rtx tag_mask = gen_int_mode (((HOST_WIDE_INT_1U << IX86_HWASAN_SHIFT)
   27838          114 :                                 + (HOST_WIDE_INT_1U << 63) - 1),
   27839          114 :                                Pmode);
   27840          114 :   rtx untagged_base = expand_simple_binop (Pmode, AND, tagged_pointer,
   27841              :                                            tag_mask, target, true,
   27842              :                                            OPTAB_DIRECT);
   27843          114 :   gcc_assert (untagged_base);
   27844          114 :   return untagged_base;
   27845              : }
   27846              : 
   27847              : /* Implement TARGET_MEMTAG_ADD_TAG.  */
   27848              : rtx
   27849           90 : ix86_memtag_add_tag (rtx base, poly_int64 offset, unsigned char tag_offset)
   27850              : {
   27851           90 :   rtx base_tag = gen_reg_rtx (QImode);
   27852           90 :   rtx base_addr = gen_reg_rtx (Pmode);
   27853           90 :   rtx tagged_addr = gen_reg_rtx (Pmode);
   27854           90 :   rtx new_tag = gen_reg_rtx (QImode);
   27855          180 :   unsigned HOST_WIDE_INT and_imm
   27856           90 :     = (HOST_WIDE_INT_1U << IX86_HWASAN_SHIFT) - 1;
   27857              : 
   27858              :   /* When there's "overflow" in tag adding,
   27859              :      need to mask the most significant bit off.  */
   27860           90 :   emit_move_insn (base_tag, ix86_memtag_extract_tag (base, NULL_RTX));
   27861           90 :   emit_move_insn (base_addr,
   27862              :                   ix86_memtag_untagged_pointer (base, NULL_RTX));
   27863           90 :   emit_insn (gen_add2_insn (base_tag, gen_int_mode (tag_offset, QImode)));
   27864           90 :   emit_move_insn (new_tag, base_tag);
   27865           90 :   emit_insn (gen_andqi3 (new_tag, new_tag, gen_int_mode (and_imm, QImode)));
   27866           90 :   emit_move_insn (tagged_addr,
   27867              :                   ix86_memtag_set_tag (base_addr, new_tag, NULL_RTX));
   27868           90 :   return plus_constant (Pmode, tagged_addr, offset);
   27869              : }
   27870              : 
   27871              : /* Implement TARGET_HAVE_CCMP.  */
   27872              : static bool
   27873      8036359 : ix86_have_ccmp ()
   27874              : {
   27875      8036359 :   return (bool) TARGET_APX_CCMP;
   27876              : }
   27877              : 
   27878              : /* Implement TARGET_MODE_CAN_TRANSFER_BITS.  */
   27879              : static bool
   27880      4547143 : ix86_mode_can_transfer_bits (machine_mode mode)
   27881              : {
   27882      4547143 :   if (GET_MODE_CLASS (mode) == MODE_FLOAT
   27883      4500650 :       || GET_MODE_CLASS (mode) == MODE_COMPLEX_FLOAT)
   27884       111510 :     switch (GET_MODE_INNER (mode))
   27885              :       {
   27886        53569 :       case E_SFmode:
   27887        53569 :       case E_DFmode:
   27888              :         /* These suffer from normalization upon load when not using SSE.  */
   27889        53569 :         return !(ix86_fpmath & FPMATH_387);
   27890              :       default:
   27891              :         return true;
   27892              :       }
   27893              : 
   27894              :   return true;
   27895              : }
   27896              : 
   27897              : /* Implement TARGET_REDZONE_CLOBBER.  */
   27898              : static rtx
   27899            2 : ix86_redzone_clobber ()
   27900              : {
   27901            2 :   cfun->machine->asm_redzone_clobber_seen = true;
   27902            2 :   if (ix86_using_red_zone ())
   27903              :     {
   27904            2 :       rtx base = plus_constant (Pmode, stack_pointer_rtx, -RED_ZONE_SIZE);
   27905            2 :       rtx mem = gen_rtx_MEM (BLKmode, base);
   27906            2 :       set_mem_size (mem, RED_ZONE_SIZE);
   27907            2 :       return mem;
   27908              :     }
   27909              :   return NULL_RTX;
   27910              : }
   27911              : 
   27912              : /* Target-specific selftests.  */
   27913              : 
   27914              : #if CHECKING_P
   27915              : 
   27916              : namespace selftest {
   27917              : 
   27918              : /* Verify that hard regs are dumped as expected (in compact mode).  */
   27919              : 
   27920              : static void
   27921            4 : ix86_test_dumping_hard_regs ()
   27922              : {
   27923            4 :   ASSERT_RTL_DUMP_EQ ("(reg:SI ax)", gen_raw_REG (SImode, 0));
   27924            4 :   ASSERT_RTL_DUMP_EQ ("(reg:SI dx)", gen_raw_REG (SImode, 1));
   27925            4 : }
   27926              : 
   27927              : /* Test dumping an insn with repeated references to the same SCRATCH,
   27928              :    to verify the rtx_reuse code.  */
   27929              : 
   27930              : static void
   27931            4 : ix86_test_dumping_memory_blockage ()
   27932              : {
   27933            4 :   set_new_first_and_last_insn (NULL, NULL);
   27934              : 
   27935            4 :   rtx pat = gen_memory_blockage ();
   27936            4 :   rtx_reuse_manager r;
   27937            4 :   r.preprocess (pat);
   27938              : 
   27939              :   /* Verify that the repeated references to the SCRATCH show use
   27940              :      reuse IDS.  The first should be prefixed with a reuse ID,
   27941              :      and the second should be dumped as a "reuse_rtx" of that ID.
   27942              :      The expected string assumes Pmode == DImode.  */
   27943            4 :   if (Pmode == DImode)
   27944            4 :     ASSERT_RTL_DUMP_EQ_WITH_REUSE
   27945              :       ("(cinsn 1 (set (mem/v:BLK (0|scratch:DI) [0  A8])\n"
   27946              :        "        (unspec:BLK [\n"
   27947              :        "                (mem/v:BLK (reuse_rtx 0) [0  A8])\n"
   27948              :        "            ] UNSPEC_MEMORY_BLOCKAGE)))\n", pat, &r);
   27949            4 : }
   27950              : 
   27951              : /* Verify loading an RTL dump; specifically a dump of copying
   27952              :    a param on x86_64 from a hard reg into the frame.
   27953              :    This test is target-specific since the dump contains target-specific
   27954              :    hard reg names.  */
   27955              : 
   27956              : static void
   27957            4 : ix86_test_loading_dump_fragment_1 ()
   27958              : {
   27959            4 :   rtl_dump_test t (SELFTEST_LOCATION,
   27960            4 :                    locate_file ("x86_64/copy-hard-reg-into-frame.rtl"));
   27961              : 
   27962            4 :   rtx_insn *insn = get_insn_by_uid (1);
   27963              : 
   27964              :   /* The block structure and indentation here is purely for
   27965              :      readability; it mirrors the structure of the rtx.  */
   27966            4 :   tree mem_expr;
   27967            4 :   {
   27968            4 :     rtx pat = PATTERN (insn);
   27969            4 :     ASSERT_EQ (SET, GET_CODE (pat));
   27970            4 :     {
   27971            4 :       rtx dest = SET_DEST (pat);
   27972            4 :       ASSERT_EQ (MEM, GET_CODE (dest));
   27973              :       /* Verify the "/c" was parsed.  */
   27974            4 :       ASSERT_TRUE (RTX_FLAG (dest, call));
   27975            4 :       ASSERT_EQ (SImode, GET_MODE (dest));
   27976            4 :       {
   27977            4 :         rtx addr = XEXP (dest, 0);
   27978            4 :         ASSERT_EQ (PLUS, GET_CODE (addr));
   27979            4 :         ASSERT_EQ (DImode, GET_MODE (addr));
   27980            4 :         {
   27981            4 :           rtx lhs = XEXP (addr, 0);
   27982              :           /* Verify that the "frame" REG was consolidated.  */
   27983            4 :           ASSERT_RTX_PTR_EQ (frame_pointer_rtx, lhs);
   27984              :         }
   27985            4 :         {
   27986            4 :           rtx rhs = XEXP (addr, 1);
   27987            4 :           ASSERT_EQ (CONST_INT, GET_CODE (rhs));
   27988            4 :           ASSERT_EQ (-4, INTVAL (rhs));
   27989              :         }
   27990              :       }
   27991              :       /* Verify the "[1 i+0 S4 A32]" was parsed.  */
   27992            4 :       ASSERT_EQ (1, MEM_ALIAS_SET (dest));
   27993              :       /* "i" should have been handled by synthesizing a global int
   27994              :          variable named "i".  */
   27995            4 :       mem_expr = MEM_EXPR (dest);
   27996            4 :       ASSERT_NE (mem_expr, NULL);
   27997            4 :       ASSERT_EQ (VAR_DECL, TREE_CODE (mem_expr));
   27998            4 :       ASSERT_EQ (integer_type_node, TREE_TYPE (mem_expr));
   27999            4 :       ASSERT_EQ (IDENTIFIER_NODE, TREE_CODE (DECL_NAME (mem_expr)));
   28000            4 :       ASSERT_STREQ ("i", IDENTIFIER_POINTER (DECL_NAME (mem_expr)));
   28001              :       /* "+0".  */
   28002            4 :       ASSERT_TRUE (MEM_OFFSET_KNOWN_P (dest));
   28003            4 :       ASSERT_EQ (0, MEM_OFFSET (dest));
   28004              :       /* "S4".  */
   28005            4 :       ASSERT_EQ (4, MEM_SIZE (dest));
   28006              :       /* "A32.  */
   28007            4 :       ASSERT_EQ (32, MEM_ALIGN (dest));
   28008              :     }
   28009            4 :     {
   28010            4 :       rtx src = SET_SRC (pat);
   28011            4 :       ASSERT_EQ (REG, GET_CODE (src));
   28012            4 :       ASSERT_EQ (SImode, GET_MODE (src));
   28013            4 :       ASSERT_EQ (5, REGNO (src));
   28014            4 :       tree reg_expr = REG_EXPR (src);
   28015              :       /* "i" here should point to the same var as for the MEM_EXPR.  */
   28016            4 :       ASSERT_EQ (reg_expr, mem_expr);
   28017              :     }
   28018              :   }
   28019            4 : }
   28020              : 
   28021              : /* Verify that the RTL loader copes with a call_insn dump.
   28022              :    This test is target-specific since the dump contains a target-specific
   28023              :    hard reg name.  */
   28024              : 
   28025              : static void
   28026            4 : ix86_test_loading_call_insn ()
   28027              : {
   28028              :   /* The test dump includes register "xmm0", where requires TARGET_SSE
   28029              :      to exist.  */
   28030            4 :   if (!TARGET_SSE)
   28031            0 :     return;
   28032              : 
   28033            4 :   rtl_dump_test t (SELFTEST_LOCATION, locate_file ("x86_64/call-insn.rtl"));
   28034              : 
   28035            4 :   rtx_insn *insn = get_insns ();
   28036            4 :   ASSERT_EQ (CALL_INSN, GET_CODE (insn));
   28037              : 
   28038              :   /* "/j".  */
   28039            4 :   ASSERT_TRUE (RTX_FLAG (insn, jump));
   28040              : 
   28041            4 :   rtx pat = PATTERN (insn);
   28042            4 :   ASSERT_EQ (CALL, GET_CODE (SET_SRC (pat)));
   28043              : 
   28044              :   /* Verify REG_NOTES.  */
   28045            4 :   {
   28046              :     /* "(expr_list:REG_CALL_DECL".   */
   28047            4 :     ASSERT_EQ (EXPR_LIST, GET_CODE (REG_NOTES (insn)));
   28048            4 :     rtx_expr_list *note0 = as_a <rtx_expr_list *> (REG_NOTES (insn));
   28049            4 :     ASSERT_EQ (REG_CALL_DECL, REG_NOTE_KIND (note0));
   28050              : 
   28051              :     /* "(expr_list:REG_EH_REGION (const_int 0 [0])".  */
   28052            4 :     rtx_expr_list *note1 = note0->next ();
   28053            4 :     ASSERT_EQ (REG_EH_REGION, REG_NOTE_KIND (note1));
   28054              : 
   28055            4 :     ASSERT_EQ (NULL, note1->next ());
   28056              :   }
   28057              : 
   28058              :   /* Verify CALL_INSN_FUNCTION_USAGE.  */
   28059            4 :   {
   28060              :     /* "(expr_list:DF (use (reg:DF 21 xmm0))".  */
   28061            4 :     rtx_expr_list *usage
   28062            4 :       = as_a <rtx_expr_list *> (CALL_INSN_FUNCTION_USAGE (insn));
   28063            4 :     ASSERT_EQ (EXPR_LIST, GET_CODE (usage));
   28064            4 :     ASSERT_EQ (DFmode, GET_MODE (usage));
   28065            4 :     ASSERT_EQ (USE, GET_CODE (usage->element ()));
   28066            4 :     ASSERT_EQ (NULL, usage->next ());
   28067              :   }
   28068            4 : }
   28069              : 
   28070              : /* Verify that the RTL loader copes a dump from print_rtx_function.
   28071              :    This test is target-specific since the dump contains target-specific
   28072              :    hard reg names.  */
   28073              : 
   28074              : static void
   28075            4 : ix86_test_loading_full_dump ()
   28076              : {
   28077            4 :   rtl_dump_test t (SELFTEST_LOCATION, locate_file ("x86_64/times-two.rtl"));
   28078              : 
   28079            4 :   ASSERT_STREQ ("times_two", IDENTIFIER_POINTER (DECL_NAME (cfun->decl)));
   28080              : 
   28081            4 :   rtx_insn *insn_1 = get_insn_by_uid (1);
   28082            4 :   ASSERT_EQ (NOTE, GET_CODE (insn_1));
   28083              : 
   28084            4 :   rtx_insn *insn_7 = get_insn_by_uid (7);
   28085            4 :   ASSERT_EQ (INSN, GET_CODE (insn_7));
   28086            4 :   ASSERT_EQ (PARALLEL, GET_CODE (PATTERN (insn_7)));
   28087              : 
   28088            4 :   rtx_insn *insn_15 = get_insn_by_uid (15);
   28089            4 :   ASSERT_EQ (INSN, GET_CODE (insn_15));
   28090            4 :   ASSERT_EQ (USE, GET_CODE (PATTERN (insn_15)));
   28091              : 
   28092              :   /* Verify crtl->return_rtx.  */
   28093            4 :   ASSERT_EQ (REG, GET_CODE (crtl->return_rtx));
   28094            4 :   ASSERT_EQ (0, REGNO (crtl->return_rtx));
   28095            4 :   ASSERT_EQ (SImode, GET_MODE (crtl->return_rtx));
   28096            4 : }
   28097              : 
   28098              : /* Verify that the RTL loader copes with UNSPEC and UNSPEC_VOLATILE insns.
   28099              :    In particular, verify that it correctly loads the 2nd operand.
   28100              :    This test is target-specific since these are machine-specific
   28101              :    operands (and enums).  */
   28102              : 
   28103              : static void
   28104            4 : ix86_test_loading_unspec ()
   28105              : {
   28106            4 :   rtl_dump_test t (SELFTEST_LOCATION, locate_file ("x86_64/unspec.rtl"));
   28107              : 
   28108            4 :   ASSERT_STREQ ("test_unspec", IDENTIFIER_POINTER (DECL_NAME (cfun->decl)));
   28109              : 
   28110            4 :   ASSERT_TRUE (cfun);
   28111              : 
   28112              :   /* Test of an UNSPEC.  */
   28113            4 :    rtx_insn *insn = get_insns ();
   28114            4 :   ASSERT_EQ (INSN, GET_CODE (insn));
   28115            4 :   rtx set = single_set (insn);
   28116            4 :   ASSERT_NE (NULL, set);
   28117            4 :   rtx dst = SET_DEST (set);
   28118            4 :   ASSERT_EQ (MEM, GET_CODE (dst));
   28119            4 :   rtx src = SET_SRC (set);
   28120            4 :   ASSERT_EQ (UNSPEC, GET_CODE (src));
   28121            4 :   ASSERT_EQ (BLKmode, GET_MODE (src));
   28122            4 :   ASSERT_EQ (UNSPEC_MEMORY_BLOCKAGE, XINT (src, 1));
   28123              : 
   28124            4 :   rtx v0 = XVECEXP (src, 0, 0);
   28125              : 
   28126              :   /* Verify that the two uses of the first SCRATCH have pointer
   28127              :      equality.  */
   28128            4 :   rtx scratch_a = XEXP (dst, 0);
   28129            4 :   ASSERT_EQ (SCRATCH, GET_CODE (scratch_a));
   28130              : 
   28131            4 :   rtx scratch_b = XEXP (v0, 0);
   28132            4 :   ASSERT_EQ (SCRATCH, GET_CODE (scratch_b));
   28133              : 
   28134            4 :   ASSERT_EQ (scratch_a, scratch_b);
   28135              : 
   28136              :   /* Verify that the two mems are thus treated as equal.  */
   28137            4 :   ASSERT_TRUE (rtx_equal_p (dst, v0));
   28138              : 
   28139              :   /* Verify that the insn is recognized.  */
   28140            4 :   ASSERT_NE(-1, recog_memoized (insn));
   28141              : 
   28142              :   /* Test of an UNSPEC_VOLATILE, which has its own enum values.  */
   28143            4 :   insn = NEXT_INSN (insn);
   28144            4 :   ASSERT_EQ (INSN, GET_CODE (insn));
   28145              : 
   28146            4 :   set = single_set (insn);
   28147            4 :   ASSERT_NE (NULL, set);
   28148              : 
   28149            4 :   src = SET_SRC (set);
   28150            4 :   ASSERT_EQ (UNSPEC_VOLATILE, GET_CODE (src));
   28151            4 :   ASSERT_EQ (UNSPECV_RDTSCP, XINT (src, 1));
   28152            4 : }
   28153              : 
   28154              : /* Run all target-specific selftests.  */
   28155              : 
   28156              : static void
   28157            4 : ix86_run_selftests (void)
   28158              : {
   28159            4 :   ix86_test_dumping_hard_regs ();
   28160            4 :   ix86_test_dumping_memory_blockage ();
   28161              : 
   28162              :   /* Various tests of loading RTL dumps, here because they contain
   28163              :      ix86-isms (e.g. names of hard regs).  */
   28164            4 :   ix86_test_loading_dump_fragment_1 ();
   28165            4 :   ix86_test_loading_call_insn ();
   28166            4 :   ix86_test_loading_full_dump ();
   28167            4 :   ix86_test_loading_unspec ();
   28168            4 : }
   28169              : 
   28170              : } // namespace selftest
   28171              : 
   28172              : #endif /* CHECKING_P */
   28173              : 
   28174              : static const scoped_attribute_specs *const ix86_attribute_table[] =
   28175              : {
   28176              :   &ix86_gnu_attribute_table
   28177              : };
   28178              : 
   28179              : /* Initialize the GCC target structure.  */
   28180              : #undef TARGET_RETURN_IN_MEMORY
   28181              : #define TARGET_RETURN_IN_MEMORY ix86_return_in_memory
   28182              : 
   28183              : #undef TARGET_LEGITIMIZE_ADDRESS
   28184              : #define TARGET_LEGITIMIZE_ADDRESS ix86_legitimize_address
   28185              : 
   28186              : #undef TARGET_ATTRIBUTE_TABLE
   28187              : #define TARGET_ATTRIBUTE_TABLE ix86_attribute_table
   28188              : #undef TARGET_FUNCTION_ATTRIBUTE_INLINABLE_P
   28189              : #define TARGET_FUNCTION_ATTRIBUTE_INLINABLE_P hook_bool_const_tree_true
   28190              : #if TARGET_DLLIMPORT_DECL_ATTRIBUTES
   28191              : #  undef TARGET_MERGE_DECL_ATTRIBUTES
   28192              : #  define TARGET_MERGE_DECL_ATTRIBUTES merge_dllimport_decl_attributes
   28193              : #endif
   28194              : 
   28195              : #undef TARGET_INVALID_CONVERSION
   28196              : #define TARGET_INVALID_CONVERSION ix86_invalid_conversion
   28197              : 
   28198              : #undef TARGET_INVALID_UNARY_OP
   28199              : #define TARGET_INVALID_UNARY_OP ix86_invalid_unary_op
   28200              : 
   28201              : #undef TARGET_INVALID_BINARY_OP
   28202              : #define TARGET_INVALID_BINARY_OP ix86_invalid_binary_op
   28203              : 
   28204              : #undef TARGET_COMP_TYPE_ATTRIBUTES
   28205              : #define TARGET_COMP_TYPE_ATTRIBUTES ix86_comp_type_attributes
   28206              : 
   28207              : #undef TARGET_INIT_BUILTINS
   28208              : #define TARGET_INIT_BUILTINS ix86_init_builtins
   28209              : #undef TARGET_BUILTIN_DECL
   28210              : #define TARGET_BUILTIN_DECL ix86_builtin_decl
   28211              : #undef TARGET_EXPAND_BUILTIN
   28212              : #define TARGET_EXPAND_BUILTIN ix86_expand_builtin
   28213              : 
   28214              : #undef TARGET_VECTORIZE_BUILTIN_VECTORIZED_FUNCTION
   28215              : #define TARGET_VECTORIZE_BUILTIN_VECTORIZED_FUNCTION \
   28216              :   ix86_builtin_vectorized_function
   28217              : 
   28218              : #undef TARGET_VECTORIZE_BUILTIN_GATHER
   28219              : #define TARGET_VECTORIZE_BUILTIN_GATHER ix86_vectorize_builtin_gather
   28220              : 
   28221              : #undef TARGET_VECTORIZE_BUILTIN_SCATTER
   28222              : #define TARGET_VECTORIZE_BUILTIN_SCATTER ix86_vectorize_builtin_scatter
   28223              : 
   28224              : #undef TARGET_BUILTIN_RECIPROCAL
   28225              : #define TARGET_BUILTIN_RECIPROCAL ix86_builtin_reciprocal
   28226              : 
   28227              : #undef TARGET_ASM_FUNCTION_EPILOGUE
   28228              : #define TARGET_ASM_FUNCTION_EPILOGUE ix86_output_function_epilogue
   28229              : 
   28230              : #undef TARGET_ASM_PRINT_PATCHABLE_FUNCTION_ENTRY
   28231              : #define TARGET_ASM_PRINT_PATCHABLE_FUNCTION_ENTRY \
   28232              :   ix86_print_patchable_function_entry
   28233              : 
   28234              : #undef TARGET_ENCODE_SECTION_INFO
   28235              : #ifndef SUBTARGET_ENCODE_SECTION_INFO
   28236              : #define TARGET_ENCODE_SECTION_INFO ix86_encode_section_info
   28237              : #else
   28238              : #define TARGET_ENCODE_SECTION_INFO SUBTARGET_ENCODE_SECTION_INFO
   28239              : #endif
   28240              : 
   28241              : #undef TARGET_ASM_OPEN_PAREN
   28242              : #define TARGET_ASM_OPEN_PAREN ""
   28243              : #undef TARGET_ASM_CLOSE_PAREN
   28244              : #define TARGET_ASM_CLOSE_PAREN ""
   28245              : 
   28246              : #undef TARGET_ASM_BYTE_OP
   28247              : #define TARGET_ASM_BYTE_OP ASM_BYTE
   28248              : 
   28249              : #undef TARGET_ASM_ALIGNED_HI_OP
   28250              : #define TARGET_ASM_ALIGNED_HI_OP ASM_SHORT
   28251              : #undef TARGET_ASM_ALIGNED_SI_OP
   28252              : #define TARGET_ASM_ALIGNED_SI_OP ASM_LONG
   28253              : #ifdef ASM_QUAD
   28254              : #undef TARGET_ASM_ALIGNED_DI_OP
   28255              : #define TARGET_ASM_ALIGNED_DI_OP ASM_QUAD
   28256              : #endif
   28257              : 
   28258              : #undef TARGET_PROFILE_BEFORE_PROLOGUE
   28259              : #define TARGET_PROFILE_BEFORE_PROLOGUE ix86_profile_before_prologue
   28260              : 
   28261              : #undef TARGET_MANGLE_DECL_ASSEMBLER_NAME
   28262              : #define TARGET_MANGLE_DECL_ASSEMBLER_NAME ix86_mangle_decl_assembler_name
   28263              : 
   28264              : #undef TARGET_ASM_UNALIGNED_HI_OP
   28265              : #define TARGET_ASM_UNALIGNED_HI_OP TARGET_ASM_ALIGNED_HI_OP
   28266              : #undef TARGET_ASM_UNALIGNED_SI_OP
   28267              : #define TARGET_ASM_UNALIGNED_SI_OP TARGET_ASM_ALIGNED_SI_OP
   28268              : #undef TARGET_ASM_UNALIGNED_DI_OP
   28269              : #define TARGET_ASM_UNALIGNED_DI_OP TARGET_ASM_ALIGNED_DI_OP
   28270              : 
   28271              : #undef TARGET_PRINT_OPERAND
   28272              : #define TARGET_PRINT_OPERAND ix86_print_operand
   28273              : #undef TARGET_PRINT_OPERAND_ADDRESS
   28274              : #define TARGET_PRINT_OPERAND_ADDRESS ix86_print_operand_address
   28275              : #undef TARGET_PRINT_OPERAND_PUNCT_VALID_P
   28276              : #define TARGET_PRINT_OPERAND_PUNCT_VALID_P ix86_print_operand_punct_valid_p
   28277              : #undef TARGET_ASM_OUTPUT_ADDR_CONST_EXTRA
   28278              : #define TARGET_ASM_OUTPUT_ADDR_CONST_EXTRA i386_asm_output_addr_const_extra
   28279              : 
   28280              : #undef TARGET_SCHED_INIT_GLOBAL
   28281              : #define TARGET_SCHED_INIT_GLOBAL ix86_sched_init_global
   28282              : #undef TARGET_SCHED_ADJUST_COST
   28283              : #define TARGET_SCHED_ADJUST_COST ix86_adjust_cost
   28284              : #undef TARGET_SCHED_ISSUE_RATE
   28285              : #define TARGET_SCHED_ISSUE_RATE ix86_issue_rate
   28286              : #undef TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD
   28287              : #define TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD \
   28288              :   ia32_multipass_dfa_lookahead
   28289              : #undef TARGET_SCHED_MACRO_FUSION_P
   28290              : #define TARGET_SCHED_MACRO_FUSION_P ix86_macro_fusion_p
   28291              : #undef TARGET_SCHED_MACRO_FUSION_PAIR_P
   28292              : #define TARGET_SCHED_MACRO_FUSION_PAIR_P ix86_macro_fusion_pair_p
   28293              : 
   28294              : #undef TARGET_FUNCTION_OK_FOR_SIBCALL
   28295              : #define TARGET_FUNCTION_OK_FOR_SIBCALL ix86_function_ok_for_sibcall
   28296              : 
   28297              : #undef TARGET_MEMMODEL_CHECK
   28298              : #define TARGET_MEMMODEL_CHECK ix86_memmodel_check
   28299              : 
   28300              : #undef TARGET_ATOMIC_ASSIGN_EXPAND_FENV
   28301              : #define TARGET_ATOMIC_ASSIGN_EXPAND_FENV ix86_atomic_assign_expand_fenv
   28302              : 
   28303              : #ifdef HAVE_AS_TLS
   28304              : #undef TARGET_HAVE_TLS
   28305              : #define TARGET_HAVE_TLS true
   28306              : #endif
   28307              : #undef TARGET_CANNOT_FORCE_CONST_MEM
   28308              : #define TARGET_CANNOT_FORCE_CONST_MEM ix86_cannot_force_const_mem
   28309              : #undef TARGET_USE_BLOCKS_FOR_CONSTANT_P
   28310              : #define TARGET_USE_BLOCKS_FOR_CONSTANT_P hook_bool_mode_const_rtx_true
   28311              : 
   28312              : #undef TARGET_DELEGITIMIZE_ADDRESS
   28313              : #define TARGET_DELEGITIMIZE_ADDRESS ix86_delegitimize_address
   28314              : 
   28315              : #undef TARGET_CONST_NOT_OK_FOR_DEBUG_P
   28316              : #define TARGET_CONST_NOT_OK_FOR_DEBUG_P ix86_const_not_ok_for_debug_p
   28317              : 
   28318              : #undef TARGET_MS_BITFIELD_LAYOUT_P
   28319              : #define TARGET_MS_BITFIELD_LAYOUT_P ix86_ms_bitfield_layout_p
   28320              : 
   28321              : #if TARGET_MACHO
   28322              : #undef TARGET_BINDS_LOCAL_P
   28323              : #define TARGET_BINDS_LOCAL_P darwin_binds_local_p
   28324              : #else
   28325              : #undef TARGET_BINDS_LOCAL_P
   28326              : #define TARGET_BINDS_LOCAL_P ix86_binds_local_p
   28327              : #endif
   28328              : #if TARGET_DLLIMPORT_DECL_ATTRIBUTES
   28329              : #undef TARGET_BINDS_LOCAL_P
   28330              : #define TARGET_BINDS_LOCAL_P i386_pe_binds_local_p
   28331              : #endif
   28332              : 
   28333              : #undef TARGET_ASM_OUTPUT_MI_THUNK
   28334              : #define TARGET_ASM_OUTPUT_MI_THUNK x86_output_mi_thunk
   28335              : #undef TARGET_ASM_CAN_OUTPUT_MI_THUNK
   28336              : #define TARGET_ASM_CAN_OUTPUT_MI_THUNK x86_can_output_mi_thunk
   28337              : 
   28338              : #undef TARGET_ASM_FILE_START
   28339              : #define TARGET_ASM_FILE_START x86_file_start
   28340              : 
   28341              : #undef TARGET_OPTION_OVERRIDE
   28342              : #define TARGET_OPTION_OVERRIDE ix86_option_override
   28343              : 
   28344              : #undef TARGET_REGISTER_MOVE_COST
   28345              : #define TARGET_REGISTER_MOVE_COST ix86_register_move_cost
   28346              : #undef TARGET_MEMORY_MOVE_COST
   28347              : #define TARGET_MEMORY_MOVE_COST ix86_memory_move_cost
   28348              : #undef TARGET_RTX_COSTS
   28349              : #define TARGET_RTX_COSTS ix86_rtx_costs
   28350              : #undef TARGET_INSN_COST
   28351              : #define TARGET_INSN_COST ix86_insn_cost
   28352              : #undef TARGET_ADDRESS_COST
   28353              : #define TARGET_ADDRESS_COST ix86_address_cost
   28354              : 
   28355              : #undef TARGET_USE_BY_PIECES_INFRASTRUCTURE_P
   28356              : #define TARGET_USE_BY_PIECES_INFRASTRUCTURE_P \
   28357              :   ix86_use_by_pieces_infrastructure_p
   28358              : 
   28359              : #undef TARGET_OVERLAP_OP_BY_PIECES_P
   28360              : #define TARGET_OVERLAP_OP_BY_PIECES_P hook_bool_void_true
   28361              : 
   28362              : #undef TARGET_FLAGS_REGNUM
   28363              : #define TARGET_FLAGS_REGNUM FLAGS_REG
   28364              : #undef TARGET_FIXED_CONDITION_CODE_REGS
   28365              : #define TARGET_FIXED_CONDITION_CODE_REGS ix86_fixed_condition_code_regs
   28366              : #undef TARGET_CC_MODES_COMPATIBLE
   28367              : #define TARGET_CC_MODES_COMPATIBLE ix86_cc_modes_compatible
   28368              : 
   28369              : #undef TARGET_MACHINE_DEPENDENT_REORG
   28370              : #define TARGET_MACHINE_DEPENDENT_REORG ix86_reorg
   28371              : 
   28372              : #undef TARGET_BUILD_BUILTIN_VA_LIST
   28373              : #define TARGET_BUILD_BUILTIN_VA_LIST ix86_build_builtin_va_list
   28374              : 
   28375              : #undef TARGET_FOLD_BUILTIN
   28376              : #define TARGET_FOLD_BUILTIN ix86_fold_builtin
   28377              : 
   28378              : #undef TARGET_GIMPLE_FOLD_BUILTIN
   28379              : #define TARGET_GIMPLE_FOLD_BUILTIN ix86_gimple_fold_builtin
   28380              : 
   28381              : #undef TARGET_COMPARE_VERSION_PRIORITY
   28382              : #define TARGET_COMPARE_VERSION_PRIORITY ix86_compare_version_priority
   28383              : 
   28384              : #undef TARGET_GENERATE_VERSION_DISPATCHER_BODY
   28385              : #define TARGET_GENERATE_VERSION_DISPATCHER_BODY \
   28386              :   ix86_generate_version_dispatcher_body
   28387              : 
   28388              : #undef TARGET_GET_FUNCTION_VERSIONS_DISPATCHER
   28389              : #define TARGET_GET_FUNCTION_VERSIONS_DISPATCHER \
   28390              :   ix86_get_function_versions_dispatcher
   28391              : 
   28392              : #undef TARGET_ENUM_VA_LIST_P
   28393              : #define TARGET_ENUM_VA_LIST_P ix86_enum_va_list
   28394              : 
   28395              : #undef TARGET_FN_ABI_VA_LIST
   28396              : #define TARGET_FN_ABI_VA_LIST ix86_fn_abi_va_list
   28397              : 
   28398              : #undef TARGET_CANONICAL_VA_LIST_TYPE
   28399              : #define TARGET_CANONICAL_VA_LIST_TYPE ix86_canonical_va_list_type
   28400              : 
   28401              : #undef TARGET_EXPAND_BUILTIN_VA_START
   28402              : #define TARGET_EXPAND_BUILTIN_VA_START ix86_va_start
   28403              : 
   28404              : #undef TARGET_MD_ASM_ADJUST
   28405              : #define TARGET_MD_ASM_ADJUST ix86_md_asm_adjust
   28406              : 
   28407              : #undef TARGET_C_EXCESS_PRECISION
   28408              : #define TARGET_C_EXCESS_PRECISION ix86_get_excess_precision
   28409              : #undef TARGET_C_BITINT_TYPE_INFO
   28410              : #define TARGET_C_BITINT_TYPE_INFO ix86_bitint_type_info
   28411              : #undef TARGET_C_MODE_FOR_FLOATING_TYPE
   28412              : #define TARGET_C_MODE_FOR_FLOATING_TYPE ix86_c_mode_for_floating_type
   28413              : #undef TARGET_CXX_ADJUST_CDTOR_CALLABI_FNTYPE
   28414              : #define TARGET_CXX_ADJUST_CDTOR_CALLABI_FNTYPE ix86_cxx_adjust_cdtor_callabi_fntype
   28415              : #undef TARGET_PROMOTE_PROTOTYPES
   28416              : #define TARGET_PROMOTE_PROTOTYPES hook_bool_const_tree_true
   28417              : #undef TARGET_PUSH_ARGUMENT
   28418              : #define TARGET_PUSH_ARGUMENT ix86_push_argument
   28419              : #undef TARGET_SETUP_INCOMING_VARARGS
   28420              : #define TARGET_SETUP_INCOMING_VARARGS ix86_setup_incoming_varargs
   28421              : #undef TARGET_MUST_PASS_IN_STACK
   28422              : #define TARGET_MUST_PASS_IN_STACK ix86_must_pass_in_stack
   28423              : #undef TARGET_ALLOCATE_STACK_SLOTS_FOR_ARGS
   28424              : #define TARGET_ALLOCATE_STACK_SLOTS_FOR_ARGS ix86_allocate_stack_slots_for_args
   28425              : #undef TARGET_FUNCTION_ARG_ADVANCE
   28426              : #define TARGET_FUNCTION_ARG_ADVANCE ix86_function_arg_advance
   28427              : #undef TARGET_FUNCTION_ARG
   28428              : #define TARGET_FUNCTION_ARG ix86_function_arg
   28429              : #undef TARGET_INIT_PIC_REG
   28430              : #define TARGET_INIT_PIC_REG ix86_init_pic_reg
   28431              : #undef TARGET_USE_PSEUDO_PIC_REG
   28432              : #define TARGET_USE_PSEUDO_PIC_REG ix86_use_pseudo_pic_reg
   28433              : #undef TARGET_FUNCTION_ARG_BOUNDARY
   28434              : #define TARGET_FUNCTION_ARG_BOUNDARY ix86_function_arg_boundary
   28435              : #undef TARGET_PASS_BY_REFERENCE
   28436              : #define TARGET_PASS_BY_REFERENCE ix86_pass_by_reference
   28437              : #undef TARGET_INTERNAL_ARG_POINTER
   28438              : #define TARGET_INTERNAL_ARG_POINTER ix86_internal_arg_pointer
   28439              : #undef TARGET_UPDATE_STACK_BOUNDARY
   28440              : #define TARGET_UPDATE_STACK_BOUNDARY ix86_update_stack_boundary
   28441              : #undef TARGET_GET_DRAP_RTX
   28442              : #define TARGET_GET_DRAP_RTX ix86_get_drap_rtx
   28443              : #undef TARGET_STRICT_ARGUMENT_NAMING
   28444              : #define TARGET_STRICT_ARGUMENT_NAMING hook_bool_CUMULATIVE_ARGS_true
   28445              : #undef TARGET_STATIC_CHAIN
   28446              : #define TARGET_STATIC_CHAIN ix86_static_chain
   28447              : #undef TARGET_TRAMPOLINE_INIT
   28448              : #define TARGET_TRAMPOLINE_INIT ix86_trampoline_init
   28449              : #undef TARGET_RETURN_POPS_ARGS
   28450              : #define TARGET_RETURN_POPS_ARGS ix86_return_pops_args
   28451              : 
   28452              : #undef TARGET_WARN_FUNC_RETURN
   28453              : #define TARGET_WARN_FUNC_RETURN ix86_warn_func_return
   28454              : 
   28455              : #undef TARGET_LEGITIMATE_COMBINED_INSN
   28456              : #define TARGET_LEGITIMATE_COMBINED_INSN ix86_legitimate_combined_insn
   28457              : 
   28458              : #undef TARGET_ASAN_SHADOW_OFFSET
   28459              : #define TARGET_ASAN_SHADOW_OFFSET ix86_asan_shadow_offset
   28460              : 
   28461              : #undef TARGET_GIMPLIFY_VA_ARG_EXPR
   28462              : #define TARGET_GIMPLIFY_VA_ARG_EXPR ix86_gimplify_va_arg
   28463              : 
   28464              : #undef TARGET_SCALAR_MODE_SUPPORTED_P
   28465              : #define TARGET_SCALAR_MODE_SUPPORTED_P ix86_scalar_mode_supported_p
   28466              : 
   28467              : #undef TARGET_LIBGCC_FLOATING_MODE_SUPPORTED_P
   28468              : #define TARGET_LIBGCC_FLOATING_MODE_SUPPORTED_P \
   28469              : ix86_libgcc_floating_mode_supported_p
   28470              : 
   28471              : #undef TARGET_VECTOR_MODE_SUPPORTED_P
   28472              : #define TARGET_VECTOR_MODE_SUPPORTED_P ix86_vector_mode_supported_p
   28473              : 
   28474              : #undef TARGET_C_MODE_FOR_SUFFIX
   28475              : #define TARGET_C_MODE_FOR_SUFFIX ix86_c_mode_for_suffix
   28476              : 
   28477              : #ifdef HAVE_AS_TLS
   28478              : #undef TARGET_ASM_OUTPUT_DWARF_DTPREL
   28479              : #define TARGET_ASM_OUTPUT_DWARF_DTPREL i386_output_dwarf_dtprel
   28480              : #endif
   28481              : 
   28482              : #ifdef SUBTARGET_INSERT_ATTRIBUTES
   28483              : #undef TARGET_INSERT_ATTRIBUTES
   28484              : #define TARGET_INSERT_ATTRIBUTES SUBTARGET_INSERT_ATTRIBUTES
   28485              : #endif
   28486              : 
   28487              : #undef TARGET_MANGLE_TYPE
   28488              : #define TARGET_MANGLE_TYPE ix86_mangle_type
   28489              : 
   28490              : #undef TARGET_EMIT_SUPPORT_TINFOS
   28491              : #define TARGET_EMIT_SUPPORT_TINFOS ix86_emit_support_tinfos
   28492              : 
   28493              : #undef TARGET_STACK_PROTECT_GUARD
   28494              : #define TARGET_STACK_PROTECT_GUARD ix86_stack_protect_guard
   28495              : 
   28496              : #undef TARGET_STACK_PROTECT_RUNTIME_ENABLED_P
   28497              : #define TARGET_STACK_PROTECT_RUNTIME_ENABLED_P \
   28498              :   ix86_stack_protect_runtime_enabled_p
   28499              : 
   28500              : #if !TARGET_MACHO
   28501              : #undef TARGET_STACK_PROTECT_FAIL
   28502              : #define TARGET_STACK_PROTECT_FAIL ix86_stack_protect_fail
   28503              : #endif
   28504              : 
   28505              : #undef TARGET_FUNCTION_VALUE
   28506              : #define TARGET_FUNCTION_VALUE ix86_function_value
   28507              : 
   28508              : #undef TARGET_FUNCTION_VALUE_REGNO_P
   28509              : #define TARGET_FUNCTION_VALUE_REGNO_P ix86_function_value_regno_p
   28510              : 
   28511              : #undef TARGET_ZERO_CALL_USED_REGS
   28512              : #define TARGET_ZERO_CALL_USED_REGS ix86_zero_call_used_regs
   28513              : 
   28514              : #undef TARGET_PROMOTE_FUNCTION_MODE
   28515              : #define TARGET_PROMOTE_FUNCTION_MODE ix86_promote_function_mode
   28516              : 
   28517              : #undef  TARGET_OVERRIDE_OPTIONS_AFTER_CHANGE
   28518              : #define TARGET_OVERRIDE_OPTIONS_AFTER_CHANGE ix86_override_options_after_change
   28519              : 
   28520              : #undef TARGET_MEMBER_TYPE_FORCES_BLK
   28521              : #define TARGET_MEMBER_TYPE_FORCES_BLK ix86_member_type_forces_blk
   28522              : 
   28523              : #undef TARGET_INSTANTIATE_DECLS
   28524              : #define TARGET_INSTANTIATE_DECLS ix86_instantiate_decls
   28525              : 
   28526              : #undef TARGET_SECONDARY_RELOAD
   28527              : #define TARGET_SECONDARY_RELOAD ix86_secondary_reload
   28528              : #undef TARGET_SECONDARY_MEMORY_NEEDED
   28529              : #define TARGET_SECONDARY_MEMORY_NEEDED ix86_secondary_memory_needed
   28530              : #undef TARGET_SECONDARY_MEMORY_NEEDED_MODE
   28531              : #define TARGET_SECONDARY_MEMORY_NEEDED_MODE ix86_secondary_memory_needed_mode
   28532              : 
   28533              : #undef TARGET_CLASS_MAX_NREGS
   28534              : #define TARGET_CLASS_MAX_NREGS ix86_class_max_nregs
   28535              : 
   28536              : #undef TARGET_PREFERRED_RELOAD_CLASS
   28537              : #define TARGET_PREFERRED_RELOAD_CLASS ix86_preferred_reload_class
   28538              : #undef TARGET_PREFERRED_OUTPUT_RELOAD_CLASS
   28539              : #define TARGET_PREFERRED_OUTPUT_RELOAD_CLASS ix86_preferred_output_reload_class
   28540              : /* When this hook returns true for MODE, the compiler allows
   28541              :    registers explicitly used in the rtl to be used as spill registers
   28542              :    but prevents the compiler from extending the lifetime of these
   28543              :    registers.  */
   28544              : #undef TARGET_SMALL_REGISTER_CLASSES_FOR_MODE_P
   28545              : #define TARGET_SMALL_REGISTER_CLASSES_FOR_MODE_P hook_bool_mode_true
   28546              : #undef TARGET_CLASS_LIKELY_SPILLED_P
   28547              : #define TARGET_CLASS_LIKELY_SPILLED_P ix86_class_likely_spilled_p
   28548              : #undef TARGET_CALLEE_SAVE_COST
   28549              : #define TARGET_CALLEE_SAVE_COST ix86_callee_save_cost
   28550              : 
   28551              : #undef TARGET_VECTORIZE_BUILTIN_VECTORIZATION_COST
   28552              : #define TARGET_VECTORIZE_BUILTIN_VECTORIZATION_COST \
   28553              :   ix86_builtin_vectorization_cost
   28554              : #undef TARGET_VECTORIZE_VEC_PERM_CONST
   28555              : #define TARGET_VECTORIZE_VEC_PERM_CONST ix86_vectorize_vec_perm_const
   28556              : #undef TARGET_VECTORIZE_PREFERRED_SIMD_MODE
   28557              : #define TARGET_VECTORIZE_PREFERRED_SIMD_MODE \
   28558              :   ix86_preferred_simd_mode
   28559              : #undef TARGET_VECTORIZE_SPLIT_REDUCTION
   28560              : #define TARGET_VECTORIZE_SPLIT_REDUCTION \
   28561              :   ix86_split_reduction
   28562              : #undef TARGET_VECTORIZE_AUTOVECTORIZE_VECTOR_MODES
   28563              : #define TARGET_VECTORIZE_AUTOVECTORIZE_VECTOR_MODES \
   28564              :   ix86_autovectorize_vector_modes
   28565              : #undef TARGET_VECTORIZE_GET_MASK_MODE
   28566              : #define TARGET_VECTORIZE_GET_MASK_MODE ix86_get_mask_mode
   28567              : #undef TARGET_VECTORIZE_CREATE_COSTS
   28568              : #define TARGET_VECTORIZE_CREATE_COSTS ix86_vectorize_create_costs
   28569              : 
   28570              : #undef TARGET_SET_CURRENT_FUNCTION
   28571              : #define TARGET_SET_CURRENT_FUNCTION ix86_set_current_function
   28572              : 
   28573              : #undef TARGET_OPTION_VALID_ATTRIBUTE_P
   28574              : #define TARGET_OPTION_VALID_ATTRIBUTE_P ix86_valid_target_attribute_p
   28575              : 
   28576              : #undef TARGET_OPTION_SAVE
   28577              : #define TARGET_OPTION_SAVE ix86_function_specific_save
   28578              : 
   28579              : #undef TARGET_OPTION_RESTORE
   28580              : #define TARGET_OPTION_RESTORE ix86_function_specific_restore
   28581              : 
   28582              : #undef TARGET_OPTION_POST_STREAM_IN
   28583              : #define TARGET_OPTION_POST_STREAM_IN ix86_function_specific_post_stream_in
   28584              : 
   28585              : #undef TARGET_OPTION_PRINT
   28586              : #define TARGET_OPTION_PRINT ix86_function_specific_print
   28587              : 
   28588              : #undef TARGET_CAN_INLINE_P
   28589              : #define TARGET_CAN_INLINE_P ix86_can_inline_p
   28590              : 
   28591              : #undef TARGET_LEGITIMATE_ADDRESS_P
   28592              : #define TARGET_LEGITIMATE_ADDRESS_P ix86_legitimate_address_p
   28593              : 
   28594              : #undef TARGET_REGISTER_PRIORITY
   28595              : #define TARGET_REGISTER_PRIORITY ix86_register_priority
   28596              : 
   28597              : #undef TARGET_REGISTER_USAGE_LEVELING_P
   28598              : #define TARGET_REGISTER_USAGE_LEVELING_P hook_bool_void_true
   28599              : 
   28600              : #undef TARGET_LEGITIMATE_CONSTANT_P
   28601              : #define TARGET_LEGITIMATE_CONSTANT_P ix86_legitimate_constant_p
   28602              : 
   28603              : #undef TARGET_COMPUTE_FRAME_LAYOUT
   28604              : #define TARGET_COMPUTE_FRAME_LAYOUT ix86_compute_frame_layout
   28605              : 
   28606              : #undef TARGET_FRAME_POINTER_REQUIRED
   28607              : #define TARGET_FRAME_POINTER_REQUIRED ix86_frame_pointer_required
   28608              : 
   28609              : #undef TARGET_CAN_ELIMINATE
   28610              : #define TARGET_CAN_ELIMINATE ix86_can_eliminate
   28611              : 
   28612              : #undef TARGET_EXTRA_LIVE_ON_ENTRY
   28613              : #define TARGET_EXTRA_LIVE_ON_ENTRY ix86_live_on_entry
   28614              : 
   28615              : #undef TARGET_ASM_CODE_END
   28616              : #define TARGET_ASM_CODE_END ix86_code_end
   28617              : 
   28618              : #undef TARGET_CONDITIONAL_REGISTER_USAGE
   28619              : #define TARGET_CONDITIONAL_REGISTER_USAGE ix86_conditional_register_usage
   28620              : 
   28621              : #undef TARGET_CANONICALIZE_COMPARISON
   28622              : #define TARGET_CANONICALIZE_COMPARISON ix86_canonicalize_comparison
   28623              : 
   28624              : #undef TARGET_LOOP_UNROLL_ADJUST
   28625              : #define TARGET_LOOP_UNROLL_ADJUST ix86_loop_unroll_adjust
   28626              : 
   28627              : /* Disabled due to PRs 70902, 71453, 71555, 71596 and 71657.  */
   28628              : #undef TARGET_SPILL_CLASS
   28629              : #define TARGET_SPILL_CLASS ix86_spill_class
   28630              : 
   28631              : #undef TARGET_SIMD_CLONE_COMPUTE_VECSIZE_AND_SIMDLEN
   28632              : #define TARGET_SIMD_CLONE_COMPUTE_VECSIZE_AND_SIMDLEN \
   28633              :   ix86_simd_clone_compute_vecsize_and_simdlen
   28634              : 
   28635              : #undef TARGET_SIMD_CLONE_ADJUST
   28636              : #define TARGET_SIMD_CLONE_ADJUST ix86_simd_clone_adjust
   28637              : 
   28638              : #undef TARGET_SIMD_CLONE_USABLE
   28639              : #define TARGET_SIMD_CLONE_USABLE ix86_simd_clone_usable
   28640              : 
   28641              : #undef TARGET_OMP_DEVICE_KIND_ARCH_ISA
   28642              : #define TARGET_OMP_DEVICE_KIND_ARCH_ISA ix86_omp_device_kind_arch_isa
   28643              : 
   28644              : #undef TARGET_FLOAT_EXCEPTIONS_ROUNDING_SUPPORTED_P
   28645              : #define TARGET_FLOAT_EXCEPTIONS_ROUNDING_SUPPORTED_P \
   28646              :   ix86_float_exceptions_rounding_supported_p
   28647              : 
   28648              : #undef TARGET_MODE_EMIT
   28649              : #define TARGET_MODE_EMIT ix86_emit_mode_set
   28650              : 
   28651              : #undef TARGET_MODE_NEEDED
   28652              : #define TARGET_MODE_NEEDED ix86_mode_needed
   28653              : 
   28654              : #undef TARGET_MODE_AFTER
   28655              : #define TARGET_MODE_AFTER ix86_mode_after
   28656              : 
   28657              : #undef TARGET_MODE_ENTRY
   28658              : #define TARGET_MODE_ENTRY ix86_mode_entry
   28659              : 
   28660              : #undef TARGET_MODE_EXIT
   28661              : #define TARGET_MODE_EXIT ix86_mode_exit
   28662              : 
   28663              : #undef TARGET_MODE_PRIORITY
   28664              : #define TARGET_MODE_PRIORITY ix86_mode_priority
   28665              : 
   28666              : #undef TARGET_CALL_FUSAGE_CONTAINS_NON_CALLEE_CLOBBERS
   28667              : #define TARGET_CALL_FUSAGE_CONTAINS_NON_CALLEE_CLOBBERS true
   28668              : 
   28669              : #undef TARGET_OFFLOAD_OPTIONS
   28670              : #define TARGET_OFFLOAD_OPTIONS \
   28671              :   ix86_offload_options
   28672              : 
   28673              : #undef TARGET_ABSOLUTE_BIGGEST_ALIGNMENT
   28674              : #define TARGET_ABSOLUTE_BIGGEST_ALIGNMENT 512
   28675              : 
   28676              : #undef TARGET_OPTAB_SUPPORTED_P
   28677              : #define TARGET_OPTAB_SUPPORTED_P ix86_optab_supported_p
   28678              : 
   28679              : #undef TARGET_HARD_REGNO_SCRATCH_OK
   28680              : #define TARGET_HARD_REGNO_SCRATCH_OK ix86_hard_regno_scratch_ok
   28681              : 
   28682              : #undef TARGET_CUSTOM_FUNCTION_DESCRIPTORS
   28683              : #define TARGET_CUSTOM_FUNCTION_DESCRIPTORS X86_CUSTOM_FUNCTION_TEST
   28684              : 
   28685              : #undef TARGET_ADDR_SPACE_ZERO_ADDRESS_VALID
   28686              : #define TARGET_ADDR_SPACE_ZERO_ADDRESS_VALID ix86_addr_space_zero_address_valid
   28687              : 
   28688              : #undef TARGET_INIT_LIBFUNCS
   28689              : #define TARGET_INIT_LIBFUNCS ix86_init_libfuncs
   28690              : 
   28691              : #undef TARGET_EXPAND_DIVMOD_LIBFUNC
   28692              : #define TARGET_EXPAND_DIVMOD_LIBFUNC ix86_expand_divmod_libfunc
   28693              : 
   28694              : #undef TARGET_MAX_NOCE_IFCVT_SEQ_COST
   28695              : #define TARGET_MAX_NOCE_IFCVT_SEQ_COST ix86_max_noce_ifcvt_seq_cost
   28696              : 
   28697              : #undef TARGET_NOCE_CONVERSION_PROFITABLE_P
   28698              : #define TARGET_NOCE_CONVERSION_PROFITABLE_P ix86_noce_conversion_profitable_p
   28699              : 
   28700              : #undef TARGET_HARD_REGNO_NREGS
   28701              : #define TARGET_HARD_REGNO_NREGS ix86_hard_regno_nregs
   28702              : #undef TARGET_HARD_REGNO_MODE_OK
   28703              : #define TARGET_HARD_REGNO_MODE_OK ix86_hard_regno_mode_ok
   28704              : 
   28705              : #undef TARGET_MODES_TIEABLE_P
   28706              : #define TARGET_MODES_TIEABLE_P ix86_modes_tieable_p
   28707              : 
   28708              : #undef TARGET_HARD_REGNO_CALL_PART_CLOBBERED
   28709              : #define TARGET_HARD_REGNO_CALL_PART_CLOBBERED \
   28710              :   ix86_hard_regno_call_part_clobbered
   28711              : 
   28712              : #undef TARGET_INSN_CALLEE_ABI
   28713              : #define TARGET_INSN_CALLEE_ABI ix86_insn_callee_abi
   28714              : 
   28715              : #undef TARGET_CAN_CHANGE_MODE_CLASS
   28716              : #define TARGET_CAN_CHANGE_MODE_CLASS ix86_can_change_mode_class
   28717              : 
   28718              : #undef TARGET_LOWER_LOCAL_DECL_ALIGNMENT
   28719              : #define TARGET_LOWER_LOCAL_DECL_ALIGNMENT ix86_lower_local_decl_alignment
   28720              : 
   28721              : #undef TARGET_STATIC_RTX_ALIGNMENT
   28722              : #define TARGET_STATIC_RTX_ALIGNMENT ix86_static_rtx_alignment
   28723              : #undef TARGET_CONSTANT_ALIGNMENT
   28724              : #define TARGET_CONSTANT_ALIGNMENT ix86_constant_alignment
   28725              : 
   28726              : #undef TARGET_EMPTY_RECORD_P
   28727              : #define TARGET_EMPTY_RECORD_P ix86_is_empty_record
   28728              : 
   28729              : #undef TARGET_WARN_PARAMETER_PASSING_ABI
   28730              : #define TARGET_WARN_PARAMETER_PASSING_ABI ix86_warn_parameter_passing_abi
   28731              : 
   28732              : #undef TARGET_GET_MULTILIB_ABI_NAME
   28733              : #define TARGET_GET_MULTILIB_ABI_NAME \
   28734              :   ix86_get_multilib_abi_name
   28735              : 
   28736              : #undef TARGET_IFUNC_REF_LOCAL_OK
   28737              : #define TARGET_IFUNC_REF_LOCAL_OK ix86_ifunc_ref_local_ok
   28738              : 
   28739              : #if !TARGET_MACHO && !TARGET_DLLIMPORT_DECL_ATTRIBUTES
   28740              : # undef TARGET_ASM_RELOC_RW_MASK
   28741              : # define TARGET_ASM_RELOC_RW_MASK ix86_reloc_rw_mask
   28742              : #endif
   28743              : 
   28744              : #undef TARGET_MEMTAG_CAN_TAG_ADDRESSES
   28745              : #define TARGET_MEMTAG_CAN_TAG_ADDRESSES ix86_memtag_can_tag_addresses
   28746              : 
   28747              : #undef TARGET_MEMTAG_ADD_TAG
   28748              : #define TARGET_MEMTAG_ADD_TAG ix86_memtag_add_tag
   28749              : 
   28750              : #undef TARGET_MEMTAG_SET_TAG
   28751              : #define TARGET_MEMTAG_SET_TAG ix86_memtag_set_tag
   28752              : 
   28753              : #undef TARGET_MEMTAG_EXTRACT_TAG
   28754              : #define TARGET_MEMTAG_EXTRACT_TAG ix86_memtag_extract_tag
   28755              : 
   28756              : #undef TARGET_MEMTAG_UNTAGGED_POINTER
   28757              : #define TARGET_MEMTAG_UNTAGGED_POINTER ix86_memtag_untagged_pointer
   28758              : 
   28759              : #undef TARGET_MEMTAG_TAG_BITSIZE
   28760              : #define TARGET_MEMTAG_TAG_BITSIZE ix86_memtag_tag_bitsize
   28761              : 
   28762              : #undef TARGET_GEN_CCMP_FIRST
   28763              : #define TARGET_GEN_CCMP_FIRST ix86_gen_ccmp_first
   28764              : 
   28765              : #undef TARGET_GEN_CCMP_NEXT
   28766              : #define TARGET_GEN_CCMP_NEXT ix86_gen_ccmp_next
   28767              : 
   28768              : #undef TARGET_HAVE_CCMP
   28769              : #define TARGET_HAVE_CCMP ix86_have_ccmp
   28770              : 
   28771              : #undef TARGET_MODE_CAN_TRANSFER_BITS
   28772              : #define TARGET_MODE_CAN_TRANSFER_BITS ix86_mode_can_transfer_bits
   28773              : 
   28774              : #undef TARGET_REDZONE_CLOBBER
   28775              : #define TARGET_REDZONE_CLOBBER ix86_redzone_clobber
   28776              : 
   28777              : static bool
   28778        96365 : ix86_libc_has_fast_function (int fcode ATTRIBUTE_UNUSED)
   28779              : {
   28780              : #ifdef OPTION_GLIBC
   28781        96365 :   if (OPTION_GLIBC)
   28782        96365 :     return (built_in_function)fcode == BUILT_IN_MEMPCPY;
   28783              :   else
   28784              :     return false;
   28785              : #else
   28786              :   return false;
   28787              : #endif
   28788              : }
   28789              : 
   28790              : #undef TARGET_LIBC_HAS_FAST_FUNCTION
   28791              : #define TARGET_LIBC_HAS_FAST_FUNCTION ix86_libc_has_fast_function
   28792              : 
   28793              : static unsigned
   28794        78698 : ix86_libm_function_max_error (unsigned cfn, machine_mode mode,
   28795              :                               bool boundary_p)
   28796              : {
   28797              : #ifdef OPTION_GLIBC
   28798        78698 :   bool glibc_p = OPTION_GLIBC;
   28799              : #else
   28800              :   bool glibc_p = false;
   28801              : #endif
   28802        78698 :   if (glibc_p)
   28803              :     {
   28804              :       /* If __FAST_MATH__ is defined, glibc provides libmvec.  */
   28805        78698 :       unsigned int libmvec_ret = 0;
   28806        78698 :       if (!flag_trapping_math
   28807         8296 :           && flag_unsafe_math_optimizations
   28808         3374 :           && flag_finite_math_only
   28809         3348 :           && !flag_signed_zeros
   28810         3348 :           && !flag_errno_math)
   28811         3348 :         switch (cfn)
   28812              :           {
   28813         1396 :           CASE_CFN_COS:
   28814         1396 :           CASE_CFN_COS_FN:
   28815         1396 :           CASE_CFN_SIN:
   28816         1396 :           CASE_CFN_SIN_FN:
   28817         1396 :             if (!boundary_p)
   28818              :               {
   28819              :                 /* With non-default rounding modes, libmvec provides
   28820              :                    complete garbage in results.  E.g.
   28821              :                    _ZGVcN8v_sinf for 1.40129846e-45f in FE_UPWARD
   28822              :                    returns 0.00333309174f rather than 1.40129846e-45f.  */
   28823          587 :                 if (flag_rounding_math)
   28824              :                   return ~0U;
   28825              :                 /* https://www.gnu.org/software/libc/manual/html_node/Errors-in-Math-Functions.html
   28826              :                    claims libmvec maximum error is 4ulps.
   28827              :                    My own random testing indicates 2ulps for SFmode and
   28828              :                    0.5ulps for DFmode, but let's go with the 4ulps.  */
   28829              :                 libmvec_ret = 4;
   28830              :               }
   28831              :             break;
   28832              :           default:
   28833              :             break;
   28834              :           }
   28835        78698 :       unsigned int ret = glibc_linux_libm_function_max_error (cfn, mode,
   28836              :                                                               boundary_p);
   28837        78698 :       return MAX (ret, libmvec_ret);
   28838              :     }
   28839            0 :   return default_libm_function_max_error (cfn, mode, boundary_p);
   28840              : }
   28841              : 
   28842              : #undef TARGET_LIBM_FUNCTION_MAX_ERROR
   28843              : #define TARGET_LIBM_FUNCTION_MAX_ERROR ix86_libm_function_max_error
   28844              : 
   28845              : #if TARGET_MACHO
   28846              : static bool
   28847              : ix86_cannot_copy_insn_p (rtx_insn *insn)
   28848              : {
   28849              :   if (TARGET_64BIT)
   28850              :     return false;
   28851              : 
   28852              :   rtx set = single_set (insn);
   28853              :   if (set)
   28854              :     {
   28855              :       rtx src = SET_SRC (set);
   28856              :       if (GET_CODE (src) == UNSPEC
   28857              :           && XINT (src, 1) == UNSPEC_SET_GOT)
   28858              :         return true;
   28859              :     }
   28860              :   return false;
   28861              : }
   28862              : 
   28863              : #undef TARGET_CANNOT_COPY_INSN_P
   28864              : #define TARGET_CANNOT_COPY_INSN_P ix86_cannot_copy_insn_p
   28865              : 
   28866              : #endif
   28867              : 
   28868              : #if CHECKING_P
   28869              : #undef TARGET_RUN_TARGET_SELFTESTS
   28870              : #define TARGET_RUN_TARGET_SELFTESTS selftest::ix86_run_selftests
   28871              : #endif /* #if CHECKING_P */
   28872              : 
   28873              : #undef TARGET_DOCUMENTATION_NAME
   28874              : #define TARGET_DOCUMENTATION_NAME "x86"
   28875              : 
   28876              : /* Implement TARGET_SHRINK_WRAP_GET_SEPARATE_COMPONENTS.  */
   28877              : sbitmap
   28878       737546 : ix86_get_separate_components (void)
   28879              : {
   28880       737546 :   HOST_WIDE_INT offset, to_allocate;
   28881       737546 :   sbitmap components = sbitmap_alloc (FIRST_PSEUDO_REGISTER);
   28882       737546 :   bitmap_clear (components);
   28883       737546 :   struct machine_function *m = cfun->machine;
   28884              : 
   28885       737546 :   offset = m->frame.stack_pointer_offset;
   28886       737546 :   to_allocate = offset - m->frame.sse_reg_save_offset;
   28887              : 
   28888              :   /* Shrink wrap separate uses MOV, which means APX PPX cannot be used.
   28889              :      Experiments show that APX PPX can speed up the prologue.  If the function
   28890              :      does not exit early during actual execution, then using APX PPX is faster.
   28891              :      If the function always exits early during actual execution, then shrink
   28892              :      wrap separate reduces the number of MOV (PUSH/POP) instructions actually
   28893              :      executed, thus speeding up execution.
   28894              :      foo:
   28895              :           movl    $1, %eax
   28896              :           testq   %rdi, %rdi
   28897              :           jne.L60
   28898              :           ret   ---> early return.
   28899              :     .L60:
   28900              :           subq    $88, %rsp     ---> belong to prologue.
   28901              :           xorl    %eax, %eax
   28902              :           movq    %rbx, 40 (%rsp) ---> belong to prologue.
   28903              :           movq    8 (%rdi), %rbx
   28904              :           movq    %rbp, 48 (%rsp) ---> belong to prologue.
   28905              :           movq    %rdi, %rbp
   28906              :           testq   %rbx, %rbx
   28907              :           jne.L61
   28908              :           movq    40 (%rsp), %rbx
   28909              :           movq    48 (%rsp), %rbp
   28910              :           addq    $88, %rsp
   28911              :           ret
   28912              :      .L61:
   28913              :           movq    %r12, 56 (%rsp) ---> belong to prologue.
   28914              :           movq    %r13, 64 (%rsp) ---> belong to prologue.
   28915              :           movq    %r14, 72 (%rsp) ---> belong to prologue.
   28916              :      ... ...
   28917              : 
   28918              :      Disable shrink wrap separate when PPX is enabled.  */
   28919       737546 :   if ((TARGET_APX_PPX && !crtl->calls_eh_return)
   28920       737079 :       || cfun->machine->func_type != TYPE_NORMAL
   28921              :       || TARGET_SEH
   28922       736981 :       || crtl->stack_realign_needed
   28923       727320 :       || m->call_ms2sysv)
   28924              :     return components;
   28925              : 
   28926              :   /* Since shrink wrapping separate uses MOV instead of PUSH/POP.
   28927              :      Disable shrink wrap separate when MOV is prohibited.  */
   28928       725398 :   if (save_regs_using_push_pop (to_allocate))
   28929              :     return components;
   28930              : 
   28931     32673411 :   for (unsigned int regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
   28932     32322084 :     if (GENERAL_REGNO_P (regno) && ix86_save_reg (regno, true, true))
   28933              :       {
   28934              :         /* Skip registers with large offsets, where a pseudo may be needed.  */
   28935       602593 :         if (IN_RANGE (offset, -0x8000, 0x7fff))
   28936       601520 :           bitmap_set_bit (components, regno);
   28937       648467 :         offset += UNITS_PER_WORD;
   28938              :       }
   28939              : 
   28940              :   /* Don't mess with the following registers.  */
   28941       351327 :   if (frame_pointer_needed)
   28942         6337 :     bitmap_clear_bit (components, HARD_FRAME_POINTER_REGNUM);
   28943              : 
   28944       351327 :   if (crtl->drap_reg)
   28945          129 :     bitmap_clear_bit (components, REGNO (crtl->drap_reg));
   28946              : 
   28947       351327 :   if (pic_offset_table_rtx)
   28948        29860 :     bitmap_clear_bit (components, REAL_PIC_OFFSET_TABLE_REGNUM);
   28949              : 
   28950              :   return components;
   28951              : }
   28952              : 
   28953              : /* Implement TARGET_SHRINK_WRAP_COMPONENTS_FOR_BB.  */
   28954              : sbitmap
   28955      9415806 : ix86_components_for_bb (basic_block bb)
   28956              : {
   28957      9415806 :   bitmap in = DF_LIVE_IN (bb);
   28958      9415806 :   bitmap gen = &DF_LIVE_BB_INFO (bb)->gen;
   28959      9415806 :   bitmap kill = &DF_LIVE_BB_INFO (bb)->kill;
   28960              : 
   28961      9415806 :   sbitmap components = sbitmap_alloc (FIRST_PSEUDO_REGISTER);
   28962      9415806 :   bitmap_clear (components);
   28963              : 
   28964      9415806 :   function_abi_aggregator callee_abis;
   28965      9415806 :   rtx_insn *insn;
   28966    110427935 :   FOR_BB_INSNS (bb, insn)
   28967    101012129 :     if (CALL_P (insn))
   28968      3080298 :       callee_abis.note_callee_abi (insn_callee_abi (insn));
   28969      9415806 :   HARD_REG_SET extra_caller_saves = callee_abis.caller_save_regs (*crtl->abi);
   28970              : 
   28971              :   /* GPRs are used in a bb if they are in the IN, GEN, or KILL sets.  */
   28972    875669958 :   for (unsigned int regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
   28973    866254152 :     if (!fixed_regs[regno]
   28974    866254152 :         && (TEST_HARD_REG_BIT (extra_caller_saves, regno)
   28975    440299364 :             || bitmap_bit_p (in, regno)
   28976    414583942 :             || bitmap_bit_p (gen, regno)
   28977    402028717 :             || bitmap_bit_p (kill, regno)))
   28978     38535652 :       bitmap_set_bit (components, regno);
   28979              : 
   28980      9415806 :   return components;
   28981              : }
   28982              : 
   28983              : /* Implement TARGET_SHRINK_WRAP_DISQUALIFY_COMPONENTS.  */
   28984              : void
   28985       476221 : ix86_disqualify_components (sbitmap, edge, sbitmap, bool)
   28986              : {
   28987              :   /* Nothing to do for x86.  */
   28988       476221 : }
   28989              : 
   28990              : /* Implement TARGET_SHRINK_WRAP_EMIT_PROLOGUE_COMPONENTS.  */
   28991              : void
   28992       168833 : ix86_emit_prologue_components (sbitmap components)
   28993              : {
   28994       168833 :   HOST_WIDE_INT cfa_offset;
   28995       168833 :   struct machine_function *m = cfun->machine;
   28996              : 
   28997       168833 :   cfa_offset = m->frame.reg_save_offset + m->fs.sp_offset
   28998       168833 :                - m->frame.stack_pointer_offset;
   28999     15701469 :   for (unsigned int regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
   29000     15532636 :     if (GENERAL_REGNO_P (regno) && ix86_save_reg (regno, true, true))
   29001              :       {
   29002       769764 :         if (bitmap_bit_p (components, regno))
   29003       194584 :           ix86_emit_save_reg_using_mov (word_mode, regno, cfa_offset);
   29004       821003 :         cfa_offset -= UNITS_PER_WORD;
   29005              :       }
   29006       168833 : }
   29007              : 
   29008              : /* Implement TARGET_SHRINK_WRAP_EMIT_EPILOGUE_COMPONENTS.  */
   29009              : void
   29010       150912 : ix86_emit_epilogue_components (sbitmap components)
   29011              : {
   29012       150912 :   HOST_WIDE_INT cfa_offset;
   29013       150912 :   struct machine_function *m = cfun->machine;
   29014       150912 :   cfa_offset = m->frame.reg_save_offset + m->fs.sp_offset
   29015       150912 :                - m->frame.stack_pointer_offset;
   29016              : 
   29017     14034816 :   for (unsigned int regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
   29018     13883904 :     if (GENERAL_REGNO_P (regno) && ix86_save_reg (regno, true, true))
   29019              :       {
   29020       690120 :         if (bitmap_bit_p (components, regno))
   29021              :           {
   29022       260929 :             rtx reg = gen_rtx_REG (word_mode, regno);
   29023       260929 :             rtx mem;
   29024       260929 :             rtx_insn *insn;
   29025              : 
   29026       260929 :             mem = choose_baseaddr (cfa_offset, NULL);
   29027       260929 :             mem = gen_frame_mem (word_mode, mem);
   29028       260929 :             insn = emit_move_insn (reg, mem);
   29029              : 
   29030       260929 :             RTX_FRAME_RELATED_P (insn) = 1;
   29031       260929 :             add_reg_note (insn, REG_CFA_RESTORE, reg);
   29032              :           }
   29033       748013 :         cfa_offset -= UNITS_PER_WORD;
   29034              :       }
   29035       150912 : }
   29036              : 
   29037              : /* Implement TARGET_SHRINK_WRAP_SET_HANDLED_COMPONENTS.  */
   29038              : void
   29039        44753 : ix86_set_handled_components (sbitmap components)
   29040              : {
   29041      4162029 :   for (unsigned int regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
   29042      4117276 :     if (bitmap_bit_p (components, regno))
   29043              :       {
   29044       105000 :         cfun->machine->reg_is_wrapped_separately[regno] = true;
   29045       105000 :         cfun->machine->use_fast_prologue_epilogue = true;
   29046       105000 :         cfun->machine->frame.save_regs_using_mov = true;
   29047              :       }
   29048        44753 : }
   29049              : 
   29050              : #undef TARGET_SHRINK_WRAP_GET_SEPARATE_COMPONENTS
   29051              : #define TARGET_SHRINK_WRAP_GET_SEPARATE_COMPONENTS ix86_get_separate_components
   29052              : #undef TARGET_SHRINK_WRAP_COMPONENTS_FOR_BB
   29053              : #define TARGET_SHRINK_WRAP_COMPONENTS_FOR_BB ix86_components_for_bb
   29054              : #undef TARGET_SHRINK_WRAP_DISQUALIFY_COMPONENTS
   29055              : #define TARGET_SHRINK_WRAP_DISQUALIFY_COMPONENTS ix86_disqualify_components
   29056              : #undef TARGET_SHRINK_WRAP_EMIT_PROLOGUE_COMPONENTS
   29057              : #define TARGET_SHRINK_WRAP_EMIT_PROLOGUE_COMPONENTS \
   29058              :   ix86_emit_prologue_components
   29059              : #undef TARGET_SHRINK_WRAP_EMIT_EPILOGUE_COMPONENTS
   29060              : #define TARGET_SHRINK_WRAP_EMIT_EPILOGUE_COMPONENTS \
   29061              :   ix86_emit_epilogue_components
   29062              : #undef TARGET_SHRINK_WRAP_SET_HANDLED_COMPONENTS
   29063              : #define TARGET_SHRINK_WRAP_SET_HANDLED_COMPONENTS ix86_set_handled_components
   29064              : 
   29065              : struct gcc_target targetm = TARGET_INITIALIZER;
   29066              : 
   29067              : #include "gt-i386.h"
        

Generated by: LCOV version 2.4-beta

LCOV profile is generated on x86_64 machine using following configure options: configure --disable-bootstrap --enable-coverage=opt --enable-languages=c,c++,fortran,go,jit,lto,rust,m2 --enable-host-shared. GCC test suite is run with the built compiler.