LCOV - code coverage report
Current view: top level - gcc/config/i386 - i386.cc (source / functions) Coverage Total Hit
Test: gcc.info Lines: 89.2 % 12872 11480
Test Date: 2026-02-28 14:20:25 Functions: 97.0 % 465 451
Legend: Lines:     hit not hit

            Line data    Source code
       1              : /* Subroutines used for code generation on IA-32.
       2              :    Copyright (C) 1988-2026 Free Software Foundation, Inc.
       3              : 
       4              : This file is part of GCC.
       5              : 
       6              : GCC is free software; you can redistribute it and/or modify
       7              : it under the terms of the GNU General Public License as published by
       8              : the Free Software Foundation; either version 3, or (at your option)
       9              : any later version.
      10              : 
      11              : GCC is distributed in the hope that it will be useful,
      12              : but WITHOUT ANY WARRANTY; without even the implied warranty of
      13              : MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
      14              : GNU General Public License for more details.
      15              : 
      16              : You should have received a copy of the GNU General Public License
      17              : along with GCC; see the file COPYING3.  If not see
      18              : <http://www.gnu.org/licenses/>.  */
      19              : 
      20              : #define INCLUDE_STRING
      21              : #define IN_TARGET_CODE 1
      22              : 
      23              : #include "config.h"
      24              : #include "system.h"
      25              : #include "coretypes.h"
      26              : #include "backend.h"
      27              : #include "rtl.h"
      28              : #include "tree.h"
      29              : #include "memmodel.h"
      30              : #include "gimple.h"
      31              : #include "cfghooks.h"
      32              : #include "cfgloop.h"
      33              : #include "df.h"
      34              : #include "tm_p.h"
      35              : #include "stringpool.h"
      36              : #include "expmed.h"
      37              : #include "optabs.h"
      38              : #include "regs.h"
      39              : #include "emit-rtl.h"
      40              : #include "recog.h"
      41              : #include "cgraph.h"
      42              : #include "diagnostic.h"
      43              : #include "cfgbuild.h"
      44              : #include "alias.h"
      45              : #include "fold-const.h"
      46              : #include "attribs.h"
      47              : #include "calls.h"
      48              : #include "stor-layout.h"
      49              : #include "varasm.h"
      50              : #include "output.h"
      51              : #include "insn-attr.h"
      52              : #include "flags.h"
      53              : #include "except.h"
      54              : #include "explow.h"
      55              : #include "expr.h"
      56              : #include "cfgrtl.h"
      57              : #include "common/common-target.h"
      58              : #include "langhooks.h"
      59              : #include "reload.h"
      60              : #include "gimplify.h"
      61              : #include "dwarf2.h"
      62              : #include "tm-constrs.h"
      63              : #include "cselib.h"
      64              : #include "sched-int.h"
      65              : #include "opts.h"
      66              : #include "tree-pass.h"
      67              : #include "context.h"
      68              : #include "pass_manager.h"
      69              : #include "target-globals.h"
      70              : #include "gimple-iterator.h"
      71              : #include "gimple-fold.h"
      72              : #include "tree-vectorizer.h"
      73              : #include "shrink-wrap.h"
      74              : #include "builtins.h"
      75              : #include "rtl-iter.h"
      76              : #include "tree-iterator.h"
      77              : #include "dbgcnt.h"
      78              : #include "case-cfn-macros.h"
      79              : #include "dojump.h"
      80              : #include "fold-const-call.h"
      81              : #include "tree-vrp.h"
      82              : #include "tree-ssanames.h"
      83              : #include "selftest.h"
      84              : #include "selftest-rtl.h"
      85              : #include "print-rtl.h"
      86              : #include "intl.h"
      87              : #include "ifcvt.h"
      88              : #include "symbol-summary.h"
      89              : #include "sreal.h"
      90              : #include "ipa-cp.h"
      91              : #include "ipa-prop.h"
      92              : #include "ipa-fnsummary.h"
      93              : #include "wide-int-bitmask.h"
      94              : #include "tree-vector-builder.h"
      95              : #include "debug.h"
      96              : #include "dwarf2out.h"
      97              : #include "i386-options.h"
      98              : #include "i386-builtins.h"
      99              : #include "i386-expand.h"
     100              : #include "i386-features.h"
     101              : #include "function-abi.h"
     102              : #include "rtl-error.h"
     103              : #include "gimple-pretty-print.h"
     104              : 
     105              : /* This file should be included last.  */
     106              : #include "target-def.h"
     107              : 
     108              : static void ix86_print_operand_address_as (FILE *, rtx, addr_space_t, bool);
     109              : static void ix86_emit_restore_reg_using_pop (rtx, bool = false);
     110              : 
     111              : 
     112              : #ifndef CHECK_STACK_LIMIT
     113              : #define CHECK_STACK_LIMIT (-1)
     114              : #endif
     115              : 
     116              : /* Return index of given mode in mult and division cost tables.  */
     117              : #define MODE_INDEX(mode)                                        \
     118              :   ((mode) == QImode ? 0                                         \
     119              :    : (mode) == HImode ? 1                                       \
     120              :    : (mode) == SImode ? 2                                       \
     121              :    : (mode) == DImode ? 3                                       \
     122              :    : 4)
     123              : 
     124              : 
     125              : /* Set by -mtune.  */
     126              : const struct processor_costs *ix86_tune_cost = NULL;
     127              : 
     128              : /* Set by -mtune or -Os.  */
     129              : const struct processor_costs *ix86_cost = NULL;
     130              : 
     131              : /* In case the average insn count for single function invocation is
     132              :    lower than this constant, emit fast (but longer) prologue and
     133              :    epilogue code.  */
     134              : #define FAST_PROLOGUE_INSN_COUNT 20
     135              : 
     136              : /* Names for 8 (low), 8 (high), and 16-bit registers, respectively.  */
     137              : static const char *const qi_reg_name[] = QI_REGISTER_NAMES;
     138              : static const char *const qi_high_reg_name[] = QI_HIGH_REGISTER_NAMES;
     139              : static const char *const hi_reg_name[] = HI_REGISTER_NAMES;
     140              : 
     141              : /* Array of the smallest class containing reg number REGNO, indexed by
     142              :    REGNO.  Used by REGNO_REG_CLASS in i386.h.  */
     143              : 
     144              : enum reg_class const regclass_map[FIRST_PSEUDO_REGISTER] =
     145              : {
     146              :   /* ax, dx, cx, bx */
     147              :   AREG, DREG, CREG, BREG,
     148              :   /* si, di, bp, sp */
     149              :   SIREG, DIREG, NON_Q_REGS, NON_Q_REGS,
     150              :   /* FP registers */
     151              :   FP_TOP_REG, FP_SECOND_REG, FLOAT_REGS, FLOAT_REGS,
     152              :   FLOAT_REGS, FLOAT_REGS, FLOAT_REGS, FLOAT_REGS,
     153              :   /* arg pointer, flags, fpsr, frame */
     154              :   NON_Q_REGS, NO_REGS, NO_REGS, NON_Q_REGS,
     155              :   /* SSE registers */
     156              :   SSE_FIRST_REG, SSE_REGS, SSE_REGS, SSE_REGS,
     157              :   SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS,
     158              :   /* MMX registers */
     159              :   MMX_REGS, MMX_REGS, MMX_REGS, MMX_REGS,
     160              :   MMX_REGS, MMX_REGS, MMX_REGS, MMX_REGS,
     161              :   /* REX registers */
     162              :   GENERAL_REGS, GENERAL_REGS, GENERAL_REGS, GENERAL_REGS,
     163              :   GENERAL_REGS, GENERAL_REGS, GENERAL_REGS, GENERAL_REGS,
     164              :   /* SSE REX registers */
     165              :   SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS,
     166              :   SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS,
     167              :   /* AVX-512 SSE registers */
     168              :   ALL_SSE_REGS, ALL_SSE_REGS, ALL_SSE_REGS, ALL_SSE_REGS,
     169              :   ALL_SSE_REGS, ALL_SSE_REGS, ALL_SSE_REGS, ALL_SSE_REGS,
     170              :   ALL_SSE_REGS, ALL_SSE_REGS, ALL_SSE_REGS, ALL_SSE_REGS,
     171              :   ALL_SSE_REGS, ALL_SSE_REGS, ALL_SSE_REGS, ALL_SSE_REGS,
     172              :   /* Mask registers.  */
     173              :   ALL_MASK_REGS, MASK_REGS, MASK_REGS, MASK_REGS,
     174              :   MASK_REGS, MASK_REGS, MASK_REGS, MASK_REGS,
     175              :   /* REX2 registers */
     176              :   GENERAL_REGS, GENERAL_REGS, GENERAL_REGS, GENERAL_REGS,
     177              :   GENERAL_REGS, GENERAL_REGS, GENERAL_REGS, GENERAL_REGS,
     178              :   GENERAL_REGS, GENERAL_REGS, GENERAL_REGS, GENERAL_REGS,
     179              :   GENERAL_REGS, GENERAL_REGS, GENERAL_REGS, GENERAL_REGS,
     180              : };
     181              : 
     182              : /* The "default" register map used in 32bit mode.  */
     183              : 
     184              : unsigned int const debugger_register_map[FIRST_PSEUDO_REGISTER] =
     185              : {
     186              :   /* general regs */
     187              :   0, 2, 1, 3, 6, 7, 4, 5,
     188              :   /* fp regs */
     189              :   12, 13, 14, 15, 16, 17, 18, 19,
     190              :   /* arg, flags, fpsr, frame */
     191              :   IGNORED_DWARF_REGNUM, IGNORED_DWARF_REGNUM,
     192              :   IGNORED_DWARF_REGNUM, IGNORED_DWARF_REGNUM,
     193              :   /* SSE */
     194              :   21, 22, 23, 24, 25, 26, 27, 28,
     195              :   /* MMX */
     196              :   29, 30, 31, 32, 33, 34, 35, 36,
     197              :   /* extended integer registers */
     198              :   INVALID_REGNUM, INVALID_REGNUM, INVALID_REGNUM, INVALID_REGNUM,
     199              :   INVALID_REGNUM, INVALID_REGNUM, INVALID_REGNUM, INVALID_REGNUM,
     200              :   /* extended sse registers */
     201              :   INVALID_REGNUM, INVALID_REGNUM, INVALID_REGNUM, INVALID_REGNUM,
     202              :   INVALID_REGNUM, INVALID_REGNUM, INVALID_REGNUM, INVALID_REGNUM,
     203              :   /* AVX-512 registers 16-23 */
     204              :   INVALID_REGNUM, INVALID_REGNUM, INVALID_REGNUM, INVALID_REGNUM,
     205              :   INVALID_REGNUM, INVALID_REGNUM, INVALID_REGNUM, INVALID_REGNUM,
     206              :   /* AVX-512 registers 24-31 */
     207              :   INVALID_REGNUM, INVALID_REGNUM, INVALID_REGNUM, INVALID_REGNUM,
     208              :   INVALID_REGNUM, INVALID_REGNUM, INVALID_REGNUM, INVALID_REGNUM,
     209              :   /* Mask registers */
     210              :   93, 94, 95, 96, 97, 98, 99, 100
     211              : };
     212              : 
     213              : /* The "default" register map used in 64bit mode.  */
     214              : 
     215              : unsigned int const debugger64_register_map[FIRST_PSEUDO_REGISTER] =
     216              : {
     217              :   /* general regs */
     218              :   0, 1, 2, 3, 4, 5, 6, 7,
     219              :   /* fp regs */
     220              :   33, 34, 35, 36, 37, 38, 39, 40,
     221              :   /* arg, flags, fpsr, frame */
     222              :   IGNORED_DWARF_REGNUM, IGNORED_DWARF_REGNUM,
     223              :   IGNORED_DWARF_REGNUM, IGNORED_DWARF_REGNUM,
     224              :   /* SSE */
     225              :   17, 18, 19, 20, 21, 22, 23, 24,
     226              :   /* MMX */
     227              :   41, 42, 43, 44, 45, 46, 47, 48,
     228              :   /* extended integer registers */
     229              :   8, 9, 10, 11, 12, 13, 14, 15,
     230              :   /* extended SSE registers */
     231              :   25, 26, 27, 28, 29, 30, 31, 32,
     232              :   /* AVX-512 registers 16-23 */
     233              :   67, 68, 69, 70, 71, 72, 73, 74,
     234              :   /* AVX-512 registers 24-31 */
     235              :   75, 76, 77, 78, 79, 80, 81, 82,
     236              :   /* Mask registers */
     237              :   118, 119, 120, 121, 122, 123, 124, 125,
     238              :   /* rex2 extend interger registers */
     239              :   130, 131, 132, 133, 134, 135, 136, 137,
     240              :   138, 139, 140, 141, 142, 143, 144, 145
     241              : };
     242              : 
     243              : /* Define the register numbers to be used in Dwarf debugging information.
     244              :    The SVR4 reference port C compiler uses the following register numbers
     245              :    in its Dwarf output code:
     246              :         0 for %eax (gcc regno = 0)
     247              :         1 for %ecx (gcc regno = 2)
     248              :         2 for %edx (gcc regno = 1)
     249              :         3 for %ebx (gcc regno = 3)
     250              :         4 for %esp (gcc regno = 7)
     251              :         5 for %ebp (gcc regno = 6)
     252              :         6 for %esi (gcc regno = 4)
     253              :         7 for %edi (gcc regno = 5)
     254              :    The following three DWARF register numbers are never generated by
     255              :    the SVR4 C compiler or by the GNU compilers, but SDB on x86/svr4
     256              :    believed these numbers have these meanings.
     257              :         8  for %eip    (no gcc equivalent)
     258              :         9  for %eflags (gcc regno = 17)
     259              :         10 for %trapno (no gcc equivalent)
     260              :    It is not at all clear how we should number the FP stack registers
     261              :    for the x86 architecture.  If the version of SDB on x86/svr4 were
     262              :    a bit less brain dead with respect to floating-point then we would
     263              :    have a precedent to follow with respect to DWARF register numbers
     264              :    for x86 FP registers, but the SDB on x86/svr4 was so completely
     265              :    broken with respect to FP registers that it is hardly worth thinking
     266              :    of it as something to strive for compatibility with.
     267              :    The version of x86/svr4 SDB I had does (partially)
     268              :    seem to believe that DWARF register number 11 is associated with
     269              :    the x86 register %st(0), but that's about all.  Higher DWARF
     270              :    register numbers don't seem to be associated with anything in
     271              :    particular, and even for DWARF regno 11, SDB only seemed to under-
     272              :    stand that it should say that a variable lives in %st(0) (when
     273              :    asked via an `=' command) if we said it was in DWARF regno 11,
     274              :    but SDB still printed garbage when asked for the value of the
     275              :    variable in question (via a `/' command).
     276              :    (Also note that the labels SDB printed for various FP stack regs
     277              :    when doing an `x' command were all wrong.)
     278              :    Note that these problems generally don't affect the native SVR4
     279              :    C compiler because it doesn't allow the use of -O with -g and
     280              :    because when it is *not* optimizing, it allocates a memory
     281              :    location for each floating-point variable, and the memory
     282              :    location is what gets described in the DWARF AT_location
     283              :    attribute for the variable in question.
     284              :    Regardless of the severe mental illness of the x86/svr4 SDB, we
     285              :    do something sensible here and we use the following DWARF
     286              :    register numbers.  Note that these are all stack-top-relative
     287              :    numbers.
     288              :         11 for %st(0) (gcc regno = 8)
     289              :         12 for %st(1) (gcc regno = 9)
     290              :         13 for %st(2) (gcc regno = 10)
     291              :         14 for %st(3) (gcc regno = 11)
     292              :         15 for %st(4) (gcc regno = 12)
     293              :         16 for %st(5) (gcc regno = 13)
     294              :         17 for %st(6) (gcc regno = 14)
     295              :         18 for %st(7) (gcc regno = 15)
     296              : */
     297              : unsigned int const svr4_debugger_register_map[FIRST_PSEUDO_REGISTER] =
     298              : {
     299              :   /* general regs */
     300              :   0, 2, 1, 3, 6, 7, 5, 4,
     301              :   /* fp regs */
     302              :   11, 12, 13, 14, 15, 16, 17, 18,
     303              :   /* arg, flags, fpsr, frame */
     304              :   IGNORED_DWARF_REGNUM, 9,
     305              :   IGNORED_DWARF_REGNUM, IGNORED_DWARF_REGNUM,
     306              :   /* SSE registers */
     307              :   21, 22, 23, 24, 25, 26, 27, 28,
     308              :   /* MMX registers */
     309              :   29, 30, 31, 32, 33, 34, 35, 36,
     310              :   /* extended integer registers */
     311              :   INVALID_REGNUM, INVALID_REGNUM, INVALID_REGNUM, INVALID_REGNUM,
     312              :   INVALID_REGNUM, INVALID_REGNUM, INVALID_REGNUM, INVALID_REGNUM,
     313              :   /* extended sse registers */
     314              :   INVALID_REGNUM, INVALID_REGNUM, INVALID_REGNUM, INVALID_REGNUM,
     315              :   INVALID_REGNUM, INVALID_REGNUM, INVALID_REGNUM, INVALID_REGNUM,
     316              :   /* AVX-512 registers 16-23 */
     317              :   INVALID_REGNUM, INVALID_REGNUM, INVALID_REGNUM, INVALID_REGNUM,
     318              :   INVALID_REGNUM, INVALID_REGNUM, INVALID_REGNUM, INVALID_REGNUM,
     319              :   /* AVX-512 registers 24-31 */
     320              :   INVALID_REGNUM, INVALID_REGNUM, INVALID_REGNUM, INVALID_REGNUM,
     321              :   INVALID_REGNUM, INVALID_REGNUM, INVALID_REGNUM, INVALID_REGNUM,
     322              :   /* Mask registers */
     323              :   93, 94, 95, 96, 97, 98, 99, 100
     324              : };
     325              : 
     326              : /* Define parameter passing and return registers.  */
     327              : 
     328              : static int const x86_64_int_parameter_registers[6] =
     329              : {
     330              :   DI_REG, SI_REG, DX_REG, CX_REG, R8_REG, R9_REG
     331              : };
     332              : 
     333              : static int const x86_64_ms_abi_int_parameter_registers[4] =
     334              : {
     335              :   CX_REG, DX_REG, R8_REG, R9_REG
     336              : };
     337              : 
     338              : /* Similar as Clang's preserve_none function parameter passing.
     339              :    NB: Use DI_REG and SI_REG, see ix86_function_value_regno_p.  */
     340              : 
     341              : static int const x86_64_preserve_none_int_parameter_registers[6] =
     342              : {
     343              :   R12_REG, R13_REG, R14_REG, R15_REG, DI_REG, SI_REG
     344              : };
     345              : 
     346              : static int const x86_64_int_return_registers[4] =
     347              : {
     348              :   AX_REG, DX_REG, DI_REG, SI_REG
     349              : };
     350              : 
     351              : /* Define the structure for the machine field in struct function.  */
     352              : 
     353              : struct GTY(()) stack_local_entry {
     354              :   unsigned short mode;
     355              :   unsigned short n;
     356              :   rtx rtl;
     357              :   struct stack_local_entry *next;
     358              : };
     359              : 
     360              : /* Which cpu are we scheduling for.  */
     361              : enum attr_cpu ix86_schedule;
     362              : 
     363              : /* Which cpu are we optimizing for.  */
     364              : enum processor_type ix86_tune;
     365              : 
     366              : /* Which instruction set architecture to use.  */
     367              : enum processor_type ix86_arch;
     368              : 
     369              : /* True if processor has SSE prefetch instruction.  */
     370              : unsigned char ix86_prefetch_sse;
     371              : 
     372              : /* Preferred alignment for stack boundary in bits.  */
     373              : unsigned int ix86_preferred_stack_boundary;
     374              : 
     375              : /* Alignment for incoming stack boundary in bits specified at
     376              :    command line.  */
     377              : unsigned int ix86_user_incoming_stack_boundary;
     378              : 
     379              : /* Default alignment for incoming stack boundary in bits.  */
     380              : unsigned int ix86_default_incoming_stack_boundary;
     381              : 
     382              : /* Alignment for incoming stack boundary in bits.  */
     383              : unsigned int ix86_incoming_stack_boundary;
     384              : 
     385              : /* True if there is no direct access to extern symbols.  */
     386              : bool ix86_has_no_direct_extern_access;
     387              : 
     388              : /* Calling abi specific va_list type nodes.  */
     389              : tree sysv_va_list_type_node;
     390              : tree ms_va_list_type_node;
     391              : 
     392              : /* Prefix built by ASM_GENERATE_INTERNAL_LABEL.  */
     393              : char internal_label_prefix[16];
     394              : int internal_label_prefix_len;
     395              : 
     396              : /* Fence to use after loop using movnt.  */
     397              : tree x86_mfence;
     398              : 
     399              : /* Register class used for passing given 64bit part of the argument.
     400              :    These represent classes as documented by the PS ABI, with the exception
     401              :    of SSESF, SSEDF classes, that are basically SSE class, just gcc will
     402              :    use SF or DFmode move instead of DImode to avoid reformatting penalties.
     403              : 
     404              :    Similarly we play games with INTEGERSI_CLASS to use cheaper SImode moves
     405              :    whenever possible (upper half does contain padding).  */
     406              : enum x86_64_reg_class
     407              :   {
     408              :     X86_64_NO_CLASS,
     409              :     X86_64_INTEGER_CLASS,
     410              :     X86_64_INTEGERSI_CLASS,
     411              :     X86_64_SSE_CLASS,
     412              :     X86_64_SSEHF_CLASS,
     413              :     X86_64_SSESF_CLASS,
     414              :     X86_64_SSEDF_CLASS,
     415              :     X86_64_SSEUP_CLASS,
     416              :     X86_64_X87_CLASS,
     417              :     X86_64_X87UP_CLASS,
     418              :     X86_64_COMPLEX_X87_CLASS,
     419              :     X86_64_MEMORY_CLASS
     420              :   };
     421              : 
     422              : #define MAX_CLASSES 8
     423              : 
     424              : /* Table of constants used by fldpi, fldln2, etc....  */
     425              : static REAL_VALUE_TYPE ext_80387_constants_table [5];
     426              : static bool ext_80387_constants_init;
     427              : 
     428              : 
     429              : static rtx ix86_function_value (const_tree, const_tree, bool);
     430              : static bool ix86_function_value_regno_p (const unsigned int);
     431              : static unsigned int ix86_function_arg_boundary (machine_mode,
     432              :                                                 const_tree);
     433              : static rtx ix86_static_chain (const_tree, bool);
     434              : static int ix86_function_regparm (const_tree, const_tree);
     435              : static void ix86_compute_frame_layout (void);
     436              : static tree ix86_canonical_va_list_type (tree);
     437              : static unsigned int split_stack_prologue_scratch_regno (void);
     438              : static bool i386_asm_output_addr_const_extra (FILE *, rtx);
     439              : 
     440              : static bool ix86_can_inline_p (tree, tree);
     441              : static unsigned int ix86_minimum_incoming_stack_boundary (bool);
     442              : 
     443              : typedef enum ix86_flags_cc
     444              : {
     445              :   X86_CCO = 0, X86_CCNO, X86_CCB, X86_CCNB,
     446              :   X86_CCE, X86_CCNE, X86_CCBE, X86_CCNBE,
     447              :   X86_CCS, X86_CCNS, X86_CCP, X86_CCNP,
     448              :   X86_CCL, X86_CCNL, X86_CCLE, X86_CCNLE
     449              : } ix86_cc;
     450              : 
     451              : static const char *ix86_ccmp_dfv_mapping[] =
     452              : {
     453              :   "{dfv=of}", "{dfv=}", "{dfv=cf}", "{dfv=}",
     454              :   "{dfv=zf}", "{dfv=}", "{dfv=cf, zf}", "{dfv=}",
     455              :   "{dfv=sf}", "{dfv=}", "{dfv=cf}", "{dfv=}",
     456              :   "{dfv=sf}", "{dfv=sf, of}", "{dfv=sf, of, zf}", "{dfv=sf, of}"
     457              : };
     458              : 
     459              : 
     460              : /* Whether -mtune= or -march= were specified */
     461              : int ix86_tune_defaulted;
     462              : int ix86_arch_specified;
     463              : 
     464              : /* Return true if a red-zone is in use.  We can't use red-zone when
     465              :    there are local indirect jumps, like "indirect_jump" or "tablejump",
     466              :    which jumps to another place in the function, since "call" in the
     467              :    indirect thunk pushes the return address onto stack, destroying
     468              :    red-zone.
     469              : 
     470              :    NB: Don't use red-zone for functions with no_caller_saved_registers
     471              :    and 32 GPRs or 16 XMM registers since 128-byte red-zone is too small
     472              :    for 31 GPRs or 15 GPRs + 16 XMM registers.
     473              : 
     474              :    TODO: If we can reserve the first 2 WORDs, for PUSH and, another
     475              :    for CALL, in red-zone, we can allow local indirect jumps with
     476              :    indirect thunk.  */
     477              : 
     478              : bool
     479      9841732 : ix86_using_red_zone (void)
     480              : {
     481      9841732 :   return (TARGET_RED_ZONE
     482      8901924 :           && !TARGET_64BIT_MS_ABI
     483      8599446 :           && ((!TARGET_APX_EGPR && !TARGET_SSE)
     484      8576447 :               || (cfun->machine->call_saved_registers
     485      8576447 :                   != TYPE_NO_CALLER_SAVED_REGISTERS))
     486     18441117 :           && (!cfun->machine->has_local_indirect_jump
     487        59127 :               || cfun->machine->indirect_branch_type == indirect_branch_keep));
     488              : }
     489              : 
     490              : /* Return true, if profiling code should be emitted before
     491              :    prologue. Otherwise it returns false.
     492              :    Note: For x86 with "hotfix" it is sorried.  */
     493              : static bool
     494      4467021 : ix86_profile_before_prologue (void)
     495              : {
     496      4467021 :   return flag_fentry != 0;
     497              : }
     498              : 
     499              : /* Update register usage after having seen the compiler flags.  */
     500              : 
     501              : static void
     502       822162 : ix86_conditional_register_usage (void)
     503              : {
     504       822162 :   int i, c_mask;
     505              : 
     506              :   /* If there are no caller-saved registers, preserve all registers.
     507              :      except fixed_regs and registers used for function return value
     508              :      since aggregate_value_p checks call_used_regs[regno] on return
     509              :      value.  */
     510       822162 :   if (cfun
     511        63963 :       && (cfun->machine->call_saved_registers
     512        63963 :           == TYPE_NO_CALLER_SAVED_REGISTERS))
     513       407247 :     for (i = 0; i < FIRST_PSEUDO_REGISTER; i++)
     514       402868 :       if (!fixed_regs[i] && !ix86_function_value_regno_p (i))
     515       363035 :         call_used_regs[i] = 0;
     516              : 
     517              :   /* For 32-bit targets, disable the REX registers.  */
     518       822162 :   if (! TARGET_64BIT)
     519              :     {
     520       134586 :       for (i = FIRST_REX_INT_REG; i <= LAST_REX_INT_REG; i++)
     521       119632 :         CLEAR_HARD_REG_BIT (accessible_reg_set, i);
     522       134586 :       for (i = FIRST_REX_SSE_REG; i <= LAST_REX_SSE_REG; i++)
     523       119632 :         CLEAR_HARD_REG_BIT (accessible_reg_set, i);
     524       254218 :       for (i = FIRST_EXT_REX_SSE_REG; i <= LAST_EXT_REX_SSE_REG; i++)
     525       239264 :         CLEAR_HARD_REG_BIT (accessible_reg_set, i);
     526              :     }
     527              : 
     528              :   /*  See the definition of CALL_USED_REGISTERS in i386.h.  */
     529       822162 :   c_mask = CALL_USED_REGISTERS_MASK (TARGET_64BIT_MS_ABI);
     530              : 
     531       822162 :   CLEAR_HARD_REG_SET (reg_class_contents[(int)CLOBBERED_REGS]);
     532              : 
     533     76461066 :   for (i = 0; i < FIRST_PSEUDO_REGISTER; i++)
     534              :     {
     535              :       /* Set/reset conditionally defined registers from
     536              :          CALL_USED_REGISTERS initializer.  */
     537     75638904 :       if (call_used_regs[i] > 1)
     538     13093231 :         call_used_regs[i] = !!(call_used_regs[i] & c_mask);
     539              : 
     540              :       /* Calculate registers of CLOBBERED_REGS register set
     541              :          as call used registers from GENERAL_REGS register set.  */
     542     75638904 :       if (TEST_HARD_REG_BIT (reg_class_contents[(int)GENERAL_REGS], i)
     543     75638904 :           && call_used_regs[i])
     544     22920265 :         SET_HARD_REG_BIT (reg_class_contents[(int)CLOBBERED_REGS], i);
     545              :     }
     546              : 
     547              :   /* If MMX is disabled, disable the registers.  */
     548       822162 :   if (! TARGET_MMX)
     549       398404 :     accessible_reg_set &= ~reg_class_contents[MMX_REGS];
     550              : 
     551              :   /* If SSE is disabled, disable the registers.  */
     552       822162 :   if (! TARGET_SSE)
     553       392426 :     accessible_reg_set &= ~reg_class_contents[ALL_SSE_REGS];
     554              : 
     555              :   /* If the FPU is disabled, disable the registers.  */
     556       822162 :   if (! (TARGET_80387 || TARGET_FLOAT_RETURNS_IN_80387))
     557       393650 :     accessible_reg_set &= ~reg_class_contents[FLOAT_REGS];
     558              : 
     559              :   /* If AVX512F is disabled, disable the registers.  */
     560       822162 :   if (! TARGET_AVX512F)
     561              :     {
     562      9894051 :       for (i = FIRST_EXT_REX_SSE_REG; i <= LAST_EXT_REX_SSE_REG; i++)
     563      9312048 :         CLEAR_HARD_REG_BIT (accessible_reg_set, i);
     564              : 
     565      1164006 :       accessible_reg_set &= ~reg_class_contents[ALL_MASK_REGS];
     566              :     }
     567              : 
     568              :   /* If APX is disabled, disable the registers.  */
     569       822162 :   if (! (TARGET_APX_EGPR && TARGET_64BIT))
     570              :     {
     571     13965398 :       for (i = FIRST_REX2_INT_REG; i <= LAST_REX2_INT_REG; i++)
     572     13143904 :         CLEAR_HARD_REG_BIT (accessible_reg_set, i);
     573              :     }
     574       822162 : }
     575              : 
     576              : /* Canonicalize a comparison from one we don't have to one we do have.  */
     577              : 
     578              : static void
     579     24283785 : ix86_canonicalize_comparison (int *code, rtx *op0, rtx *op1,
     580              :                               bool op0_preserve_value)
     581              : {
     582              :   /* The order of operands in x87 ficom compare is forced by combine in
     583              :      simplify_comparison () function. Float operator is treated as RTX_OBJ
     584              :      with a precedence over other operators and is always put in the first
     585              :      place. Swap condition and operands to match ficom instruction.  */
     586     24283785 :   if (!op0_preserve_value
     587     23467258 :       && GET_CODE (*op0) == FLOAT && MEM_P (XEXP (*op0, 0)) && REG_P (*op1))
     588              :     {
     589            6 :       enum rtx_code scode = swap_condition ((enum rtx_code) *code);
     590              : 
     591              :       /* We are called only for compares that are split to SAHF instruction.
     592              :          Ensure that we have setcc/jcc insn for the swapped condition.  */
     593            6 :       if (ix86_fp_compare_code_to_integer (scode) != UNKNOWN)
     594              :         {
     595            6 :           std::swap (*op0, *op1);
     596            6 :           *code = (int) scode;
     597            6 :           return;
     598              :         }
     599              :     }
     600              : 
     601              :   /* SUB (a, b) underflows precisely when a < b.  Convert
     602              :      (compare (minus (a b)) a) to (compare (a b))
     603              :      to match *sub<mode>_3 pattern.  */
     604     23467252 :   if (!op0_preserve_value
     605     23467252 :       && (*code == GTU || *code == LEU)
     606      1876137 :       && GET_CODE (*op0) == MINUS
     607        96085 :       && rtx_equal_p (XEXP (*op0, 0), *op1))
     608              :     {
     609          488 :       *op1 = XEXP (*op0, 1);
     610          488 :       *op0 = XEXP (*op0, 0);
     611          488 :       *code = (int) swap_condition ((enum rtx_code) *code);
     612          488 :       return;
     613              :     }
     614              : 
     615              :   /* Swap operands of GTU comparison to canonicalize
     616              :      addcarry/subborrow comparison.  */
     617     24283291 :   if (!op0_preserve_value
     618     23466764 :       && *code == GTU
     619       874378 :       && GET_CODE (*op0) == PLUS
     620       339042 :       && ix86_carry_flag_operator (XEXP (*op0, 0), VOIDmode)
     621        46842 :       && GET_CODE (XEXP (*op0, 1)) == ZERO_EXTEND
     622     24325929 :       && GET_CODE (*op1) == ZERO_EXTEND)
     623              :     {
     624        39332 :       std::swap (*op0, *op1);
     625        39332 :       *code = (int) swap_condition ((enum rtx_code) *code);
     626        39332 :       return;
     627              :     }
     628              : }
     629              : 
     630              : /* Hook to determine if one function can safely inline another.  */
     631              : 
     632              : static bool
     633      9565611 : ix86_can_inline_p (tree caller, tree callee)
     634              : {
     635      9565611 :   tree caller_tree = DECL_FUNCTION_SPECIFIC_TARGET (caller);
     636      9565611 :   tree callee_tree = DECL_FUNCTION_SPECIFIC_TARGET (callee);
     637              : 
     638              :   /* Changes of those flags can be tolerated for always inlines. Lets hope
     639              :      user knows what he is doing.  */
     640      9565611 :   unsigned HOST_WIDE_INT always_inline_safe_mask
     641              :          = (MASK_USE_8BIT_IDIV | MASK_ACCUMULATE_OUTGOING_ARGS
     642              :             | MASK_NO_ALIGN_STRINGOPS | MASK_AVX256_SPLIT_UNALIGNED_LOAD
     643              :             | MASK_AVX256_SPLIT_UNALIGNED_STORE | MASK_CLD
     644              :             | MASK_NO_FANCY_MATH_387 | MASK_IEEE_FP | MASK_INLINE_ALL_STRINGOPS
     645              :             | MASK_INLINE_STRINGOPS_DYNAMICALLY | MASK_RECIP | MASK_STACK_PROBE
     646              :             | MASK_STV | MASK_TLS_DIRECT_SEG_REFS | MASK_VZEROUPPER
     647              :             | MASK_NO_PUSH_ARGS | MASK_OMIT_LEAF_FRAME_POINTER);
     648              : 
     649              : 
     650      9565611 :   if (!callee_tree)
     651      8971884 :     callee_tree = target_option_default_node;
     652      9565611 :   if (!caller_tree)
     653      8971940 :     caller_tree = target_option_default_node;
     654      9565611 :   if (callee_tree == caller_tree)
     655              :     return true;
     656              : 
     657         5283 :   struct cl_target_option *caller_opts = TREE_TARGET_OPTION (caller_tree);
     658         5283 :   struct cl_target_option *callee_opts = TREE_TARGET_OPTION (callee_tree);
     659         5283 :   bool ret = false;
     660         5283 :   bool always_inline
     661         5283 :     = (DECL_DISREGARD_INLINE_LIMITS (callee)
     662         9924 :        && lookup_attribute ("always_inline",
     663         4641 :                             DECL_ATTRIBUTES (callee)));
     664              : 
     665              :   /* If callee only uses GPRs, ignore MASK_80387.  */
     666         5283 :   if (TARGET_GENERAL_REGS_ONLY_P (callee_opts->x_ix86_target_flags))
     667         1024 :     always_inline_safe_mask |= MASK_80387;
     668              : 
     669         5283 :   cgraph_node *callee_node = cgraph_node::get (callee);
     670              :   /* Callee's isa options should be a subset of the caller's, i.e. a SSE4
     671              :      function can inline a SSE2 function but a SSE2 function can't inline
     672              :      a SSE4 function.  */
     673         5283 :   if (((caller_opts->x_ix86_isa_flags & callee_opts->x_ix86_isa_flags)
     674              :        != callee_opts->x_ix86_isa_flags)
     675         5050 :       || ((caller_opts->x_ix86_isa_flags2 & callee_opts->x_ix86_isa_flags2)
     676              :           != callee_opts->x_ix86_isa_flags2))
     677              :     ret = false;
     678              : 
     679              :   /* See if we have the same non-isa options.  */
     680         5013 :   else if ((!always_inline
     681          388 :             && caller_opts->x_target_flags != callee_opts->x_target_flags)
     682         4969 :            || (caller_opts->x_target_flags & ~always_inline_safe_mask)
     683         4969 :                != (callee_opts->x_target_flags & ~always_inline_safe_mask))
     684              :     ret = false;
     685              : 
     686         4969 :   else if (caller_opts->x_ix86_fpmath != callee_opts->x_ix86_fpmath
     687              :            /* If the calle doesn't use FP expressions differences in
     688              :               ix86_fpmath can be ignored.  We are called from FEs
     689              :               for multi-versioning call optimization, so beware of
     690              :               ipa_fn_summaries not available.  */
     691         1241 :            && (! ipa_fn_summaries
     692         1241 :                || ipa_fn_summaries->get (callee_node) == NULL
     693         1241 :                || ipa_fn_summaries->get (callee_node)->fp_expressions))
     694              :     ret = false;
     695              : 
     696              :   /* At this point we cannot identify whether arch or tune setting
     697              :      comes from target attribute or not. So the most conservative way
     698              :      is to allow the callee that uses default arch and tune string to
     699              :      be inlined.  */
     700         4695 :   else if (!strcmp (callee_opts->x_ix86_arch_string, "x86-64")
     701         1424 :            && !strcmp (callee_opts->x_ix86_tune_string, "generic"))
     702              :     ret = true;
     703              : 
     704              :   /* See if arch, tune, etc. are the same. As previous ISA flags already
     705              :      checks if callee's ISA is subset of caller's, do not block
     706              :      always_inline attribute for callee even it has different arch. */
     707         3279 :   else if (!always_inline && caller_opts->arch != callee_opts->arch)
     708              :     ret = false;
     709              : 
     710           15 :   else if (!always_inline && caller_opts->tune != callee_opts->tune)
     711              :     ret = false;
     712              : 
     713         3279 :   else if (!always_inline
     714           15 :            && caller_opts->branch_cost != callee_opts->branch_cost)
     715              :     ret = false;
     716              : 
     717              :   else
     718      9565023 :     ret = true;
     719              : 
     720              :   return ret;
     721              : }
     722              : 
     723              : /* Return true if this goes in large data/bss.  */
     724              : 
     725              : static bool
     726     79961836 : ix86_in_large_data_p (tree exp)
     727              : {
     728     79961836 :   if (ix86_cmodel != CM_MEDIUM && ix86_cmodel != CM_MEDIUM_PIC
     729     79961598 :       && ix86_cmodel != CM_LARGE && ix86_cmodel != CM_LARGE_PIC)
     730              :     return false;
     731              : 
     732         1110 :   if (exp == NULL_TREE)
     733              :     return false;
     734              : 
     735              :   /* Functions are never large data.  */
     736         1110 :   if (TREE_CODE (exp) == FUNCTION_DECL)
     737              :     return false;
     738              : 
     739              :   /* Automatic variables are never large data.  */
     740          262 :   if (VAR_P (exp) && !is_global_var (exp))
     741              :     return false;
     742              : 
     743          262 :   if (VAR_P (exp) && DECL_SECTION_NAME (exp))
     744              :     {
     745           51 :       const char *section = DECL_SECTION_NAME (exp);
     746           51 :       if (strcmp (section, ".ldata") == 0
     747           51 :           || strcmp (section, ".lbss") == 0)
     748              :         return true;
     749              :       return false;
     750              :     }
     751              :   else
     752              :     {
     753          211 :       HOST_WIDE_INT size = int_size_in_bytes (TREE_TYPE (exp));
     754              : 
     755              :       /* If this is an incomplete type with size 0, then we can't put it
     756              :          in data because it might be too big when completed.  Also,
     757              :          int_size_in_bytes returns -1 if size can vary or is larger than
     758              :          an integer in which case also it is safer to assume that it goes in
     759              :          large data.  */
     760          211 :       if (size <= 0 || size > ix86_section_threshold)
     761              :         return true;
     762              :     }
     763              : 
     764              :   return false;
     765              : }
     766              : 
     767              : /* i386-specific section flag to mark large sections.  */
     768              : #define SECTION_LARGE SECTION_MACH_DEP
     769              : 
     770              : /* Switch to the appropriate section for output of DECL.
     771              :    DECL is either a `VAR_DECL' node or a constant of some sort.
     772              :    RELOC indicates whether forming the initial value of DECL requires
     773              :    link-time relocations.  */
     774              : 
     775              : ATTRIBUTE_UNUSED static section *
     776      1650103 : x86_64_elf_select_section (tree decl, int reloc,
     777              :                            unsigned HOST_WIDE_INT align)
     778              : {
     779      1650103 :   if (ix86_in_large_data_p (decl))
     780              :     {
     781            6 :       const char *sname = NULL;
     782            6 :       unsigned int flags = SECTION_WRITE | SECTION_LARGE;
     783            6 :       switch (categorize_decl_for_section (decl, reloc))
     784              :         {
     785            1 :         case SECCAT_DATA:
     786            1 :           sname = ".ldata";
     787            1 :           break;
     788            0 :         case SECCAT_DATA_REL:
     789            0 :           sname = ".ldata.rel";
     790            0 :           break;
     791            0 :         case SECCAT_DATA_REL_LOCAL:
     792            0 :           sname = ".ldata.rel.local";
     793            0 :           break;
     794            0 :         case SECCAT_DATA_REL_RO:
     795            0 :           sname = ".ldata.rel.ro";
     796            0 :           break;
     797            0 :         case SECCAT_DATA_REL_RO_LOCAL:
     798            0 :           sname = ".ldata.rel.ro.local";
     799            0 :           break;
     800            0 :         case SECCAT_BSS:
     801            0 :           sname = ".lbss";
     802            0 :           flags |= SECTION_BSS;
     803            0 :           break;
     804              :         case SECCAT_RODATA:
     805              :         case SECCAT_RODATA_MERGE_STR:
     806              :         case SECCAT_RODATA_MERGE_STR_INIT:
     807              :         case SECCAT_RODATA_MERGE_CONST:
     808              :           sname = ".lrodata";
     809              :           flags &= ~SECTION_WRITE;
     810              :           break;
     811            0 :         case SECCAT_SRODATA:
     812            0 :         case SECCAT_SDATA:
     813            0 :         case SECCAT_SBSS:
     814            0 :           gcc_unreachable ();
     815              :         case SECCAT_TEXT:
     816              :         case SECCAT_TDATA:
     817              :         case SECCAT_TBSS:
     818              :           /* We don't split these for medium model.  Place them into
     819              :              default sections and hope for best.  */
     820              :           break;
     821              :         }
     822            1 :       if (sname)
     823              :         {
     824              :           /* We might get called with string constants, but get_named_section
     825              :              doesn't like them as they are not DECLs.  Also, we need to set
     826              :              flags in that case.  */
     827            6 :           if (!DECL_P (decl))
     828            3 :             return get_section (sname, flags, NULL);
     829            3 :           return get_named_section (decl, sname, reloc);
     830              :         }
     831              :     }
     832      1650097 :   return default_elf_select_section (decl, reloc, align);
     833              : }
     834              : 
     835              : /* Select a set of attributes for section NAME based on the properties
     836              :    of DECL and whether or not RELOC indicates that DECL's initializer
     837              :    might contain runtime relocations.  */
     838              : 
     839              : static unsigned int ATTRIBUTE_UNUSED
     840     66667127 : x86_64_elf_section_type_flags (tree decl, const char *name, int reloc)
     841              : {
     842     66667127 :   unsigned int flags = default_section_type_flags (decl, name, reloc);
     843              : 
     844     66667127 :   if (ix86_in_large_data_p (decl))
     845            7 :     flags |= SECTION_LARGE;
     846              : 
     847     66667127 :   if (decl == NULL_TREE
     848          370 :       && (strcmp (name, ".ldata.rel.ro") == 0
     849          370 :           || strcmp (name, ".ldata.rel.ro.local") == 0))
     850            0 :     flags |= SECTION_RELRO;
     851              : 
     852     66667127 :   if (strcmp (name, ".lbss") == 0
     853     66667123 :       || startswith (name, ".lbss.")
     854    133334247 :       || startswith (name, ".gnu.linkonce.lb."))
     855            7 :     flags |= SECTION_BSS;
     856              : 
     857     66667127 :   return flags;
     858              : }
     859              : 
     860              : /* Build up a unique section name, expressed as a
     861              :    STRING_CST node, and assign it to DECL_SECTION_NAME (decl).
     862              :    RELOC indicates whether the initial value of EXP requires
     863              :    link-time relocations.  */
     864              : 
     865              : static void ATTRIBUTE_UNUSED
     866      1797885 : x86_64_elf_unique_section (tree decl, int reloc)
     867              : {
     868      1797885 :   if (ix86_in_large_data_p (decl))
     869              :     {
     870            3 :       const char *prefix = NULL;
     871              :       /* We only need to use .gnu.linkonce if we don't have COMDAT groups.  */
     872            3 :       bool one_only = DECL_COMDAT_GROUP (decl) && !HAVE_COMDAT_GROUP;
     873              : 
     874            3 :       switch (categorize_decl_for_section (decl, reloc))
     875              :         {
     876            0 :         case SECCAT_DATA:
     877            0 :         case SECCAT_DATA_REL:
     878            0 :         case SECCAT_DATA_REL_LOCAL:
     879            0 :         case SECCAT_DATA_REL_RO:
     880            0 :         case SECCAT_DATA_REL_RO_LOCAL:
     881            0 :           prefix = one_only ? ".ld" : ".ldata";
     882              :           break;
     883            3 :         case SECCAT_BSS:
     884            3 :           prefix = one_only ? ".lb" : ".lbss";
     885              :           break;
     886              :         case SECCAT_RODATA:
     887              :         case SECCAT_RODATA_MERGE_STR:
     888              :         case SECCAT_RODATA_MERGE_STR_INIT:
     889              :         case SECCAT_RODATA_MERGE_CONST:
     890              :           prefix = one_only ? ".lr" : ".lrodata";
     891              :           break;
     892            0 :         case SECCAT_SRODATA:
     893            0 :         case SECCAT_SDATA:
     894            0 :         case SECCAT_SBSS:
     895            0 :           gcc_unreachable ();
     896              :         case SECCAT_TEXT:
     897              :         case SECCAT_TDATA:
     898              :         case SECCAT_TBSS:
     899              :           /* We don't split these for medium model.  Place them into
     900              :              default sections and hope for best.  */
     901              :           break;
     902              :         }
     903            3 :       if (prefix)
     904              :         {
     905            3 :           const char *name, *linkonce;
     906            3 :           char *string;
     907              : 
     908            3 :           name = IDENTIFIER_POINTER (DECL_ASSEMBLER_NAME (decl));
     909            3 :           name = targetm.strip_name_encoding (name);
     910              : 
     911              :           /* If we're using one_only, then there needs to be a .gnu.linkonce
     912              :              prefix to the section name.  */
     913            3 :           linkonce = one_only ? ".gnu.linkonce" : "";
     914              : 
     915            3 :           string = ACONCAT ((linkonce, prefix, ".", name, NULL));
     916              : 
     917            3 :           set_decl_section_name (decl, string);
     918            3 :           return;
     919              :         }
     920              :     }
     921      1797882 :   default_unique_section (decl, reloc);
     922              : }
     923              : 
     924              : /* Return true if TYPE has no_callee_saved_registers or preserve_none
     925              :    attribute.  */
     926              : 
     927              : bool
     928      7484577 : ix86_type_no_callee_saved_registers_p (const_tree type)
     929              : {
     930     14969154 :   return (lookup_attribute ("no_callee_saved_registers",
     931      7484577 :                             TYPE_ATTRIBUTES (type)) != NULL
     932     14969023 :           || lookup_attribute ("preserve_none",
     933      7484446 :                                TYPE_ATTRIBUTES (type)) != NULL);
     934              : }
     935              : 
     936              : #ifdef COMMON_ASM_OP
     937              : 
     938              : #ifndef LARGECOMM_SECTION_ASM_OP
     939              : #define LARGECOMM_SECTION_ASM_OP "\t.largecomm\t"
     940              : #endif
     941              : 
     942              : /* This says how to output assembler code to declare an
     943              :    uninitialized external linkage data object.
     944              : 
     945              :    For medium model x86-64 we need to use LARGECOMM_SECTION_ASM_OP opcode for
     946              :    large objects.  */
     947              : void
     948       170060 : x86_elf_aligned_decl_common (FILE *file, tree decl,
     949              :                         const char *name, unsigned HOST_WIDE_INT size,
     950              :                         unsigned align)
     951              : {
     952       170060 :   if ((ix86_cmodel == CM_MEDIUM || ix86_cmodel == CM_MEDIUM_PIC
     953       170054 :        || ix86_cmodel == CM_LARGE || ix86_cmodel == CM_LARGE_PIC)
     954            6 :       && size > (unsigned int)ix86_section_threshold)
     955              :     {
     956            1 :       switch_to_section (get_named_section (decl, ".lbss", 0));
     957            1 :       fputs (LARGECOMM_SECTION_ASM_OP, file);
     958              :     }
     959              :   else
     960       170059 :     fputs (COMMON_ASM_OP, file);
     961       170060 :   assemble_name (file, name);
     962       170060 :   fprintf (file, "," HOST_WIDE_INT_PRINT_UNSIGNED ",%u\n",
     963              :            size, align / BITS_PER_UNIT);
     964       170060 : }
     965              : #endif
     966              : 
     967              : /* Utility function for targets to use in implementing
     968              :    ASM_OUTPUT_ALIGNED_BSS.  */
     969              : 
     970              : void
     971       766928 : x86_output_aligned_bss (FILE *file, tree decl, const char *name,
     972              :                         unsigned HOST_WIDE_INT size, unsigned align)
     973              : {
     974       766928 :   if ((ix86_cmodel == CM_MEDIUM || ix86_cmodel == CM_MEDIUM_PIC
     975       766918 :        || ix86_cmodel == CM_LARGE || ix86_cmodel == CM_LARGE_PIC)
     976           37 :       && size > (unsigned int)ix86_section_threshold)
     977            3 :     switch_to_section (get_named_section (decl, ".lbss", 0));
     978              :   else
     979       766925 :     switch_to_section (bss_section);
     980       920154 :   ASM_OUTPUT_ALIGN (file, floor_log2 (align / BITS_PER_UNIT));
     981              : #ifdef ASM_DECLARE_OBJECT_NAME
     982       766928 :   last_assemble_variable_decl = decl;
     983       766928 :   ASM_DECLARE_OBJECT_NAME (file, name, decl);
     984              : #else
     985              :   /* Standard thing is just output label for the object.  */
     986              :   ASM_OUTPUT_LABEL (file, name);
     987              : #endif /* ASM_DECLARE_OBJECT_NAME */
     988       766928 :   ASM_OUTPUT_SKIP (file, size ? size : 1);
     989       766928 : }
     990              : 
     991              : /* Decide whether we must probe the stack before any space allocation
     992              :    on this target.  It's essentially TARGET_STACK_PROBE except when
     993              :    -fstack-check causes the stack to be already probed differently.  */
     994              : 
     995              : bool
     996       865886 : ix86_target_stack_probe (void)
     997              : {
     998              :   /* Do not probe the stack twice if static stack checking is enabled.  */
     999       865886 :   if (flag_stack_check == STATIC_BUILTIN_STACK_CHECK)
    1000              :     return false;
    1001              : 
    1002       865886 :   return TARGET_STACK_PROBE;
    1003              : }
    1004              : 
    1005              : /* Decide whether we can make a sibling call to a function.  DECL is the
    1006              :    declaration of the function being targeted by the call and EXP is the
    1007              :    CALL_EXPR representing the call.  */
    1008              : 
    1009              : static bool
    1010       139153 : ix86_function_ok_for_sibcall (tree decl, tree exp)
    1011              : {
    1012       139153 :   tree type, decl_or_type;
    1013       139153 :   rtx a, b;
    1014       139153 :   bool bind_global = decl && !targetm.binds_local_p (decl);
    1015              : 
    1016       139153 :   if (ix86_function_naked (current_function_decl))
    1017              :     return false;
    1018              : 
    1019              :   /* Sibling call isn't OK if there are no caller-saved registers
    1020              :      since all registers must be preserved before return.  */
    1021       139151 :   if (cfun->machine->call_saved_registers
    1022       139151 :       == TYPE_NO_CALLER_SAVED_REGISTERS)
    1023              :     return false;
    1024              : 
    1025              :   /* If we are generating position-independent code, we cannot sibcall
    1026              :      optimize direct calls to global functions, as the PLT requires
    1027              :      %ebx be live. (Darwin does not have a PLT.)  */
    1028       139122 :   if (!TARGET_MACHO
    1029       139122 :       && !TARGET_64BIT
    1030        11317 :       && flag_pic
    1031         8392 :       && flag_plt
    1032         8392 :       && bind_global)
    1033              :     return false;
    1034              : 
    1035              :   /* If we need to align the outgoing stack, then sibcalling would
    1036              :      unalign the stack, which may break the called function.  */
    1037       134484 :   if (ix86_minimum_incoming_stack_boundary (true)
    1038       134484 :       < PREFERRED_STACK_BOUNDARY)
    1039              :     return false;
    1040              : 
    1041       133903 :   if (decl)
    1042              :     {
    1043       122881 :       decl_or_type = decl;
    1044       122881 :       type = TREE_TYPE (decl);
    1045              :     }
    1046              :   else
    1047              :     {
    1048              :       /* We're looking at the CALL_EXPR, we need the type of the function.  */
    1049        11022 :       type = CALL_EXPR_FN (exp);                /* pointer expression */
    1050        11022 :       type = TREE_TYPE (type);                  /* pointer type */
    1051        11022 :       type = TREE_TYPE (type);                  /* function type */
    1052        11022 :       decl_or_type = type;
    1053              :     }
    1054              : 
    1055              :   /* Sibling call isn't OK if callee has no callee-saved registers
    1056              :      and the calling function has callee-saved registers.  */
    1057       133903 :   if ((cfun->machine->call_saved_registers
    1058       133903 :        != TYPE_NO_CALLEE_SAVED_REGISTERS)
    1059       133903 :       && cfun->machine->call_saved_registers != TYPE_PRESERVE_NONE
    1060       133903 :       && ix86_type_no_callee_saved_registers_p (type))
    1061              :     return false;
    1062              : 
    1063              :   /* If outgoing reg parm stack space changes, we cannot do sibcall.  */
    1064       133887 :   if ((OUTGOING_REG_PARM_STACK_SPACE (type)
    1065       133887 :        != OUTGOING_REG_PARM_STACK_SPACE (TREE_TYPE (current_function_decl)))
    1066       267027 :       || (REG_PARM_STACK_SPACE (decl_or_type)
    1067       133140 :           != REG_PARM_STACK_SPACE (current_function_decl)))
    1068              :     {
    1069          747 :       maybe_complain_about_tail_call (exp,
    1070              :                                       "inconsistent size of stack space"
    1071              :                                       " allocated for arguments which are"
    1072              :                                       " passed in registers");
    1073          747 :       return false;
    1074              :     }
    1075              : 
    1076              :   /* Check that the return value locations are the same.  Like
    1077              :      if we are returning floats on the 80387 register stack, we cannot
    1078              :      make a sibcall from a function that doesn't return a float to a
    1079              :      function that does or, conversely, from a function that does return
    1080              :      a float to a function that doesn't; the necessary stack adjustment
    1081              :      would not be executed.  This is also the place we notice
    1082              :      differences in the return value ABI.  Note that it is ok for one
    1083              :      of the functions to have void return type as long as the return
    1084              :      value of the other is passed in a register.  */
    1085       133140 :   a = ix86_function_value (TREE_TYPE (exp), decl_or_type, false);
    1086       133140 :   b = ix86_function_value (TREE_TYPE (DECL_RESULT (cfun->decl)),
    1087       133140 :                            cfun->decl, false);
    1088       133140 :   if (STACK_REG_P (a) || STACK_REG_P (b))
    1089              :     {
    1090         1019 :       if (!rtx_equal_p (a, b))
    1091              :         return false;
    1092              :     }
    1093       132121 :   else if (VOID_TYPE_P (TREE_TYPE (DECL_RESULT (cfun->decl))))
    1094              :     ;
    1095        24920 :   else if (!rtx_equal_p (a, b))
    1096              :     return false;
    1097              : 
    1098       132760 :   if (TARGET_64BIT)
    1099              :     {
    1100              :       /* The SYSV ABI has more call-clobbered registers;
    1101              :          disallow sibcalls from MS to SYSV.  */
    1102       126081 :       if (cfun->machine->call_abi == MS_ABI
    1103       126081 :           && ix86_function_type_abi (type) == SYSV_ABI)
    1104              :         return false;
    1105              :     }
    1106              :   else
    1107              :     {
    1108              :       /* If this call is indirect, we'll need to be able to use a
    1109              :          call-clobbered register for the address of the target function.
    1110              :          Make sure that all such registers are not used for passing
    1111              :          parameters.  Note that DLLIMPORT functions and call to global
    1112              :          function via GOT slot are indirect.  */
    1113         6679 :       if (!decl
    1114         4769 :           || (bind_global && flag_pic && !flag_plt)
    1115              :           || (TARGET_DLLIMPORT_DECL_ATTRIBUTES && DECL_DLLIMPORT_P (decl))
    1116         4769 :           || flag_force_indirect_call)
    1117              :         {
    1118              :           /* Check if regparm >= 3 since arg_reg_available is set to
    1119              :              false if regparm == 0.  If regparm is 1 or 2, there is
    1120              :              always a call-clobbered register available.
    1121              : 
    1122              :              ??? The symbol indirect call doesn't need a call-clobbered
    1123              :              register.  But we don't know if this is a symbol indirect
    1124              :              call or not here.  */
    1125         1910 :           if (ix86_function_regparm (type, decl) >= 3
    1126         1910 :               && !cfun->machine->arg_reg_available)
    1127              :             return false;
    1128              :         }
    1129              :     }
    1130              : 
    1131       132760 :   if (decl && ix86_use_pseudo_pic_reg ())
    1132              :     {
    1133              :       /* When PIC register is used, it must be restored after ifunc
    1134              :          function returns.  */
    1135         2057 :        cgraph_node *node = cgraph_node::get (decl);
    1136         2057 :        if (node && node->ifunc_resolver)
    1137              :          return false;
    1138              :     }
    1139              : 
    1140              :   /* Disable sibcall if callee has indirect_return attribute and
    1141              :      caller doesn't since callee will return to the caller's caller
    1142              :      via an indirect jump.  */
    1143       132760 :   if (((flag_cf_protection & (CF_RETURN | CF_BRANCH))
    1144              :        == (CF_RETURN | CF_BRANCH))
    1145        54147 :       && lookup_attribute ("indirect_return", TYPE_ATTRIBUTES (type))
    1146       132764 :       && !lookup_attribute ("indirect_return",
    1147            4 :                             TYPE_ATTRIBUTES (TREE_TYPE (cfun->decl))))
    1148              :     return false;
    1149              : 
    1150              :   /* Otherwise okay.  That also includes certain types of indirect calls.  */
    1151              :   return true;
    1152              : }
    1153              : 
    1154              : /* This function determines from TYPE the calling-convention.  */
    1155              : 
    1156              : unsigned int
    1157      6165414 : ix86_get_callcvt (const_tree type)
    1158              : {
    1159      6165414 :   unsigned int ret = 0;
    1160      6165414 :   bool is_stdarg;
    1161      6165414 :   tree attrs;
    1162              : 
    1163      6165414 :   if (TARGET_64BIT)
    1164              :     return IX86_CALLCVT_CDECL;
    1165              : 
    1166      3259552 :   attrs = TYPE_ATTRIBUTES (type);
    1167      3259552 :   if (attrs != NULL_TREE)
    1168              :     {
    1169        65901 :       if (lookup_attribute ("cdecl", attrs))
    1170              :         ret |= IX86_CALLCVT_CDECL;
    1171        65901 :       else if (lookup_attribute ("stdcall", attrs))
    1172              :         ret |= IX86_CALLCVT_STDCALL;
    1173        65901 :       else if (lookup_attribute ("fastcall", attrs))
    1174              :         ret |= IX86_CALLCVT_FASTCALL;
    1175        65892 :       else if (lookup_attribute ("thiscall", attrs))
    1176              :         ret |= IX86_CALLCVT_THISCALL;
    1177              : 
    1178              :       /* Regparam isn't allowed for thiscall and fastcall.  */
    1179              :       if ((ret & (IX86_CALLCVT_THISCALL | IX86_CALLCVT_FASTCALL)) == 0)
    1180              :         {
    1181        65892 :           if (lookup_attribute ("regparm", attrs))
    1182        15823 :             ret |= IX86_CALLCVT_REGPARM;
    1183        65892 :           if (lookup_attribute ("sseregparm", attrs))
    1184            0 :             ret |= IX86_CALLCVT_SSEREGPARM;
    1185              :         }
    1186              : 
    1187        65901 :       if (IX86_BASE_CALLCVT(ret) != 0)
    1188            9 :         return ret;
    1189              :     }
    1190              : 
    1191      3259543 :   is_stdarg = stdarg_p (type);
    1192      3259543 :   if (TARGET_RTD && !is_stdarg)
    1193            0 :     return IX86_CALLCVT_STDCALL | ret;
    1194              : 
    1195      3259543 :   if (ret != 0
    1196      3259543 :       || is_stdarg
    1197      3234734 :       || TREE_CODE (type) != METHOD_TYPE
    1198      3390949 :       || ix86_function_type_abi (type) != MS_ABI)
    1199      3259543 :     return IX86_CALLCVT_CDECL | ret;
    1200              : 
    1201              :   return IX86_CALLCVT_THISCALL;
    1202              : }
    1203              : 
    1204              : /* Return 0 if the attributes for two types are incompatible, 1 if they
    1205              :    are compatible, and 2 if they are nearly compatible (which causes a
    1206              :    warning to be generated).  */
    1207              : 
    1208              : static int
    1209      1470653 : ix86_comp_type_attributes (const_tree type1, const_tree type2)
    1210              : {
    1211      1470653 :   unsigned int ccvt1, ccvt2;
    1212              : 
    1213      1470653 :   if (TREE_CODE (type1) != FUNCTION_TYPE
    1214      1470653 :       && TREE_CODE (type1) != METHOD_TYPE)
    1215              :     return 1;
    1216              : 
    1217      1464050 :   ccvt1 = ix86_get_callcvt (type1);
    1218      1464050 :   ccvt2 = ix86_get_callcvt (type2);
    1219      1464050 :   if (ccvt1 != ccvt2)
    1220              :     return 0;
    1221      2905982 :   if (ix86_function_regparm (type1, NULL)
    1222      1452991 :       != ix86_function_regparm (type2, NULL))
    1223              :     return 0;
    1224              : 
    1225      1415230 :   if (ix86_type_no_callee_saved_registers_p (type1)
    1226       707615 :       != ix86_type_no_callee_saved_registers_p (type2))
    1227              :     return 0;
    1228              : 
    1229              :   /* preserve_none attribute uses a different calling convention is
    1230              :      only for 64-bit.  */
    1231       707489 :   if (TARGET_64BIT
    1232      1414918 :       && (lookup_attribute ("preserve_none", TYPE_ATTRIBUTES (type1))
    1233       707429 :           != lookup_attribute ("preserve_none",
    1234       707429 :                                TYPE_ATTRIBUTES (type2))))
    1235              :     return 0;
    1236              : 
    1237              :   return 1;
    1238              : }
    1239              : 
    1240              : /* Return the regparm value for a function with the indicated TYPE and DECL.
    1241              :    DECL may be NULL when calling function indirectly
    1242              :    or considering a libcall.  */
    1243              : 
    1244              : static int
    1245      4173235 : ix86_function_regparm (const_tree type, const_tree decl)
    1246              : {
    1247      4173235 :   tree attr;
    1248      4173235 :   int regparm;
    1249      4173235 :   unsigned int ccvt;
    1250              : 
    1251      4173235 :   if (TARGET_64BIT)
    1252      2905862 :     return (ix86_function_type_abi (type) == SYSV_ABI
    1253      2905862 :             ? X86_64_REGPARM_MAX : X86_64_MS_REGPARM_MAX);
    1254      1267373 :   ccvt = ix86_get_callcvt (type);
    1255      1267373 :   regparm = ix86_regparm;
    1256              : 
    1257      1267373 :   if ((ccvt & IX86_CALLCVT_REGPARM) != 0)
    1258              :     {
    1259         2020 :       attr = lookup_attribute ("regparm", TYPE_ATTRIBUTES (type));
    1260         2020 :       if (attr)
    1261              :         {
    1262         2020 :           regparm = TREE_INT_CST_LOW (TREE_VALUE (TREE_VALUE (attr)));
    1263         2020 :           return regparm;
    1264              :         }
    1265              :     }
    1266      1265353 :   else if ((ccvt & IX86_CALLCVT_FASTCALL) != 0)
    1267              :     return 2;
    1268      1265353 :   else if ((ccvt & IX86_CALLCVT_THISCALL) != 0)
    1269              :     return 1;
    1270              : 
    1271              :   /* Use register calling convention for local functions when possible.  */
    1272      1265353 :   if (decl
    1273      1201449 :       && TREE_CODE (decl) == FUNCTION_DECL)
    1274              :     {
    1275      1191354 :       cgraph_node *target = cgraph_node::get (decl);
    1276      1191354 :       if (target)
    1277      1183869 :         target = target->function_symbol ();
    1278              : 
    1279              :       /* Caller and callee must agree on the calling convention, so
    1280              :          checking here just optimize means that with
    1281              :          __attribute__((optimize (...))) caller could use regparm convention
    1282              :          and callee not, or vice versa.  Instead look at whether the callee
    1283              :          is optimized or not.  */
    1284      1183869 :       if (target && opt_for_fn (target->decl, optimize)
    1285      2366846 :           && !(profile_flag && !flag_fentry))
    1286              :         {
    1287      1182977 :           if (target->local && target->can_change_signature)
    1288              :             {
    1289       139228 :               int local_regparm, globals = 0, regno;
    1290              : 
    1291              :               /* Make sure no regparm register is taken by a
    1292              :                  fixed register variable.  */
    1293       139228 :               for (local_regparm = 0; local_regparm < REGPARM_MAX;
    1294              :                    local_regparm++)
    1295       104421 :                 if (fixed_regs[local_regparm])
    1296              :                   break;
    1297              : 
    1298              :               /* We don't want to use regparm(3) for nested functions as
    1299              :                  these use a static chain pointer in the third argument.  */
    1300        34807 :               if (local_regparm == 3 && DECL_STATIC_CHAIN (target->decl))
    1301              :                 local_regparm = 2;
    1302              : 
    1303              :               /* Save a register for the split stack.  */
    1304        34807 :               if (flag_split_stack)
    1305              :                 {
    1306        20760 :                   if (local_regparm == 3)
    1307              :                     local_regparm = 2;
    1308          707 :                   else if (local_regparm == 2
    1309          707 :                            && DECL_STATIC_CHAIN (target->decl))
    1310              :                     local_regparm = 1;
    1311              :                 }
    1312              : 
    1313              :               /* Each fixed register usage increases register pressure,
    1314              :                  so less registers should be used for argument passing.
    1315              :                  This functionality can be overriden by an explicit
    1316              :                  regparm value.  */
    1317       243649 :               for (regno = AX_REG; regno <= DI_REG; regno++)
    1318       208842 :                 if (fixed_regs[regno])
    1319            0 :                   globals++;
    1320              : 
    1321        34807 :               local_regparm
    1322        34807 :                 = globals < local_regparm ? local_regparm - globals : 0;
    1323              : 
    1324        34807 :               if (local_regparm > regparm)
    1325      4173235 :                 regparm = local_regparm;
    1326              :             }
    1327              :         }
    1328              :     }
    1329              : 
    1330              :   return regparm;
    1331              : }
    1332              : 
    1333              : /* Return 1 or 2, if we can pass up to SSE_REGPARM_MAX SFmode (1) and
    1334              :    DFmode (2) arguments in SSE registers for a function with the
    1335              :    indicated TYPE and DECL.  DECL may be NULL when calling function
    1336              :    indirectly or considering a libcall.  Return -1 if any FP parameter
    1337              :    should be rejected by error.  This is used in siutation we imply SSE
    1338              :    calling convetion but the function is called from another function with
    1339              :    SSE disabled. Otherwise return 0.  */
    1340              : 
    1341              : static int
    1342      1072484 : ix86_function_sseregparm (const_tree type, const_tree decl, bool warn)
    1343              : {
    1344      1072484 :   gcc_assert (!TARGET_64BIT);
    1345              : 
    1346              :   /* Use SSE registers to pass SFmode and DFmode arguments if requested
    1347              :      by the sseregparm attribute.  */
    1348      1072484 :   if (TARGET_SSEREGPARM
    1349      1072484 :       || (type && lookup_attribute ("sseregparm", TYPE_ATTRIBUTES (type))))
    1350              :     {
    1351            0 :       if (!TARGET_SSE)
    1352              :         {
    1353            0 :           if (warn)
    1354              :             {
    1355            0 :               if (decl)
    1356            0 :                 error ("calling %qD with attribute sseregparm without "
    1357              :                        "SSE/SSE2 enabled", decl);
    1358              :               else
    1359            0 :                 error ("calling %qT with attribute sseregparm without "
    1360              :                        "SSE/SSE2 enabled", type);
    1361              :             }
    1362            0 :           return 0;
    1363              :         }
    1364              : 
    1365              :       return 2;
    1366              :     }
    1367              : 
    1368      1072484 :   if (!decl)
    1369              :     return 0;
    1370              : 
    1371       975223 :   cgraph_node *target = cgraph_node::get (decl);
    1372       975223 :   if (target)
    1373       967741 :     target = target->function_symbol ();
    1374              : 
    1375              :   /* For local functions, pass up to SSE_REGPARM_MAX SFmode
    1376              :      (and DFmode for SSE2) arguments in SSE registers.  */
    1377       967741 :   if (target
    1378              :       /* TARGET_SSE_MATH */
    1379       967741 :       && (target_opts_for_fn (target->decl)->x_ix86_fpmath & FPMATH_SSE)
    1380         1296 :       && opt_for_fn (target->decl, optimize)
    1381       969037 :       && !(profile_flag && !flag_fentry))
    1382              :     {
    1383         1296 :       if (target->local && target->can_change_signature)
    1384              :         {
    1385              :           /* Refuse to produce wrong code when local function with SSE enabled
    1386              :              is called from SSE disabled function.
    1387              :              FIXME: We need a way to detect these cases cross-ltrans partition
    1388              :              and avoid using SSE calling conventions on local functions called
    1389              :              from function with SSE disabled.  For now at least delay the
    1390              :              warning until we know we are going to produce wrong code.
    1391              :              See PR66047  */
    1392            0 :           if (!TARGET_SSE && warn)
    1393              :             return -1;
    1394            0 :           return TARGET_SSE2_P (target_opts_for_fn (target->decl)
    1395            0 :                                 ->x_ix86_isa_flags) ? 2 : 1;
    1396              :         }
    1397              :     }
    1398              : 
    1399              :   return 0;
    1400              : }
    1401              : 
    1402              : /* Return true if EAX is live at the start of the function.  Used by
    1403              :    ix86_expand_prologue to determine if we need special help before
    1404              :    calling allocate_stack_worker.  */
    1405              : 
    1406              : static bool
    1407         7090 : ix86_eax_live_at_start_p (void)
    1408              : {
    1409              :   /* Cheat.  Don't bother working forward from ix86_function_regparm
    1410              :      to the function type to whether an actual argument is located in
    1411              :      eax.  Instead just look at cfg info, which is still close enough
    1412              :      to correct at this point.  This gives false positives for broken
    1413              :      functions that might use uninitialized data that happens to be
    1414              :      allocated in eax, but who cares?  */
    1415         7090 :   return REGNO_REG_SET_P (df_get_live_out (ENTRY_BLOCK_PTR_FOR_FN (cfun)), 0);
    1416              : }
    1417              : 
    1418              : static bool
    1419       159550 : ix86_keep_aggregate_return_pointer (tree fntype)
    1420              : {
    1421       159550 :   tree attr;
    1422              : 
    1423       159550 :   if (!TARGET_64BIT)
    1424              :     {
    1425       159550 :       attr = lookup_attribute ("callee_pop_aggregate_return",
    1426       159550 :                                TYPE_ATTRIBUTES (fntype));
    1427       159550 :       if (attr)
    1428            0 :         return (TREE_INT_CST_LOW (TREE_VALUE (TREE_VALUE (attr))) == 0);
    1429              : 
    1430              :       /* For 32-bit MS-ABI the default is to keep aggregate
    1431              :          return pointer.  */
    1432       159550 :       if (ix86_function_type_abi (fntype) == MS_ABI)
    1433              :         return true;
    1434              :     }
    1435              :   return KEEP_AGGREGATE_RETURN_POINTER != 0;
    1436              : }
    1437              : 
    1438              : /* Value is the number of bytes of arguments automatically
    1439              :    popped when returning from a subroutine call.
    1440              :    FUNDECL is the declaration node of the function (as a tree),
    1441              :    FUNTYPE is the data type of the function (as a tree),
    1442              :    or for a library call it is an identifier node for the subroutine name.
    1443              :    SIZE is the number of bytes of arguments passed on the stack.
    1444              : 
    1445              :    On the 80386, the RTD insn may be used to pop them if the number
    1446              :      of args is fixed, but if the number is variable then the caller
    1447              :      must pop them all.  RTD can't be used for library calls now
    1448              :      because the library is compiled with the Unix compiler.
    1449              :    Use of RTD is a selectable option, since it is incompatible with
    1450              :    standard Unix calling sequences.  If the option is not selected,
    1451              :    the caller must always pop the args.
    1452              : 
    1453              :    The attribute stdcall is equivalent to RTD on a per module basis.  */
    1454              : 
    1455              : static poly_int64
    1456      7536512 : ix86_return_pops_args (tree fundecl, tree funtype, poly_int64 size)
    1457              : {
    1458      7536512 :   unsigned int ccvt;
    1459              : 
    1460              :   /* None of the 64-bit ABIs pop arguments.  */
    1461      7536512 :   if (TARGET_64BIT)
    1462      6665343 :     return 0;
    1463              : 
    1464       871169 :   ccvt = ix86_get_callcvt (funtype);
    1465              : 
    1466       871169 :   if ((ccvt & (IX86_CALLCVT_STDCALL | IX86_CALLCVT_FASTCALL
    1467              :                | IX86_CALLCVT_THISCALL)) != 0
    1468       871169 :       && ! stdarg_p (funtype))
    1469            3 :     return size;
    1470              : 
    1471              :   /* Lose any fake structure return argument if it is passed on the stack.  */
    1472       871166 :   if (aggregate_value_p (TREE_TYPE (funtype), fundecl)
    1473       871166 :       && !ix86_keep_aggregate_return_pointer (funtype))
    1474              :     {
    1475       159550 :       int nregs = ix86_function_regparm (funtype, fundecl);
    1476       159550 :       if (nregs == 0)
    1477       457779 :         return GET_MODE_SIZE (Pmode);
    1478              :     }
    1479              : 
    1480       718573 :   return 0;
    1481              : }
    1482              : 
    1483              : /* Implement the TARGET_LEGITIMATE_COMBINED_INSN hook.  */
    1484              : 
    1485              : static bool
    1486     10076308 : ix86_legitimate_combined_insn (rtx_insn *insn)
    1487              : {
    1488     10076308 :   int i;
    1489              : 
    1490              :   /* Check operand constraints in case hard registers were propagated
    1491              :      into insn pattern.  This check prevents combine pass from
    1492              :      generating insn patterns with invalid hard register operands.
    1493              :      These invalid insns can eventually confuse reload to error out
    1494              :      with a spill failure.  See also PRs 46829 and 46843.  */
    1495              : 
    1496     10076308 :   gcc_assert (INSN_CODE (insn) >= 0);
    1497              : 
    1498     10076308 :   extract_insn (insn);
    1499     10076308 :   preprocess_constraints (insn);
    1500              : 
    1501     10076308 :   int n_operands = recog_data.n_operands;
    1502     10076308 :   int n_alternatives = recog_data.n_alternatives;
    1503     34431669 :   for (i = 0; i < n_operands; i++)
    1504              :     {
    1505     24358880 :       rtx op = recog_data.operand[i];
    1506     24358880 :       machine_mode mode = GET_MODE (op);
    1507     24358880 :       const operand_alternative *op_alt;
    1508     24358880 :       int offset = 0;
    1509     24358880 :       bool win;
    1510     24358880 :       int j;
    1511              : 
    1512              :       /* A unary operator may be accepted by the predicate, but it
    1513              :          is irrelevant for matching constraints.  */
    1514     24358880 :       if (UNARY_P (op))
    1515        51608 :         op = XEXP (op, 0);
    1516              : 
    1517     24358880 :       if (SUBREG_P (op))
    1518              :         {
    1519       870818 :           if (REG_P (SUBREG_REG (op))
    1520       870818 :               && REGNO (SUBREG_REG (op)) < FIRST_PSEUDO_REGISTER)
    1521           55 :             offset = subreg_regno_offset (REGNO (SUBREG_REG (op)),
    1522           55 :                                           GET_MODE (SUBREG_REG (op)),
    1523           55 :                                           SUBREG_BYTE (op),
    1524           55 :                                           GET_MODE (op));
    1525       870818 :           op = SUBREG_REG (op);
    1526              :         }
    1527              : 
    1528     24358880 :       if (!(REG_P (op) && HARD_REGISTER_P (op)))
    1529     24051530 :         continue;
    1530              : 
    1531       307350 :       op_alt = recog_op_alt;
    1532              : 
    1533              :       /* Operand has no constraints, anything is OK.  */
    1534       307350 :       win = !n_alternatives;
    1535              : 
    1536       307350 :       alternative_mask preferred = get_preferred_alternatives (insn);
    1537       839613 :       for (j = 0; j < n_alternatives; j++, op_alt += n_operands)
    1538              :         {
    1539       528588 :           if (!TEST_BIT (preferred, j))
    1540       137692 :             continue;
    1541       390896 :           if (op_alt[i].anything_ok
    1542       203668 :               || (op_alt[i].matches != -1
    1543        33475 :                   && operands_match_p
    1544        33475 :                   (recog_data.operand[i],
    1545        33475 :                    recog_data.operand[op_alt[i].matches]))
    1546       590345 :               || reg_fits_class_p (op, op_alt[i].cl, offset, mode))
    1547              :             {
    1548              :               win = true;
    1549              :               break;
    1550              :             }
    1551              :         }
    1552              : 
    1553       307350 :       if (!win)
    1554              :         return false;
    1555              :     }
    1556              : 
    1557              :   return true;
    1558              : }
    1559              : 
    1560              : /* Implement the TARGET_ASAN_SHADOW_OFFSET hook.  */
    1561              : 
    1562              : static unsigned HOST_WIDE_INT
    1563         4574 : ix86_asan_shadow_offset (void)
    1564              : {
    1565         4574 :   return SUBTARGET_SHADOW_OFFSET;
    1566              : }
    1567              : 
    1568              : /* Argument support functions.  */
    1569              : 
    1570              : /* Return true when register may be used to pass function parameters.  */
    1571              : bool
    1572   1472711549 : ix86_function_arg_regno_p (int regno)
    1573              : {
    1574   1472711549 :   int i;
    1575   1472711549 :   enum calling_abi call_abi;
    1576   1472711549 :   const int *parm_regs;
    1577              : 
    1578   1469265686 :   if (TARGET_SSE && SSE_REGNO_P (regno)
    1579   2435333035 :       && regno < FIRST_SSE_REG + SSE_REGPARM_MAX)
    1580              :     return true;
    1581              : 
    1582   1353832268 :    if (!TARGET_64BIT)
    1583    128937130 :      return (regno < REGPARM_MAX
    1584    128937130 :              || (TARGET_MMX && MMX_REGNO_P (regno)
    1585     11588488 :                  && regno < FIRST_MMX_REG + MMX_REGPARM_MAX));
    1586              : 
    1587              :   /* TODO: The function should depend on current function ABI but
    1588              :      builtins.cc would need updating then. Therefore we use the
    1589              :      default ABI.  */
    1590   1224895138 :   call_abi = ix86_cfun_abi ();
    1591              : 
    1592              :   /* RAX is used as hidden argument to va_arg functions.  */
    1593   1224895138 :   if (call_abi == SYSV_ABI && regno == AX_REG)
    1594              :     return true;
    1595              : 
    1596   1210743161 :   if (cfun
    1597   1210742829 :       && cfun->machine->call_saved_registers == TYPE_PRESERVE_NONE)
    1598              :     parm_regs = x86_64_preserve_none_int_parameter_registers;
    1599   1210724901 :   else if (call_abi == MS_ABI)
    1600              :     parm_regs = x86_64_ms_abi_int_parameter_registers;
    1601              :   else
    1602   1174762613 :     parm_regs = x86_64_int_parameter_registers;
    1603              : 
    1604  16203490160 :   for (i = 0; i < (call_abi == MS_ABI
    1605   8101745080 :                    ? X86_64_MS_REGPARM_MAX : X86_64_REGPARM_MAX); i++)
    1606   6977598547 :     if (regno == parm_regs[i])
    1607              :       return true;
    1608              :   return false;
    1609              : }
    1610              : 
    1611              : /* Return if we do not know how to pass ARG solely in registers.  */
    1612              : 
    1613              : static bool
    1614    427359253 : ix86_must_pass_in_stack (const function_arg_info &arg)
    1615              : {
    1616    427359253 :   if (must_pass_in_stack_var_size_or_pad (arg))
    1617              :     return true;
    1618              : 
    1619              :   /* For 32-bit, we want TImode aggregates to go on the stack.  But watch out!
    1620              :      The layout_type routine is crafty and tries to trick us into passing
    1621              :      currently unsupported vector types on the stack by using TImode.  */
    1622      1766503 :   return (!TARGET_64BIT && arg.mode == TImode
    1623    427359216 :           && arg.type && TREE_CODE (arg.type) != VECTOR_TYPE);
    1624              : }
    1625              : 
    1626              : /* It returns the size, in bytes, of the area reserved for arguments passed
    1627              :    in registers for the function represented by fndecl dependent to the used
    1628              :    abi format.  */
    1629              : int
    1630     10615491 : ix86_reg_parm_stack_space (const_tree fndecl)
    1631              : {
    1632     10615491 :   enum calling_abi call_abi = SYSV_ABI;
    1633     10615491 :   if (fndecl != NULL_TREE && TREE_CODE (fndecl) == FUNCTION_DECL)
    1634     10299472 :     call_abi = ix86_function_abi (fndecl);
    1635              :   else
    1636       316019 :     call_abi = ix86_function_type_abi (fndecl);
    1637     10615491 :   if (TARGET_64BIT && call_abi == MS_ABI)
    1638       119238 :     return 32;
    1639              :   return 0;
    1640              : }
    1641              : 
    1642              : /* We add this as a workaround in order to use libc_has_function
    1643              :    hook in i386.md.  */
    1644              : bool
    1645            0 : ix86_libc_has_function (enum function_class fn_class)
    1646              : {
    1647            0 :   return targetm.libc_has_function (fn_class, NULL_TREE);
    1648              : }
    1649              : 
    1650              : /* Returns value SYSV_ABI, MS_ABI dependent on fntype,
    1651              :    specifying the call abi used.  */
    1652              : enum calling_abi
    1653    457195970 : ix86_function_type_abi (const_tree fntype)
    1654              : {
    1655    457195970 :   enum calling_abi abi = ix86_abi;
    1656              : 
    1657    457195970 :   if (fntype == NULL_TREE || TYPE_ATTRIBUTES (fntype) == NULL_TREE)
    1658              :     return abi;
    1659              : 
    1660     17603638 :   if (abi == SYSV_ABI
    1661     17603638 :       && lookup_attribute ("ms_abi", TYPE_ATTRIBUTES (fntype)))
    1662              :     {
    1663      2601385 :       static int warned;
    1664      2601385 :       if (TARGET_X32 && !warned)
    1665              :         {
    1666            1 :           error ("X32 does not support %<ms_abi%> attribute");
    1667            1 :           warned = 1;
    1668              :         }
    1669              : 
    1670              :       abi = MS_ABI;
    1671              :     }
    1672     15002253 :   else if (abi == MS_ABI
    1673     15002253 :            && lookup_attribute ("sysv_abi", TYPE_ATTRIBUTES (fntype)))
    1674              :     abi = SYSV_ABI;
    1675              : 
    1676              :   return abi;
    1677              : }
    1678              : 
    1679              : enum calling_abi
    1680    224030231 : ix86_function_abi (const_tree fndecl)
    1681              : {
    1682    224030231 :   return fndecl ? ix86_function_type_abi (TREE_TYPE (fndecl)) : ix86_abi;
    1683              : }
    1684              : 
    1685              : /* Returns value SYSV_ABI, MS_ABI dependent on cfun,
    1686              :    specifying the call abi used.  */
    1687              : enum calling_abi
    1688   2071341419 : ix86_cfun_abi (void)
    1689              : {
    1690   2071341419 :   return cfun ? cfun->machine->call_abi : ix86_abi;
    1691              : }
    1692              : 
    1693              : bool
    1694      5014605 : ix86_function_ms_hook_prologue (const_tree fn)
    1695              : {
    1696      5014605 :   if (fn && lookup_attribute ("ms_hook_prologue", DECL_ATTRIBUTES (fn)))
    1697              :     {
    1698            8 :       if (decl_function_context (fn) != NULL_TREE)
    1699            0 :         error_at (DECL_SOURCE_LOCATION (fn),
    1700              :                   "%<ms_hook_prologue%> attribute is not compatible "
    1701              :                   "with nested function");
    1702              :       else
    1703              :         return true;
    1704              :     }
    1705              :   return false;
    1706              : }
    1707              : 
    1708              : bool
    1709    119273622 : ix86_function_naked (const_tree fn)
    1710              : {
    1711    119273622 :   if (fn && lookup_attribute ("naked", DECL_ATTRIBUTES (fn)))
    1712              :     return true;
    1713              : 
    1714              :   return false;
    1715              : }
    1716              : 
    1717              : /* Write the extra assembler code needed to declare a function properly.  */
    1718              : 
    1719              : void
    1720      1541054 : ix86_asm_output_function_label (FILE *out_file, const char *fname,
    1721              :                                 tree decl)
    1722              : {
    1723      1541054 :   bool is_ms_hook = ix86_function_ms_hook_prologue (decl);
    1724              : 
    1725      1541054 :   if (cfun)
    1726      1537467 :     cfun->machine->function_label_emitted = true;
    1727              : 
    1728      1541054 :   if (is_ms_hook)
    1729              :     {
    1730            2 :       int i, filler_count = (TARGET_64BIT ? 32 : 16);
    1731            2 :       unsigned int filler_cc = 0xcccccccc;
    1732              : 
    1733           18 :       for (i = 0; i < filler_count; i += 4)
    1734           16 :         fprintf (out_file, ASM_LONG " %#x\n", filler_cc);
    1735              :     }
    1736              : 
    1737              : #ifdef SUBTARGET_ASM_UNWIND_INIT
    1738              :   SUBTARGET_ASM_UNWIND_INIT (out_file);
    1739              : #endif
    1740              : 
    1741      1541054 :   assemble_function_label_raw (out_file, fname);
    1742              : 
    1743              :   /* Output magic byte marker, if hot-patch attribute is set.  */
    1744      1541054 :   if (is_ms_hook)
    1745              :     {
    1746            2 :       if (TARGET_64BIT)
    1747              :         {
    1748              :           /* leaq [%rsp + 0], %rsp  */
    1749            2 :           fputs (ASM_BYTE "0x48, 0x8d, 0xa4, 0x24, 0x00, 0x00, 0x00, 0x00\n",
    1750              :                  out_file);
    1751              :         }
    1752              :       else
    1753              :         {
    1754              :           /* movl.s %edi, %edi
    1755              :              push   %ebp
    1756              :              movl.s %esp, %ebp */
    1757            0 :           fputs (ASM_BYTE "0x8b, 0xff, 0x55, 0x8b, 0xec\n", out_file);
    1758              :         }
    1759              :     }
    1760      1541054 : }
    1761              : 
    1762              : /* Output a user-defined label.  In AT&T syntax, registers are prefixed
    1763              :    with %, so labels require no punctuation.  In Intel syntax, registers
    1764              :    are unprefixed, so labels may clash with registers or other operators,
    1765              :    and require quoting.  */
    1766              : void
    1767     34914475 : ix86_asm_output_labelref (FILE *file, const char *prefix, const char *label)
    1768              : {
    1769     34914475 :   if (ASSEMBLER_DIALECT == ASM_ATT)
    1770     34913592 :     fprintf (file, "%s%s", prefix, label);
    1771              :   else
    1772          883 :     fprintf (file, "\"%s%s\"", prefix, label);
    1773     34914475 : }
    1774              : 
    1775              : /* Implementation of call abi switching target hook. Specific to FNDECL
    1776              :    the specific call register sets are set.  See also
    1777              :    ix86_conditional_register_usage for more details.  */
    1778              : void
    1779    203695612 : ix86_call_abi_override (const_tree fndecl)
    1780              : {
    1781    203695612 :   cfun->machine->call_abi = ix86_function_abi (fndecl);
    1782    203695612 : }
    1783              : 
    1784              : /* Return 1 if pseudo register should be created and used to hold
    1785              :    GOT address for PIC code.  */
    1786              : bool
    1787    172832897 : ix86_use_pseudo_pic_reg (void)
    1788              : {
    1789    172832897 :   if ((TARGET_64BIT
    1790    161823371 :        && (ix86_cmodel == CM_SMALL_PIC
    1791              :            || TARGET_PECOFF))
    1792    167058148 :       || !flag_pic)
    1793    168072558 :     return false;
    1794              :   return true;
    1795              : }
    1796              : 
    1797              : /* Initialize large model PIC register.  */
    1798              : 
    1799              : static void
    1800           52 : ix86_init_large_pic_reg (unsigned int tmp_regno)
    1801              : {
    1802           52 :   rtx_code_label *label;
    1803           52 :   rtx tmp_reg;
    1804              : 
    1805           52 :   gcc_assert (Pmode == DImode);
    1806           52 :   label = gen_label_rtx ();
    1807           52 :   emit_label (label);
    1808           52 :   LABEL_PRESERVE_P (label) = 1;
    1809           52 :   tmp_reg = gen_rtx_REG (Pmode, tmp_regno);
    1810           52 :   gcc_assert (REGNO (pic_offset_table_rtx) != tmp_regno);
    1811           52 :   emit_insn (gen_set_rip_rex64 (pic_offset_table_rtx,
    1812              :                                 label));
    1813           52 :   emit_insn (gen_set_got_offset_rex64 (tmp_reg, label));
    1814           52 :   emit_insn (gen_add2_insn (pic_offset_table_rtx, tmp_reg));
    1815           52 :   const char *name = LABEL_NAME (label);
    1816           52 :   PUT_CODE (label, NOTE);
    1817           52 :   NOTE_KIND (label) = NOTE_INSN_DELETED_LABEL;
    1818           52 :   NOTE_DELETED_LABEL_NAME (label) = name;
    1819           52 : }
    1820              : 
    1821              : /* Create and initialize PIC register if required.  */
    1822              : static void
    1823      1471362 : ix86_init_pic_reg (void)
    1824              : {
    1825      1471362 :   edge entry_edge;
    1826      1471362 :   rtx_insn *seq;
    1827              : 
    1828      1471362 :   if (!ix86_use_pseudo_pic_reg ())
    1829              :     return;
    1830              : 
    1831        40230 :   start_sequence ();
    1832              : 
    1833        40230 :   if (TARGET_64BIT)
    1834              :     {
    1835           65 :       if (ix86_cmodel == CM_LARGE_PIC)
    1836           49 :         ix86_init_large_pic_reg (R11_REG);
    1837              :       else
    1838           16 :         emit_insn (gen_set_got_rex64 (pic_offset_table_rtx));
    1839              :     }
    1840              :   else
    1841              :     {
    1842              :       /*  If there is future mcount call in the function it is more profitable
    1843              :           to emit SET_GOT into ABI defined REAL_PIC_OFFSET_TABLE_REGNUM.  */
    1844        40165 :       rtx reg = crtl->profile
    1845        40165 :                 ? gen_rtx_REG (Pmode, REAL_PIC_OFFSET_TABLE_REGNUM)
    1846        40165 :                 : pic_offset_table_rtx;
    1847        40165 :       rtx_insn *insn = emit_insn (gen_set_got (reg));
    1848        40165 :       RTX_FRAME_RELATED_P (insn) = 1;
    1849        40165 :       if (crtl->profile)
    1850            0 :         emit_move_insn (pic_offset_table_rtx, reg);
    1851        40165 :       add_reg_note (insn, REG_CFA_FLUSH_QUEUE, NULL_RTX);
    1852              :     }
    1853              : 
    1854        40230 :   seq = end_sequence ();
    1855              : 
    1856        40230 :   entry_edge = single_succ_edge (ENTRY_BLOCK_PTR_FOR_FN (cfun));
    1857        40230 :   insert_insn_on_edge (seq, entry_edge);
    1858        40230 :   commit_one_edge_insertion (entry_edge);
    1859              : }
    1860              : 
    1861              : /* Initialize a variable CUM of type CUMULATIVE_ARGS
    1862              :    for a call to a function whose data type is FNTYPE.
    1863              :    For a library call, FNTYPE is 0.  */
    1864              : 
    1865              : void
    1866     10333582 : init_cumulative_args (CUMULATIVE_ARGS *cum,  /* Argument info to initialize */
    1867              :                       tree fntype,      /* tree ptr for function decl */
    1868              :                       rtx libname,      /* SYMBOL_REF of library name or 0 */
    1869              :                       tree fndecl,
    1870              :                       int caller)
    1871              : {
    1872     10333582 :   struct cgraph_node *local_info_node = NULL;
    1873     10333582 :   struct cgraph_node *target = NULL;
    1874              : 
    1875              :   /* Set silent_p to false to raise an error for invalid calls when
    1876              :      expanding function body.  */
    1877     10333582 :   cfun->machine->silent_p = false;
    1878              : 
    1879     10333582 :   memset (cum, 0, sizeof (*cum));
    1880              : 
    1881     10333582 :   tree preserve_none_type;
    1882     10333582 :   if (fndecl)
    1883              :     {
    1884      9988052 :       target = cgraph_node::get (fndecl);
    1885      9988052 :       if (target)
    1886              :         {
    1887      9844867 :           target = target->function_symbol ();
    1888      9844867 :           local_info_node = cgraph_node::local_info_node (target->decl);
    1889      9844867 :           cum->call_abi = ix86_function_abi (target->decl);
    1890      9844867 :           preserve_none_type = TREE_TYPE (target->decl);
    1891              :         }
    1892              :       else
    1893              :         {
    1894       143185 :           cum->call_abi = ix86_function_abi (fndecl);
    1895       143185 :           preserve_none_type = TREE_TYPE (fndecl);
    1896              :         }
    1897              :     }
    1898              :   else
    1899              :     {
    1900       345530 :       cum->call_abi = ix86_function_type_abi (fntype);
    1901       345530 :       preserve_none_type = fntype;
    1902              :     }
    1903     10333582 :   cum->preserve_none_abi
    1904     10333582 :     = (preserve_none_type
    1905     20549768 :        && (lookup_attribute ("preserve_none",
    1906     10216186 :                              TYPE_ATTRIBUTES (preserve_none_type))
    1907              :            != nullptr));
    1908              : 
    1909     10333582 :   cum->caller = caller;
    1910              : 
    1911              :   /* Set up the number of registers to use for passing arguments.  */
    1912     10333582 :   cum->nregs = ix86_regparm;
    1913     10333582 :   if (TARGET_64BIT)
    1914              :     {
    1915      9301409 :       cum->nregs = (cum->call_abi == SYSV_ABI
    1916      9301409 :                    ? X86_64_REGPARM_MAX
    1917              :                    : X86_64_MS_REGPARM_MAX);
    1918              :     }
    1919     10333582 :   if (TARGET_SSE)
    1920              :     {
    1921     10324483 :       cum->sse_nregs = SSE_REGPARM_MAX;
    1922     10324483 :       if (TARGET_64BIT)
    1923              :         {
    1924      9292430 :           cum->sse_nregs = (cum->call_abi == SYSV_ABI
    1925      9292430 :                            ? X86_64_SSE_REGPARM_MAX
    1926              :                            : X86_64_MS_SSE_REGPARM_MAX);
    1927              :         }
    1928              :     }
    1929     10333582 :   if (TARGET_MMX)
    1930     11152983 :     cum->mmx_nregs = MMX_REGPARM_MAX;
    1931     10333582 :   cum->warn_avx512f = true;
    1932     10333582 :   cum->warn_avx = true;
    1933     10333582 :   cum->warn_sse = true;
    1934     10333582 :   cum->warn_mmx = true;
    1935              : 
    1936              :   /* Because type might mismatch in between caller and callee, we need to
    1937              :      use actual type of function for local calls.
    1938              :      FIXME: cgraph_analyze can be told to actually record if function uses
    1939              :      va_start so for local functions maybe_vaarg can be made aggressive
    1940              :      helping K&R code.
    1941              :      FIXME: once typesytem is fixed, we won't need this code anymore.  */
    1942     10333582 :   if (local_info_node && local_info_node->local
    1943       419392 :       && local_info_node->can_change_signature)
    1944       396148 :     fntype = TREE_TYPE (target->decl);
    1945     10333582 :   cum->stdarg = stdarg_p (fntype);
    1946     20667164 :   cum->maybe_vaarg = (fntype
    1947     10798944 :                       ? (!prototype_p (fntype) || stdarg_p (fntype))
    1948       117396 :                       : !libname);
    1949              : 
    1950     10333582 :   cum->decl = fndecl;
    1951              : 
    1952     10333582 :   cum->warn_empty = !warn_abi || cum->stdarg;
    1953     10333582 :   if (!cum->warn_empty && fntype)
    1954              :     {
    1955      2671455 :       function_args_iterator iter;
    1956      2671455 :       tree argtype;
    1957      2671455 :       bool seen_empty_type = false;
    1958      7434570 :       FOREACH_FUNCTION_ARGS (fntype, argtype, iter)
    1959              :         {
    1960      7434507 :           if (argtype == error_mark_node || VOID_TYPE_P (argtype))
    1961              :             break;
    1962      4781990 :           if (TYPE_EMPTY_P (argtype))
    1963              :             seen_empty_type = true;
    1964      4713010 :           else if (seen_empty_type)
    1965              :             {
    1966        18875 :               cum->warn_empty = true;
    1967        18875 :               break;
    1968              :             }
    1969              :         }
    1970              :     }
    1971              : 
    1972     10333582 :   if (!TARGET_64BIT)
    1973              :     {
    1974              :       /* If there are variable arguments, then we won't pass anything
    1975              :          in registers in 32-bit mode. */
    1976      1032173 :       if (stdarg_p (fntype))
    1977              :         {
    1978         9043 :           cum->nregs = 0;
    1979              :           /* Since in 32-bit, variable arguments are always passed on
    1980              :              stack, there is scratch register available for indirect
    1981              :              sibcall.  */
    1982         9043 :           cfun->machine->arg_reg_available = true;
    1983         9043 :           cum->sse_nregs = 0;
    1984         9043 :           cum->mmx_nregs = 0;
    1985         9043 :           cum->warn_avx512f = false;
    1986         9043 :           cum->warn_avx = false;
    1987         9043 :           cum->warn_sse = false;
    1988         9043 :           cum->warn_mmx = false;
    1989         9043 :           return;
    1990              :         }
    1991              : 
    1992              :       /* Use ecx and edx registers if function has fastcall attribute,
    1993              :          else look for regparm information.  */
    1994      1023130 :       if (fntype)
    1995              :         {
    1996      1009970 :           unsigned int ccvt = ix86_get_callcvt (fntype);
    1997      1009970 :           if ((ccvt & IX86_CALLCVT_THISCALL) != 0)
    1998              :             {
    1999            0 :               cum->nregs = 1;
    2000            0 :               cum->fastcall = 1; /* Same first register as in fastcall.  */
    2001              :             }
    2002      1009970 :           else if ((ccvt & IX86_CALLCVT_FASTCALL) != 0)
    2003              :             {
    2004            4 :               cum->nregs = 2;
    2005            4 :               cum->fastcall = 1;
    2006              :             }
    2007              :           else
    2008      1009966 :             cum->nregs = ix86_function_regparm (fntype, fndecl);
    2009              :         }
    2010              : 
    2011              :       /* Set up the number of SSE registers used for passing SFmode
    2012              :          and DFmode arguments.  Warn for mismatching ABI.  */
    2013      1023130 :       cum->float_in_sse = ix86_function_sseregparm (fntype, fndecl, true);
    2014              :     }
    2015              : 
    2016     10324539 :   cfun->machine->arg_reg_available = (cum->nregs > 0);
    2017              : }
    2018              : 
    2019              : /* Return the "natural" mode for TYPE.  In most cases, this is just TYPE_MODE.
    2020              :    But in the case of vector types, it is some vector mode.
    2021              : 
    2022              :    When we have only some of our vector isa extensions enabled, then there
    2023              :    are some modes for which vector_mode_supported_p is false.  For these
    2024              :    modes, the generic vector support in gcc will choose some non-vector mode
    2025              :    in order to implement the type.  By computing the natural mode, we'll
    2026              :    select the proper ABI location for the operand and not depend on whatever
    2027              :    the middle-end decides to do with these vector types.
    2028              : 
    2029              :    The midde-end can't deal with the vector types > 16 bytes.  In this
    2030              :    case, we return the original mode and warn ABI change if CUM isn't
    2031              :    NULL.
    2032              : 
    2033              :    If INT_RETURN is true, warn ABI change if the vector mode isn't
    2034              :    available for function return value.  */
    2035              : 
    2036              : static machine_mode
    2037    239492175 : type_natural_mode (const_tree type, const CUMULATIVE_ARGS *cum,
    2038              :                    bool in_return)
    2039              : {
    2040    239492175 :   machine_mode mode = TYPE_MODE (type);
    2041              : 
    2042    239492175 :   if (VECTOR_TYPE_P (type) && !VECTOR_MODE_P (mode))
    2043              :     {
    2044       445366 :       HOST_WIDE_INT size = int_size_in_bytes (type);
    2045       445366 :       if ((size == 8 || size == 16 || size == 32 || size == 64)
    2046              :           /* ??? Generic code allows us to create width 1 vectors.  Ignore.  */
    2047       445366 :           && TYPE_VECTOR_SUBPARTS (type) > 1)
    2048              :         {
    2049       410406 :           machine_mode innermode = TYPE_MODE (TREE_TYPE (type));
    2050              : 
    2051              :           /* There are no XFmode vector modes ...  */
    2052       410406 :           if (innermode == XFmode)
    2053              :             return mode;
    2054              : 
    2055              :           /* ... and no decimal float vector modes.  */
    2056       409853 :           if (DECIMAL_FLOAT_MODE_P (innermode))
    2057              :             return mode;
    2058              : 
    2059       409560 :           if (SCALAR_FLOAT_TYPE_P (TREE_TYPE (type)))
    2060              :             mode = MIN_MODE_VECTOR_FLOAT;
    2061              :           else
    2062       345284 :             mode = MIN_MODE_VECTOR_INT;
    2063              : 
    2064              :           /* Get the mode which has this inner mode and number of units.  */
    2065      8699949 :           FOR_EACH_MODE_FROM (mode, mode)
    2066     18095025 :             if (GET_MODE_NUNITS (mode) == TYPE_VECTOR_SUBPARTS (type)
    2067      9804636 :                 && GET_MODE_INNER (mode) == innermode)
    2068              :               {
    2069       409560 :                 if (size == 64 && !TARGET_AVX512F && !TARGET_IAMCU)
    2070              :                   {
    2071       286438 :                     static bool warnedavx512f;
    2072       286438 :                     static bool warnedavx512f_ret;
    2073              : 
    2074       286438 :                     if (cum && cum->warn_avx512f && !warnedavx512f)
    2075              :                       {
    2076         1201 :                         if (warning (OPT_Wpsabi, "AVX512F vector argument "
    2077              :                                      "without AVX512F enabled changes the ABI"))
    2078            2 :                           warnedavx512f = true;
    2079              :                       }
    2080       285237 :                     else if (in_return && !warnedavx512f_ret)
    2081              :                       {
    2082       282498 :                         if (warning (OPT_Wpsabi, "AVX512F vector return "
    2083              :                                      "without AVX512F enabled changes the ABI"))
    2084            2 :                           warnedavx512f_ret = true;
    2085              :                       }
    2086              : 
    2087       286438 :                     return TYPE_MODE (type);
    2088              :                   }
    2089       123122 :                 else if (size == 32 && !TARGET_AVX && !TARGET_IAMCU)
    2090              :                   {
    2091       122573 :                     static bool warnedavx;
    2092       122573 :                     static bool warnedavx_ret;
    2093              : 
    2094       122573 :                     if (cum && cum->warn_avx && !warnedavx)
    2095              :                       {
    2096          610 :                         if (warning (OPT_Wpsabi, "AVX vector argument "
    2097              :                                      "without AVX enabled changes the ABI"))
    2098            5 :                           warnedavx = true;
    2099              :                       }
    2100       121963 :                     else if (in_return && !warnedavx_ret)
    2101              :                       {
    2102       119961 :                         if (warning (OPT_Wpsabi, "AVX vector return "
    2103              :                                      "without AVX enabled changes the ABI"))
    2104            7 :                           warnedavx_ret = true;
    2105              :                       }
    2106              : 
    2107       122573 :                     return TYPE_MODE (type);
    2108              :                   }
    2109          549 :                 else if (((size == 8 && TARGET_64BIT) || size == 16)
    2110          546 :                          && !TARGET_SSE
    2111          140 :                          && !TARGET_IAMCU)
    2112              :                   {
    2113          140 :                     static bool warnedsse;
    2114          140 :                     static bool warnedsse_ret;
    2115              : 
    2116          140 :                     if (cum && cum->warn_sse && !warnedsse)
    2117              :                       {
    2118           19 :                         if (warning (OPT_Wpsabi, "SSE vector argument "
    2119              :                                      "without SSE enabled changes the ABI"))
    2120            6 :                           warnedsse = true;
    2121              :                       }
    2122          121 :                     else if (!TARGET_64BIT && in_return && !warnedsse_ret)
    2123              :                       {
    2124            0 :                         if (warning (OPT_Wpsabi, "SSE vector return "
    2125              :                                      "without SSE enabled changes the ABI"))
    2126            0 :                           warnedsse_ret = true;
    2127              :                       }
    2128              :                   }
    2129          409 :                 else if ((size == 8 && !TARGET_64BIT)
    2130            0 :                          && (!cfun
    2131            0 :                              || cfun->machine->func_type == TYPE_NORMAL)
    2132            0 :                          && !TARGET_MMX
    2133            0 :                          && !TARGET_IAMCU)
    2134              :                   {
    2135            0 :                     static bool warnedmmx;
    2136            0 :                     static bool warnedmmx_ret;
    2137              : 
    2138            0 :                     if (cum && cum->warn_mmx && !warnedmmx)
    2139              :                       {
    2140            0 :                         if (warning (OPT_Wpsabi, "MMX vector argument "
    2141              :                                      "without MMX enabled changes the ABI"))
    2142            0 :                           warnedmmx = true;
    2143              :                       }
    2144            0 :                     else if (in_return && !warnedmmx_ret)
    2145              :                       {
    2146            0 :                         if (warning (OPT_Wpsabi, "MMX vector return "
    2147              :                                      "without MMX enabled changes the ABI"))
    2148            0 :                           warnedmmx_ret = true;
    2149              :                       }
    2150              :                   }
    2151          549 :                 return mode;
    2152              :               }
    2153              : 
    2154            0 :           gcc_unreachable ();
    2155              :         }
    2156              :     }
    2157              : 
    2158              :   return mode;
    2159              : }
    2160              : 
    2161              : /* We want to pass a value in REGNO whose "natural" mode is MODE.  However,
    2162              :    this may not agree with the mode that the type system has chosen for the
    2163              :    register, which is ORIG_MODE.  If ORIG_MODE is not BLKmode, then we can
    2164              :    go ahead and use it.  Otherwise we have to build a PARALLEL instead.  */
    2165              : 
    2166              : static rtx
    2167     36294137 : gen_reg_or_parallel (machine_mode mode, machine_mode orig_mode,
    2168              :                      unsigned int regno)
    2169              : {
    2170     36294137 :   rtx tmp;
    2171              : 
    2172     36294137 :   if (orig_mode != BLKmode)
    2173     36294109 :     tmp = gen_rtx_REG (orig_mode, regno);
    2174              :   else
    2175              :     {
    2176           28 :       tmp = gen_rtx_REG (mode, regno);
    2177           28 :       tmp = gen_rtx_EXPR_LIST (VOIDmode, tmp, const0_rtx);
    2178           28 :       tmp = gen_rtx_PARALLEL (orig_mode, gen_rtvec (1, tmp));
    2179              :     }
    2180              : 
    2181     36294137 :   return tmp;
    2182              : }
    2183              : 
    2184              : /* x86-64 register passing implementation.  See x86-64 ABI for details.  Goal
    2185              :    of this code is to classify each 8bytes of incoming argument by the register
    2186              :    class and assign registers accordingly.  */
    2187              : 
    2188              : /* Return the union class of CLASS1 and CLASS2.
    2189              :    See the x86-64 PS ABI for details.  */
    2190              : 
    2191              : static enum x86_64_reg_class
    2192     62808408 : merge_classes (enum x86_64_reg_class class1, enum x86_64_reg_class class2)
    2193              : {
    2194              :   /* Rule #1: If both classes are equal, this is the resulting class.  */
    2195     61586067 :   if (class1 == class2)
    2196              :     return class1;
    2197              : 
    2198              :   /* Rule #2: If one of the classes is NO_CLASS, the resulting class is
    2199              :      the other class.  */
    2200     54346915 :   if (class1 == X86_64_NO_CLASS)
    2201              :     return class2;
    2202     55152160 :   if (class2 == X86_64_NO_CLASS)
    2203              :     return class1;
    2204              : 
    2205              :   /* Rule #3: If one of the classes is MEMORY, the result is MEMORY.  */
    2206      2365353 :   if (class1 == X86_64_MEMORY_CLASS || class2 == X86_64_MEMORY_CLASS)
    2207              :     return X86_64_MEMORY_CLASS;
    2208              : 
    2209              :   /* Rule #4: If one of the classes is INTEGER, the result is INTEGER.  */
    2210      2020313 :   if ((class1 == X86_64_INTEGERSI_CLASS
    2211       189412 :        && (class2 == X86_64_SSESF_CLASS || class2 == X86_64_SSEHF_CLASS))
    2212      2019107 :       || (class2 == X86_64_INTEGERSI_CLASS
    2213       982284 :           && (class1 == X86_64_SSESF_CLASS || class1 == X86_64_SSEHF_CLASS)))
    2214              :     return X86_64_INTEGERSI_CLASS;
    2215      2015291 :   if (class1 == X86_64_INTEGER_CLASS || class1 == X86_64_INTEGERSI_CLASS
    2216       829324 :       || class2 == X86_64_INTEGER_CLASS || class2 == X86_64_INTEGERSI_CLASS)
    2217              :     return X86_64_INTEGER_CLASS;
    2218              : 
    2219              :   /* Rule #5: If one of the classes is X87, X87UP, or COMPLEX_X87 class,
    2220              :      MEMORY is used.  */
    2221       140672 :   if (class1 == X86_64_X87_CLASS
    2222              :       || class1 == X86_64_X87UP_CLASS
    2223       140672 :       || class1 == X86_64_COMPLEX_X87_CLASS
    2224              :       || class2 == X86_64_X87_CLASS
    2225       139767 :       || class2 == X86_64_X87UP_CLASS
    2226        59748 :       || class2 == X86_64_COMPLEX_X87_CLASS)
    2227        80924 :     return X86_64_MEMORY_CLASS;
    2228              : 
    2229              :   /* Rule #6: Otherwise class SSE is used.  */
    2230              :   return X86_64_SSE_CLASS;
    2231              : }
    2232              : 
    2233              : /* Classify the argument of type TYPE and mode MODE.
    2234              :    CLASSES will be filled by the register class used to pass each word
    2235              :    of the operand.  The number of words is returned.  In case the parameter
    2236              :    should be passed in memory, 0 is returned. As a special case for zero
    2237              :    sized containers, classes[0] will be NO_CLASS and 1 is returned.
    2238              : 
    2239              :    BIT_OFFSET is used internally for handling records and specifies offset
    2240              :    of the offset in bits modulo 512 to avoid overflow cases.
    2241              : 
    2242              :    See the x86-64 PS ABI for details.
    2243              : */
    2244              : 
    2245              : static int
    2246    414648310 : classify_argument (machine_mode mode, const_tree type,
    2247              :                    enum x86_64_reg_class classes[MAX_CLASSES], int bit_offset,
    2248              :                    int &zero_width_bitfields)
    2249              : {
    2250    414648310 :   HOST_WIDE_INT bytes
    2251    822865857 :     = mode == BLKmode ? int_size_in_bytes (type) : (int) GET_MODE_SIZE (mode);
    2252    414648310 :   int words = CEIL (bytes + (bit_offset % 64) / 8, UNITS_PER_WORD);
    2253              : 
    2254              :   /* Variable sized entities are always passed/returned in memory.  */
    2255    414648310 :   if (bytes < 0)
    2256              :     return 0;
    2257              : 
    2258    414647147 :   if (mode != VOIDmode)
    2259              :     {
    2260              :       /* The value of "named" doesn't matter.  */
    2261    412229637 :       function_arg_info arg (const_cast<tree> (type), mode, /*named=*/true);
    2262    412229637 :       if (targetm.calls.must_pass_in_stack (arg))
    2263           37 :         return 0;
    2264              :     }
    2265              : 
    2266    414647110 :   if (type && (AGGREGATE_TYPE_P (type)
    2267    373835385 :                || (TREE_CODE (type) == BITINT_TYPE && words > 1)))
    2268              :     {
    2269     41894425 :       int i;
    2270     41894425 :       tree field;
    2271     41894425 :       enum x86_64_reg_class subclasses[MAX_CLASSES];
    2272              : 
    2273              :       /* On x86-64 we pass structures larger than 64 bytes on the stack.  */
    2274     41894425 :       if (bytes > 64)
    2275              :         return 0;
    2276              : 
    2277    104174867 :       for (i = 0; i < words; i++)
    2278     63100574 :         classes[i] = X86_64_NO_CLASS;
    2279              : 
    2280              :       /* Zero sized arrays or structures are NO_CLASS.  We return 0 to
    2281              :          signalize memory class, so handle it as special case.  */
    2282     41074293 :       if (!words)
    2283              :         {
    2284        83076 :           classes[0] = X86_64_NO_CLASS;
    2285        83076 :           return 1;
    2286              :         }
    2287              : 
    2288              :       /* Classify each field of record and merge classes.  */
    2289     40991217 :       switch (TREE_CODE (type))
    2290              :         {
    2291     38722761 :         case RECORD_TYPE:
    2292              :           /* And now merge the fields of structure.  */
    2293   1131668136 :           for (field = TYPE_FIELDS (type); field; field = DECL_CHAIN (field))
    2294              :             {
    2295   1093565101 :               if (TREE_CODE (field) == FIELD_DECL)
    2296              :                 {
    2297     56648403 :                   int num;
    2298              : 
    2299     56648403 :                   if (TREE_TYPE (field) == error_mark_node)
    2300            4 :                     continue;
    2301              : 
    2302              :                   /* Bitfields are always classified as integer.  Handle them
    2303              :                      early, since later code would consider them to be
    2304              :                      misaligned integers.  */
    2305     56648399 :                   if (DECL_BIT_FIELD (field))
    2306              :                     {
    2307      1231602 :                       if (integer_zerop (DECL_SIZE (field)))
    2308              :                         {
    2309        12902 :                           if (DECL_FIELD_CXX_ZERO_WIDTH_BIT_FIELD (field))
    2310         8048 :                             continue;
    2311         4854 :                           if (zero_width_bitfields != 2)
    2312              :                             {
    2313         4320 :                               zero_width_bitfields = 1;
    2314         4320 :                               continue;
    2315              :                             }
    2316              :                         }
    2317      1219234 :                       for (i = (int_bit_position (field)
    2318      1219234 :                                 + (bit_offset % 64)) / 8 / 8;
    2319      2441575 :                            i < ((int_bit_position (field) + (bit_offset % 64))
    2320      2441575 :                                 + tree_to_shwi (DECL_SIZE (field))
    2321      2441575 :                                 + 63) / 8 / 8; i++)
    2322      1222341 :                         classes[i]
    2323      2444682 :                           = merge_classes (X86_64_INTEGER_CLASS, classes[i]);
    2324              :                     }
    2325              :                   else
    2326              :                     {
    2327     55416797 :                       int pos;
    2328              : 
    2329     55416797 :                       type = TREE_TYPE (field);
    2330              : 
    2331              :                       /* Flexible array member is ignored.  */
    2332     55416797 :                       if (TYPE_MODE (type) == BLKmode
    2333       772099 :                           && TREE_CODE (type) == ARRAY_TYPE
    2334       170240 :                           && TYPE_SIZE (type) == NULL_TREE
    2335         2013 :                           && TYPE_DOMAIN (type) != NULL_TREE
    2336     55418045 :                           && (TYPE_MAX_VALUE (TYPE_DOMAIN (type))
    2337              :                               == NULL_TREE))
    2338              :                         {
    2339         1248 :                           static bool warned;
    2340              : 
    2341         1248 :                           if (!warned && warn_psabi)
    2342              :                             {
    2343            3 :                               warned = true;
    2344            3 :                               inform (input_location,
    2345              :                                       "the ABI of passing struct with"
    2346              :                                       " a flexible array member has"
    2347              :                                       " changed in GCC 4.4");
    2348              :                             }
    2349         1248 :                           continue;
    2350         1248 :                         }
    2351     55415549 :                       num = classify_argument (TYPE_MODE (type), type,
    2352              :                                                subclasses,
    2353     55415549 :                                                (int_bit_position (field)
    2354     55415549 :                                                 + bit_offset) % 512,
    2355              :                                                zero_width_bitfields);
    2356     55415549 :                       if (!num)
    2357              :                         return 0;
    2358     54795823 :                       pos = (int_bit_position (field)
    2359     54795823 :                              + (bit_offset % 64)) / 8 / 8;
    2360    113081891 :                       for (i = 0; i < num && (i + pos) < words; i++)
    2361     58286068 :                         classes[i + pos]
    2362     58286068 :                           = merge_classes (subclasses[i], classes[i + pos]);
    2363              :                     }
    2364              :                 }
    2365              :             }
    2366              :           break;
    2367              : 
    2368       645058 :         case ARRAY_TYPE:
    2369              :           /* Arrays are handled as small records.  */
    2370       645058 :           {
    2371       645058 :             int num;
    2372       645058 :             num = classify_argument (TYPE_MODE (TREE_TYPE (type)),
    2373       645058 :                                      TREE_TYPE (type), subclasses, bit_offset,
    2374              :                                      zero_width_bitfields);
    2375       645058 :             if (!num)
    2376              :               return 0;
    2377              : 
    2378              :             /* The partial classes are now full classes.  */
    2379       629639 :             if (subclasses[0] == X86_64_SSESF_CLASS && bytes != 4)
    2380        13850 :               subclasses[0] = X86_64_SSE_CLASS;
    2381       629639 :             if (subclasses[0] == X86_64_SSEHF_CLASS && bytes != 2)
    2382         5126 :               subclasses[0] = X86_64_SSE_CLASS;
    2383       629639 :             if (subclasses[0] == X86_64_INTEGERSI_CLASS
    2384       164677 :                 && !((bit_offset % 64) == 0 && bytes == 4))
    2385       132885 :               subclasses[0] = X86_64_INTEGER_CLASS;
    2386              : 
    2387      1725965 :             for (i = 0; i < words; i++)
    2388      1096326 :               classes[i] = subclasses[i % num];
    2389              : 
    2390              :             break;
    2391              :           }
    2392       323433 :         case UNION_TYPE:
    2393       323433 :         case QUAL_UNION_TYPE:
    2394              :           /* Unions are similar to RECORD_TYPE but offset is always 0.
    2395              :              */
    2396      4180680 :           for (field = TYPE_FIELDS (type); field; field = DECL_CHAIN (field))
    2397              :             {
    2398      3892512 :               if (TREE_CODE (field) == FIELD_DECL)
    2399              :                 {
    2400      2276528 :                   int num;
    2401              : 
    2402      2276528 :                   if (TREE_TYPE (field) == error_mark_node)
    2403           10 :                     continue;
    2404              : 
    2405      2276518 :                   num = classify_argument (TYPE_MODE (TREE_TYPE (field)),
    2406      2276518 :                                            TREE_TYPE (field), subclasses,
    2407              :                                            bit_offset, zero_width_bitfields);
    2408      2276518 :                   if (!num)
    2409              :                     return 0;
    2410      5541252 :                   for (i = 0; i < num && i < words; i++)
    2411      3299999 :                     classes[i] = merge_classes (subclasses[i], classes[i]);
    2412              :                 }
    2413              :             }
    2414              :           break;
    2415              : 
    2416      1299965 :         case BITINT_TYPE:
    2417              :           /* _BitInt(N) for N > 64 is passed as structure containing
    2418              :              (N + 63) / 64 64-bit elements.  */
    2419      1299965 :           if (words > 2)
    2420              :             return 0;
    2421        74994 :           classes[0] = classes[1] = X86_64_INTEGER_CLASS;
    2422        74994 :           return 2;
    2423              : 
    2424            0 :         default:
    2425            0 :           gcc_unreachable ();
    2426              :         }
    2427              : 
    2428     39020842 :       if (words > 2)
    2429              :         {
    2430              :           /* When size > 16 bytes, if the first one isn't
    2431              :              X86_64_SSE_CLASS or any other ones aren't
    2432              :              X86_64_SSEUP_CLASS, everything should be passed in
    2433              :              memory.  */
    2434      1833994 :           if (classes[0] != X86_64_SSE_CLASS)
    2435              :             return 0;
    2436              : 
    2437       193147 :           for (i = 1; i < words; i++)
    2438       174958 :             if (classes[i] != X86_64_SSEUP_CLASS)
    2439              :               return 0;
    2440              :         }
    2441              : 
    2442              :       /* Final merger cleanup.  */
    2443     86407386 :       for (i = 0; i < words; i++)
    2444              :         {
    2445              :           /* If one class is MEMORY, everything should be passed in
    2446              :              memory.  */
    2447     49284930 :           if (classes[i] == X86_64_MEMORY_CLASS)
    2448              :             return 0;
    2449              : 
    2450              :           /* The X86_64_SSEUP_CLASS should be always preceded by
    2451              :              X86_64_SSE_CLASS or X86_64_SSEUP_CLASS.  */
    2452     49204719 :           if (classes[i] == X86_64_SSEUP_CLASS
    2453       148570 :               && classes[i - 1] != X86_64_SSE_CLASS
    2454        76550 :               && classes[i - 1] != X86_64_SSEUP_CLASS)
    2455              :             {
    2456              :               /* The first one should never be X86_64_SSEUP_CLASS.  */
    2457         1916 :               gcc_assert (i != 0);
    2458         1916 :               classes[i] = X86_64_SSE_CLASS;
    2459              :             }
    2460              : 
    2461              :           /* If X86_64_X87UP_CLASS isn't preceded by X86_64_X87_CLASS,
    2462              :              everything should be passed in memory.  */
    2463     49204719 :           if (classes[i] == X86_64_X87UP_CLASS
    2464       178687 :               && (classes[i - 1] != X86_64_X87_CLASS))
    2465              :             {
    2466         2370 :               static bool warned;
    2467              : 
    2468              :               /* The first one should never be X86_64_X87UP_CLASS.  */
    2469         2370 :               gcc_assert (i != 0);
    2470         2370 :               if (!warned && warn_psabi)
    2471              :                 {
    2472            1 :                   warned = true;
    2473            1 :                   inform (input_location,
    2474              :                           "the ABI of passing union with %<long double%>"
    2475              :                           " has changed in GCC 4.4");
    2476              :                 }
    2477         2370 :               return 0;
    2478              :             }
    2479              :         }
    2480              :       return words;
    2481              :     }
    2482              : 
    2483              :   /* Compute alignment needed.  We align all types to natural boundaries with
    2484              :      exception of XFmode that is aligned to 64bits.  */
    2485    372752685 :   if (mode != VOIDmode && mode != BLKmode)
    2486              :     {
    2487    369892960 :       int mode_alignment = GET_MODE_BITSIZE (mode);
    2488              : 
    2489    369892960 :       if (mode == XFmode)
    2490              :         mode_alignment = 128;
    2491    362958497 :       else if (mode == XCmode)
    2492       550777 :         mode_alignment = 256;
    2493    369892960 :       if (COMPLEX_MODE_P (mode))
    2494      2296892 :         mode_alignment /= 2;
    2495              :       /* Misaligned fields are always returned in memory.  */
    2496    369892960 :       if (bit_offset % mode_alignment)
    2497              :         return 0;
    2498              :     }
    2499              : 
    2500              :   /* for V1xx modes, just use the base mode */
    2501    372745052 :   if (VECTOR_MODE_P (mode) && mode != V1DImode && mode != V1TImode
    2502    465888036 :       && GET_MODE_UNIT_SIZE (mode) == bytes)
    2503         6214 :     mode = GET_MODE_INNER (mode);
    2504              : 
    2505              :   /* Classification of atomic types.  */
    2506    372745052 :   switch (mode)
    2507              :     {
    2508       207775 :     case E_SDmode:
    2509       207775 :     case E_DDmode:
    2510       207775 :       classes[0] = X86_64_SSE_CLASS;
    2511       207775 :       return 1;
    2512        99208 :     case E_TDmode:
    2513        99208 :       classes[0] = X86_64_SSE_CLASS;
    2514        99208 :       classes[1] = X86_64_SSEUP_CLASS;
    2515        99208 :       return 2;
    2516    248487290 :     case E_DImode:
    2517    248487290 :     case E_SImode:
    2518    248487290 :     case E_HImode:
    2519    248487290 :     case E_QImode:
    2520    248487290 :     case E_CSImode:
    2521    248487290 :     case E_CHImode:
    2522    248487290 :     case E_CQImode:
    2523    248487290 :       {
    2524    248487290 :         int size = bit_offset + (int) GET_MODE_BITSIZE (mode);
    2525              : 
    2526              :         /* Analyze last 128 bits only.  */
    2527    248487290 :         size = (size - 1) & 0x7f;
    2528              : 
    2529    248487290 :         if (size < 32)
    2530              :           {
    2531    105395908 :             classes[0] = X86_64_INTEGERSI_CLASS;
    2532    105395908 :             return 1;
    2533              :           }
    2534    143091382 :         else if (size < 64)
    2535              :           {
    2536    130905384 :             classes[0] = X86_64_INTEGER_CLASS;
    2537    130905384 :             return 1;
    2538              :           }
    2539     12185998 :         else if (size < 64+32)
    2540              :           {
    2541      4328617 :             classes[0] = X86_64_INTEGER_CLASS;
    2542      4328617 :             classes[1] = X86_64_INTEGERSI_CLASS;
    2543      4328617 :             return 2;
    2544              :           }
    2545      7857381 :         else if (size < 64+64)
    2546              :           {
    2547      7857381 :             classes[0] = classes[1] = X86_64_INTEGER_CLASS;
    2548      7857381 :             return 2;
    2549              :           }
    2550              :         else
    2551              :           gcc_unreachable ();
    2552              :       }
    2553      2510003 :     case E_CDImode:
    2554      2510003 :     case E_TImode:
    2555      2510003 :       classes[0] = classes[1] = X86_64_INTEGER_CLASS;
    2556      2510003 :       return 2;
    2557            0 :     case E_COImode:
    2558            0 :     case E_OImode:
    2559              :       /* OImode shouldn't be used directly.  */
    2560            0 :       gcc_unreachable ();
    2561              :     case E_CTImode:
    2562              :       return 0;
    2563       905724 :     case E_HFmode:
    2564       905724 :     case E_BFmode:
    2565       905724 :       if (!(bit_offset % 64))
    2566       903174 :         classes[0] = X86_64_SSEHF_CLASS;
    2567              :       else
    2568         2550 :         classes[0] = X86_64_SSE_CLASS;
    2569              :       return 1;
    2570      9783984 :     case E_SFmode:
    2571      9783984 :       if (!(bit_offset % 64))
    2572      9730524 :         classes[0] = X86_64_SSESF_CLASS;
    2573              :       else
    2574        53460 :         classes[0] = X86_64_SSE_CLASS;
    2575              :       return 1;
    2576      4397769 :     case E_DFmode:
    2577      4397769 :       classes[0] = X86_64_SSEDF_CLASS;
    2578      4397769 :       return 1;
    2579      6933747 :     case E_XFmode:
    2580      6933747 :       classes[0] = X86_64_X87_CLASS;
    2581      6933747 :       classes[1] = X86_64_X87UP_CLASS;
    2582      6933747 :       return 2;
    2583      1314218 :     case E_TFmode:
    2584      1314218 :       classes[0] = X86_64_SSE_CLASS;
    2585      1314218 :       classes[1] = X86_64_SSEUP_CLASS;
    2586      1314218 :       return 2;
    2587        71812 :     case E_HCmode:
    2588        71812 :     case E_BCmode:
    2589        71812 :       classes[0] = X86_64_SSE_CLASS;
    2590        71812 :       if (!(bit_offset % 64))
    2591              :         return 1;
    2592              :       else
    2593              :         {
    2594           98 :           classes[1] = X86_64_SSEHF_CLASS;
    2595           98 :           return 2;
    2596              :         }
    2597       685787 :     case E_SCmode:
    2598       685787 :       classes[0] = X86_64_SSE_CLASS;
    2599       685787 :       if (!(bit_offset % 64))
    2600              :         return 1;
    2601              :       else
    2602              :         {
    2603         1119 :           static bool warned;
    2604              : 
    2605         1119 :           if (!warned && warn_psabi)
    2606              :             {
    2607            2 :               warned = true;
    2608            2 :               inform (input_location,
    2609              :                       "the ABI of passing structure with %<complex float%>"
    2610              :                       " member has changed in GCC 4.4");
    2611              :             }
    2612         1119 :           classes[1] = X86_64_SSESF_CLASS;
    2613         1119 :           return 2;
    2614              :         }
    2615       695561 :     case E_DCmode:
    2616       695561 :       classes[0] = X86_64_SSEDF_CLASS;
    2617       695561 :       classes[1] = X86_64_SSEDF_CLASS;
    2618       695561 :       return 2;
    2619       550777 :     case E_XCmode:
    2620       550777 :       classes[0] = X86_64_COMPLEX_X87_CLASS;
    2621       550777 :       return 1;
    2622              :     case E_TCmode:
    2623              :       /* This modes is larger than 16 bytes.  */
    2624              :       return 0;
    2625     25280716 :     case E_V8SFmode:
    2626     25280716 :     case E_V8SImode:
    2627     25280716 :     case E_V32QImode:
    2628     25280716 :     case E_V16HFmode:
    2629     25280716 :     case E_V16BFmode:
    2630     25280716 :     case E_V16HImode:
    2631     25280716 :     case E_V4DFmode:
    2632     25280716 :     case E_V4DImode:
    2633     25280716 :       classes[0] = X86_64_SSE_CLASS;
    2634     25280716 :       classes[1] = X86_64_SSEUP_CLASS;
    2635     25280716 :       classes[2] = X86_64_SSEUP_CLASS;
    2636     25280716 :       classes[3] = X86_64_SSEUP_CLASS;
    2637     25280716 :       return 4;
    2638     27412912 :     case E_V8DFmode:
    2639     27412912 :     case E_V16SFmode:
    2640     27412912 :     case E_V32HFmode:
    2641     27412912 :     case E_V32BFmode:
    2642     27412912 :     case E_V8DImode:
    2643     27412912 :     case E_V16SImode:
    2644     27412912 :     case E_V32HImode:
    2645     27412912 :     case E_V64QImode:
    2646     27412912 :       classes[0] = X86_64_SSE_CLASS;
    2647     27412912 :       classes[1] = X86_64_SSEUP_CLASS;
    2648     27412912 :       classes[2] = X86_64_SSEUP_CLASS;
    2649     27412912 :       classes[3] = X86_64_SSEUP_CLASS;
    2650     27412912 :       classes[4] = X86_64_SSEUP_CLASS;
    2651     27412912 :       classes[5] = X86_64_SSEUP_CLASS;
    2652     27412912 :       classes[6] = X86_64_SSEUP_CLASS;
    2653     27412912 :       classes[7] = X86_64_SSEUP_CLASS;
    2654     27412912 :       return 8;
    2655     37175376 :     case E_V4SFmode:
    2656     37175376 :     case E_V4SImode:
    2657     37175376 :     case E_V16QImode:
    2658     37175376 :     case E_V8HImode:
    2659     37175376 :     case E_V8HFmode:
    2660     37175376 :     case E_V8BFmode:
    2661     37175376 :     case E_V2DFmode:
    2662     37175376 :     case E_V2DImode:
    2663     37175376 :       classes[0] = X86_64_SSE_CLASS;
    2664     37175376 :       classes[1] = X86_64_SSEUP_CLASS;
    2665     37175376 :       return 2;
    2666      3249819 :     case E_V1TImode:
    2667      3249819 :     case E_V1DImode:
    2668      3249819 :     case E_V2SFmode:
    2669      3249819 :     case E_V2SImode:
    2670      3249819 :     case E_V4HImode:
    2671      3249819 :     case E_V4HFmode:
    2672      3249819 :     case E_V4BFmode:
    2673      3249819 :     case E_V2HFmode:
    2674      3249819 :     case E_V2BFmode:
    2675      3249819 :     case E_V8QImode:
    2676      3249819 :       classes[0] = X86_64_SSE_CLASS;
    2677      3249819 :       return 1;
    2678              :     case E_BLKmode:
    2679              :     case E_VOIDmode:
    2680              :       return 0;
    2681        37666 :     default:
    2682        37666 :       gcc_assert (VECTOR_MODE_P (mode));
    2683              : 
    2684        37666 :       if (bytes > 16)
    2685              :         return 0;
    2686              : 
    2687        45608 :       gcc_assert (GET_MODE_CLASS (GET_MODE_INNER (mode)) == MODE_INT);
    2688              : 
    2689        45608 :       if (bit_offset + GET_MODE_BITSIZE (mode) <= 32)
    2690        22364 :         classes[0] = X86_64_INTEGERSI_CLASS;
    2691              :       else
    2692          440 :         classes[0] = X86_64_INTEGER_CLASS;
    2693        22804 :       classes[1] = X86_64_INTEGER_CLASS;
    2694        22804 :       return 1 + (bytes > 8);
    2695              :     }
    2696              : }
    2697              : 
    2698              : /* Wrapper around classify_argument with the extra zero_width_bitfields
    2699              :    argument, to diagnose GCC 12.1 ABI differences for C.  */
    2700              : 
    2701              : static int
    2702    356310651 : classify_argument (machine_mode mode, const_tree type,
    2703              :                    enum x86_64_reg_class classes[MAX_CLASSES], int bit_offset)
    2704              : {
    2705    356310651 :   int zero_width_bitfields = 0;
    2706    356310651 :   static bool warned = false;
    2707    356310651 :   int n = classify_argument (mode, type, classes, bit_offset,
    2708              :                              zero_width_bitfields);
    2709    356310651 :   if (!zero_width_bitfields || warned || !warn_psabi)
    2710              :     return n;
    2711          534 :   enum x86_64_reg_class alt_classes[MAX_CLASSES];
    2712          534 :   zero_width_bitfields = 2;
    2713          534 :   if (classify_argument (mode, type, alt_classes, bit_offset,
    2714              :                          zero_width_bitfields) != n)
    2715            0 :     zero_width_bitfields = 3;
    2716              :   else
    2717         1286 :     for (int i = 0; i < n; i++)
    2718          760 :       if (classes[i] != alt_classes[i])
    2719              :         {
    2720            8 :           zero_width_bitfields = 3;
    2721            8 :           break;
    2722              :         }
    2723          534 :   if (zero_width_bitfields == 3)
    2724              :     {
    2725            8 :       warned = true;
    2726            8 :       const char *url
    2727              :         = CHANGES_ROOT_URL "gcc-12/changes.html#zero_width_bitfields";
    2728              : 
    2729            8 :       inform (input_location,
    2730              :               "the ABI of passing C structures with zero-width bit-fields"
    2731              :               " has changed in GCC %{12.1%}", url);
    2732              :     }
    2733              :   return n;
    2734              : }
    2735              : 
    2736              : /* Examine the argument and return set number of register required in each
    2737              :    class.  Return true iff parameter should be passed in memory.  */
    2738              : 
    2739              : static bool
    2740    240184910 : examine_argument (machine_mode mode, const_tree type, int in_return,
    2741              :                   int *int_nregs, int *sse_nregs)
    2742              : {
    2743    240184910 :   enum x86_64_reg_class regclass[MAX_CLASSES];
    2744    240184910 :   int n = classify_argument (mode, type, regclass, 0);
    2745              : 
    2746    240184910 :   *int_nregs = 0;
    2747    240184910 :   *sse_nregs = 0;
    2748              : 
    2749    240184910 :   if (!n)
    2750              :     return true;
    2751    683768421 :   for (n--; n >= 0; n--)
    2752    450134806 :     switch (regclass[n])
    2753              :       {
    2754    162678014 :       case X86_64_INTEGER_CLASS:
    2755    162678014 :       case X86_64_INTEGERSI_CLASS:
    2756    162678014 :         (*int_nregs)++;
    2757    162678014 :         break;
    2758     74078869 :       case X86_64_SSE_CLASS:
    2759     74078869 :       case X86_64_SSEHF_CLASS:
    2760     74078869 :       case X86_64_SSESF_CLASS:
    2761     74078869 :       case X86_64_SSEDF_CLASS:
    2762     74078869 :         (*sse_nregs)++;
    2763     74078869 :         break;
    2764              :       case X86_64_NO_CLASS:
    2765              :       case X86_64_SSEUP_CLASS:
    2766              :         break;
    2767      9338405 :       case X86_64_X87_CLASS:
    2768      9338405 :       case X86_64_X87UP_CLASS:
    2769      9338405 :       case X86_64_COMPLEX_X87_CLASS:
    2770      9338405 :         if (!in_return)
    2771              :           return true;
    2772              :         break;
    2773            0 :       case X86_64_MEMORY_CLASS:
    2774            0 :         gcc_unreachable ();
    2775              :       }
    2776              : 
    2777              :   return false;
    2778              : }
    2779              : 
    2780              : /* Construct container for the argument used by GCC interface.  See
    2781              :    FUNCTION_ARG for the detailed description.  */
    2782              : 
    2783              : static rtx
    2784    116125741 : construct_container (machine_mode mode, machine_mode orig_mode,
    2785              :                      const_tree type, int in_return, int nintregs, int nsseregs,
    2786              :                      const int *intreg, int sse_regno)
    2787              : {
    2788              :   /* The following variables hold the static issued_error state.  */
    2789    116125741 :   static bool issued_sse_arg_error;
    2790    116125741 :   static bool issued_sse_ret_error;
    2791    116125741 :   static bool issued_x87_ret_error;
    2792              : 
    2793    116125741 :   machine_mode tmpmode;
    2794    116125741 :   int bytes
    2795    231581618 :     = mode == BLKmode ? int_size_in_bytes (type) : (int) GET_MODE_SIZE (mode);
    2796    116125741 :   enum x86_64_reg_class regclass[MAX_CLASSES];
    2797    116125741 :   int n;
    2798    116125741 :   int i;
    2799    116125741 :   int nexps = 0;
    2800    116125741 :   int needed_sseregs, needed_intregs;
    2801    116125741 :   rtx exp[MAX_CLASSES];
    2802    116125741 :   rtx ret;
    2803              : 
    2804    116125741 :   n = classify_argument (mode, type, regclass, 0);
    2805    116125741 :   if (!n)
    2806              :     return NULL;
    2807    115663163 :   if (examine_argument (mode, type, in_return, &needed_intregs,
    2808              :                         &needed_sseregs))
    2809              :     return NULL;
    2810    115613245 :   if (needed_intregs > nintregs || needed_sseregs > nsseregs)
    2811              :     return NULL;
    2812              : 
    2813              :   /* We allowed the user to turn off SSE for kernel mode.  Don't crash if
    2814              :      some less clueful developer tries to use floating-point anyway.  */
    2815    114513243 :   if (needed_sseregs
    2816     36608108 :       && (!TARGET_SSE || (VALID_SSE2_TYPE_MODE (mode) && !TARGET_SSE2)))
    2817              :     {
    2818              :       /* Return early if we shouldn't raise an error for invalid
    2819              :          calls.  */
    2820           71 :       if (cfun != NULL && cfun->machine->silent_p)
    2821              :         return NULL;
    2822           39 :       if (in_return)
    2823              :         {
    2824           34 :           if (!issued_sse_ret_error)
    2825              :             {
    2826           16 :               if (VALID_SSE2_TYPE_MODE (mode))
    2827            5 :                 error ("SSE register return with SSE2 disabled");
    2828              :               else
    2829           11 :                 error ("SSE register return with SSE disabled");
    2830           16 :               issued_sse_ret_error = true;
    2831              :             }
    2832              :         }
    2833            5 :       else if (!issued_sse_arg_error)
    2834              :         {
    2835            5 :           if (VALID_SSE2_TYPE_MODE (mode))
    2836            0 :             error ("SSE register argument with SSE2 disabled");
    2837              :           else
    2838            5 :             error ("SSE register argument with SSE disabled");
    2839            5 :           issued_sse_arg_error = true;
    2840              :         }
    2841           39 :       return NULL;
    2842              :     }
    2843              : 
    2844              :   /* Likewise, error if the ABI requires us to return values in the
    2845              :      x87 registers and the user specified -mno-80387.  */
    2846    114513172 :   if (!TARGET_FLOAT_RETURNS_IN_80387 && in_return)
    2847      1421778 :     for (i = 0; i < n; i++)
    2848       750500 :       if (regclass[i] == X86_64_X87_CLASS
    2849              :           || regclass[i] == X86_64_X87UP_CLASS
    2850       750500 :           || regclass[i] == X86_64_COMPLEX_X87_CLASS)
    2851              :         {
    2852              :           /* Return early if we shouldn't raise an error for invalid
    2853              :              calls.  */
    2854           16 :           if (cfun != NULL && cfun->machine->silent_p)
    2855              :             return NULL;
    2856           13 :           if (!issued_x87_ret_error)
    2857              :             {
    2858            8 :               error ("x87 register return with x87 disabled");
    2859            8 :               issued_x87_ret_error = true;
    2860              :             }
    2861           13 :           return NULL;
    2862              :         }
    2863              : 
    2864              :   /* First construct simple cases.  Avoid SCmode, since we want to use
    2865              :      single register to pass this type.  */
    2866    114513156 :   if (n == 1 && mode != SCmode && mode != HCmode)
    2867     77672318 :     switch (regclass[0])
    2868              :       {
    2869     71682597 :       case X86_64_INTEGER_CLASS:
    2870     71682597 :       case X86_64_INTEGERSI_CLASS:
    2871     71682597 :         return gen_rtx_REG (mode, intreg[0]);
    2872      5791262 :       case X86_64_SSE_CLASS:
    2873      5791262 :       case X86_64_SSEHF_CLASS:
    2874      5791262 :       case X86_64_SSESF_CLASS:
    2875      5791262 :       case X86_64_SSEDF_CLASS:
    2876      5791262 :         if (mode != BLKmode)
    2877     11581716 :           return gen_reg_or_parallel (mode, orig_mode,
    2878     11581716 :                                       GET_SSE_REGNO (sse_regno));
    2879              :         break;
    2880       170369 :       case X86_64_X87_CLASS:
    2881       170369 :       case X86_64_COMPLEX_X87_CLASS:
    2882       170369 :         return gen_rtx_REG (mode, FIRST_STACK_REG);
    2883              :       case X86_64_NO_CLASS:
    2884              :         /* Zero sized array, struct or class.  */
    2885              :         return NULL;
    2886            0 :       default:
    2887            0 :         gcc_unreachable ();
    2888              :       }
    2889     36841242 :   if (n == 2
    2890     19088707 :       && regclass[0] == X86_64_SSE_CLASS
    2891     12818130 :       && regclass[1] == X86_64_SSEUP_CLASS
    2892     12813220 :       && mode != BLKmode)
    2893     25626440 :     return gen_reg_or_parallel (mode, orig_mode,
    2894     25626440 :                                 GET_SSE_REGNO (sse_regno));
    2895     24028022 :   if (n == 4
    2896      8410452 :       && regclass[0] == X86_64_SSE_CLASS
    2897      8410452 :       && regclass[1] == X86_64_SSEUP_CLASS
    2898      8410452 :       && regclass[2] == X86_64_SSEUP_CLASS
    2899      8410452 :       && regclass[3] == X86_64_SSEUP_CLASS
    2900      8410452 :       && mode != BLKmode)
    2901     16817526 :     return gen_reg_or_parallel (mode, orig_mode,
    2902     16817526 :                                 GET_SSE_REGNO (sse_regno));
    2903     15619259 :   if (n == 8
    2904      9107113 :       && regclass[0] == X86_64_SSE_CLASS
    2905      9107113 :       && regclass[1] == X86_64_SSEUP_CLASS
    2906      9107113 :       && regclass[2] == X86_64_SSEUP_CLASS
    2907      9107113 :       && regclass[3] == X86_64_SSEUP_CLASS
    2908      9107113 :       && regclass[4] == X86_64_SSEUP_CLASS
    2909      9107113 :       && regclass[5] == X86_64_SSEUP_CLASS
    2910      9107113 :       && regclass[6] == X86_64_SSEUP_CLASS
    2911      9107113 :       && regclass[7] == X86_64_SSEUP_CLASS
    2912      9107113 :       && mode != BLKmode)
    2913     18209954 :     return gen_reg_or_parallel (mode, orig_mode,
    2914     18209954 :                                 GET_SSE_REGNO (sse_regno));
    2915      6514282 :   if (n == 2
    2916      6275487 :       && regclass[0] == X86_64_X87_CLASS
    2917      2229893 :       && regclass[1] == X86_64_X87UP_CLASS)
    2918      2229893 :     return gen_rtx_REG (XFmode, FIRST_STACK_REG);
    2919              : 
    2920      4284389 :   if (n == 2
    2921      4045594 :       && regclass[0] == X86_64_INTEGER_CLASS
    2922      3645100 :       && regclass[1] == X86_64_INTEGER_CLASS
    2923      3636814 :       && (mode == CDImode || mode == TImode || mode == BLKmode)
    2924      3636814 :       && intreg[0] + 1 == intreg[1])
    2925              :     {
    2926      3318453 :       if (mode == BLKmode)
    2927              :         {
    2928              :           /* Use TImode for BLKmode values in 2 integer registers.  */
    2929       496088 :           exp[0] = gen_rtx_EXPR_LIST (VOIDmode,
    2930       248044 :                                       gen_rtx_REG (TImode, intreg[0]),
    2931              :                                       GEN_INT (0));
    2932       248044 :           ret = gen_rtx_PARALLEL (mode, rtvec_alloc (1));
    2933       248044 :           XVECEXP (ret, 0, 0) = exp[0];
    2934       248044 :           return ret;
    2935              :         }
    2936              :       else
    2937      3070409 :         return gen_rtx_REG (mode, intreg[0]);
    2938              :     }
    2939              : 
    2940              :   /* Otherwise figure out the entries of the PARALLEL.  */
    2941      2659013 :   for (i = 0; i < n; i++)
    2942              :     {
    2943      1693077 :       int pos;
    2944              : 
    2945      1693077 :       switch (regclass[i])
    2946              :         {
    2947              :           case X86_64_NO_CLASS:
    2948              :             break;
    2949       955045 :           case X86_64_INTEGER_CLASS:
    2950       955045 :           case X86_64_INTEGERSI_CLASS:
    2951              :             /* Merge TImodes on aligned occasions here too.  */
    2952       955045 :             if (i * 8 + 8 > bytes)
    2953              :               {
    2954         3224 :                 unsigned int tmpbits = (bytes - i * 8) * BITS_PER_UNIT;
    2955         3224 :                 if (!int_mode_for_size (tmpbits, 0).exists (&tmpmode))
    2956              :                   /* We've requested 24 bytes we
    2957              :                      don't have mode for.  Use DImode.  */
    2958          357 :                   tmpmode = DImode;
    2959              :               }
    2960       951821 :             else if (regclass[i] == X86_64_INTEGERSI_CLASS)
    2961              :               tmpmode = SImode;
    2962              :             else
    2963       798123 :               tmpmode = DImode;
    2964      1910090 :             exp [nexps++]
    2965       955045 :               = gen_rtx_EXPR_LIST (VOIDmode,
    2966       955045 :                                    gen_rtx_REG (tmpmode, *intreg),
    2967       955045 :                                    GEN_INT (i*8));
    2968       955045 :             intreg++;
    2969       955045 :             break;
    2970          592 :           case X86_64_SSEHF_CLASS:
    2971          592 :             tmpmode = (mode == BFmode ? BFmode : HFmode);
    2972         1184 :             exp [nexps++]
    2973         1184 :               = gen_rtx_EXPR_LIST (VOIDmode,
    2974              :                                    gen_rtx_REG (tmpmode,
    2975          592 :                                                 GET_SSE_REGNO (sse_regno)),
    2976          592 :                                    GEN_INT (i*8));
    2977          592 :             sse_regno++;
    2978          592 :             break;
    2979         2969 :           case X86_64_SSESF_CLASS:
    2980         5938 :             exp [nexps++]
    2981         5938 :               = gen_rtx_EXPR_LIST (VOIDmode,
    2982              :                                    gen_rtx_REG (SFmode,
    2983         2969 :                                                 GET_SSE_REGNO (sse_regno)),
    2984         2969 :                                    GEN_INT (i*8));
    2985         2969 :             sse_regno++;
    2986         2969 :             break;
    2987       478264 :           case X86_64_SSEDF_CLASS:
    2988       956528 :             exp [nexps++]
    2989       956528 :               = gen_rtx_EXPR_LIST (VOIDmode,
    2990              :                                    gen_rtx_REG (DFmode,
    2991       478264 :                                                 GET_SSE_REGNO (sse_regno)),
    2992       478264 :                                    GEN_INT (i*8));
    2993       478264 :             sse_regno++;
    2994       478264 :             break;
    2995       248245 :           case X86_64_SSE_CLASS:
    2996       248245 :             pos = i;
    2997       248245 :             switch (n)
    2998              :               {
    2999              :               case 1:
    3000              :                 tmpmode = DImode;
    3001              :                 break;
    3002         9664 :               case 2:
    3003         9664 :                 if (i == 0 && regclass[1] == X86_64_SSEUP_CLASS)
    3004              :                   {
    3005            0 :                     tmpmode = TImode;
    3006            0 :                     i++;
    3007              :                   }
    3008              :                 else
    3009              :                   tmpmode = DImode;
    3010              :                 break;
    3011         1689 :               case 4:
    3012         1689 :                 gcc_assert (i == 0
    3013              :                             && regclass[1] == X86_64_SSEUP_CLASS
    3014              :                             && regclass[2] == X86_64_SSEUP_CLASS
    3015              :                             && regclass[3] == X86_64_SSEUP_CLASS);
    3016              :                 tmpmode = OImode;
    3017              :                 i += 3;
    3018              :                 break;
    3019         2136 :               case 8:
    3020         2136 :                 gcc_assert (i == 0
    3021              :                             && regclass[1] == X86_64_SSEUP_CLASS
    3022              :                             && regclass[2] == X86_64_SSEUP_CLASS
    3023              :                             && regclass[3] == X86_64_SSEUP_CLASS
    3024              :                             && regclass[4] == X86_64_SSEUP_CLASS
    3025              :                             && regclass[5] == X86_64_SSEUP_CLASS
    3026              :                             && regclass[6] == X86_64_SSEUP_CLASS
    3027              :                             && regclass[7] == X86_64_SSEUP_CLASS);
    3028              :                 tmpmode = XImode;
    3029              :                 i += 7;
    3030              :                 break;
    3031            0 :               default:
    3032            0 :                 gcc_unreachable ();
    3033              :               }
    3034       496490 :             exp [nexps++]
    3035       496490 :               = gen_rtx_EXPR_LIST (VOIDmode,
    3036              :                                    gen_rtx_REG (tmpmode,
    3037       248245 :                                                 GET_SSE_REGNO (sse_regno)),
    3038       248245 :                                    GEN_INT (pos*8));
    3039       248245 :             sse_regno++;
    3040       248245 :             break;
    3041            0 :           default:
    3042            0 :             gcc_unreachable ();
    3043              :         }
    3044              :     }
    3045              : 
    3046              :   /* Empty aligned struct, union or class.  */
    3047       965936 :   if (nexps == 0)
    3048              :     return NULL;
    3049              : 
    3050       965681 :   ret =  gen_rtx_PARALLEL (mode, rtvec_alloc (nexps));
    3051      2650796 :   for (i = 0; i < nexps; i++)
    3052      1685115 :     XVECEXP (ret, 0, i) = exp [i];
    3053              :   return ret;
    3054              : }
    3055              : 
    3056              : /* Update the data in CUM to advance over an argument of mode MODE
    3057              :    and data type TYPE.  (TYPE is null for libcalls where that information
    3058              :    may not be available.)
    3059              : 
    3060              :    Return a number of integer regsiters advanced over.  */
    3061              : 
    3062              : static int
    3063      2120329 : function_arg_advance_32 (CUMULATIVE_ARGS *cum, machine_mode mode,
    3064              :                          const_tree type, HOST_WIDE_INT bytes,
    3065              :                          HOST_WIDE_INT words)
    3066              : {
    3067      2120329 :   int res = 0;
    3068      2120329 :   bool error_p = false;
    3069              : 
    3070      2120329 :   if (TARGET_IAMCU)
    3071              :     {
    3072              :       /* Intel MCU psABI passes scalars and aggregates no larger than 8
    3073              :          bytes in registers.  */
    3074            0 :       if (!VECTOR_MODE_P (mode) && bytes <= 8)
    3075            0 :         goto pass_in_reg;
    3076              :       return res;
    3077              :     }
    3078              : 
    3079      2120329 :   switch (mode)
    3080              :     {
    3081              :     default:
    3082              :       break;
    3083              : 
    3084        93823 :     case E_BLKmode:
    3085        93823 :       if (bytes < 0)
    3086              :         break;
    3087              :       /* FALLTHRU */
    3088              : 
    3089      2083720 :     case E_DImode:
    3090      2083720 :     case E_SImode:
    3091      2083720 :     case E_HImode:
    3092      2083720 :     case E_QImode:
    3093        93823 : pass_in_reg:
    3094      2083720 :       cum->words += words;
    3095      2083720 :       cum->nregs -= words;
    3096      2083720 :       cum->regno += words;
    3097      2083720 :       if (cum->nregs >= 0)
    3098        46777 :         res = words;
    3099      2083720 :       if (cum->nregs <= 0)
    3100              :         {
    3101      2050062 :           cum->nregs = 0;
    3102      2050062 :           cfun->machine->arg_reg_available = false;
    3103      2050062 :           cum->regno = 0;
    3104              :         }
    3105              :       break;
    3106              : 
    3107            0 :     case E_OImode:
    3108              :       /* OImode shouldn't be used directly.  */
    3109            0 :       gcc_unreachable ();
    3110              : 
    3111         4703 :     case E_DFmode:
    3112         4703 :       if (cum->float_in_sse == -1)
    3113            0 :         error_p = true;
    3114         4703 :       if (cum->float_in_sse < 2)
    3115              :         break;
    3116              :       /* FALLTHRU */
    3117         1309 :     case E_SFmode:
    3118         1309 :       if (cum->float_in_sse == -1)
    3119            0 :         error_p = true;
    3120         1309 :       if (cum->float_in_sse < 1)
    3121              :         break;
    3122              :       /* FALLTHRU */
    3123              : 
    3124           52 :     case E_V16HFmode:
    3125           52 :     case E_V16BFmode:
    3126           52 :     case E_V8SFmode:
    3127           52 :     case E_V8SImode:
    3128           52 :     case E_V64QImode:
    3129           52 :     case E_V32HImode:
    3130           52 :     case E_V16SImode:
    3131           52 :     case E_V8DImode:
    3132           52 :     case E_V32HFmode:
    3133           52 :     case E_V32BFmode:
    3134           52 :     case E_V16SFmode:
    3135           52 :     case E_V8DFmode:
    3136           52 :     case E_V32QImode:
    3137           52 :     case E_V16HImode:
    3138           52 :     case E_V4DFmode:
    3139           52 :     case E_V4DImode:
    3140           52 :     case E_TImode:
    3141           52 :     case E_V16QImode:
    3142           52 :     case E_V8HImode:
    3143           52 :     case E_V4SImode:
    3144           52 :     case E_V2DImode:
    3145           52 :     case E_V8HFmode:
    3146           52 :     case E_V8BFmode:
    3147           52 :     case E_V4SFmode:
    3148           52 :     case E_V2DFmode:
    3149           52 :       if (!type || !AGGREGATE_TYPE_P (type))
    3150              :         {
    3151           52 :           cum->sse_words += words;
    3152           52 :           cum->sse_nregs -= 1;
    3153           52 :           cum->sse_regno += 1;
    3154           52 :           if (cum->sse_nregs <= 0)
    3155              :             {
    3156            4 :               cum->sse_nregs = 0;
    3157            4 :               cum->sse_regno = 0;
    3158              :             }
    3159              :         }
    3160              :       break;
    3161              : 
    3162           16 :     case E_V8QImode:
    3163           16 :     case E_V4HImode:
    3164           16 :     case E_V4HFmode:
    3165           16 :     case E_V4BFmode:
    3166           16 :     case E_V2SImode:
    3167           16 :     case E_V2SFmode:
    3168           16 :     case E_V1TImode:
    3169           16 :     case E_V1DImode:
    3170           16 :       if (!type || !AGGREGATE_TYPE_P (type))
    3171              :         {
    3172           16 :           cum->mmx_words += words;
    3173           16 :           cum->mmx_nregs -= 1;
    3174           16 :           cum->mmx_regno += 1;
    3175           16 :           if (cum->mmx_nregs <= 0)
    3176              :             {
    3177            0 :               cum->mmx_nregs = 0;
    3178            0 :               cum->mmx_regno = 0;
    3179              :             }
    3180              :         }
    3181              :       break;
    3182              :     }
    3183      2056126 :   if (error_p)
    3184              :     {
    3185            0 :       cum->float_in_sse = 0;
    3186            0 :       error ("calling %qD with SSE calling convention without "
    3187              :              "SSE/SSE2 enabled", cum->decl);
    3188            0 :       sorry ("this is a GCC bug that can be worked around by adding "
    3189              :              "attribute used to function called");
    3190              :     }
    3191              : 
    3192              :   return res;
    3193              : }
    3194              : 
    3195              : static int
    3196     18908315 : function_arg_advance_64 (CUMULATIVE_ARGS *cum, machine_mode mode,
    3197              :                          const_tree type, HOST_WIDE_INT words, bool named)
    3198              : {
    3199     18908315 :   int int_nregs, sse_nregs;
    3200              : 
    3201              :   /* Unnamed 512 and 256bit vector mode parameters are passed on stack.  */
    3202     18908315 :   if (!named && (VALID_AVX512F_REG_MODE (mode)
    3203              :                  || VALID_AVX256_REG_MODE (mode)))
    3204              :     return 0;
    3205              : 
    3206     18907951 :   if (!examine_argument (mode, type, 0, &int_nregs, &sse_nregs)
    3207     18907951 :       && sse_nregs <= cum->sse_nregs && int_nregs <= cum->nregs)
    3208              :     {
    3209     16638282 :       cum->nregs -= int_nregs;
    3210     16638282 :       cum->sse_nregs -= sse_nregs;
    3211     16638282 :       cum->regno += int_nregs;
    3212     16638282 :       cum->sse_regno += sse_nregs;
    3213     16638282 :       return int_nregs;
    3214              :     }
    3215              :   else
    3216              :     {
    3217      2269669 :       int align = ix86_function_arg_boundary (mode, type) / BITS_PER_WORD;
    3218      2269669 :       cum->words = ROUND_UP (cum->words, align);
    3219      2269669 :       cum->words += words;
    3220      2269669 :       return 0;
    3221              :     }
    3222              : }
    3223              : 
    3224              : static int
    3225       446989 : function_arg_advance_ms_64 (CUMULATIVE_ARGS *cum, HOST_WIDE_INT bytes,
    3226              :                             HOST_WIDE_INT words)
    3227              : {
    3228              :   /* Otherwise, this should be passed indirect.  */
    3229       446989 :   gcc_assert (bytes == 1 || bytes == 2 || bytes == 4 || bytes == 8);
    3230              : 
    3231       446989 :   cum->words += words;
    3232       446989 :   if (cum->nregs > 0)
    3233              :     {
    3234       289355 :       cum->nregs -= 1;
    3235       289355 :       cum->regno += 1;
    3236       289355 :       return 1;
    3237              :     }
    3238              :   return 0;
    3239              : }
    3240              : 
    3241              : /* Update the data in CUM to advance over argument ARG.  */
    3242              : 
    3243              : static void
    3244     21476000 : ix86_function_arg_advance (cumulative_args_t cum_v,
    3245              :                            const function_arg_info &arg)
    3246              : {
    3247     21476000 :   CUMULATIVE_ARGS *cum = get_cumulative_args (cum_v);
    3248     21476000 :   machine_mode mode = arg.mode;
    3249     21476000 :   HOST_WIDE_INT bytes, words;
    3250     21476000 :   int nregs;
    3251              : 
    3252              :   /* The argument of interrupt handler is a special case and is
    3253              :      handled in ix86_function_arg.  */
    3254     21476000 :   if (!cum->caller && cfun->machine->func_type != TYPE_NORMAL)
    3255              :     return;
    3256              : 
    3257     21475633 :   bytes = arg.promoted_size_in_bytes ();
    3258     21475633 :   words = CEIL (bytes, UNITS_PER_WORD);
    3259              : 
    3260     21475633 :   if (arg.type)
    3261     21159166 :     mode = type_natural_mode (arg.type, NULL, false);
    3262              : 
    3263     21475633 :   if (TARGET_64BIT)
    3264              :     {
    3265     19355304 :       enum calling_abi call_abi = cum ? cum->call_abi : ix86_abi;
    3266              : 
    3267     19355304 :       if (call_abi == MS_ABI)
    3268       446989 :         nregs = function_arg_advance_ms_64 (cum, bytes, words);
    3269              :       else
    3270     18908315 :         nregs = function_arg_advance_64 (cum, mode, arg.type, words,
    3271     18908315 :                                          arg.named);
    3272              :     }
    3273              :   else
    3274      2120329 :     nregs = function_arg_advance_32 (cum, mode, arg.type, bytes, words);
    3275              : 
    3276     21475633 :   if (!nregs)
    3277              :     {
    3278              :       /* Track if there are outgoing arguments on stack.  */
    3279      5687481 :       if (cum->caller)
    3280      2710913 :         cfun->machine->outgoing_args_on_stack = true;
    3281              :     }
    3282              : }
    3283              : 
    3284              : /* Define where to put the arguments to a function.
    3285              :    Value is zero to push the argument on the stack,
    3286              :    or a hard register in which to store the argument.
    3287              : 
    3288              :    MODE is the argument's machine mode.
    3289              :    TYPE is the data type of the argument (as a tree).
    3290              :     This is null for libcalls where that information may
    3291              :     not be available.
    3292              :    CUM is a variable of type CUMULATIVE_ARGS which gives info about
    3293              :     the preceding args and about the function being called.
    3294              :    NAMED is nonzero if this argument is a named parameter
    3295              :     (otherwise it is an extra parameter matching an ellipsis).  */
    3296              : 
    3297              : static rtx
    3298      2549165 : function_arg_32 (CUMULATIVE_ARGS *cum, machine_mode mode,
    3299              :                  machine_mode orig_mode, const_tree type,
    3300              :                  HOST_WIDE_INT bytes, HOST_WIDE_INT words)
    3301              : {
    3302      2549165 :   bool error_p = false;
    3303              : 
    3304              :   /* Avoid the AL settings for the Unix64 ABI.  */
    3305      2549165 :   if (mode == VOIDmode)
    3306       739979 :     return constm1_rtx;
    3307              : 
    3308      1809186 :   if (TARGET_IAMCU)
    3309              :     {
    3310              :       /* Intel MCU psABI passes scalars and aggregates no larger than 8
    3311              :          bytes in registers.  */
    3312            0 :       if (!VECTOR_MODE_P (mode) && bytes <= 8)
    3313            0 :         goto pass_in_reg;
    3314              :       return NULL_RTX;
    3315              :     }
    3316              : 
    3317      1809186 :   switch (mode)
    3318              :     {
    3319              :     default:
    3320              :       break;
    3321              : 
    3322        77846 :     case E_BLKmode:
    3323        77846 :       if (bytes < 0)
    3324              :         break;
    3325              :       /* FALLTHRU */
    3326      1775830 :     case E_DImode:
    3327      1775830 :     case E_SImode:
    3328      1775830 :     case E_HImode:
    3329      1775830 :     case E_QImode:
    3330        77846 : pass_in_reg:
    3331      1775830 :       if (words <= cum->nregs)
    3332              :         {
    3333        44945 :           int regno = cum->regno;
    3334              : 
    3335              :           /* Fastcall allocates the first two DWORD (SImode) or
    3336              :             smaller arguments to ECX and EDX if it isn't an
    3337              :             aggregate type .  */
    3338        44945 :           if (cum->fastcall)
    3339              :             {
    3340            6 :               if (mode == BLKmode
    3341            6 :                   || mode == DImode
    3342            6 :                   || (type && AGGREGATE_TYPE_P (type)))
    3343              :                 break;
    3344              : 
    3345              :               /* ECX not EAX is the first allocated register.  */
    3346            6 :               if (regno == AX_REG)
    3347        44945 :                 regno = CX_REG;
    3348              :             }
    3349        44945 :           return gen_rtx_REG (mode, regno);
    3350              :         }
    3351              :       break;
    3352              : 
    3353         3322 :     case E_DFmode:
    3354         3322 :       if (cum->float_in_sse == -1)
    3355            0 :         error_p = true;
    3356         3322 :       if (cum->float_in_sse < 2)
    3357              :         break;
    3358              :       /* FALLTHRU */
    3359          918 :     case E_SFmode:
    3360          918 :       if (cum->float_in_sse == -1)
    3361            0 :         error_p = true;
    3362          918 :       if (cum->float_in_sse < 1)
    3363              :         break;
    3364              :       /* FALLTHRU */
    3365           12 :     case E_TImode:
    3366              :       /* In 32bit, we pass TImode in xmm registers.  */
    3367           12 :     case E_V16QImode:
    3368           12 :     case E_V8HImode:
    3369           12 :     case E_V4SImode:
    3370           12 :     case E_V2DImode:
    3371           12 :     case E_V8HFmode:
    3372           12 :     case E_V8BFmode:
    3373           12 :     case E_V4SFmode:
    3374           12 :     case E_V2DFmode:
    3375           12 :       if (!type || !AGGREGATE_TYPE_P (type))
    3376              :         {
    3377           12 :           if (cum->sse_nregs)
    3378           12 :             return gen_reg_or_parallel (mode, orig_mode,
    3379           12 :                                         cum->sse_regno + FIRST_SSE_REG);
    3380              :         }
    3381              :       break;
    3382              : 
    3383            0 :     case E_OImode:
    3384            0 :     case E_XImode:
    3385              :       /* OImode and XImode shouldn't be used directly.  */
    3386            0 :       gcc_unreachable ();
    3387              : 
    3388            9 :     case E_V64QImode:
    3389            9 :     case E_V32HImode:
    3390            9 :     case E_V16SImode:
    3391            9 :     case E_V8DImode:
    3392            9 :     case E_V32HFmode:
    3393            9 :     case E_V32BFmode:
    3394            9 :     case E_V16SFmode:
    3395            9 :     case E_V8DFmode:
    3396            9 :     case E_V16HFmode:
    3397            9 :     case E_V16BFmode:
    3398            9 :     case E_V8SFmode:
    3399            9 :     case E_V8SImode:
    3400            9 :     case E_V32QImode:
    3401            9 :     case E_V16HImode:
    3402            9 :     case E_V4DFmode:
    3403            9 :     case E_V4DImode:
    3404            9 :       if (!type || !AGGREGATE_TYPE_P (type))
    3405              :         {
    3406            9 :           if (cum->sse_nregs)
    3407            9 :             return gen_reg_or_parallel (mode, orig_mode,
    3408            9 :                                         cum->sse_regno + FIRST_SSE_REG);
    3409              :         }
    3410              :       break;
    3411              : 
    3412            8 :     case E_V8QImode:
    3413            8 :     case E_V4HImode:
    3414            8 :     case E_V4HFmode:
    3415            8 :     case E_V4BFmode:
    3416            8 :     case E_V2SImode:
    3417            8 :     case E_V2SFmode:
    3418            8 :     case E_V1TImode:
    3419            8 :     case E_V1DImode:
    3420            8 :       if (!type || !AGGREGATE_TYPE_P (type))
    3421              :         {
    3422            8 :           if (cum->mmx_nregs)
    3423            8 :             return gen_reg_or_parallel (mode, orig_mode,
    3424            8 :                                         cum->mmx_regno + FIRST_MMX_REG);
    3425              :         }
    3426              :       break;
    3427              :     }
    3428         4240 :   if (error_p)
    3429              :     {
    3430            0 :       cum->float_in_sse = 0;
    3431            0 :       error ("calling %qD with SSE calling convention without "
    3432              :              "SSE/SSE2 enabled", cum->decl);
    3433            0 :       sorry ("this is a GCC bug that can be worked around by adding "
    3434              :              "attribute used to function called");
    3435              :     }
    3436              : 
    3437              :   return NULL_RTX;
    3438              : }
    3439              : 
    3440              : static rtx
    3441     18603024 : function_arg_64 (const CUMULATIVE_ARGS *cum, machine_mode mode,
    3442              :                  machine_mode orig_mode, const_tree type, bool named)
    3443              : {
    3444              :   /* Handle a hidden AL argument containing number of registers
    3445              :      for varargs x86-64 functions.  */
    3446     18603024 :   if (mode == VOIDmode)
    3447      5158749 :     return GEN_INT (cum->maybe_vaarg
    3448              :                     ? (cum->sse_nregs < 0
    3449              :                        ? X86_64_SSE_REGPARM_MAX
    3450              :                        : cum->sse_regno)
    3451              :                     : -1);
    3452              : 
    3453     13444275 :   switch (mode)
    3454              :     {
    3455              :     default:
    3456              :       break;
    3457              : 
    3458        90108 :     case E_V16HFmode:
    3459        90108 :     case E_V16BFmode:
    3460        90108 :     case E_V8SFmode:
    3461        90108 :     case E_V8SImode:
    3462        90108 :     case E_V32QImode:
    3463        90108 :     case E_V16HImode:
    3464        90108 :     case E_V4DFmode:
    3465        90108 :     case E_V4DImode:
    3466        90108 :     case E_V32HFmode:
    3467        90108 :     case E_V32BFmode:
    3468        90108 :     case E_V16SFmode:
    3469        90108 :     case E_V16SImode:
    3470        90108 :     case E_V64QImode:
    3471        90108 :     case E_V32HImode:
    3472        90108 :     case E_V8DFmode:
    3473        90108 :     case E_V8DImode:
    3474              :       /* Unnamed 256 and 512bit vector mode parameters are passed on stack.  */
    3475        90108 :       if (!named)
    3476              :         return NULL;
    3477              :       break;
    3478              :     }
    3479              : 
    3480     13443911 :   const int *parm_regs;
    3481     13443911 :   if (cum->preserve_none_abi)
    3482              :     parm_regs = x86_64_preserve_none_int_parameter_registers;
    3483              :   else
    3484     13443796 :     parm_regs = x86_64_int_parameter_registers;
    3485              : 
    3486     13443911 :   return construct_container (mode, orig_mode, type, 0, cum->nregs,
    3487     13443911 :                               cum->sse_nregs,
    3488     13443911 :                               &parm_regs[cum->regno],
    3489     13443911 :                               cum->sse_regno);
    3490              : }
    3491              : 
    3492              : static rtx
    3493       296338 : function_arg_ms_64 (const CUMULATIVE_ARGS *cum, machine_mode mode,
    3494              :                     machine_mode orig_mode, bool named, const_tree type,
    3495              :                     HOST_WIDE_INT bytes)
    3496              : {
    3497       296338 :   unsigned int regno;
    3498              : 
    3499              :   /* We need to add clobber for MS_ABI->SYSV ABI calls in expand_call.
    3500              :      We use value of -2 to specify that current function call is MSABI.  */
    3501       296338 :   if (mode == VOIDmode)
    3502        36293 :     return GEN_INT (-2);
    3503              : 
    3504              :   /* If we've run out of registers, it goes on the stack.  */
    3505       260045 :   if (cum->nregs == 0)
    3506              :     return NULL_RTX;
    3507              : 
    3508       176290 :   regno = x86_64_ms_abi_int_parameter_registers[cum->regno];
    3509              : 
    3510              :   /* Only floating point modes less than 64 bits are passed in anything but
    3511              :      integer regs.  Larger floating point types are excluded as the Windows
    3512              :      ABI requires vreg args can be shadowed in GPRs (for red zone / varargs). */
    3513       176290 :   if (TARGET_SSE && (mode == HFmode || mode == SFmode || mode == DFmode))
    3514              :     {
    3515        38254 :       if (named)
    3516              :         {
    3517        38254 :           if (type == NULL_TREE || !AGGREGATE_TYPE_P (type))
    3518        37260 :             regno = cum->regno + FIRST_SSE_REG;
    3519              :         }
    3520              :       else
    3521              :         {
    3522            0 :           rtx t1, t2;
    3523              : 
    3524              :           /* Unnamed floating parameters are passed in both the
    3525              :              SSE and integer registers.  */
    3526            0 :           t1 = gen_rtx_REG (mode, cum->regno + FIRST_SSE_REG);
    3527            0 :           t2 = gen_rtx_REG (mode, regno);
    3528            0 :           t1 = gen_rtx_EXPR_LIST (VOIDmode, t1, const0_rtx);
    3529            0 :           t2 = gen_rtx_EXPR_LIST (VOIDmode, t2, const0_rtx);
    3530            0 :           return gen_rtx_PARALLEL (mode, gen_rtvec (2, t1, t2));
    3531              :         }
    3532              :     }
    3533              :   /* Handle aggregated types passed in register.  */
    3534       176290 :   if (orig_mode == BLKmode)
    3535              :     {
    3536            0 :       if (bytes > 0 && bytes <= 8)
    3537            0 :         mode = (bytes > 4 ? DImode : SImode);
    3538            0 :       if (mode == BLKmode)
    3539            0 :         mode = DImode;
    3540              :     }
    3541              : 
    3542       176290 :   return gen_reg_or_parallel (mode, orig_mode, regno);
    3543              : }
    3544              : 
    3545              : /* Return where to put the arguments to a function.
    3546              :    Return zero to push the argument on the stack, or a hard register in which to store the argument.
    3547              : 
    3548              :    ARG describes the argument while CUM gives information about the
    3549              :    preceding args and about the function being called.  */
    3550              : 
    3551              : static rtx
    3552     21448714 : ix86_function_arg (cumulative_args_t cum_v, const function_arg_info &arg)
    3553              : {
    3554     21448714 :   CUMULATIVE_ARGS *cum = get_cumulative_args (cum_v);
    3555     21448714 :   machine_mode mode = arg.mode;
    3556     21448714 :   HOST_WIDE_INT bytes, words;
    3557     21448714 :   rtx reg;
    3558              : 
    3559     21448714 :   if (!cum->caller && cfun->machine->func_type != TYPE_NORMAL)
    3560              :     {
    3561          187 :       gcc_assert (arg.type != NULL_TREE);
    3562          187 :       if (POINTER_TYPE_P (arg.type))
    3563              :         {
    3564              :           /* This is the pointer argument.  */
    3565          122 :           gcc_assert (TYPE_MODE (arg.type) == ptr_mode);
    3566              :           /* It is at -WORD(AP) in the current frame in interrupt and
    3567              :              exception handlers.  */
    3568          122 :           reg = plus_constant (Pmode, arg_pointer_rtx, -UNITS_PER_WORD);
    3569              :         }
    3570              :       else
    3571              :         {
    3572           65 :           gcc_assert (cfun->machine->func_type == TYPE_EXCEPTION
    3573              :                       && TREE_CODE (arg.type) == INTEGER_TYPE
    3574              :                       && TYPE_MODE (arg.type) == word_mode);
    3575              :           /* The error code is the word-mode integer argument at
    3576              :              -2 * WORD(AP) in the current frame of the exception
    3577              :              handler.  */
    3578           65 :           reg = gen_rtx_MEM (word_mode,
    3579           65 :                              plus_constant (Pmode,
    3580              :                                             arg_pointer_rtx,
    3581           65 :                                             -2 * UNITS_PER_WORD));
    3582              :         }
    3583          187 :       return reg;
    3584              :     }
    3585              : 
    3586     21448527 :   bytes = arg.promoted_size_in_bytes ();
    3587     21448527 :   words = CEIL (bytes, UNITS_PER_WORD);
    3588              : 
    3589              :   /* To simplify the code below, represent vector types with a vector mode
    3590              :      even if MMX/SSE are not active.  */
    3591     21448527 :   if (arg.type && VECTOR_TYPE_P (arg.type))
    3592       170093 :     mode = type_natural_mode (arg.type, cum, false);
    3593              : 
    3594     21448527 :   if (TARGET_64BIT)
    3595              :     {
    3596     18899362 :       enum calling_abi call_abi = cum ? cum->call_abi : ix86_abi;
    3597              : 
    3598     18899362 :       if (call_abi == MS_ABI)
    3599       296338 :         reg = function_arg_ms_64 (cum, mode, arg.mode, arg.named,
    3600       296338 :                                   arg.type, bytes);
    3601              :       else
    3602     18603024 :         reg = function_arg_64 (cum, mode, arg.mode, arg.type, arg.named);
    3603              :     }
    3604              :   else
    3605      2549165 :     reg = function_arg_32 (cum, mode, arg.mode, arg.type, bytes, words);
    3606              : 
    3607              :   /* Track if there are outgoing arguments on stack.  */
    3608     21448527 :   if (reg == NULL_RTX && cum->caller)
    3609      2191999 :     cfun->machine->outgoing_args_on_stack = true;
    3610              : 
    3611              :   return reg;
    3612              : }
    3613              : 
    3614              : /* A C expression that indicates when an argument must be passed by
    3615              :    reference.  If nonzero for an argument, a copy of that argument is
    3616              :    made in memory and a pointer to the argument is passed instead of
    3617              :    the argument itself.  The pointer is passed in whatever way is
    3618              :    appropriate for passing a pointer to that type.  */
    3619              : 
    3620              : static bool
    3621     21393490 : ix86_pass_by_reference (cumulative_args_t cum_v, const function_arg_info &arg)
    3622              : {
    3623     21393490 :   CUMULATIVE_ARGS *cum = get_cumulative_args (cum_v);
    3624              : 
    3625     21393490 :   if (TARGET_64BIT)
    3626              :     {
    3627     19283415 :       enum calling_abi call_abi = cum ? cum->call_abi : ix86_abi;
    3628              : 
    3629              :       /* See Windows x64 Software Convention.  */
    3630     19283415 :       if (call_abi == MS_ABI)
    3631              :         {
    3632       441390 :           HOST_WIDE_INT msize = GET_MODE_SIZE (arg.mode);
    3633              : 
    3634       441390 :           if (tree type = arg.type)
    3635              :             {
    3636              :               /* Arrays are passed by reference.  */
    3637       441390 :               if (TREE_CODE (type) == ARRAY_TYPE)
    3638              :                 return true;
    3639              : 
    3640       441390 :               if (RECORD_OR_UNION_TYPE_P (type))
    3641              :                 {
    3642              :                   /* Structs/unions of sizes other than 8, 16, 32, or 64 bits
    3643              :                      are passed by reference.  */
    3644        15022 :                   msize = int_size_in_bytes (type);
    3645              :                 }
    3646              :             }
    3647              : 
    3648              :           /* __m128 is passed by reference.  */
    3649       872851 :           return msize != 1 && msize != 2 && msize != 4 && msize != 8;
    3650              :         }
    3651     18842025 :       else if (arg.type && int_size_in_bytes (arg.type) == -1)
    3652              :         return true;
    3653              :     }
    3654              : 
    3655              :   return false;
    3656              : }
    3657              : 
    3658              : /* Return true when TYPE should be 128bit aligned for 32bit argument
    3659              :    passing ABI.  XXX: This function is obsolete and is only used for
    3660              :    checking psABI compatibility with previous versions of GCC.  */
    3661              : 
    3662              : static bool
    3663      1969480 : ix86_compat_aligned_value_p (const_tree type)
    3664              : {
    3665      1969480 :   machine_mode mode = TYPE_MODE (type);
    3666      1969480 :   if (((TARGET_SSE && SSE_REG_MODE_P (mode))
    3667      1969438 :        || mode == TDmode
    3668      1969438 :        || mode == TFmode
    3669              :        || mode == TCmode)
    3670      1969692 :       && (!TYPE_USER_ALIGN (type) || TYPE_ALIGN (type) > 128))
    3671              :     return true;
    3672      1969268 :   if (TYPE_ALIGN (type) < 128)
    3673              :     return false;
    3674              : 
    3675            0 :   if (AGGREGATE_TYPE_P (type))
    3676              :     {
    3677              :       /* Walk the aggregates recursively.  */
    3678            0 :       switch (TREE_CODE (type))
    3679              :         {
    3680            0 :         case RECORD_TYPE:
    3681            0 :         case UNION_TYPE:
    3682            0 :         case QUAL_UNION_TYPE:
    3683            0 :           {
    3684            0 :             tree field;
    3685              : 
    3686              :             /* Walk all the structure fields.  */
    3687            0 :             for (field = TYPE_FIELDS (type); field; field = DECL_CHAIN (field))
    3688              :               {
    3689            0 :                 if (TREE_CODE (field) == FIELD_DECL
    3690            0 :                     && ix86_compat_aligned_value_p (TREE_TYPE (field)))
    3691              :                   return true;
    3692              :               }
    3693              :             break;
    3694              :           }
    3695              : 
    3696            0 :         case ARRAY_TYPE:
    3697              :           /* Just for use if some languages passes arrays by value.  */
    3698            0 :           if (ix86_compat_aligned_value_p (TREE_TYPE (type)))
    3699              :             return true;
    3700              :           break;
    3701              : 
    3702              :         default:
    3703              :           gcc_unreachable ();
    3704              :         }
    3705              :     }
    3706              :   return false;
    3707              : }
    3708              : 
    3709              : /* Return the alignment boundary for MODE and TYPE with alignment ALIGN.
    3710              :    XXX: This function is obsolete and is only used for checking psABI
    3711              :    compatibility with previous versions of GCC.  */
    3712              : 
    3713              : static unsigned int
    3714      5529135 : ix86_compat_function_arg_boundary (machine_mode mode,
    3715              :                                    const_tree type, unsigned int align)
    3716              : {
    3717              :   /* In 32bit, only _Decimal128 and __float128 are aligned to their
    3718              :      natural boundaries.  */
    3719      5529135 :   if (!TARGET_64BIT && mode != TDmode && mode != TFmode)
    3720              :     {
    3721              :       /* i386 ABI defines all arguments to be 4 byte aligned.  We have to
    3722              :          make an exception for SSE modes since these require 128bit
    3723              :          alignment.
    3724              : 
    3725              :          The handling here differs from field_alignment.  ICC aligns MMX
    3726              :          arguments to 4 byte boundaries, while structure fields are aligned
    3727              :          to 8 byte boundaries.  */
    3728      1981390 :       if (!type)
    3729              :         {
    3730        11910 :           if (!(TARGET_SSE && SSE_REG_MODE_P (mode)))
    3731      1981178 :             align = PARM_BOUNDARY;
    3732              :         }
    3733              :       else
    3734              :         {
    3735      1969480 :           if (!ix86_compat_aligned_value_p (type))
    3736      1981178 :             align = PARM_BOUNDARY;
    3737              :         }
    3738              :     }
    3739     10658839 :   if (align > BIGGEST_ALIGNMENT)
    3740           86 :     align = BIGGEST_ALIGNMENT;
    3741      5529135 :   return align;
    3742              : }
    3743              : 
    3744              : /* Return true when TYPE should be 128bit aligned for 32bit argument
    3745              :    passing ABI.  */
    3746              : 
    3747              : static bool
    3748      1972122 : ix86_contains_aligned_value_p (const_tree type)
    3749              : {
    3750      1972122 :   machine_mode mode = TYPE_MODE (type);
    3751              : 
    3752      1972122 :   if (mode == XFmode || mode == XCmode)
    3753              :     return false;
    3754              : 
    3755      1970020 :   if (TYPE_ALIGN (type) < 128)
    3756              :     return false;
    3757              : 
    3758         2854 :   if (AGGREGATE_TYPE_P (type))
    3759              :     {
    3760              :       /* Walk the aggregates recursively.  */
    3761            0 :       switch (TREE_CODE (type))
    3762              :         {
    3763            0 :         case RECORD_TYPE:
    3764            0 :         case UNION_TYPE:
    3765            0 :         case QUAL_UNION_TYPE:
    3766            0 :           {
    3767            0 :             tree field;
    3768              : 
    3769              :             /* Walk all the structure fields.  */
    3770            0 :             for (field = TYPE_FIELDS (type);
    3771            0 :                  field;
    3772            0 :                  field = DECL_CHAIN (field))
    3773              :               {
    3774            0 :                 if (TREE_CODE (field) == FIELD_DECL
    3775            0 :                     && ix86_contains_aligned_value_p (TREE_TYPE (field)))
    3776              :                   return true;
    3777              :               }
    3778              :             break;
    3779              :           }
    3780              : 
    3781            0 :         case ARRAY_TYPE:
    3782              :           /* Just for use if some languages passes arrays by value.  */
    3783            0 :           if (ix86_contains_aligned_value_p (TREE_TYPE (type)))
    3784              :             return true;
    3785              :           break;
    3786              : 
    3787              :         default:
    3788              :           gcc_unreachable ();
    3789              :         }
    3790              :     }
    3791              :   else
    3792         2854 :     return TYPE_ALIGN (type) >= 128;
    3793              : 
    3794              :   return false;
    3795              : }
    3796              : 
    3797              : /* Gives the alignment boundary, in bits, of an argument with the
    3798              :    specified mode and type.  */
    3799              : 
    3800              : static unsigned int
    3801     10911527 : ix86_function_arg_boundary (machine_mode mode, const_tree type)
    3802              : {
    3803     10911527 :   unsigned int align;
    3804     10911527 :   if (type)
    3805              :     {
    3806              :       /* Since the main variant type is used for call, we convert it to
    3807              :          the main variant type.  */
    3808     10871856 :       type = TYPE_MAIN_VARIANT (type);
    3809     10871856 :       align = TYPE_ALIGN (type);
    3810     10871856 :       if (TYPE_EMPTY_P (type))
    3811        24413 :         return PARM_BOUNDARY;
    3812              :     }
    3813              :   else
    3814        39671 :     align = GET_MODE_ALIGNMENT (mode);
    3815     12906235 :   if (align < PARM_BOUNDARY)
    3816      4101396 :     align = PARM_BOUNDARY;
    3817              :   else
    3818              :     {
    3819      6785718 :       static bool warned;
    3820      6785718 :       unsigned int saved_align = align;
    3821              : 
    3822      6785718 :       if (!TARGET_64BIT)
    3823              :         {
    3824              :           /* i386 ABI defines XFmode arguments to be 4 byte aligned.  */
    3825      2007860 :           if (!type)
    3826              :             {
    3827        35738 :               if (mode == XFmode || mode == XCmode)
    3828              :                 align = PARM_BOUNDARY;
    3829              :             }
    3830      1972122 :           else if (!ix86_contains_aligned_value_p (type))
    3831              :             align = PARM_BOUNDARY;
    3832              : 
    3833        38592 :           if (align < 128)
    3834      1981178 :             align = PARM_BOUNDARY;
    3835              :         }
    3836              : 
    3837      6785718 :       if (warn_psabi
    3838      5531801 :           && !warned
    3839     12314853 :           && align != ix86_compat_function_arg_boundary (mode, type,
    3840              :                                                          saved_align))
    3841              :         {
    3842           86 :           warned = true;
    3843           86 :           inform (input_location,
    3844              :                   "the ABI for passing parameters with %d-byte"
    3845              :                   " alignment has changed in GCC 4.6",
    3846              :                   align / BITS_PER_UNIT);
    3847              :         }
    3848              :     }
    3849              : 
    3850              :   return align;
    3851              : }
    3852              : 
    3853              : /* Return true if N is a possible register number of function value.  */
    3854              : 
    3855              : static bool
    3856      4658569 : ix86_function_value_regno_p (const unsigned int regno)
    3857              : {
    3858      4658569 :   switch (regno)
    3859              :     {
    3860              :     case AX_REG:
    3861              :       return true;
    3862       105987 :     case DX_REG:
    3863       105987 :       return (!TARGET_64BIT || ix86_cfun_abi () != MS_ABI);
    3864        99341 :     case DI_REG:
    3865        99341 :     case SI_REG:
    3866        99341 :       return TARGET_64BIT && ix86_cfun_abi () != MS_ABI;
    3867              : 
    3868              :       /* Complex values are returned in %st(0)/%st(1) pair.  */
    3869        24145 :     case ST0_REG:
    3870        24145 :     case ST1_REG:
    3871              :       /* TODO: The function should depend on current function ABI but
    3872              :        builtins.cc would need updating then. Therefore we use the
    3873              :        default ABI.  */
    3874        24145 :       if (TARGET_64BIT && ix86_cfun_abi () == MS_ABI)
    3875              :         return false;
    3876        24145 :       return TARGET_FLOAT_RETURNS_IN_80387;
    3877              : 
    3878              :       /* Complex values are returned in %xmm0/%xmm1 pair.  */
    3879      1290026 :     case XMM0_REG:
    3880      1290026 :     case XMM1_REG:
    3881      1290026 :       return TARGET_SSE;
    3882              : 
    3883         9464 :     case MM0_REG:
    3884         9464 :       if (TARGET_MACHO || TARGET_64BIT)
    3885              :         return false;
    3886         2472 :       return TARGET_MMX;
    3887              :     }
    3888              : 
    3889              :   return false;
    3890              : }
    3891              : 
    3892              : /* Check whether the register REGNO should be zeroed on X86.
    3893              :    When ALL_SSE_ZEROED is true, all SSE registers have been zeroed
    3894              :    together, no need to zero it again.
    3895              :    When NEED_ZERO_MMX is true, MMX registers should be cleared.  */
    3896              : 
    3897              : static bool
    3898         1296 : zero_call_used_regno_p (const unsigned int regno,
    3899              :                         bool all_sse_zeroed,
    3900              :                         bool need_zero_mmx)
    3901              : {
    3902          763 :   return GENERAL_REGNO_P (regno)
    3903          763 :          || (!all_sse_zeroed && SSE_REGNO_P (regno))
    3904          383 :          || MASK_REGNO_P (regno)
    3905         1671 :          || (need_zero_mmx && MMX_REGNO_P (regno));
    3906              : }
    3907              : 
    3908              : /* Return the machine_mode that is used to zero register REGNO.  */
    3909              : 
    3910              : static machine_mode
    3911          921 : zero_call_used_regno_mode (const unsigned int regno)
    3912              : {
    3913              :   /* NB: We only need to zero the lower 32 bits for integer registers
    3914              :      and the lower 128 bits for vector registers since destination are
    3915              :      zero-extended to the full register width.  */
    3916          921 :   if (GENERAL_REGNO_P (regno))
    3917              :     return SImode;
    3918              :   else if (SSE_REGNO_P (regno))
    3919          380 :     return V4SFmode;
    3920              :   else if (MASK_REGNO_P (regno))
    3921              :     return HImode;
    3922              :   else if (MMX_REGNO_P (regno))
    3923            0 :     return V2SImode;
    3924              :   else
    3925            0 :     gcc_unreachable ();
    3926              : }
    3927              : 
    3928              : /* Generate a rtx to zero all vector registers together if possible,
    3929              :    otherwise, return NULL.  */
    3930              : 
    3931              : static rtx
    3932          130 : zero_all_vector_registers (HARD_REG_SET need_zeroed_hardregs)
    3933              : {
    3934          130 :   if (!TARGET_AVX)
    3935              :     return NULL;
    3936              : 
    3937          279 :   for (unsigned int regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
    3938          276 :     if ((LEGACY_SSE_REGNO_P (regno)
    3939          252 :          || (TARGET_64BIT
    3940          252 :              && (REX_SSE_REGNO_P (regno)
    3941          228 :                  || (TARGET_AVX512F && EXT_REX_SSE_REGNO_P (regno)))))
    3942          316 :         && !TEST_HARD_REG_BIT (need_zeroed_hardregs, regno))
    3943              :       return NULL;
    3944              : 
    3945            3 :   return gen_avx_vzeroall ();
    3946              : }
    3947              : 
    3948              : /* Generate insns to zero all st registers together.
    3949              :    Return true when zeroing instructions are generated.
    3950              :    Assume the number of st registers that are zeroed is num_of_st,
    3951              :    we will emit the following sequence to zero them together:
    3952              :                   fldz;         \
    3953              :                   fldz;         \
    3954              :                   ...
    3955              :                   fldz;         \
    3956              :                   fstp %%st(0); \
    3957              :                   fstp %%st(0); \
    3958              :                   ...
    3959              :                   fstp %%st(0);
    3960              :    i.e., num_of_st fldz followed by num_of_st fstp to clear the stack
    3961              :    mark stack slots empty.
    3962              : 
    3963              :    How to compute the num_of_st:
    3964              :    There is no direct mapping from stack registers to hard register
    3965              :    numbers.  If one stack register needs to be cleared, we don't know
    3966              :    where in the stack the value remains.  So, if any stack register
    3967              :    needs to be cleared, the whole stack should be cleared.  However,
    3968              :    x87 stack registers that hold the return value should be excluded.
    3969              :    x87 returns in the top (two for complex values) register, so
    3970              :    num_of_st should be 7/6 when x87 returns, otherwise it will be 8.
    3971              :    return the value of num_of_st.  */
    3972              : 
    3973              : 
    3974              : static int
    3975          130 : zero_all_st_registers (HARD_REG_SET need_zeroed_hardregs)
    3976              : {
    3977              : 
    3978              :   /* If the FPU is disabled, no need to zero all st registers.  */
    3979          130 :   if (! (TARGET_80387 || TARGET_FLOAT_RETURNS_IN_80387))
    3980              :     return 0;
    3981              : 
    3982        10320 :   unsigned int num_of_st = 0;
    3983        10320 :   for (unsigned int regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
    3984        10211 :     if ((STACK_REGNO_P (regno) || MMX_REGNO_P (regno))
    3985        10211 :         && TEST_HARD_REG_BIT (need_zeroed_hardregs, regno))
    3986              :       {
    3987              :         num_of_st++;
    3988              :         break;
    3989              :       }
    3990              : 
    3991          129 :   if (num_of_st == 0)
    3992              :     return 0;
    3993              : 
    3994           20 :   bool return_with_x87 = false;
    3995           40 :   return_with_x87 = (crtl->return_rtx
    3996           20 :                      && (STACK_REG_P (crtl->return_rtx)));
    3997              : 
    3998           20 :   bool complex_return = false;
    3999           40 :   complex_return = (crtl->return_rtx
    4000           20 :                     && COMPLEX_MODE_P (GET_MODE (crtl->return_rtx)));
    4001              : 
    4002           20 :   if (return_with_x87)
    4003            2 :     if (complex_return)
    4004              :       num_of_st = 6;
    4005              :     else
    4006            1 :       num_of_st = 7;
    4007              :   else
    4008              :     num_of_st = 8;
    4009              : 
    4010           20 :   rtx st_reg = gen_rtx_REG (XFmode, FIRST_STACK_REG);
    4011          177 :   for (unsigned int i = 0; i < num_of_st; i++)
    4012          157 :     emit_insn (gen_rtx_SET (st_reg, CONST0_RTX (XFmode)));
    4013              : 
    4014          177 :   for (unsigned int i = 0; i < num_of_st; i++)
    4015              :     {
    4016          157 :       rtx insn;
    4017          157 :       insn = emit_insn (gen_rtx_SET (st_reg, st_reg));
    4018          157 :       add_reg_note (insn, REG_DEAD, st_reg);
    4019              :     }
    4020           20 :   return num_of_st;
    4021              : }
    4022              : 
    4023              : 
    4024              : /* When the routine exit in MMX mode, if any ST register needs
    4025              :    to be zeroed, we should clear all MMX registers except the
    4026              :    RET_MMX_REGNO that holds the return value.  */
    4027              : static bool
    4028            0 : zero_all_mm_registers (HARD_REG_SET need_zeroed_hardregs,
    4029              :                        unsigned int ret_mmx_regno)
    4030              : {
    4031            0 :   bool need_zero_all_mm = false;
    4032            0 :   for (unsigned int regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
    4033            0 :     if (STACK_REGNO_P (regno)
    4034            0 :         && TEST_HARD_REG_BIT (need_zeroed_hardregs, regno))
    4035              :       {
    4036              :         need_zero_all_mm = true;
    4037              :         break;
    4038              :       }
    4039              : 
    4040            0 :   if (!need_zero_all_mm)
    4041              :     return false;
    4042              : 
    4043              :   machine_mode mode = V2SImode;
    4044            0 :   for (unsigned int regno = FIRST_MMX_REG; regno <= LAST_MMX_REG; regno++)
    4045            0 :     if (regno != ret_mmx_regno)
    4046              :       {
    4047            0 :         rtx reg = gen_rtx_REG (mode, regno);
    4048            0 :         emit_insn (gen_rtx_SET (reg, CONST0_RTX (mode)));
    4049              :       }
    4050              :   return true;
    4051              : }
    4052              : 
    4053              : /* TARGET_ZERO_CALL_USED_REGS.  */
    4054              : /* Generate a sequence of instructions that zero registers specified by
    4055              :    NEED_ZEROED_HARDREGS.  Return the ZEROED_HARDREGS that are actually
    4056              :    zeroed.  */
    4057              : static HARD_REG_SET
    4058          130 : ix86_zero_call_used_regs (HARD_REG_SET need_zeroed_hardregs)
    4059              : {
    4060          130 :   HARD_REG_SET zeroed_hardregs;
    4061          130 :   bool all_sse_zeroed = false;
    4062          130 :   int all_st_zeroed_num = 0;
    4063          130 :   bool all_mm_zeroed = false;
    4064              : 
    4065          130 :   CLEAR_HARD_REG_SET (zeroed_hardregs);
    4066              : 
    4067              :   /* first, let's see whether we can zero all vector registers together.  */
    4068          130 :   rtx zero_all_vec_insn = zero_all_vector_registers (need_zeroed_hardregs);
    4069          130 :   if (zero_all_vec_insn)
    4070              :     {
    4071            3 :       emit_insn (zero_all_vec_insn);
    4072            3 :       all_sse_zeroed = true;
    4073              :     }
    4074              : 
    4075              :   /* mm/st registers are shared registers set, we should follow the following
    4076              :      rules to clear them:
    4077              :                         MMX exit mode         x87 exit mode
    4078              :         -------------|----------------------|---------------
    4079              :         uses x87 reg | clear all MMX        | clear all x87
    4080              :         uses MMX reg | clear individual MMX | clear all x87
    4081              :         x87 + MMX    | clear all MMX        | clear all x87
    4082              : 
    4083              :      first, we should decide which mode (MMX mode or x87 mode) the function
    4084              :      exit with.  */
    4085              : 
    4086          130 :   bool exit_with_mmx_mode = (crtl->return_rtx
    4087          130 :                              && (MMX_REG_P (crtl->return_rtx)));
    4088              : 
    4089          130 :   if (!exit_with_mmx_mode)
    4090              :     /* x87 exit mode, we should zero all st registers together.  */
    4091              :     {
    4092          130 :       all_st_zeroed_num = zero_all_st_registers (need_zeroed_hardregs);
    4093              : 
    4094          130 :       if (all_st_zeroed_num > 0)
    4095          180 :         for (unsigned int regno = FIRST_STACK_REG; regno <= LAST_STACK_REG; regno++)
    4096              :           /* x87 stack registers that hold the return value should be excluded.
    4097              :              x87 returns in the top (two for complex values) register.  */
    4098          160 :           if (all_st_zeroed_num == 8
    4099          160 :               || !((all_st_zeroed_num >= 6 && regno == REGNO (crtl->return_rtx))
    4100              :                    || (all_st_zeroed_num == 6
    4101            7 :                        && (regno == (REGNO (crtl->return_rtx) + 1)))))
    4102          157 :             SET_HARD_REG_BIT (zeroed_hardregs, regno);
    4103              :     }
    4104              :   else
    4105              :     /* MMX exit mode, check whether we can zero all mm registers.  */
    4106              :     {
    4107            0 :       unsigned int exit_mmx_regno = REGNO (crtl->return_rtx);
    4108            0 :       all_mm_zeroed = zero_all_mm_registers (need_zeroed_hardregs,
    4109              :                                              exit_mmx_regno);
    4110            0 :       if (all_mm_zeroed)
    4111            0 :         for (unsigned int regno = FIRST_MMX_REG; regno <= LAST_MMX_REG; regno++)
    4112            0 :           if (regno != exit_mmx_regno)
    4113            0 :             SET_HARD_REG_BIT (zeroed_hardregs, regno);
    4114              :     }
    4115              : 
    4116              :   /* Now, generate instructions to zero all the other registers.  */
    4117              : 
    4118        12090 :   for (unsigned int regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
    4119              :     {
    4120        11960 :       if (!TEST_HARD_REG_BIT (need_zeroed_hardregs, regno))
    4121        10664 :         continue;
    4122         1671 :       if (!zero_call_used_regno_p (regno, all_sse_zeroed,
    4123         1296 :                                    exit_with_mmx_mode && !all_mm_zeroed))
    4124          375 :         continue;
    4125              : 
    4126          921 :       SET_HARD_REG_BIT (zeroed_hardregs, regno);
    4127              : 
    4128          921 :       machine_mode mode = zero_call_used_regno_mode (regno);
    4129              : 
    4130          921 :       rtx reg = gen_rtx_REG (mode, regno);
    4131          921 :       rtx tmp = gen_rtx_SET (reg, CONST0_RTX (mode));
    4132              : 
    4133          921 :       switch (mode)
    4134              :         {
    4135          533 :         case E_SImode:
    4136          533 :           if (!TARGET_USE_MOV0 || optimize_insn_for_size_p ())
    4137              :             {
    4138          533 :               rtx clob = gen_rtx_CLOBBER (VOIDmode,
    4139              :                                           gen_rtx_REG (CCmode,
    4140              :                                                        FLAGS_REG));
    4141          533 :               tmp = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2,
    4142              :                                                            tmp,
    4143              :                                                            clob));
    4144              :             }
    4145              :           /* FALLTHRU.  */
    4146              : 
    4147          921 :         case E_V4SFmode:
    4148          921 :         case E_HImode:
    4149          921 :         case E_V2SImode:
    4150          921 :           emit_insn (tmp);
    4151          921 :           break;
    4152              : 
    4153            0 :         default:
    4154            0 :           gcc_unreachable ();
    4155              :         }
    4156              :     }
    4157          130 :   return zeroed_hardregs;
    4158              : }
    4159              : 
    4160              : /* Define how to find the value returned by a function.
    4161              :    VALTYPE is the data type of the value (as a tree).
    4162              :    If the precise function being called is known, FUNC is its FUNCTION_DECL;
    4163              :    otherwise, FUNC is 0.  */
    4164              : 
    4165              : static rtx
    4166      3908412 : function_value_32 (machine_mode orig_mode, machine_mode mode,
    4167              :                    const_tree fntype, const_tree fn)
    4168              : {
    4169      3908412 :   unsigned int regno;
    4170              : 
    4171              :   /* 8-byte vector modes in %mm0. See ix86_return_in_memory for where
    4172              :      we normally prevent this case when mmx is not available.  However
    4173              :      some ABIs may require the result to be returned like DImode.  */
    4174      4176497 :   if (VECTOR_MODE_P (mode) && GET_MODE_SIZE (mode) == 8)
    4175              :     regno = FIRST_MMX_REG;
    4176              : 
    4177              :   /* 16-byte vector modes in %xmm0.  See ix86_return_in_memory for where
    4178              :      we prevent this case when sse is not available.  However some ABIs
    4179              :      may require the result to be returned like integer TImode.  */
    4180      3899136 :   else if (mode == TImode
    4181      4157945 :            || (VECTOR_MODE_P (mode) && GET_MODE_SIZE (mode) == 16))
    4182              :     regno = FIRST_SSE_REG;
    4183              : 
    4184              :   /* 32-byte vector modes in %ymm0.   */
    4185      3940047 :   else if (VECTOR_MODE_P (mode) && GET_MODE_SIZE (mode) == 32)
    4186              :     regno = FIRST_SSE_REG;
    4187              : 
    4188              :   /* 64-byte vector modes in %zmm0.   */
    4189      3795849 :   else if (VECTOR_MODE_P (mode) && GET_MODE_SIZE (mode) == 64)
    4190              :     regno = FIRST_SSE_REG;
    4191              : 
    4192              :   /* Floating point return values in %st(0) (unless -mno-fp-ret-in-387).  */
    4193      3640327 :   else if (X87_FLOAT_MODE_P (mode) && TARGET_FLOAT_RETURNS_IN_80387)
    4194              :     regno = FIRST_FLOAT_REG;
    4195              :   else
    4196              :     /* Most things go in %eax.  */
    4197      3577328 :     regno = AX_REG;
    4198              : 
    4199              :   /* Return __bf16/ _Float16/_Complex _Foat16 by sse register.  */
    4200      3908412 :   if (mode == HFmode || mode == BFmode)
    4201              :     {
    4202         1555 :       if (!TARGET_SSE2)
    4203              :         {
    4204            0 :           error ("SSE register return with SSE2 disabled");
    4205            0 :           regno = AX_REG;
    4206              :         }
    4207              :       else
    4208              :         regno = FIRST_SSE_REG;
    4209              :     }
    4210              : 
    4211      3908412 :   if (mode == HCmode)
    4212              :     {
    4213           80 :       if (!TARGET_SSE2)
    4214            0 :         error ("SSE register return with SSE2 disabled");
    4215              : 
    4216           80 :       rtx ret = gen_rtx_PARALLEL (mode, rtvec_alloc(1));
    4217          160 :       XVECEXP (ret, 0, 0)
    4218          160 :         = gen_rtx_EXPR_LIST (VOIDmode,
    4219              :                              gen_rtx_REG (SImode,
    4220           80 :                                           TARGET_SSE2 ? FIRST_SSE_REG : AX_REG),
    4221              :                              GEN_INT (0));
    4222           80 :       return ret;
    4223              :     }
    4224              : 
    4225              :   /* Override FP return register with %xmm0 for local functions when
    4226              :      SSE math is enabled or for functions with sseregparm attribute.  */
    4227      3908332 :   if ((fn || fntype) && (mode == SFmode || mode == DFmode))
    4228              :     {
    4229        49354 :       int sse_level = ix86_function_sseregparm (fntype, fn, false);
    4230        49354 :       if (sse_level == -1)
    4231              :         {
    4232            0 :           error ("calling %qD with SSE calling convention without "
    4233              :                  "SSE/SSE2 enabled", fn);
    4234            0 :           sorry ("this is a GCC bug that can be worked around by adding "
    4235              :                  "attribute used to function called");
    4236              :         }
    4237        49354 :       else if ((sse_level >= 1 && mode == SFmode)
    4238        49354 :                || (sse_level == 2 && mode == DFmode))
    4239              :         regno = FIRST_SSE_REG;
    4240              :     }
    4241              : 
    4242              :   /* OImode shouldn't be used directly.  */
    4243      3908332 :   gcc_assert (mode != OImode);
    4244              : 
    4245      3908332 :   return gen_rtx_REG (orig_mode, regno);
    4246              : }
    4247              : 
    4248              : static rtx
    4249    102732627 : function_value_64 (machine_mode orig_mode, machine_mode mode,
    4250              :                    const_tree valtype)
    4251              : {
    4252    102732627 :   rtx ret;
    4253              : 
    4254              :   /* Handle libcalls, which don't provide a type node.  */
    4255    102732627 :   if (valtype == NULL)
    4256              :     {
    4257       102602 :       unsigned int regno;
    4258              : 
    4259       102602 :       switch (mode)
    4260              :         {
    4261              :         case E_BFmode:
    4262              :         case E_HFmode:
    4263              :         case E_HCmode:
    4264              :         case E_SFmode:
    4265              :         case E_SCmode:
    4266              :         case E_DFmode:
    4267              :         case E_DCmode:
    4268              :         case E_TFmode:
    4269              :         case E_SDmode:
    4270              :         case E_DDmode:
    4271              :         case E_TDmode:
    4272              :           regno = FIRST_SSE_REG;
    4273              :           break;
    4274         1037 :         case E_XFmode:
    4275         1037 :         case E_XCmode:
    4276         1037 :           regno = FIRST_FLOAT_REG;
    4277         1037 :           break;
    4278              :         case E_TCmode:
    4279              :           return NULL;
    4280        56645 :         default:
    4281        56645 :           regno = AX_REG;
    4282              :         }
    4283              : 
    4284       102602 :       return gen_rtx_REG (mode, regno);
    4285              :     }
    4286    102630025 :   else if (POINTER_TYPE_P (valtype))
    4287              :     {
    4288              :       /* Pointers are always returned in word_mode.  */
    4289     18540567 :       mode = word_mode;
    4290              :     }
    4291              : 
    4292    102630025 :   ret = construct_container (mode, orig_mode, valtype, 1,
    4293              :                              X86_64_REGPARM_MAX, X86_64_SSE_REGPARM_MAX,
    4294              :                              x86_64_int_return_registers, 0);
    4295              : 
    4296              :   /* For zero sized structures, construct_container returns NULL, but we
    4297              :      need to keep rest of compiler happy by returning meaningful value.  */
    4298    102630025 :   if (!ret)
    4299       205640 :     ret = gen_rtx_REG (orig_mode, AX_REG);
    4300              : 
    4301              :   return ret;
    4302              : }
    4303              : 
    4304              : static rtx
    4305            0 : function_value_ms_32 (machine_mode orig_mode, machine_mode mode,
    4306              :                       const_tree fntype, const_tree fn, const_tree valtype)
    4307              : {
    4308            0 :   unsigned int regno;
    4309              : 
    4310              :   /* Floating point return values in %st(0)
    4311              :      (unless -mno-fp-ret-in-387 or aggregate type of up to 8 bytes).  */
    4312            0 :   if (X87_FLOAT_MODE_P (mode) && TARGET_FLOAT_RETURNS_IN_80387
    4313            0 :            && (GET_MODE_SIZE (mode) > 8
    4314            0 :                || valtype == NULL_TREE || !AGGREGATE_TYPE_P (valtype)))
    4315              :   {
    4316            0 :     regno = FIRST_FLOAT_REG;
    4317            0 :     return gen_rtx_REG (orig_mode, regno);
    4318              :   }
    4319              :   else
    4320            0 :     return function_value_32(orig_mode, mode, fntype,fn);
    4321              : }
    4322              : 
    4323              : static rtx
    4324       767095 : function_value_ms_64 (machine_mode orig_mode, machine_mode mode,
    4325              :                       const_tree valtype)
    4326              : {
    4327       767095 :   unsigned int regno = AX_REG;
    4328              : 
    4329       767095 :   if (TARGET_SSE)
    4330              :     {
    4331      1532736 :       switch (GET_MODE_SIZE (mode))
    4332              :         {
    4333        14003 :         case 16:
    4334        14003 :           if (valtype != NULL_TREE
    4335        14003 :               && !VECTOR_INTEGER_TYPE_P (valtype)
    4336         7146 :               && !INTEGRAL_TYPE_P (valtype)
    4337        21149 :               && !VECTOR_FLOAT_TYPE_P (valtype))
    4338              :             break;
    4339        14003 :           if ((SCALAR_INT_MODE_P (mode) || VECTOR_MODE_P (mode))
    4340              :               && !COMPLEX_MODE_P (mode))
    4341       197656 :             regno = FIRST_SSE_REG;
    4342              :           break;
    4343       741063 :         case 8:
    4344       741063 :         case 4:
    4345       741063 :         case 2:
    4346       741063 :           if (valtype != NULL_TREE && AGGREGATE_TYPE_P (valtype))
    4347              :             break;
    4348       723842 :           if (mode == HFmode || mode == SFmode || mode == DFmode)
    4349       197656 :             regno = FIRST_SSE_REG;
    4350              :           break;
    4351              :         default:
    4352              :           break;
    4353              :         }
    4354              :     }
    4355       767095 :   return gen_rtx_REG (orig_mode, regno);
    4356              : }
    4357              : 
    4358              : static rtx
    4359    107408134 : ix86_function_value_1 (const_tree valtype, const_tree fntype_or_decl,
    4360              :                        machine_mode orig_mode, machine_mode mode)
    4361              : {
    4362    107408134 :   const_tree fn, fntype;
    4363              : 
    4364    107408134 :   fn = NULL_TREE;
    4365    107408134 :   if (fntype_or_decl && DECL_P (fntype_or_decl))
    4366      3525315 :     fn = fntype_or_decl;
    4367      3525315 :   fntype = fn ? TREE_TYPE (fn) : fntype_or_decl;
    4368              : 
    4369    107408134 :   if (ix86_function_type_abi (fntype) == MS_ABI)
    4370              :     {
    4371       767095 :       if (TARGET_64BIT)
    4372       767095 :         return function_value_ms_64 (orig_mode, mode, valtype);
    4373              :       else
    4374            0 :         return function_value_ms_32 (orig_mode, mode, fntype, fn, valtype);
    4375              :     }
    4376    106641039 :   else if (TARGET_64BIT)
    4377    102732627 :     return function_value_64 (orig_mode, mode, valtype);
    4378              :   else
    4379      3908412 :     return function_value_32 (orig_mode, mode, fntype, fn);
    4380              : }
    4381              : 
    4382              : static rtx
    4383    107302432 : ix86_function_value (const_tree valtype, const_tree fntype_or_decl, bool)
    4384              : {
    4385    107302432 :   machine_mode mode, orig_mode;
    4386              : 
    4387    107302432 :   orig_mode = TYPE_MODE (valtype);
    4388    107302432 :   mode = type_natural_mode (valtype, NULL, true);
    4389    107302432 :   return ix86_function_value_1 (valtype, fntype_or_decl, orig_mode, mode);
    4390              : }
    4391              : 
    4392              : /* Pointer function arguments and return values are promoted to
    4393              :    word_mode for normal functions.  */
    4394              : 
    4395              : static machine_mode
    4396     31955484 : ix86_promote_function_mode (const_tree type, machine_mode mode,
    4397              :                             int *punsignedp, const_tree fntype,
    4398              :                             int for_return)
    4399              : {
    4400     31955484 :   if (cfun->machine->func_type == TYPE_NORMAL
    4401     31954461 :       && type != NULL_TREE
    4402     31920624 :       && POINTER_TYPE_P (type))
    4403              :     {
    4404     15958595 :       *punsignedp = POINTERS_EXTEND_UNSIGNED;
    4405     15958595 :       return word_mode;
    4406              :     }
    4407     15996889 :   return default_promote_function_mode (type, mode, punsignedp, fntype,
    4408     15996889 :                                         for_return);
    4409              : }
    4410              : 
    4411              : /* Return true if a structure, union or array with MODE containing FIELD
    4412              :    should be accessed using BLKmode.  */
    4413              : 
    4414              : static bool
    4415    149179411 : ix86_member_type_forces_blk (const_tree field, machine_mode mode)
    4416              : {
    4417              :   /* Union with XFmode must be in BLKmode.  */
    4418    149179411 :   return (mode == XFmode
    4419    149348864 :           && (TREE_CODE (DECL_FIELD_CONTEXT (field)) == UNION_TYPE
    4420       129585 :               || TREE_CODE (DECL_FIELD_CONTEXT (field)) == QUAL_UNION_TYPE));
    4421              : }
    4422              : 
    4423              : rtx
    4424       105702 : ix86_libcall_value (machine_mode mode)
    4425              : {
    4426       105702 :   return ix86_function_value_1 (NULL, NULL, mode, mode);
    4427              : }
    4428              : 
    4429              : /* Return true iff type is returned in memory.  */
    4430              : 
    4431              : static bool
    4432    110808651 : ix86_return_in_memory (const_tree type, const_tree fntype ATTRIBUTE_UNUSED)
    4433              : {
    4434    110808651 :   const machine_mode mode = type_natural_mode (type, NULL, true);
    4435    110808651 :   HOST_WIDE_INT size;
    4436              : 
    4437    110808651 :   if (TARGET_64BIT)
    4438              :     {
    4439    106291907 :       if (ix86_function_type_abi (fntype) == MS_ABI)
    4440              :         {
    4441       706853 :           size = int_size_in_bytes (type);
    4442              : 
    4443              :           /* __m128 is returned in xmm0.  */
    4444       706853 :           if ((!type || VECTOR_INTEGER_TYPE_P (type)
    4445       687282 :                || INTEGRAL_TYPE_P (type)
    4446       216966 :                || VECTOR_FLOAT_TYPE_P (type))
    4447       505715 :               && (SCALAR_INT_MODE_P (mode) || VECTOR_MODE_P (mode))
    4448              :               && !COMPLEX_MODE_P (mode)
    4449      1212568 :               && (GET_MODE_SIZE (mode) == 16 || size == 16))
    4450              :             return false;
    4451              : 
    4452              :           /* Otherwise, the size must be exactly in [1248]. */
    4453      1349324 :           return size != 1 && size != 2 && size != 4 && size != 8;
    4454              :         }
    4455              :       else
    4456              :         {
    4457    105585054 :           int needed_intregs, needed_sseregs;
    4458              : 
    4459    105585054 :           return examine_argument (mode, type, 1,
    4460              :                                    &needed_intregs, &needed_sseregs);
    4461              :         }
    4462              :     }
    4463              :   else
    4464              :     {
    4465      4516744 :       size = int_size_in_bytes (type);
    4466              : 
    4467              :       /* Intel MCU psABI returns scalars and aggregates no larger than 8
    4468              :          bytes in registers.  */
    4469      4516744 :       if (TARGET_IAMCU)
    4470            0 :         return VECTOR_MODE_P (mode) || size < 0 || size > 8;
    4471              : 
    4472      4516744 :       if (mode == BLKmode)
    4473              :         return true;
    4474              : 
    4475      4516744 :       if (MS_AGGREGATE_RETURN && AGGREGATE_TYPE_P (type) && size <= 8)
    4476              :         return false;
    4477              : 
    4478      4516744 :       if (VECTOR_MODE_P (mode) || mode == TImode)
    4479              :         {
    4480              :           /* User-created vectors small enough to fit in EAX.  */
    4481       268055 :           if (size < 8)
    4482              :             return false;
    4483              : 
    4484              :           /* Unless ABI prescibes otherwise,
    4485              :              MMX/3dNow values are returned in MM0 if available.  */
    4486              : 
    4487       268055 :           if (size == 8)
    4488         9266 :             return TARGET_VECT8_RETURNS || !TARGET_MMX;
    4489              : 
    4490              :           /* SSE values are returned in XMM0 if available.  */
    4491       258789 :           if (size == 16)
    4492       108939 :             return !TARGET_SSE;
    4493              : 
    4494              :           /* AVX values are returned in YMM0 if available.  */
    4495       149850 :           if (size == 32)
    4496        72090 :             return !TARGET_AVX;
    4497              : 
    4498              :           /* AVX512F values are returned in ZMM0 if available.  */
    4499        77760 :           if (size == 64)
    4500        77760 :             return !TARGET_AVX512F;
    4501              :         }
    4502              : 
    4503      4248689 :       if (mode == XFmode)
    4504              :         return false;
    4505              : 
    4506      4237387 :       if (size > 12)
    4507              :         return true;
    4508              : 
    4509              :       /* OImode shouldn't be used directly.  */
    4510      3256820 :       gcc_assert (mode != OImode);
    4511              : 
    4512              :       return false;
    4513              :     }
    4514              : }
    4515              : 
    4516              : /* Implement TARGET_PUSH_ARGUMENT.  */
    4517              : 
    4518              : static bool
    4519      9315521 : ix86_push_argument (unsigned int npush)
    4520              : {
    4521              :   /* If SSE2 is available, use vector move to put large argument onto
    4522              :      stack.  NB:  In 32-bit mode, use 8-byte vector move.  */
    4523     11733868 :   return ((!TARGET_SSE2 || npush < (TARGET_64BIT ? 16 : 8))
    4524      9051130 :           && TARGET_PUSH_ARGS
    4525     18366553 :           && !ACCUMULATE_OUTGOING_ARGS);
    4526              : }
    4527              : 
    4528              : 
    4529              : /* Create the va_list data type.  */
    4530              : 
    4531              : static tree
    4532       278262 : ix86_build_builtin_va_list_64 (void)
    4533              : {
    4534       278262 :   tree f_gpr, f_fpr, f_ovf, f_sav, record, type_decl;
    4535              : 
    4536       278262 :   record = lang_hooks.types.make_type (RECORD_TYPE);
    4537       278262 :   type_decl = build_decl (BUILTINS_LOCATION,
    4538              :                           TYPE_DECL, get_identifier ("__va_list_tag"), record);
    4539              : 
    4540       278262 :   f_gpr = build_decl (BUILTINS_LOCATION,
    4541              :                       FIELD_DECL, get_identifier ("gp_offset"),
    4542              :                       unsigned_type_node);
    4543       278262 :   f_fpr = build_decl (BUILTINS_LOCATION,
    4544              :                       FIELD_DECL, get_identifier ("fp_offset"),
    4545              :                       unsigned_type_node);
    4546       278262 :   f_ovf = build_decl (BUILTINS_LOCATION,
    4547              :                       FIELD_DECL, get_identifier ("overflow_arg_area"),
    4548              :                       ptr_type_node);
    4549       278262 :   f_sav = build_decl (BUILTINS_LOCATION,
    4550              :                       FIELD_DECL, get_identifier ("reg_save_area"),
    4551              :                       ptr_type_node);
    4552              : 
    4553       278262 :   va_list_gpr_counter_field = f_gpr;
    4554       278262 :   va_list_fpr_counter_field = f_fpr;
    4555              : 
    4556       278262 :   DECL_FIELD_CONTEXT (f_gpr) = record;
    4557       278262 :   DECL_FIELD_CONTEXT (f_fpr) = record;
    4558       278262 :   DECL_FIELD_CONTEXT (f_ovf) = record;
    4559       278262 :   DECL_FIELD_CONTEXT (f_sav) = record;
    4560              : 
    4561       278262 :   TYPE_STUB_DECL (record) = type_decl;
    4562       278262 :   TYPE_NAME (record) = type_decl;
    4563       278262 :   TYPE_FIELDS (record) = f_gpr;
    4564       278262 :   DECL_CHAIN (f_gpr) = f_fpr;
    4565       278262 :   DECL_CHAIN (f_fpr) = f_ovf;
    4566       278262 :   DECL_CHAIN (f_ovf) = f_sav;
    4567              : 
    4568       278262 :   layout_type (record);
    4569              : 
    4570       278262 :   TYPE_ATTRIBUTES (record) = tree_cons (get_identifier ("sysv_abi va_list"),
    4571       278262 :                                         NULL_TREE, TYPE_ATTRIBUTES (record));
    4572              : 
    4573              :   /* The correct type is an array type of one element.  */
    4574       278262 :   return build_array_type (record, build_index_type (size_zero_node));
    4575              : }
    4576              : 
    4577              : /* Setup the builtin va_list data type and for 64-bit the additional
    4578              :    calling convention specific va_list data types.  */
    4579              : 
    4580              : static tree
    4581       285422 : ix86_build_builtin_va_list (void)
    4582              : {
    4583       285422 :   if (TARGET_64BIT)
    4584              :     {
    4585              :       /* Initialize ABI specific va_list builtin types.
    4586              : 
    4587              :          In lto1, we can encounter two va_list types:
    4588              :          - one as a result of the type-merge across TUs, and
    4589              :          - the one constructed here.
    4590              :          These two types will not have the same TYPE_MAIN_VARIANT, and therefore
    4591              :          a type identity check in canonical_va_list_type based on
    4592              :          TYPE_MAIN_VARIANT (which we used to have) will not work.
    4593              :          Instead, we tag each va_list_type_node with its unique attribute, and
    4594              :          look for the attribute in the type identity check in
    4595              :          canonical_va_list_type.
    4596              : 
    4597              :          Tagging sysv_va_list_type_node directly with the attribute is
    4598              :          problematic since it's a array of one record, which will degrade into a
    4599              :          pointer to record when used as parameter (see build_va_arg comments for
    4600              :          an example), dropping the attribute in the process.  So we tag the
    4601              :          record instead.  */
    4602              : 
    4603              :       /* For SYSV_ABI we use an array of one record.  */
    4604       278262 :       sysv_va_list_type_node = ix86_build_builtin_va_list_64 ();
    4605              : 
    4606              :       /* For MS_ABI we use plain pointer to argument area.  */
    4607       278262 :       tree char_ptr_type = build_pointer_type (char_type_node);
    4608       278262 :       tree attr = tree_cons (get_identifier ("ms_abi va_list"), NULL_TREE,
    4609       278262 :                              TYPE_ATTRIBUTES (char_ptr_type));
    4610       278262 :       ms_va_list_type_node = build_type_attribute_variant (char_ptr_type, attr);
    4611              : 
    4612       278262 :       return ((ix86_abi == MS_ABI)
    4613       278262 :               ? ms_va_list_type_node
    4614       278262 :               : sysv_va_list_type_node);
    4615              :     }
    4616              :   else
    4617              :     {
    4618              :       /* For i386 we use plain pointer to argument area.  */
    4619         7160 :       return build_pointer_type (char_type_node);
    4620              :     }
    4621              : }
    4622              : 
    4623              : /* Worker function for TARGET_SETUP_INCOMING_VARARGS.  */
    4624              : 
    4625              : static void
    4626        15706 : setup_incoming_varargs_64 (CUMULATIVE_ARGS *cum)
    4627              : {
    4628        15706 :   rtx save_area, mem;
    4629        15706 :   alias_set_type set;
    4630        15706 :   int i, max;
    4631              : 
    4632              :   /* GPR size of varargs save area.  */
    4633        15706 :   if (cfun->va_list_gpr_size)
    4634        15257 :     ix86_varargs_gpr_size = X86_64_REGPARM_MAX * UNITS_PER_WORD;
    4635              :   else
    4636          449 :     ix86_varargs_gpr_size = 0;
    4637              : 
    4638              :   /* FPR size of varargs save area.  We don't need it if we don't pass
    4639              :      anything in SSE registers.  */
    4640        15706 :   if (TARGET_SSE && cfun->va_list_fpr_size)
    4641        14650 :     ix86_varargs_fpr_size = X86_64_SSE_REGPARM_MAX * 16;
    4642              :   else
    4643         1056 :     ix86_varargs_fpr_size = 0;
    4644              : 
    4645        15706 :   if (! ix86_varargs_gpr_size && ! ix86_varargs_fpr_size)
    4646              :     return;
    4647              : 
    4648        15426 :   save_area = frame_pointer_rtx;
    4649        15426 :   set = get_varargs_alias_set ();
    4650              : 
    4651        15426 :   max = cum->regno + cfun->va_list_gpr_size / UNITS_PER_WORD;
    4652        15426 :   if (max > X86_64_REGPARM_MAX)
    4653              :     max = X86_64_REGPARM_MAX;
    4654              : 
    4655        15426 :   const int *parm_regs;
    4656        15426 :   if (cum->preserve_none_abi)
    4657              :     parm_regs = x86_64_preserve_none_int_parameter_registers;
    4658              :   else
    4659        15425 :     parm_regs = x86_64_int_parameter_registers;
    4660              : 
    4661        85581 :   for (i = cum->regno; i < max; i++)
    4662              :     {
    4663        70155 :       mem = gen_rtx_MEM (word_mode,
    4664        70155 :                          plus_constant (Pmode, save_area, i * UNITS_PER_WORD));
    4665        70155 :       MEM_NOTRAP_P (mem) = 1;
    4666        70155 :       set_mem_alias_set (mem, set);
    4667        70155 :       emit_move_insn (mem,
    4668        70155 :                       gen_rtx_REG (word_mode, parm_regs[i]));
    4669              :     }
    4670              : 
    4671        15426 :   if (ix86_varargs_fpr_size)
    4672              :     {
    4673        14650 :       machine_mode smode;
    4674        14650 :       rtx_code_label *label;
    4675        14650 :       rtx test;
    4676              : 
    4677              :       /* Now emit code to save SSE registers.  The AX parameter contains number
    4678              :          of SSE parameter registers used to call this function, though all we
    4679              :          actually check here is the zero/non-zero status.  */
    4680              : 
    4681        14650 :       label = gen_label_rtx ();
    4682        14650 :       test = gen_rtx_EQ (VOIDmode, gen_rtx_REG (QImode, AX_REG), const0_rtx);
    4683        14650 :       emit_jump_insn (gen_cbranchqi4 (test, XEXP (test, 0), XEXP (test, 1),
    4684              :                                       label));
    4685              : 
    4686              :       /* ??? If !TARGET_SSE_TYPELESS_STORES, would we perform better if
    4687              :          we used movdqa (i.e. TImode) instead?  Perhaps even better would
    4688              :          be if we could determine the real mode of the data, via a hook
    4689              :          into pass_stdarg.  Ignore all that for now.  */
    4690        14650 :       smode = V4SFmode;
    4691        14650 :       if (crtl->stack_alignment_needed < GET_MODE_ALIGNMENT (smode))
    4692         4151 :         crtl->stack_alignment_needed = GET_MODE_ALIGNMENT (smode);
    4693              : 
    4694        14650 :       max = cum->sse_regno + cfun->va_list_fpr_size / 16;
    4695        14650 :       if (max > X86_64_SSE_REGPARM_MAX)
    4696              :         max = X86_64_SSE_REGPARM_MAX;
    4697              : 
    4698       130241 :       for (i = cum->sse_regno; i < max; ++i)
    4699              :         {
    4700       115591 :           mem = plus_constant (Pmode, save_area,
    4701       115591 :                                i * 16 + ix86_varargs_gpr_size);
    4702       115591 :           mem = gen_rtx_MEM (smode, mem);
    4703       115591 :           MEM_NOTRAP_P (mem) = 1;
    4704       115591 :           set_mem_alias_set (mem, set);
    4705       115591 :           set_mem_align (mem, GET_MODE_ALIGNMENT (smode));
    4706              : 
    4707       115591 :           emit_move_insn (mem, gen_rtx_REG (smode, GET_SSE_REGNO (i)));
    4708              :         }
    4709              : 
    4710        14650 :       emit_label (label);
    4711              :     }
    4712              : }
    4713              : 
    4714              : static void
    4715         5652 : setup_incoming_varargs_ms_64 (CUMULATIVE_ARGS *cum)
    4716              : {
    4717         5652 :   alias_set_type set = get_varargs_alias_set ();
    4718         5652 :   int i;
    4719              : 
    4720              :   /* Reset to zero, as there might be a sysv vaarg used
    4721              :      before.  */
    4722         5652 :   ix86_varargs_gpr_size = 0;
    4723         5652 :   ix86_varargs_fpr_size = 0;
    4724              : 
    4725        14154 :   for (i = cum->regno; i < X86_64_MS_REGPARM_MAX; i++)
    4726              :     {
    4727         8502 :       rtx reg, mem;
    4728              : 
    4729         8502 :       mem = gen_rtx_MEM (Pmode,
    4730         8502 :                          plus_constant (Pmode, virtual_incoming_args_rtx,
    4731         8502 :                                         i * UNITS_PER_WORD));
    4732         8502 :       MEM_NOTRAP_P (mem) = 1;
    4733         8502 :       set_mem_alias_set (mem, set);
    4734              : 
    4735         8502 :       reg = gen_rtx_REG (Pmode, x86_64_ms_abi_int_parameter_registers[i]);
    4736         8502 :       emit_move_insn (mem, reg);
    4737              :     }
    4738         5652 : }
    4739              : 
    4740              : static void
    4741        21512 : ix86_setup_incoming_varargs (cumulative_args_t cum_v,
    4742              :                              const function_arg_info &arg,
    4743              :                              int *, int no_rtl)
    4744              : {
    4745        21512 :   CUMULATIVE_ARGS *cum = get_cumulative_args (cum_v);
    4746        21512 :   CUMULATIVE_ARGS next_cum;
    4747        21512 :   tree fntype;
    4748              : 
    4749              :   /* This argument doesn't appear to be used anymore.  Which is good,
    4750              :      because the old code here didn't suppress rtl generation.  */
    4751        21512 :   gcc_assert (!no_rtl);
    4752              : 
    4753        21512 :   if (!TARGET_64BIT)
    4754          154 :     return;
    4755              : 
    4756        21358 :   fntype = TREE_TYPE (current_function_decl);
    4757              : 
    4758              :   /* For varargs, we do not want to skip the dummy va_dcl argument.
    4759              :      For stdargs, we do want to skip the last named argument.  */
    4760        21358 :   next_cum = *cum;
    4761        21358 :   if ((!TYPE_NO_NAMED_ARGS_STDARG_P (TREE_TYPE (current_function_decl))
    4762          121 :        || arg.type != NULL_TREE)
    4763        21383 :       && stdarg_p (fntype))
    4764        21262 :     ix86_function_arg_advance (pack_cumulative_args (&next_cum), arg);
    4765              : 
    4766        21358 :   if (cum->call_abi == MS_ABI)
    4767         5652 :     setup_incoming_varargs_ms_64 (&next_cum);
    4768              :   else
    4769        15706 :     setup_incoming_varargs_64 (&next_cum);
    4770              : }
    4771              : 
    4772              : /* Checks if TYPE is of kind va_list char *.  */
    4773              : 
    4774              : static bool
    4775        73096 : is_va_list_char_pointer (tree type)
    4776              : {
    4777        73096 :   tree canonic;
    4778              : 
    4779              :   /* For 32-bit it is always true.  */
    4780        73096 :   if (!TARGET_64BIT)
    4781              :     return true;
    4782        72934 :   canonic = ix86_canonical_va_list_type (type);
    4783        72934 :   return (canonic == ms_va_list_type_node
    4784        72934 :           || (ix86_abi == MS_ABI && canonic == va_list_type_node));
    4785              : }
    4786              : 
    4787              : /* Implement va_start.  */
    4788              : 
    4789              : static void
    4790        21003 : ix86_va_start (tree valist, rtx nextarg)
    4791              : {
    4792        21003 :   HOST_WIDE_INT words, n_gpr, n_fpr;
    4793        21003 :   tree f_gpr, f_fpr, f_ovf, f_sav;
    4794        21003 :   tree gpr, fpr, ovf, sav, t;
    4795        21003 :   tree type;
    4796        21003 :   rtx ovf_rtx;
    4797              : 
    4798        21003 :   if (flag_split_stack
    4799           12 :       && cfun->machine->split_stack_varargs_pointer == NULL_RTX)
    4800              :     {
    4801           12 :       unsigned int scratch_regno;
    4802              : 
    4803              :       /* When we are splitting the stack, we can't refer to the stack
    4804              :          arguments using internal_arg_pointer, because they may be on
    4805              :          the old stack.  The split stack prologue will arrange to
    4806              :          leave a pointer to the old stack arguments in a scratch
    4807              :          register, which we here copy to a pseudo-register.  The split
    4808              :          stack prologue can't set the pseudo-register directly because
    4809              :          it (the prologue) runs before any registers have been saved.  */
    4810              : 
    4811           12 :       scratch_regno = split_stack_prologue_scratch_regno ();
    4812           12 :       if (scratch_regno != INVALID_REGNUM)
    4813              :         {
    4814           12 :           rtx reg;
    4815           12 :           rtx_insn *seq;
    4816              : 
    4817           16 :           reg = gen_reg_rtx (Pmode);
    4818           12 :           cfun->machine->split_stack_varargs_pointer = reg;
    4819              : 
    4820           12 :           start_sequence ();
    4821           16 :           emit_move_insn (reg, gen_rtx_REG (Pmode, scratch_regno));
    4822           12 :           seq = end_sequence ();
    4823              : 
    4824           12 :           push_topmost_sequence ();
    4825           12 :           emit_insn_after (seq, entry_of_function ());
    4826           12 :           pop_topmost_sequence ();
    4827              :         }
    4828              :     }
    4829              : 
    4830              :   /* Only 64bit target needs something special.  */
    4831        21003 :   if (is_va_list_char_pointer (TREE_TYPE (valist)))
    4832              :     {
    4833         5656 :       if (cfun->machine->split_stack_varargs_pointer == NULL_RTX)
    4834         5652 :         std_expand_builtin_va_start (valist, nextarg);
    4835              :       else
    4836              :         {
    4837            4 :           rtx va_r, next;
    4838              : 
    4839            4 :           va_r = expand_expr (valist, NULL_RTX, VOIDmode, EXPAND_WRITE);
    4840            8 :           next = expand_binop (ptr_mode, add_optab,
    4841            4 :                                cfun->machine->split_stack_varargs_pointer,
    4842              :                                crtl->args.arg_offset_rtx,
    4843              :                                NULL_RTX, 0, OPTAB_LIB_WIDEN);
    4844            4 :           convert_move (va_r, next, 0);
    4845              :         }
    4846         5656 :       return;
    4847              :     }
    4848              : 
    4849        15347 :   f_gpr = TYPE_FIELDS (TREE_TYPE (sysv_va_list_type_node));
    4850        15347 :   f_fpr = DECL_CHAIN (f_gpr);
    4851        15347 :   f_ovf = DECL_CHAIN (f_fpr);
    4852        15347 :   f_sav = DECL_CHAIN (f_ovf);
    4853              : 
    4854        15347 :   valist = build_simple_mem_ref (valist);
    4855        15347 :   TREE_TYPE (valist) = TREE_TYPE (sysv_va_list_type_node);
    4856              :   /* The following should be folded into the MEM_REF offset.  */
    4857        15347 :   gpr = build3 (COMPONENT_REF, TREE_TYPE (f_gpr), unshare_expr (valist),
    4858              :                 f_gpr, NULL_TREE);
    4859        15347 :   fpr = build3 (COMPONENT_REF, TREE_TYPE (f_fpr), unshare_expr (valist),
    4860              :                 f_fpr, NULL_TREE);
    4861        15347 :   ovf = build3 (COMPONENT_REF, TREE_TYPE (f_ovf), unshare_expr (valist),
    4862              :                 f_ovf, NULL_TREE);
    4863        15347 :   sav = build3 (COMPONENT_REF, TREE_TYPE (f_sav), unshare_expr (valist),
    4864              :                 f_sav, NULL_TREE);
    4865              : 
    4866              :   /* Count number of gp and fp argument registers used.  */
    4867        15347 :   words = crtl->args.info.words;
    4868        15347 :   n_gpr = crtl->args.info.regno;
    4869        15347 :   n_fpr = crtl->args.info.sse_regno;
    4870              : 
    4871        15347 :   if (cfun->va_list_gpr_size)
    4872              :     {
    4873        15113 :       type = TREE_TYPE (gpr);
    4874        15113 :       t = build2 (MODIFY_EXPR, type,
    4875        15113 :                   gpr, build_int_cst (type, n_gpr * 8));
    4876        15113 :       TREE_SIDE_EFFECTS (t) = 1;
    4877        15113 :       expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
    4878              :     }
    4879              : 
    4880        15347 :   if (TARGET_SSE && cfun->va_list_fpr_size)
    4881              :     {
    4882        14494 :       type = TREE_TYPE (fpr);
    4883        14494 :       t = build2 (MODIFY_EXPR, type, fpr,
    4884        14494 :                   build_int_cst (type, n_fpr * 16 + 8*X86_64_REGPARM_MAX));
    4885        14494 :       TREE_SIDE_EFFECTS (t) = 1;
    4886        14494 :       expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
    4887              :     }
    4888              : 
    4889              :   /* Find the overflow area.  */
    4890        15347 :   type = TREE_TYPE (ovf);
    4891        15347 :   if (cfun->machine->split_stack_varargs_pointer == NULL_RTX)
    4892        15339 :     ovf_rtx = crtl->args.internal_arg_pointer;
    4893              :   else
    4894              :     ovf_rtx = cfun->machine->split_stack_varargs_pointer;
    4895        15347 :   t = make_tree (type, ovf_rtx);
    4896        15347 :   if (words != 0)
    4897          498 :     t = fold_build_pointer_plus_hwi (t, words * UNITS_PER_WORD);
    4898              : 
    4899        15347 :   t = build2 (MODIFY_EXPR, type, ovf, t);
    4900        15347 :   TREE_SIDE_EFFECTS (t) = 1;
    4901        15347 :   expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
    4902              : 
    4903        15347 :   if (ix86_varargs_gpr_size || ix86_varargs_fpr_size)
    4904              :     {
    4905              :       /* Find the register save area.
    4906              :          Prologue of the function save it right above stack frame.  */
    4907        15282 :       type = TREE_TYPE (sav);
    4908        15282 :       t = make_tree (type, frame_pointer_rtx);
    4909        15282 :       if (!ix86_varargs_gpr_size)
    4910          169 :         t = fold_build_pointer_plus_hwi (t, -8 * X86_64_REGPARM_MAX);
    4911              : 
    4912        15282 :       t = build2 (MODIFY_EXPR, type, sav, t);
    4913        15282 :       TREE_SIDE_EFFECTS (t) = 1;
    4914        15282 :       expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
    4915              :     }
    4916              : }
    4917              : 
    4918              : /* Implement va_arg.  */
    4919              : 
    4920              : static tree
    4921        52093 : ix86_gimplify_va_arg (tree valist, tree type, gimple_seq *pre_p,
    4922              :                       gimple_seq *post_p)
    4923              : {
    4924        52093 :   static const int intreg[6] = { 0, 1, 2, 3, 4, 5 };
    4925        52093 :   tree f_gpr, f_fpr, f_ovf, f_sav;
    4926        52093 :   tree gpr, fpr, ovf, sav, t;
    4927        52093 :   int size, rsize;
    4928        52093 :   tree lab_false, lab_over = NULL_TREE;
    4929        52093 :   tree addr, t2;
    4930        52093 :   rtx container;
    4931        52093 :   int indirect_p = 0;
    4932        52093 :   tree ptrtype;
    4933        52093 :   machine_mode nat_mode;
    4934        52093 :   unsigned int arg_boundary;
    4935        52093 :   unsigned int type_align;
    4936              : 
    4937              :   /* Only 64bit target needs something special.  */
    4938        52093 :   if (is_va_list_char_pointer (TREE_TYPE (valist)))
    4939          260 :     return std_gimplify_va_arg_expr (valist, type, pre_p, post_p);
    4940              : 
    4941        51833 :   f_gpr = TYPE_FIELDS (TREE_TYPE (sysv_va_list_type_node));
    4942        51833 :   f_fpr = DECL_CHAIN (f_gpr);
    4943        51833 :   f_ovf = DECL_CHAIN (f_fpr);
    4944        51833 :   f_sav = DECL_CHAIN (f_ovf);
    4945              : 
    4946        51833 :   gpr = build3 (COMPONENT_REF, TREE_TYPE (f_gpr),
    4947              :                 valist, f_gpr, NULL_TREE);
    4948              : 
    4949        51833 :   fpr = build3 (COMPONENT_REF, TREE_TYPE (f_fpr), valist, f_fpr, NULL_TREE);
    4950        51833 :   ovf = build3 (COMPONENT_REF, TREE_TYPE (f_ovf), valist, f_ovf, NULL_TREE);
    4951        51833 :   sav = build3 (COMPONENT_REF, TREE_TYPE (f_sav), valist, f_sav, NULL_TREE);
    4952              : 
    4953        51833 :   indirect_p = pass_va_arg_by_reference (type);
    4954        51833 :   if (indirect_p)
    4955          103 :     type = build_pointer_type (type);
    4956        51833 :   size = arg_int_size_in_bytes (type);
    4957        51833 :   rsize = CEIL (size, UNITS_PER_WORD);
    4958              : 
    4959        51833 :   nat_mode = type_natural_mode (type, NULL, false);
    4960        51833 :   switch (nat_mode)
    4961              :     {
    4962           28 :     case E_V16HFmode:
    4963           28 :     case E_V16BFmode:
    4964           28 :     case E_V8SFmode:
    4965           28 :     case E_V8SImode:
    4966           28 :     case E_V32QImode:
    4967           28 :     case E_V16HImode:
    4968           28 :     case E_V4DFmode:
    4969           28 :     case E_V4DImode:
    4970           28 :     case E_V32HFmode:
    4971           28 :     case E_V32BFmode:
    4972           28 :     case E_V16SFmode:
    4973           28 :     case E_V16SImode:
    4974           28 :     case E_V64QImode:
    4975           28 :     case E_V32HImode:
    4976           28 :     case E_V8DFmode:
    4977           28 :     case E_V8DImode:
    4978              :       /* Unnamed 256 and 512bit vector mode parameters are passed on stack.  */
    4979           28 :       if (!TARGET_64BIT_MS_ABI)
    4980              :         {
    4981              :           container = NULL;
    4982              :           break;
    4983              :         }
    4984              :       /* FALLTHRU */
    4985              : 
    4986        51805 :     default:
    4987        51805 :       container = construct_container (nat_mode, TYPE_MODE (type),
    4988              :                                        type, 0, X86_64_REGPARM_MAX,
    4989              :                                        X86_64_SSE_REGPARM_MAX, intreg,
    4990              :                                        0);
    4991        51805 :       break;
    4992              :     }
    4993              : 
    4994              :   /* Pull the value out of the saved registers.  */
    4995              : 
    4996        51833 :   addr = create_tmp_var (ptr_type_node, "addr");
    4997        51833 :   type_align = TYPE_ALIGN (type);
    4998              : 
    4999        51833 :   if (container)
    5000              :     {
    5001        28742 :       int needed_intregs, needed_sseregs;
    5002        28742 :       bool need_temp;
    5003        28742 :       tree int_addr, sse_addr;
    5004              : 
    5005        28742 :       lab_false = create_artificial_label (UNKNOWN_LOCATION);
    5006        28742 :       lab_over = create_artificial_label (UNKNOWN_LOCATION);
    5007              : 
    5008        28742 :       examine_argument (nat_mode, type, 0, &needed_intregs, &needed_sseregs);
    5009              : 
    5010        28742 :       bool container_in_reg = false;
    5011        28742 :       if (REG_P (container))
    5012              :         container_in_reg = true;
    5013         1641 :       else if (GET_CODE (container) == PARALLEL
    5014         1641 :                && GET_MODE (container) == BLKmode
    5015          580 :                && XVECLEN (container, 0) == 1)
    5016              :         {
    5017              :           /* Check if it is a PARALLEL BLKmode container of an EXPR_LIST
    5018              :              expression in a TImode register.  In this case, temp isn't
    5019              :              needed.  Otherwise, the TImode variable will be put in the
    5020              :              GPR save area which guarantees only 8-byte alignment.   */
    5021          509 :           rtx x = XVECEXP (container, 0, 0);
    5022          509 :           if (GET_CODE (x) == EXPR_LIST
    5023          509 :               && REG_P (XEXP (x, 0))
    5024          509 :               && XEXP (x, 1) == const0_rtx)
    5025              :             container_in_reg = true;
    5026              :         }
    5027              : 
    5028          680 :       need_temp = (!container_in_reg
    5029         1150 :                    && ((needed_intregs && TYPE_ALIGN (type) > 64)
    5030          680 :                        || TYPE_ALIGN (type) > 128));
    5031              : 
    5032              :       /* In case we are passing structure, verify that it is consecutive block
    5033              :          on the register save area.  If not we need to do moves.  */
    5034          680 :       if (!need_temp && !container_in_reg)
    5035              :         {
    5036              :           /* Verify that all registers are strictly consecutive  */
    5037          966 :           if (SSE_REGNO_P (REGNO (XEXP (XVECEXP (container, 0, 0), 0))))
    5038              :             {
    5039              :               int i;
    5040              : 
    5041          815 :               for (i = 0; i < XVECLEN (container, 0) && !need_temp; i++)
    5042              :                 {
    5043          529 :                   rtx slot = XVECEXP (container, 0, i);
    5044          529 :                   if (REGNO (XEXP (slot, 0)) != FIRST_SSE_REG + (unsigned int) i
    5045          529 :                       || INTVAL (XEXP (slot, 1)) != i * 16)
    5046              :                     need_temp = true;
    5047              :                 }
    5048              :             }
    5049              :           else
    5050              :             {
    5051              :               int i;
    5052              : 
    5053         1120 :               for (i = 0; i < XVECLEN (container, 0) && !need_temp; i++)
    5054              :                 {
    5055          726 :                   rtx slot = XVECEXP (container, 0, i);
    5056          726 :                   if (REGNO (XEXP (slot, 0)) != (unsigned int) i
    5057          726 :                       || INTVAL (XEXP (slot, 1)) != i * 8)
    5058              :                     need_temp = true;
    5059              :                 }
    5060              :             }
    5061              :         }
    5062        28742 :       if (!need_temp)
    5063              :         {
    5064              :           int_addr = addr;
    5065              :           sse_addr = addr;
    5066              :         }
    5067              :       else
    5068              :         {
    5069          877 :           int_addr = create_tmp_var (ptr_type_node, "int_addr");
    5070          877 :           sse_addr = create_tmp_var (ptr_type_node, "sse_addr");
    5071              :         }
    5072              : 
    5073              :       /* First ensure that we fit completely in registers.  */
    5074        28742 :       if (needed_intregs)
    5075              :         {
    5076        18025 :           t = build_int_cst (TREE_TYPE (gpr),
    5077        18025 :                              (X86_64_REGPARM_MAX - needed_intregs + 1) * 8);
    5078        18025 :           t = build2 (GE_EXPR, boolean_type_node, gpr, t);
    5079        18025 :           t2 = build1 (GOTO_EXPR, void_type_node, lab_false);
    5080        18025 :           t = build3 (COND_EXPR, void_type_node, t, t2, NULL_TREE);
    5081        18025 :           gimplify_and_add (t, pre_p);
    5082              :         }
    5083        28742 :       if (needed_sseregs)
    5084              :         {
    5085        11109 :           t = build_int_cst (TREE_TYPE (fpr),
    5086              :                              (X86_64_SSE_REGPARM_MAX - needed_sseregs + 1) * 16
    5087        11109 :                              + X86_64_REGPARM_MAX * 8);
    5088        11109 :           t = build2 (GE_EXPR, boolean_type_node, fpr, t);
    5089        11109 :           t2 = build1 (GOTO_EXPR, void_type_node, lab_false);
    5090        11109 :           t = build3 (COND_EXPR, void_type_node, t, t2, NULL_TREE);
    5091        11109 :           gimplify_and_add (t, pre_p);
    5092              :         }
    5093              : 
    5094              :       /* Compute index to start of area used for integer regs.  */
    5095        28742 :       if (needed_intregs)
    5096              :         {
    5097              :           /* int_addr = gpr + sav; */
    5098        18025 :           t = fold_build_pointer_plus (sav, gpr);
    5099        18025 :           gimplify_assign (int_addr, t, pre_p);
    5100              :         }
    5101        28742 :       if (needed_sseregs)
    5102              :         {
    5103              :           /* sse_addr = fpr + sav; */
    5104        11109 :           t = fold_build_pointer_plus (sav, fpr);
    5105        11109 :           gimplify_assign (sse_addr, t, pre_p);
    5106              :         }
    5107        28742 :       if (need_temp)
    5108              :         {
    5109          877 :           int i, prev_size = 0;
    5110          877 :           tree temp = create_tmp_var (type, "va_arg_tmp");
    5111          877 :           TREE_ADDRESSABLE (temp) = 1;
    5112              : 
    5113              :           /* addr = &temp; */
    5114          877 :           t = build1 (ADDR_EXPR, build_pointer_type (type), temp);
    5115          877 :           gimplify_assign (addr, t, pre_p);
    5116              : 
    5117         2241 :           for (i = 0; i < XVECLEN (container, 0); i++)
    5118              :             {
    5119         1364 :               rtx slot = XVECEXP (container, 0, i);
    5120         1364 :               rtx reg = XEXP (slot, 0);
    5121         1364 :               machine_mode mode = GET_MODE (reg);
    5122         1364 :               tree piece_type;
    5123         1364 :               tree addr_type;
    5124         1364 :               tree daddr_type;
    5125         1364 :               tree src_addr, src;
    5126         1364 :               int src_offset;
    5127         1364 :               tree dest_addr, dest;
    5128         1364 :               int cur_size = GET_MODE_SIZE (mode);
    5129              : 
    5130         1364 :               gcc_assert (prev_size <= INTVAL (XEXP (slot, 1)));
    5131         1364 :               prev_size = INTVAL (XEXP (slot, 1));
    5132         1364 :               if (prev_size + cur_size > size)
    5133              :                 {
    5134           30 :                   cur_size = size - prev_size;
    5135           30 :                   unsigned int nbits = cur_size * BITS_PER_UNIT;
    5136           30 :                   if (!int_mode_for_size (nbits, 1).exists (&mode))
    5137           10 :                     mode = QImode;
    5138              :                 }
    5139         1364 :               piece_type = lang_hooks.types.type_for_mode (mode, 1);
    5140         1364 :               if (mode == GET_MODE (reg))
    5141         1334 :                 addr_type = build_pointer_type (piece_type);
    5142              :               else
    5143           30 :                 addr_type = build_pointer_type_for_mode (piece_type, ptr_mode,
    5144              :                                                          true);
    5145         1364 :               daddr_type = build_pointer_type_for_mode (piece_type, ptr_mode,
    5146              :                                                         true);
    5147              : 
    5148         1364 :               if (SSE_REGNO_P (REGNO (reg)))
    5149              :                 {
    5150          534 :                   src_addr = sse_addr;
    5151          534 :                   src_offset = (REGNO (reg) - FIRST_SSE_REG) * 16;
    5152              :                 }
    5153              :               else
    5154              :                 {
    5155          830 :                   src_addr = int_addr;
    5156          830 :                   src_offset = REGNO (reg) * 8;
    5157              :                 }
    5158         1364 :               src_addr = fold_convert (addr_type, src_addr);
    5159         1364 :               src_addr = fold_build_pointer_plus_hwi (src_addr, src_offset);
    5160              : 
    5161         1364 :               dest_addr = fold_convert (daddr_type, addr);
    5162         1364 :               dest_addr = fold_build_pointer_plus_hwi (dest_addr, prev_size);
    5163         2728 :               if (cur_size == GET_MODE_SIZE (mode))
    5164              :                 {
    5165         1354 :                   src = build_va_arg_indirect_ref (src_addr);
    5166         1354 :                   dest = build_va_arg_indirect_ref (dest_addr);
    5167              : 
    5168         1354 :                   gimplify_assign (dest, src, pre_p);
    5169              :                 }
    5170              :               else
    5171              :                 {
    5172           10 :                   tree copy
    5173           20 :                     = build_call_expr (builtin_decl_implicit (BUILT_IN_MEMCPY),
    5174              :                                        3, dest_addr, src_addr,
    5175           10 :                                        size_int (cur_size));
    5176           10 :                   gimplify_and_add (copy, pre_p);
    5177              :                 }
    5178         1364 :               prev_size += cur_size;
    5179              :             }
    5180              :         }
    5181              : 
    5182        28742 :       if (needed_intregs)
    5183              :         {
    5184        18025 :           t = build2 (PLUS_EXPR, TREE_TYPE (gpr), gpr,
    5185        18025 :                       build_int_cst (TREE_TYPE (gpr), needed_intregs * 8));
    5186        18025 :           gimplify_assign (gpr, t, pre_p);
    5187              :           /* The GPR save area guarantees only 8-byte alignment.  */
    5188        18025 :           if (!need_temp)
    5189        17221 :             type_align = MIN (type_align, 64);
    5190              :         }
    5191              : 
    5192        28742 :       if (needed_sseregs)
    5193              :         {
    5194        11109 :           t = build2 (PLUS_EXPR, TREE_TYPE (fpr), fpr,
    5195        11109 :                       build_int_cst (TREE_TYPE (fpr), needed_sseregs * 16));
    5196        11109 :           gimplify_assign (unshare_expr (fpr), t, pre_p);
    5197              :         }
    5198              : 
    5199        28742 :       gimple_seq_add_stmt (pre_p, gimple_build_goto (lab_over));
    5200              : 
    5201        28742 :       gimple_seq_add_stmt (pre_p, gimple_build_label (lab_false));
    5202              :     }
    5203              : 
    5204              :   /* ... otherwise out of the overflow area.  */
    5205              : 
    5206              :   /* When we align parameter on stack for caller, if the parameter
    5207              :      alignment is beyond MAX_SUPPORTED_STACK_ALIGNMENT, it will be
    5208              :      aligned at MAX_SUPPORTED_STACK_ALIGNMENT.  We will match callee
    5209              :      here with caller.  */
    5210        51833 :   arg_boundary = ix86_function_arg_boundary (VOIDmode, type);
    5211        51833 :   if ((unsigned int) arg_boundary > MAX_SUPPORTED_STACK_ALIGNMENT)
    5212              :     arg_boundary = MAX_SUPPORTED_STACK_ALIGNMENT;
    5213              : 
    5214              :   /* Care for on-stack alignment if needed.  */
    5215        51833 :   if (arg_boundary <= 64 || size == 0)
    5216        34796 :     t = ovf;
    5217              :  else
    5218              :     {
    5219        17037 :       HOST_WIDE_INT align = arg_boundary / 8;
    5220        17037 :       t = fold_build_pointer_plus_hwi (ovf, align - 1);
    5221        17037 :       t = build2 (BIT_AND_EXPR, TREE_TYPE (t), t,
    5222        17037 :                   build_int_cst (TREE_TYPE (t), -align));
    5223              :     }
    5224              : 
    5225        51833 :   gimplify_expr (&t, pre_p, NULL, is_gimple_val, fb_rvalue);
    5226        51833 :   gimplify_assign (addr, t, pre_p);
    5227              : 
    5228        51833 :   t = fold_build_pointer_plus_hwi (t, rsize * UNITS_PER_WORD);
    5229        51833 :   gimplify_assign (unshare_expr (ovf), t, pre_p);
    5230              : 
    5231        51833 :   if (container)
    5232        28742 :     gimple_seq_add_stmt (pre_p, gimple_build_label (lab_over));
    5233              : 
    5234        51833 :   type = build_aligned_type (type, type_align);
    5235        51833 :   ptrtype = build_pointer_type_for_mode (type, ptr_mode, true);
    5236        51833 :   addr = fold_convert (ptrtype, addr);
    5237              : 
    5238        51833 :   if (indirect_p)
    5239          103 :     addr = build_va_arg_indirect_ref (addr);
    5240        51833 :   return build_va_arg_indirect_ref (addr);
    5241              : }
    5242              : 
    5243              : /* Return true if OPNUM's MEM should be matched
    5244              :    in movabs* patterns.  */
    5245              : 
    5246              : bool
    5247          505 : ix86_check_movabs (rtx insn, int opnum)
    5248              : {
    5249          505 :   rtx set, mem;
    5250              : 
    5251          505 :   set = PATTERN (insn);
    5252          505 :   if (GET_CODE (set) == PARALLEL)
    5253            0 :     set = XVECEXP (set, 0, 0);
    5254          505 :   gcc_assert (GET_CODE (set) == SET);
    5255          505 :   mem = XEXP (set, opnum);
    5256          505 :   while (SUBREG_P (mem))
    5257            0 :     mem = SUBREG_REG (mem);
    5258          505 :   gcc_assert (MEM_P (mem));
    5259          505 :   return volatile_ok || !MEM_VOLATILE_P (mem);
    5260              : }
    5261              : 
    5262              : /* Return true if XVECEXP idx of INSN satisfies MOVS arguments.  */
    5263              : bool
    5264       197535 : ix86_check_movs (rtx insn, int idx)
    5265              : {
    5266       197535 :   rtx pat = PATTERN (insn);
    5267       197535 :   gcc_assert (GET_CODE (pat) == PARALLEL);
    5268              : 
    5269       197535 :   rtx set = XVECEXP (pat, 0, idx);
    5270       197535 :   gcc_assert (GET_CODE (set) == SET);
    5271              : 
    5272       197535 :   rtx dst = SET_DEST (set);
    5273       197535 :   gcc_assert (MEM_P (dst));
    5274              : 
    5275       197535 :   rtx src = SET_SRC (set);
    5276       197535 :   gcc_assert (MEM_P (src));
    5277              : 
    5278       197535 :   return (ADDR_SPACE_GENERIC_P (MEM_ADDR_SPACE (dst))
    5279       395070 :           && (ADDR_SPACE_GENERIC_P (MEM_ADDR_SPACE (src))
    5280            0 :               || Pmode == word_mode));
    5281              : }
    5282              : 
    5283              : /* Return false if INSN contains a MEM with a non-default address space.  */
    5284              : bool
    5285        65278 : ix86_check_no_addr_space (rtx insn)
    5286              : {
    5287        65278 :   subrtx_var_iterator::array_type array;
    5288      1436572 :   FOR_EACH_SUBRTX_VAR (iter, array, PATTERN (insn), ALL)
    5289              :     {
    5290      1371294 :       rtx x = *iter;
    5291      1501850 :       if (MEM_P (x) && !ADDR_SPACE_GENERIC_P (MEM_ADDR_SPACE (x)))
    5292            0 :         return false;
    5293              :     }
    5294        65278 :   return true;
    5295        65278 : }
    5296              : 
    5297              : /* Initialize the table of extra 80387 mathematical constants.  */
    5298              : 
    5299              : static void
    5300         2383 : init_ext_80387_constants (void)
    5301              : {
    5302         2383 :   static const char * cst[5] =
    5303              :   {
    5304              :     "0.3010299956639811952256464283594894482",  /* 0: fldlg2  */
    5305              :     "0.6931471805599453094286904741849753009",  /* 1: fldln2  */
    5306              :     "1.4426950408889634073876517827983434472",  /* 2: fldl2e  */
    5307              :     "3.3219280948873623478083405569094566090",  /* 3: fldl2t  */
    5308              :     "3.1415926535897932385128089594061862044",  /* 4: fldpi   */
    5309              :   };
    5310         2383 :   int i;
    5311              : 
    5312        14298 :   for (i = 0; i < 5; i++)
    5313              :     {
    5314        11915 :       real_from_string (&ext_80387_constants_table[i], cst[i]);
    5315              :       /* Ensure each constant is rounded to XFmode precision.  */
    5316        11915 :       real_convert (&ext_80387_constants_table[i],
    5317        23830 :                     XFmode, &ext_80387_constants_table[i]);
    5318              :     }
    5319              : 
    5320         2383 :   ext_80387_constants_init = 1;
    5321         2383 : }
    5322              : 
    5323              : /* Return non-zero if the constant is something that
    5324              :    can be loaded with a special instruction.  */
    5325              : 
    5326              : int
    5327      5064640 : standard_80387_constant_p (rtx x)
    5328              : {
    5329      5064640 :   machine_mode mode = GET_MODE (x);
    5330              : 
    5331      5064640 :   const REAL_VALUE_TYPE *r;
    5332              : 
    5333      5064640 :   if (!(CONST_DOUBLE_P (x) && X87_FLOAT_MODE_P (mode)))
    5334              :     return -1;
    5335              : 
    5336      4604607 :   if (x == CONST0_RTX (mode))
    5337              :     return 1;
    5338      2117412 :   if (x == CONST1_RTX (mode))
    5339              :     return 2;
    5340              : 
    5341      1231622 :   r = CONST_DOUBLE_REAL_VALUE (x);
    5342              : 
    5343              :   /* For XFmode constants, try to find a special 80387 instruction when
    5344              :      optimizing for size or on those CPUs that benefit from them.  */
    5345      1231622 :   if (mode == XFmode
    5346       795963 :       && (optimize_function_for_size_p (cfun) || TARGET_EXT_80387_CONSTANTS)
    5347      2027585 :       && !flag_rounding_math)
    5348              :     {
    5349       788165 :       int i;
    5350              : 
    5351       788165 :       if (! ext_80387_constants_init)
    5352         2376 :         init_ext_80387_constants ();
    5353              : 
    5354      4718478 :       for (i = 0; i < 5; i++)
    5355      3939144 :         if (real_identical (r, &ext_80387_constants_table[i]))
    5356         8831 :           return i + 3;
    5357              :     }
    5358              : 
    5359              :   /* Load of the constant -0.0 or -1.0 will be split as
    5360              :      fldz;fchs or fld1;fchs sequence.  */
    5361      1222791 :   if (real_isnegzero (r))
    5362              :     return 8;
    5363      1206414 :   if (real_identical (r, &dconstm1))
    5364       301732 :     return 9;
    5365              : 
    5366              :   return 0;
    5367              : }
    5368              : 
    5369              : /* Return the opcode of the special instruction to be used to load
    5370              :    the constant X.  */
    5371              : 
    5372              : const char *
    5373        54500 : standard_80387_constant_opcode (rtx x)
    5374              : {
    5375        54500 :   switch (standard_80387_constant_p (x))
    5376              :     {
    5377              :     case 1:
    5378              :       return "fldz";
    5379        33957 :     case 2:
    5380        33957 :       return "fld1";
    5381            1 :     case 3:
    5382            1 :       return "fldlg2";
    5383           10 :     case 4:
    5384           10 :       return "fldln2";
    5385           12 :     case 5:
    5386           12 :       return "fldl2e";
    5387            2 :     case 6:
    5388            2 :       return "fldl2t";
    5389          192 :     case 7:
    5390          192 :       return "fldpi";
    5391            0 :     case 8:
    5392            0 :     case 9:
    5393            0 :       return "#";
    5394            0 :     default:
    5395            0 :       gcc_unreachable ();
    5396              :     }
    5397              : }
    5398              : 
    5399              : /* Return the CONST_DOUBLE representing the 80387 constant that is
    5400              :    loaded by the specified special instruction.  The argument IDX
    5401              :    matches the return value from standard_80387_constant_p.  */
    5402              : 
    5403              : rtx
    5404           24 : standard_80387_constant_rtx (int idx)
    5405              : {
    5406           24 :   int i;
    5407              : 
    5408           24 :   if (! ext_80387_constants_init)
    5409            7 :     init_ext_80387_constants ();
    5410              : 
    5411           24 :   switch (idx)
    5412              :     {
    5413           24 :     case 3:
    5414           24 :     case 4:
    5415           24 :     case 5:
    5416           24 :     case 6:
    5417           24 :     case 7:
    5418           24 :       i = idx - 3;
    5419           24 :       break;
    5420              : 
    5421            0 :     default:
    5422            0 :       gcc_unreachable ();
    5423              :     }
    5424              : 
    5425           24 :   return const_double_from_real_value (ext_80387_constants_table[i],
    5426           24 :                                        XFmode);
    5427              : }
    5428              : 
    5429              : /* Return 1 if X is all bits 0, 2 if X is all bits 1
    5430              :    and 3 if X is all bits 1 with zero extend
    5431              :    in supported SSE/AVX vector mode.  */
    5432              : 
    5433              : int
    5434     54733282 : standard_sse_constant_p (rtx x, machine_mode pred_mode)
    5435              : {
    5436     54733282 :   machine_mode mode;
    5437              : 
    5438     54733282 :   if (!TARGET_SSE)
    5439              :     return 0;
    5440              : 
    5441     54564543 :   mode = GET_MODE (x);
    5442              : 
    5443     54564543 :   if (x == const0_rtx || const0_operand (x, mode))
    5444     13046677 :     return 1;
    5445              : 
    5446     41517866 :   if (x == constm1_rtx
    5447     41375499 :       || vector_all_ones_operand (x, mode)
    5448     82394265 :       || ((GET_MODE_CLASS (mode) == MODE_VECTOR_FLOAT
    5449     34224582 :            || GET_MODE_CLASS (pred_mode) == MODE_VECTOR_FLOAT)
    5450      6652738 :           && float_vector_all_ones_operand (x, mode)))
    5451              :     {
    5452              :       /* VOIDmode integer constant, get mode from the predicate.  */
    5453       643570 :       if (mode == VOIDmode)
    5454       142367 :         mode = pred_mode;
    5455              : 
    5456      1287140 :       switch (GET_MODE_SIZE (mode))
    5457              :         {
    5458        31183 :         case 64:
    5459        31183 :           if (TARGET_AVX512F)
    5460              :             return 2;
    5461              :           break;
    5462        39202 :         case 32:
    5463        39202 :           if (TARGET_AVX2)
    5464              :             return 2;
    5465              :           break;
    5466       561236 :         case 16:
    5467       561236 :           if (TARGET_SSE2)
    5468              :             return 2;
    5469              :           break;
    5470            0 :         case 0:
    5471              :           /* VOIDmode */
    5472            0 :           gcc_unreachable ();
    5473              :         default:
    5474              :           break;
    5475              :         }
    5476              :     }
    5477              : 
    5478     40887175 :   if (vector_all_ones_zero_extend_half_operand (x, mode)
    5479     40887175 :       || vector_all_ones_zero_extend_quarter_operand (x, mode))
    5480          706 :     return 3;
    5481              : 
    5482              :   return 0;
    5483              : }
    5484              : 
    5485              : /* Return the opcode of the special instruction to be used to load
    5486              :    the constant operands[1] into operands[0].  */
    5487              : 
    5488              : const char *
    5489       460573 : standard_sse_constant_opcode (rtx_insn *insn, rtx *operands)
    5490              : {
    5491       460573 :   machine_mode mode;
    5492       460573 :   rtx x = operands[1];
    5493              : 
    5494       460573 :   gcc_assert (TARGET_SSE);
    5495              : 
    5496       460573 :   mode = GET_MODE (x);
    5497              : 
    5498       460573 :   if (x == const0_rtx || const0_operand (x, mode))
    5499              :     {
    5500       449709 :       switch (get_attr_mode (insn))
    5501              :         {
    5502       432108 :         case MODE_TI:
    5503       432108 :           if (!EXT_REX_SSE_REG_P (operands[0]))
    5504              :             return "%vpxor\t%0, %d0";
    5505              :           /* FALLTHRU */
    5506         6248 :         case MODE_XI:
    5507         6248 :         case MODE_OI:
    5508         6248 :           if (EXT_REX_SSE_REG_P (operands[0]))
    5509              :             {
    5510           71 :               if (TARGET_AVX512VL)
    5511              :                 return "vpxord\t%x0, %x0, %x0";
    5512              :               else
    5513           29 :                 return "vpxord\t%g0, %g0, %g0";
    5514              :             }
    5515              :           return "vpxor\t%x0, %x0, %x0";
    5516              : 
    5517         2073 :         case MODE_V2DF:
    5518         2073 :           if (!EXT_REX_SSE_REG_P (operands[0]))
    5519              :             return "%vxorpd\t%0, %d0";
    5520              :           /* FALLTHRU */
    5521          853 :         case MODE_V8DF:
    5522          853 :         case MODE_V4DF:
    5523          853 :           if (EXT_REX_SSE_REG_P (operands[0]))
    5524              :             {
    5525            4 :               if (TARGET_AVX512DQ)
    5526              :                 {
    5527            0 :                   if (TARGET_AVX512VL)
    5528              :                     return "vxorpd\t%x0, %x0, %x0";
    5529              :                   else
    5530            0 :                     return "vxorpd\t%g0, %g0, %g0";
    5531              :                 }
    5532              :               else
    5533              :                 {
    5534            4 :                   if (TARGET_AVX512VL)
    5535              :                     return "vpxorq\t%x0, %x0, %x0";
    5536              :                   else
    5537            4 :                     return "vpxorq\t%g0, %g0, %g0";
    5538              :                 }
    5539              :             }
    5540              :           return "vxorpd\t%x0, %x0, %x0";
    5541              : 
    5542         6470 :         case MODE_V4SF:
    5543         6470 :           if (!EXT_REX_SSE_REG_P (operands[0]))
    5544              :             return "%vxorps\t%0, %d0";
    5545              :           /* FALLTHRU */
    5546         1991 :         case MODE_V16SF:
    5547         1991 :         case MODE_V8SF:
    5548         1991 :           if (EXT_REX_SSE_REG_P (operands[0]))
    5549              :             {
    5550           34 :               if (TARGET_AVX512DQ)
    5551              :                 {
    5552           26 :                   if (TARGET_AVX512VL)
    5553              :                     return "vxorps\t%x0, %x0, %x0";
    5554              :                   else
    5555            0 :                     return "vxorps\t%g0, %g0, %g0";
    5556              :                 }
    5557              :               else
    5558              :                 {
    5559            8 :                   if (TARGET_AVX512VL)
    5560              :                     return "vpxord\t%x0, %x0, %x0";
    5561              :                   else
    5562            6 :                     return "vpxord\t%g0, %g0, %g0";
    5563              :                 }
    5564              :             }
    5565              :           return "vxorps\t%x0, %x0, %x0";
    5566              : 
    5567            0 :         default:
    5568            0 :           gcc_unreachable ();
    5569              :         }
    5570              :     }
    5571        10864 :   else if (x == constm1_rtx
    5572        10853 :            || vector_all_ones_operand (x, mode)
    5573        10931 :            || (GET_MODE_CLASS (mode) == MODE_VECTOR_FLOAT
    5574           45 :                && float_vector_all_ones_operand (x, mode)))
    5575              :     {
    5576        10842 :       enum attr_mode insn_mode = get_attr_mode (insn);
    5577              : 
    5578        10842 :       switch (insn_mode)
    5579              :         {
    5580            4 :         case MODE_XI:
    5581            4 :         case MODE_V8DF:
    5582            4 :         case MODE_V16SF:
    5583            4 :           gcc_assert (TARGET_AVX512F);
    5584              :           return "vpternlogd\t{$0xFF, %g0, %g0, %g0|%g0, %g0, %g0, 0xFF}";
    5585              : 
    5586          948 :         case MODE_OI:
    5587          948 :         case MODE_V4DF:
    5588          948 :         case MODE_V8SF:
    5589          948 :           gcc_assert (TARGET_AVX2);
    5590              :           /* FALLTHRU */
    5591        10838 :         case MODE_TI:
    5592        10838 :         case MODE_V2DF:
    5593        10838 :         case MODE_V4SF:
    5594        10838 :           gcc_assert (TARGET_SSE2);
    5595        10838 :           if (EXT_REX_SSE_REG_P (operands[0]))
    5596              :             {
    5597            2 :               if (TARGET_AVX512VL)
    5598              :                 return "vpternlogd\t{$0xFF, %0, %0, %0|%0, %0, %0, 0xFF}";
    5599              :               else
    5600            0 :                 return "vpternlogd\t{$0xFF, %g0, %g0, %g0|%g0, %g0, %g0, 0xFF}";
    5601              :             }
    5602        10836 :           return (TARGET_AVX
    5603        10836 :                   ? "vpcmpeqd\t%0, %0, %0"
    5604        10836 :                   : "pcmpeqd\t%0, %0");
    5605              : 
    5606            0 :         default:
    5607            0 :           gcc_unreachable ();
    5608              :         }
    5609              :    }
    5610           22 :   else if (vector_all_ones_zero_extend_half_operand (x, mode))
    5611              :     {
    5612           40 :       if (GET_MODE_SIZE (mode) == 64)
    5613              :         {
    5614            5 :           gcc_assert (TARGET_AVX512F);
    5615              :           return "vpcmpeqd\t%t0, %t0, %t0";
    5616              :         }
    5617           30 :       else if (GET_MODE_SIZE (mode) == 32)
    5618              :         {
    5619           15 :           gcc_assert (TARGET_AVX);
    5620              :           return "vpcmpeqd\t%x0, %x0, %x0";
    5621              :         }
    5622            0 :       gcc_unreachable ();
    5623              :     }
    5624            2 :   else if (vector_all_ones_zero_extend_quarter_operand (x, mode))
    5625              :     {
    5626            2 :       gcc_assert (TARGET_AVX512F);
    5627              :       return "vpcmpeqd\t%x0, %x0, %x0";
    5628              :     }
    5629              : 
    5630            0 :   gcc_unreachable ();
    5631              : }
    5632              : 
    5633              : /* Returns true if INSN can be transformed from a memory load
    5634              :    to a supported FP constant load.  */
    5635              : 
    5636              : bool
    5637      2141475 : ix86_standard_x87sse_constant_load_p (const rtx_insn *insn, rtx dst)
    5638              : {
    5639      2141475 :   rtx src = find_constant_src (insn);
    5640              : 
    5641      2141475 :   gcc_assert (REG_P (dst));
    5642              : 
    5643      2141475 :   if (src == NULL
    5644       595179 :       || (SSE_REGNO_P (REGNO (dst))
    5645       463349 :           && standard_sse_constant_p (src, GET_MODE (dst)) != 1)
    5646       165509 :       || (!TARGET_AVX512VL
    5647       165448 :           && EXT_REX_SSE_REGNO_P (REGNO (dst))
    5648            0 :           && standard_sse_constant_p (src, GET_MODE (dst)) == 1)
    5649      2306984 :       || (STACK_REGNO_P (REGNO (dst))
    5650       131830 :            && standard_80387_constant_p (src) < 1))
    5651      2065247 :     return false;
    5652              : 
    5653              :   return true;
    5654              : }
    5655              : 
    5656              : /* Predicate for pre-reload splitters with associated instructions,
    5657              :    which can match any time before the split1 pass (usually combine),
    5658              :    then are unconditionally split in that pass and should not be
    5659              :    matched again afterwards.  */
    5660              : 
    5661              : bool
    5662     17678266 : ix86_pre_reload_split (void)
    5663              : {
    5664     17678266 :   return (can_create_pseudo_p ()
    5665     26809499 :           && !(cfun->curr_properties & PROP_rtl_split_insns));
    5666              : }
    5667              : 
    5668              : /* Return the opcode of the TYPE_SSEMOV instruction.  To move from
    5669              :    or to xmm16-xmm31/ymm16-ymm31 registers, we either require
    5670              :    TARGET_AVX512VL or it is a register to register move which can
    5671              :    be done with zmm register move. */
    5672              : 
    5673              : static const char *
    5674      4186587 : ix86_get_ssemov (rtx *operands, unsigned size,
    5675              :                  enum attr_mode insn_mode, machine_mode mode)
    5676              : {
    5677      4186587 :   char buf[128];
    5678      4186587 :   bool misaligned_p = (misaligned_operand (operands[0], mode)
    5679      4186587 :                        || misaligned_operand (operands[1], mode));
    5680      4186587 :   bool evex_reg_p = (size == 64
    5681      4099870 :                      || EXT_REX_SSE_REG_P (operands[0])
    5682      8285719 :                      || EXT_REX_SSE_REG_P (operands[1]));
    5683              : 
    5684      4186587 :   bool egpr_p = (TARGET_APX_EGPR
    5685      4186587 :                  && (x86_extended_rex2reg_mentioned_p (operands[0])
    5686          184 :                      || x86_extended_rex2reg_mentioned_p (operands[1])));
    5687          196 :   bool egpr_vl = egpr_p && TARGET_AVX512VL;
    5688              : 
    5689      4186587 :   machine_mode scalar_mode;
    5690              : 
    5691      4186587 :   const char *opcode = NULL;
    5692      4186587 :   enum
    5693              :     {
    5694              :       opcode_int,
    5695              :       opcode_float,
    5696              :       opcode_double
    5697      4186587 :     } type = opcode_int;
    5698              : 
    5699      4186587 :   switch (insn_mode)
    5700              :     {
    5701              :     case MODE_V16SF:
    5702              :     case MODE_V8SF:
    5703              :     case MODE_V4SF:
    5704              :       scalar_mode = E_SFmode;
    5705              :       type = opcode_float;
    5706              :       break;
    5707       208918 :     case MODE_V8DF:
    5708       208918 :     case MODE_V4DF:
    5709       208918 :     case MODE_V2DF:
    5710       208918 :       scalar_mode = E_DFmode;
    5711       208918 :       type = opcode_double;
    5712       208918 :       break;
    5713      1517837 :     case MODE_XI:
    5714      1517837 :     case MODE_OI:
    5715      1517837 :     case MODE_TI:
    5716      1517837 :       scalar_mode = GET_MODE_INNER (mode);
    5717              :       break;
    5718            0 :     default:
    5719            0 :       gcc_unreachable ();
    5720              :     }
    5721              : 
    5722              :   /* NB: To move xmm16-xmm31/ymm16-ymm31 registers without AVX512VL,
    5723              :      we can only use zmm register move without memory operand.  */
    5724      4186587 :   if (evex_reg_p
    5725        88758 :       && !TARGET_AVX512VL
    5726      4237029 :       && GET_MODE_SIZE (mode) < 64)
    5727              :     {
    5728              :       /* NB: Even though ix86_hard_regno_mode_ok doesn't allow
    5729              :          xmm16-xmm31 nor ymm16-ymm31 in 128/256 bit modes when
    5730              :          AVX512VL is disabled, LRA can still generate reg to
    5731              :          reg moves with xmm16-xmm31 and ymm16-ymm31 in 128/256 bit
    5732              :          modes.  */
    5733            0 :       if (memory_operand (operands[0], mode)
    5734            0 :           || memory_operand (operands[1], mode))
    5735            0 :         gcc_unreachable ();
    5736            0 :       size = 64;
    5737            0 :       switch (type)
    5738              :         {
    5739            0 :         case opcode_int:
    5740            0 :           if (scalar_mode == E_HFmode || scalar_mode == E_BFmode)
    5741            0 :             opcode = (misaligned_p
    5742            0 :                       ? (TARGET_AVX512BW ? "vmovdqu16" : "vmovdqu64")
    5743              :                       : "vmovdqa64");
    5744              :           else
    5745            0 :             opcode = misaligned_p ? "vmovdqu32" : "vmovdqa32";
    5746              :           break;
    5747            0 :         case opcode_float:
    5748            0 :           opcode = misaligned_p ? "vmovups" : "vmovaps";
    5749              :           break;
    5750            0 :         case opcode_double:
    5751            0 :           opcode = misaligned_p ? "vmovupd" : "vmovapd";
    5752              :           break;
    5753              :         }
    5754              :     }
    5755      4186587 :   else if (SCALAR_FLOAT_MODE_P (scalar_mode))
    5756              :     {
    5757      2847218 :       switch (scalar_mode)
    5758              :         {
    5759        36736 :         case E_HFmode:
    5760        36736 :         case E_BFmode:
    5761        36736 :           if (evex_reg_p || egpr_vl)
    5762        11597 :             opcode = (misaligned_p
    5763          173 :                       ? (TARGET_AVX512BW
    5764              :                          ? "vmovdqu16"
    5765              :                          : "vmovdqu64")
    5766              :                       : "vmovdqa64");
    5767        25139 :           else if (egpr_p)
    5768       817751 :             opcode = (misaligned_p
    5769            0 :                       ? (TARGET_AVX512BW
    5770            0 :                          ? "vmovdqu16"
    5771              :                          : "%vmovups")
    5772              :                       : "%vmovaps");
    5773              :           else
    5774       428068 :             opcode = (misaligned_p
    5775        25139 :                       ? (TARGET_AVX512BW && evex_reg_p
    5776              :                          ? "vmovdqu16"
    5777              :                          : "%vmovdqu")
    5778              :                       : "%vmovdqa");
    5779              :           break;
    5780      2459832 :         case E_SFmode:
    5781      2459832 :           opcode = misaligned_p ? "%vmovups" : "%vmovaps";
    5782              :           break;
    5783       208918 :         case E_DFmode:
    5784       208918 :           opcode = misaligned_p ? "%vmovupd" : "%vmovapd";
    5785              :           break;
    5786       141732 :         case E_TFmode:
    5787       141732 :           if (evex_reg_p || egpr_vl)
    5788           14 :             opcode = misaligned_p ? "vmovdqu64" : "vmovdqa64";
    5789       141718 :           else if (egpr_p)
    5790            0 :             opcode = misaligned_p ? "%vmovups" : "%vmovaps";
    5791              :           else
    5792       141718 :             opcode = misaligned_p ? "%vmovdqu" : "%vmovdqa";
    5793              :           break;
    5794            0 :         default:
    5795            0 :           gcc_unreachable ();
    5796              :         }
    5797              :     }
    5798      1339369 :   else if (SCALAR_INT_MODE_P (scalar_mode))
    5799              :     {
    5800      1339369 :       switch (scalar_mode)
    5801              :         {
    5802       105483 :         case E_QImode:
    5803       105483 :           if (evex_reg_p || egpr_vl)
    5804      4196497 :             opcode = (misaligned_p
    5805         9910 :                       ? (TARGET_AVX512BW
    5806         5074 :                          ? "vmovdqu8"
    5807              :                          : "vmovdqu64")
    5808              :                       : "vmovdqa64");
    5809        95573 :           else if (egpr_p)
    5810           30 :             opcode = (misaligned_p
    5811            0 :                       ? (TARGET_AVX512BW
    5812              :                          ? "vmovdqu8"
    5813              :                          : "%vmovups")
    5814              :                       : "%vmovaps");
    5815              :           else
    5816        95543 :             opcode = (misaligned_p
    5817              :                       ? (TARGET_AVX512BW && evex_reg_p
    5818              :                          ? "vmovdqu8"
    5819              :                          : "%vmovdqu")
    5820              :                       : "%vmovdqa");
    5821              :           break;
    5822        41766 :         case E_HImode:
    5823        41766 :           if (evex_reg_p || egpr_vl)
    5824         3757 :             opcode = (misaligned_p
    5825          294 :                       ? (TARGET_AVX512BW
    5826              :                          ? "vmovdqu16"
    5827              :                          : "vmovdqu64")
    5828              :                       : "vmovdqa64");
    5829        38009 :           else if (egpr_p)
    5830       817751 :             opcode = (misaligned_p
    5831           27 :                       ? (TARGET_AVX512BW
    5832            0 :                          ? "vmovdqu16"
    5833              :                          : "%vmovups")
    5834              :                       : "%vmovaps");
    5835              :           else
    5836       402929 :             opcode = (misaligned_p
    5837        37982 :                       ? (TARGET_AVX512BW && evex_reg_p
    5838              :                          ? "vmovdqu16"
    5839              :                          : "%vmovdqu")
    5840              :                       : "%vmovdqa");
    5841              :           break;
    5842       180913 :         case E_SImode:
    5843       180913 :           if (evex_reg_p || egpr_vl)
    5844         8297 :             opcode = misaligned_p ? "vmovdqu32" : "vmovdqa32";
    5845       172616 :           else if (egpr_p)
    5846           14 :             opcode = misaligned_p ? "%vmovups" : "%vmovaps";
    5847              :           else
    5848       172602 :             opcode = misaligned_p ? "%vmovdqu" : "%vmovdqa";
    5849              :           break;
    5850       999399 :         case E_DImode:
    5851       999399 :         case E_TImode:
    5852       999399 :         case E_OImode:
    5853       999399 :           if (evex_reg_p || egpr_vl)
    5854        18786 :             opcode = misaligned_p ? "vmovdqu64" : "vmovdqa64";
    5855       980613 :           else if (egpr_p)
    5856           26 :             opcode = misaligned_p ? "%vmovups" : "%vmovaps";
    5857              :           else
    5858       980587 :             opcode = misaligned_p ? "%vmovdqu" : "%vmovdqa";
    5859              :           break;
    5860        11808 :         case E_XImode:
    5861        49519 :           opcode = misaligned_p ? "vmovdqu64" : "vmovdqa64";
    5862              :           break;
    5863            0 :         default:
    5864            0 :           gcc_unreachable ();
    5865              :         }
    5866              :     }
    5867              :   else
    5868            0 :     gcc_unreachable ();
    5869              : 
    5870      4186587 :   switch (size)
    5871              :     {
    5872        86717 :     case 64:
    5873        86717 :       snprintf (buf, sizeof (buf), "%s\t{%%g1, %%g0|%%g0, %%g1}",
    5874              :                 opcode);
    5875        86717 :       break;
    5876        94004 :     case 32:
    5877        94004 :       snprintf (buf, sizeof (buf), "%s\t{%%t1, %%t0|%%t0, %%t1}",
    5878              :                 opcode);
    5879        94004 :       break;
    5880      4005866 :     case 16:
    5881      4005866 :       snprintf (buf, sizeof (buf), "%s\t{%%x1, %%x0|%%x0, %%x1}",
    5882              :                 opcode);
    5883      4005866 :       break;
    5884            0 :     default:
    5885            0 :       gcc_unreachable ();
    5886              :     }
    5887      4186587 :   output_asm_insn (buf, operands);
    5888      4186587 :   return "";
    5889              : }
    5890              : 
    5891              : /* Return the template of the TYPE_SSEMOV instruction to move
    5892              :    operands[1] into operands[0].  */
    5893              : 
    5894              : const char *
    5895      6548503 : ix86_output_ssemov (rtx_insn *insn, rtx *operands)
    5896              : {
    5897      6548503 :   machine_mode mode = GET_MODE (operands[0]);
    5898      6548503 :   if (get_attr_type (insn) != TYPE_SSEMOV
    5899      6548503 :       || mode != GET_MODE (operands[1]))
    5900            0 :     gcc_unreachable ();
    5901              : 
    5902      6548503 :   enum attr_mode insn_mode = get_attr_mode (insn);
    5903              : 
    5904      6548503 :   switch (insn_mode)
    5905              :     {
    5906        86717 :     case MODE_XI:
    5907        86717 :     case MODE_V8DF:
    5908        86717 :     case MODE_V16SF:
    5909        86717 :       return ix86_get_ssemov (operands, 64, insn_mode, mode);
    5910              : 
    5911        94004 :     case MODE_OI:
    5912        94004 :     case MODE_V4DF:
    5913        94004 :     case MODE_V8SF:
    5914        94004 :       return ix86_get_ssemov (operands, 32, insn_mode, mode);
    5915              : 
    5916      4005866 :     case MODE_TI:
    5917      4005866 :     case MODE_V2DF:
    5918      4005866 :     case MODE_V4SF:
    5919      4005866 :       return ix86_get_ssemov (operands, 16, insn_mode, mode);
    5920              : 
    5921       654577 :     case MODE_DI:
    5922              :       /* Handle broken assemblers that require movd instead of movq. */
    5923       654577 :       if (GENERAL_REG_P (operands[0]))
    5924              :         {
    5925              :           if (HAVE_AS_IX86_INTERUNIT_MOVQ)
    5926              :             return "%vmovq\t{%1, %q0|%q0, %1}";
    5927              :           else
    5928              :             return "%vmovd\t{%1, %q0|%q0, %1}";
    5929              :         }
    5930       580826 :       else if (GENERAL_REG_P (operands[1]))
    5931              :         {
    5932              :           if (HAVE_AS_IX86_INTERUNIT_MOVQ)
    5933              :             return "%vmovq\t{%q1, %0|%0, %q1}";
    5934              :           else
    5935              :             return "%vmovd\t{%q1, %0|%0, %q1}";
    5936              :         }
    5937              :       else
    5938       421323 :         return "%vmovq\t{%1, %0|%0, %1}";
    5939              : 
    5940       198654 :     case MODE_SI:
    5941       198654 :       if (GENERAL_REG_P (operands[0]))
    5942              :         return "%vmovd\t{%1, %k0|%k0, %1}";
    5943       143800 :       else if (GENERAL_REG_P (operands[1]))
    5944              :         return "%vmovd\t{%k1, %0|%0, %k1}";
    5945              :       else
    5946        60422 :         return "%vmovd\t{%1, %0|%0, %1}";
    5947              : 
    5948        54128 :     case MODE_HI:
    5949        54128 :       if (GENERAL_REG_P (operands[0]))
    5950              :         return "vmovw\t{%1, %k0|%k0, %1}";
    5951        53965 :       else if (GENERAL_REG_P (operands[1]))
    5952              :         return "vmovw\t{%k1, %0|%0, %k1}";
    5953              :       else
    5954        53731 :         return "vmovw\t{%1, %0|%0, %1}";
    5955              : 
    5956       779068 :     case MODE_DF:
    5957       779068 :       if (TARGET_AVX && REG_P (operands[0]) && REG_P (operands[1]))
    5958              :         return "vmovsd\t{%d1, %0|%0, %d1}";
    5959              :       else
    5960       778307 :         return "%vmovsd\t{%1, %0|%0, %1}";
    5961              : 
    5962       671504 :     case MODE_SF:
    5963       671504 :       if (TARGET_AVX && REG_P (operands[0]) && REG_P (operands[1]))
    5964              :         return "vmovss\t{%d1, %0|%0, %d1}";
    5965              :       else
    5966       670928 :         return "%vmovss\t{%1, %0|%0, %1}";
    5967              : 
    5968           96 :     case MODE_HF:
    5969           96 :     case MODE_BF:
    5970           96 :       if (REG_P (operands[0]) && REG_P (operands[1]))
    5971              :         return "vmovsh\t{%d1, %0|%0, %d1}";
    5972              :       else
    5973            0 :         return "vmovsh\t{%1, %0|%0, %1}";
    5974              : 
    5975           36 :     case MODE_V1DF:
    5976           36 :       gcc_assert (!TARGET_AVX);
    5977              :       return "movlpd\t{%1, %0|%0, %1}";
    5978              : 
    5979         3853 :     case MODE_V2SF:
    5980         3853 :       if (TARGET_AVX && REG_P (operands[0]))
    5981              :         return "vmovlps\t{%1, %d0|%d0, %1}";
    5982              :       else
    5983         3773 :         return "%vmovlps\t{%1, %0|%0, %1}";
    5984              : 
    5985            0 :     default:
    5986            0 :       gcc_unreachable ();
    5987              :     }
    5988              : }
    5989              : 
    5990              : /* Returns true if OP contains a symbol reference */
    5991              : 
    5992              : bool
    5993    583614079 : symbolic_reference_mentioned_p (rtx op)
    5994              : {
    5995    583614079 :   const char *fmt;
    5996    583614079 :   int i;
    5997              : 
    5998    583614079 :   if (SYMBOL_REF_P (op) || LABEL_REF_P (op))
    5999              :     return true;
    6000              : 
    6001    441156835 :   fmt = GET_RTX_FORMAT (GET_CODE (op));
    6002    748518509 :   for (i = GET_RTX_LENGTH (GET_CODE (op)) - 1; i >= 0; i--)
    6003              :     {
    6004    597179772 :       if (fmt[i] == 'E')
    6005              :         {
    6006      2019905 :           int j;
    6007              : 
    6008      4042340 :           for (j = XVECLEN (op, i) - 1; j >= 0; j--)
    6009      3326903 :             if (symbolic_reference_mentioned_p (XVECEXP (op, i, j)))
    6010              :               return true;
    6011              :         }
    6012              : 
    6013    595159867 :       else if (fmt[i] == 'e' && symbolic_reference_mentioned_p (XEXP (op, i)))
    6014              :         return true;
    6015              :     }
    6016              : 
    6017              :   return false;
    6018              : }
    6019              : 
    6020              : /* Return true if it is appropriate to emit `ret' instructions in the
    6021              :    body of a function.  Do this only if the epilogue is simple, needing a
    6022              :    couple of insns.  Prior to reloading, we can't tell how many registers
    6023              :    must be saved, so return false then.  Return false if there is no frame
    6024              :    marker to de-allocate.  */
    6025              : 
    6026              : bool
    6027            0 : ix86_can_use_return_insn_p (void)
    6028              : {
    6029            0 :   if (ix86_function_ms_hook_prologue (current_function_decl))
    6030              :     return false;
    6031              : 
    6032            0 :   if (ix86_function_naked (current_function_decl))
    6033              :     return false;
    6034              : 
    6035              :   /* Don't use `ret' instruction in interrupt handler.  */
    6036            0 :   if (! reload_completed
    6037            0 :       || frame_pointer_needed
    6038            0 :       || cfun->machine->func_type != TYPE_NORMAL)
    6039              :     return 0;
    6040              : 
    6041              :   /* Don't allow more than 32k pop, since that's all we can do
    6042              :      with one instruction.  */
    6043            0 :   if (crtl->args.pops_args && crtl->args.size >= 32768)
    6044              :     return 0;
    6045              : 
    6046            0 :   struct ix86_frame &frame = cfun->machine->frame;
    6047            0 :   return (frame.stack_pointer_offset == UNITS_PER_WORD
    6048            0 :           && (frame.nregs + frame.nsseregs) == 0);
    6049              : }
    6050              : 
    6051              : /* Return stack frame size.  get_frame_size () returns used stack slots
    6052              :    during compilation, which may be optimized out later.  If stack frame
    6053              :    is needed, stack_frame_required should be true.  */
    6054              : 
    6055              : static HOST_WIDE_INT
    6056      8193916 : ix86_get_frame_size (void)
    6057              : {
    6058      8193916 :   if (cfun->machine->stack_frame_required)
    6059      8124631 :     return get_frame_size ();
    6060              :   else
    6061              :     return 0;
    6062              : }
    6063              : 
    6064              : /* Value should be nonzero if functions must have frame pointers.
    6065              :    Zero means the frame pointer need not be set up (and parms may
    6066              :    be accessed via the stack pointer) in functions that seem suitable.  */
    6067              : 
    6068              : static bool
    6069      1224784 : ix86_frame_pointer_required (void)
    6070              : {
    6071              :   /* If we accessed previous frames, then the generated code expects
    6072              :      to be able to access the saved ebp value in our frame.  */
    6073      1224784 :   if (cfun->machine->accesses_prev_frame)
    6074              :     return true;
    6075              : 
    6076              :   /* Several x86 os'es need a frame pointer for other reasons,
    6077              :      usually pertaining to setjmp.  */
    6078      1224751 :   if (SUBTARGET_FRAME_POINTER_REQUIRED)
    6079              :     return true;
    6080              : 
    6081              :   /* For older 32-bit runtimes setjmp requires valid frame-pointer.  */
    6082      1224751 :   if (TARGET_32BIT_MS_ABI && cfun->calls_setjmp)
    6083              :     return true;
    6084              : 
    6085              :   /* Win64 SEH, very large frames need a frame-pointer as maximum stack
    6086              :      allocation is 4GB.  */
    6087      1224751 :   if (TARGET_64BIT_MS_ABI && ix86_get_frame_size () > SEH_MAX_FRAME_SIZE)
    6088              :     return true;
    6089              : 
    6090              :   /* SSE saves require frame-pointer when stack is misaligned.  */
    6091      1224751 :   if (TARGET_64BIT_MS_ABI && ix86_incoming_stack_boundary < 128)
    6092              :     return true;
    6093              : 
    6094              :   /* In ix86_option_override_internal, TARGET_OMIT_LEAF_FRAME_POINTER
    6095              :      turns off the frame pointer by default.  Turn it back on now if
    6096              :      we've not got a leaf function.  */
    6097      1224750 :   if (TARGET_OMIT_LEAF_FRAME_POINTER
    6098      1224750 :       && (!crtl->is_leaf
    6099            0 :           || ix86_current_function_calls_tls_descriptor))
    6100            0 :     return true;
    6101              : 
    6102              :   /* Several versions of mcount for the x86 assumes that there is a
    6103              :      frame, so we cannot allow profiling without a frame pointer.  */
    6104      1224750 :   if (crtl->profile && !flag_fentry)
    6105              :     return true;
    6106              : 
    6107              :   return false;
    6108              : }
    6109              : 
    6110              : /* Record that the current function accesses previous call frames.  */
    6111              : 
    6112              : void
    6113          966 : ix86_setup_frame_addresses (void)
    6114              : {
    6115          966 :   cfun->machine->accesses_prev_frame = 1;
    6116          966 : }
    6117              : 
    6118              : #if defined(HAVE_GAS_HIDDEN) && (SUPPORTS_ONE_ONLY - 0)
    6119              : # define USE_HIDDEN_LINKONCE 1
    6120              : #else
    6121              : # define USE_HIDDEN_LINKONCE 0
    6122              : #endif
    6123              : 
    6124              : /* Label count for call and return thunks.  It is used to make unique
    6125              :    labels in call and return thunks.  */
    6126              : static int indirectlabelno;
    6127              : 
    6128              : /* True if call thunk function is needed.  */
    6129              : static bool indirect_thunk_needed = false;
    6130              : 
    6131              : /* Bit masks of integer registers, which contain branch target, used
    6132              :    by call thunk functions.  */
    6133              : static HARD_REG_SET indirect_thunks_used;
    6134              : 
    6135              : /* True if return thunk function is needed.  */
    6136              : static bool indirect_return_needed = false;
    6137              : 
    6138              : /* True if return thunk function via CX is needed.  */
    6139              : static bool indirect_return_via_cx;
    6140              : 
    6141              : #ifndef INDIRECT_LABEL
    6142              : # define INDIRECT_LABEL "LIND"
    6143              : #endif
    6144              : 
    6145              : /* Indicate what prefix is needed for an indirect branch.  */
    6146              : enum indirect_thunk_prefix
    6147              : {
    6148              :   indirect_thunk_prefix_none,
    6149              :   indirect_thunk_prefix_nt
    6150              : };
    6151              : 
    6152              : /* Return the prefix needed for an indirect branch INSN.  */
    6153              : 
    6154              : enum indirect_thunk_prefix
    6155           68 : indirect_thunk_need_prefix (rtx_insn *insn)
    6156              : {
    6157           68 :   enum indirect_thunk_prefix need_prefix;
    6158           68 :   if ((cfun->machine->indirect_branch_type
    6159           68 :             == indirect_branch_thunk_extern)
    6160           68 :            && ix86_notrack_prefixed_insn_p (insn))
    6161              :     {
    6162              :       /* NOTRACK prefix is only used with external thunk so that it
    6163              :          can be properly updated to support CET at run-time.  */
    6164              :       need_prefix = indirect_thunk_prefix_nt;
    6165              :     }
    6166              :   else
    6167              :     need_prefix = indirect_thunk_prefix_none;
    6168           68 :   return need_prefix;
    6169              : }
    6170              : 
    6171              : /* Fills in the label name that should be used for the indirect thunk.  */
    6172              : 
    6173              : static void
    6174           74 : indirect_thunk_name (char name[32], unsigned int regno,
    6175              :                      enum indirect_thunk_prefix need_prefix,
    6176              :                      bool ret_p)
    6177              : {
    6178           74 :   if (regno != INVALID_REGNUM && regno != CX_REG && ret_p)
    6179            0 :     gcc_unreachable ();
    6180              : 
    6181           74 :   if (USE_HIDDEN_LINKONCE)
    6182              :     {
    6183           74 :       const char *prefix;
    6184              : 
    6185           74 :       if (need_prefix == indirect_thunk_prefix_nt
    6186           74 :           && regno != INVALID_REGNUM)
    6187              :         {
    6188              :           /* NOTRACK prefix is only used with external thunk via
    6189              :              register so that NOTRACK prefix can be added to indirect
    6190              :              branch via register to support CET at run-time.  */
    6191              :           prefix = "_nt";
    6192              :         }
    6193              :       else
    6194           72 :         prefix = "";
    6195              : 
    6196           74 :       const char *ret = ret_p ? "return" : "indirect";
    6197              : 
    6198           74 :       if (regno != INVALID_REGNUM)
    6199              :         {
    6200           55 :           const char *reg_prefix;
    6201           55 :           if (LEGACY_INT_REGNO_P (regno))
    6202           53 :             reg_prefix = TARGET_64BIT ? "r" : "e";
    6203              :           else
    6204              :             reg_prefix = "";
    6205           55 :           sprintf (name, "__x86_%s_thunk%s_%s%s",
    6206              :                    ret, prefix, reg_prefix, reg_names[regno]);
    6207              :         }
    6208              :       else
    6209           19 :         sprintf (name, "__x86_%s_thunk%s", ret, prefix);
    6210              :     }
    6211              :   else
    6212              :     {
    6213              :       if (regno != INVALID_REGNUM)
    6214              :         ASM_GENERATE_INTERNAL_LABEL (name, "LITR", regno);
    6215              :       else
    6216              :         {
    6217              :           if (ret_p)
    6218              :             ASM_GENERATE_INTERNAL_LABEL (name, "LRT", 0);
    6219              :           else
    6220           74 :             ASM_GENERATE_INTERNAL_LABEL (name, "LIT", 0);
    6221              :         }
    6222              :     }
    6223           74 : }
    6224              : 
    6225              : /* Output a call and return thunk for indirect branch.  If REGNO != -1,
    6226              :    the function address is in REGNO and the call and return thunk looks like:
    6227              : 
    6228              :         call    L2
    6229              :    L1:
    6230              :         pause
    6231              :         lfence
    6232              :         jmp     L1
    6233              :    L2:
    6234              :         mov     %REG, (%sp)
    6235              :         ret
    6236              : 
    6237              :    Otherwise, the function address is on the top of stack and the
    6238              :    call and return thunk looks like:
    6239              : 
    6240              :         call L2
    6241              :   L1:
    6242              :         pause
    6243              :         lfence
    6244              :         jmp L1
    6245              :   L2:
    6246              :         lea WORD_SIZE(%sp), %sp
    6247              :         ret
    6248              :  */
    6249              : 
    6250              : static void
    6251           38 : output_indirect_thunk (unsigned int regno)
    6252              : {
    6253           38 :   char indirectlabel1[32];
    6254           38 :   char indirectlabel2[32];
    6255              : 
    6256           38 :   ASM_GENERATE_INTERNAL_LABEL (indirectlabel1, INDIRECT_LABEL,
    6257              :                                indirectlabelno++);
    6258           38 :   ASM_GENERATE_INTERNAL_LABEL (indirectlabel2, INDIRECT_LABEL,
    6259              :                                indirectlabelno++);
    6260              : 
    6261              :   /* Call */
    6262           38 :   fputs ("\tcall\t", asm_out_file);
    6263           38 :   assemble_name_raw (asm_out_file, indirectlabel2);
    6264           38 :   fputc ('\n', asm_out_file);
    6265              : 
    6266           38 :   ASM_OUTPUT_INTERNAL_LABEL (asm_out_file, indirectlabel1);
    6267              : 
    6268              :   /* AMD and Intel CPUs prefer each a different instruction as loop filler.
    6269              :      Usage of both pause + lfence is compromise solution.  */
    6270           38 :   fprintf (asm_out_file, "\tpause\n\tlfence\n");
    6271              : 
    6272              :   /* Jump.  */
    6273           38 :   fputs ("\tjmp\t", asm_out_file);
    6274           38 :   assemble_name_raw (asm_out_file, indirectlabel1);
    6275           38 :   fputc ('\n', asm_out_file);
    6276              : 
    6277           38 :   ASM_OUTPUT_INTERNAL_LABEL (asm_out_file, indirectlabel2);
    6278              : 
    6279              :   /* The above call insn pushed a word to stack.  Adjust CFI info.  */
    6280           38 :   if (flag_asynchronous_unwind_tables && dwarf2out_do_frame ())
    6281              :     {
    6282           38 :       if (! dwarf2out_do_cfi_asm ())
    6283              :         {
    6284            0 :           dw_cfi_ref xcfi = ggc_cleared_alloc<dw_cfi_node> ();
    6285            0 :           xcfi->dw_cfi_opc = DW_CFA_advance_loc4;
    6286            0 :           xcfi->dw_cfi_oprnd1.dw_cfi_addr = ggc_strdup (indirectlabel2);
    6287            0 :           vec_safe_push (cfun->fde->dw_fde_cfi, xcfi);
    6288              :         }
    6289           38 :       dw_cfi_ref xcfi = ggc_cleared_alloc<dw_cfi_node> ();
    6290           38 :       xcfi->dw_cfi_opc = DW_CFA_def_cfa_offset;
    6291           38 :       xcfi->dw_cfi_oprnd1.dw_cfi_offset = 2 * UNITS_PER_WORD;
    6292           38 :       vec_safe_push (cfun->fde->dw_fde_cfi, xcfi);
    6293           38 :       dwarf2out_emit_cfi (xcfi);
    6294              :     }
    6295              : 
    6296           38 :   if (regno != INVALID_REGNUM)
    6297              :     {
    6298              :       /* MOV.  */
    6299           27 :       rtx xops[2];
    6300           27 :       xops[0] = gen_rtx_MEM (word_mode, stack_pointer_rtx);
    6301           27 :       xops[1] = gen_rtx_REG (word_mode, regno);
    6302           27 :       output_asm_insn ("mov\t{%1, %0|%0, %1}", xops);
    6303              :     }
    6304              :   else
    6305              :     {
    6306              :       /* LEA.  */
    6307           11 :       rtx xops[2];
    6308           11 :       xops[0] = stack_pointer_rtx;
    6309           11 :       xops[1] = plus_constant (Pmode, stack_pointer_rtx, UNITS_PER_WORD);
    6310           11 :       output_asm_insn ("lea\t{%E1, %0|%0, %E1}", xops);
    6311              :     }
    6312              : 
    6313           38 :   fputs ("\tret\n", asm_out_file);
    6314           38 :   if ((ix86_harden_sls & harden_sls_return))
    6315            1 :     fputs ("\tint3\n", asm_out_file);
    6316           38 : }
    6317              : 
    6318              : /* Output a funtion with a call and return thunk for indirect branch.
    6319              :    If REGNO != INVALID_REGNUM, the function address is in REGNO.
    6320              :    Otherwise, the function address is on the top of stack.  Thunk is
    6321              :    used for function return if RET_P is true.  */
    6322              : 
    6323              : static void
    6324           22 : output_indirect_thunk_function (enum indirect_thunk_prefix need_prefix,
    6325              :                                 unsigned int regno, bool ret_p)
    6326              : {
    6327           22 :   char name[32];
    6328           22 :   tree decl;
    6329              : 
    6330              :   /* Create __x86_indirect_thunk.  */
    6331           22 :   indirect_thunk_name (name, regno, need_prefix, ret_p);
    6332           22 :   decl = build_decl (BUILTINS_LOCATION, FUNCTION_DECL,
    6333              :                      get_identifier (name),
    6334              :                      build_function_type_list (void_type_node, NULL_TREE));
    6335           22 :   DECL_RESULT (decl) = build_decl (BUILTINS_LOCATION, RESULT_DECL,
    6336              :                                    NULL_TREE, void_type_node);
    6337           22 :   TREE_PUBLIC (decl) = 1;
    6338           22 :   TREE_STATIC (decl) = 1;
    6339           22 :   DECL_IGNORED_P (decl) = 1;
    6340              : 
    6341              : #if TARGET_MACHO
    6342              :   if (TARGET_MACHO)
    6343              :     {
    6344              :       switch_to_section (darwin_sections[picbase_thunk_section]);
    6345              :       fputs ("\t.weak_definition\t", asm_out_file);
    6346              :       assemble_name (asm_out_file, name);
    6347              :       fputs ("\n\t.private_extern\t", asm_out_file);
    6348              :       assemble_name (asm_out_file, name);
    6349              :       putc ('\n', asm_out_file);
    6350              :       ASM_OUTPUT_LABEL (asm_out_file, name);
    6351              :       DECL_WEAK (decl) = 1;
    6352              :     }
    6353              :   else
    6354              : #endif
    6355           22 :     if (USE_HIDDEN_LINKONCE)
    6356              :       {
    6357           22 :         cgraph_node::create (decl)->set_comdat_group (DECL_ASSEMBLER_NAME (decl));
    6358              : 
    6359           22 :         targetm.asm_out.unique_section (decl, 0);
    6360           22 :         switch_to_section (get_named_section (decl, NULL, 0));
    6361              : 
    6362           22 :         targetm.asm_out.globalize_label (asm_out_file, name);
    6363           22 :         fputs ("\t.hidden\t", asm_out_file);
    6364           22 :         assemble_name (asm_out_file, name);
    6365           22 :         putc ('\n', asm_out_file);
    6366           22 :         ASM_DECLARE_FUNCTION_NAME (asm_out_file, name, decl);
    6367              :       }
    6368              :     else
    6369              :       {
    6370              :         switch_to_section (text_section);
    6371           22 :         ASM_OUTPUT_LABEL (asm_out_file, name);
    6372              :       }
    6373              : 
    6374           22 :   DECL_INITIAL (decl) = make_node (BLOCK);
    6375           22 :   current_function_decl = decl;
    6376           22 :   allocate_struct_function (decl, false);
    6377           22 :   init_function_start (decl);
    6378              :   /* We're about to hide the function body from callees of final_* by
    6379              :      emitting it directly; tell them we're a thunk, if they care.  */
    6380           22 :   cfun->is_thunk = true;
    6381           22 :   first_function_block_is_cold = false;
    6382              :   /* Make sure unwind info is emitted for the thunk if needed.  */
    6383           22 :   final_start_function (emit_barrier (), asm_out_file, 1);
    6384              : 
    6385           22 :   output_indirect_thunk (regno);
    6386              : 
    6387           22 :   final_end_function ();
    6388           22 :   init_insn_lengths ();
    6389           22 :   free_after_compilation (cfun);
    6390           22 :   set_cfun (NULL);
    6391           22 :   current_function_decl = NULL;
    6392           22 : }
    6393              : 
    6394              : static int pic_labels_used;
    6395              : 
    6396              : /* Fills in the label name that should be used for a pc thunk for
    6397              :    the given register.  */
    6398              : 
    6399              : static void
    6400        37243 : get_pc_thunk_name (char name[32], unsigned int regno)
    6401              : {
    6402        37243 :   gcc_assert (!TARGET_64BIT);
    6403              : 
    6404        37243 :   if (USE_HIDDEN_LINKONCE)
    6405        37243 :     sprintf (name, "__x86.get_pc_thunk.%s", reg_names[regno]);
    6406              :   else
    6407        37243 :     ASM_GENERATE_INTERNAL_LABEL (name, "LPR", regno);
    6408        37243 : }
    6409              : 
    6410              : 
    6411              : /* This function generates code for -fpic that loads %ebx with
    6412              :    the return address of the caller and then returns.  */
    6413              : 
    6414              : static void
    6415       230133 : ix86_code_end (void)
    6416              : {
    6417       230133 :   rtx xops[2];
    6418       230133 :   unsigned int regno;
    6419              : 
    6420       230133 :   if (indirect_return_needed)
    6421            6 :     output_indirect_thunk_function (indirect_thunk_prefix_none,
    6422              :                                     INVALID_REGNUM, true);
    6423       230133 :   if (indirect_return_via_cx)
    6424            0 :     output_indirect_thunk_function (indirect_thunk_prefix_none,
    6425              :                                     CX_REG, true);
    6426       230133 :   if (indirect_thunk_needed)
    6427            0 :     output_indirect_thunk_function (indirect_thunk_prefix_none,
    6428              :                                     INVALID_REGNUM, false);
    6429              : 
    6430      2071197 :   for (regno = FIRST_REX_INT_REG; regno <= LAST_REX_INT_REG; regno++)
    6431              :     {
    6432      1841064 :       if (TEST_HARD_REG_BIT (indirect_thunks_used, regno))
    6433            0 :         output_indirect_thunk_function (indirect_thunk_prefix_none,
    6434              :                                         regno, false);
    6435              :     }
    6436              : 
    6437      3912261 :   for (regno = FIRST_REX2_INT_REG; regno <= LAST_REX2_INT_REG; regno++)
    6438              :     {
    6439      3682128 :       if (TEST_HARD_REG_BIT (indirect_thunks_used, regno))
    6440            0 :         output_indirect_thunk_function (indirect_thunk_prefix_none,
    6441              :                                         regno, false);
    6442              :     }
    6443              : 
    6444      2071197 :   for (regno = FIRST_INT_REG; regno <= LAST_INT_REG; regno++)
    6445              :     {
    6446      1841064 :       char name[32];
    6447      1841064 :       tree decl;
    6448              : 
    6449      1841064 :       if (TEST_HARD_REG_BIT (indirect_thunks_used, regno))
    6450           16 :         output_indirect_thunk_function (indirect_thunk_prefix_none,
    6451              :                                         regno, false);
    6452              : 
    6453      1841064 :       if (!(pic_labels_used & (1 << regno)))
    6454      1837499 :         continue;
    6455              : 
    6456         3565 :       get_pc_thunk_name (name, regno);
    6457              : 
    6458         3565 :       decl = build_decl (BUILTINS_LOCATION, FUNCTION_DECL,
    6459              :                          get_identifier (name),
    6460              :                          build_function_type_list (void_type_node, NULL_TREE));
    6461         3565 :       DECL_RESULT (decl) = build_decl (BUILTINS_LOCATION, RESULT_DECL,
    6462              :                                        NULL_TREE, void_type_node);
    6463         3565 :       TREE_PUBLIC (decl) = 1;
    6464         3565 :       TREE_STATIC (decl) = 1;
    6465         3565 :       DECL_IGNORED_P (decl) = 1;
    6466              : 
    6467              : #if TARGET_MACHO
    6468              :       if (TARGET_MACHO)
    6469              :         {
    6470              :           switch_to_section (darwin_sections[picbase_thunk_section]);
    6471              :           fputs ("\t.weak_definition\t", asm_out_file);
    6472              :           assemble_name (asm_out_file, name);
    6473              :           fputs ("\n\t.private_extern\t", asm_out_file);
    6474              :           assemble_name (asm_out_file, name);
    6475              :           putc ('\n', asm_out_file);
    6476              :           ASM_OUTPUT_LABEL (asm_out_file, name);
    6477              :           DECL_WEAK (decl) = 1;
    6478              :         }
    6479              :       else
    6480              : #endif
    6481         3565 :       if (USE_HIDDEN_LINKONCE)
    6482              :         {
    6483         3565 :           cgraph_node::create (decl)->set_comdat_group (DECL_ASSEMBLER_NAME (decl));
    6484              : 
    6485         3565 :           targetm.asm_out.unique_section (decl, 0);
    6486         3565 :           switch_to_section (get_named_section (decl, NULL, 0));
    6487              : 
    6488         3565 :           targetm.asm_out.globalize_label (asm_out_file, name);
    6489         3565 :           fputs ("\t.hidden\t", asm_out_file);
    6490         3565 :           assemble_name (asm_out_file, name);
    6491         3565 :           putc ('\n', asm_out_file);
    6492         3565 :           ASM_DECLARE_FUNCTION_NAME (asm_out_file, name, decl);
    6493              :         }
    6494              :       else
    6495              :         {
    6496              :           switch_to_section (text_section);
    6497         3565 :           ASM_OUTPUT_LABEL (asm_out_file, name);
    6498              :         }
    6499              : 
    6500         3565 :       DECL_INITIAL (decl) = make_node (BLOCK);
    6501         3565 :       current_function_decl = decl;
    6502         3565 :       allocate_struct_function (decl, false);
    6503         3565 :       init_function_start (decl);
    6504              :       /* We're about to hide the function body from callees of final_* by
    6505              :          emitting it directly; tell them we're a thunk, if they care.  */
    6506         3565 :       cfun->is_thunk = true;
    6507         3565 :       first_function_block_is_cold = false;
    6508              :       /* Make sure unwind info is emitted for the thunk if needed.  */
    6509         3565 :       final_start_function (emit_barrier (), asm_out_file, 1);
    6510              : 
    6511              :       /* Pad stack IP move with 4 instructions (two NOPs count
    6512              :          as one instruction).  */
    6513         3565 :       if (TARGET_PAD_SHORT_FUNCTION)
    6514              :         {
    6515              :           int i = 8;
    6516              : 
    6517            0 :           while (i--)
    6518            0 :             fputs ("\tnop\n", asm_out_file);
    6519              :         }
    6520              : 
    6521         7130 :       xops[0] = gen_rtx_REG (Pmode, regno);
    6522         7130 :       xops[1] = gen_rtx_MEM (Pmode, stack_pointer_rtx);
    6523         3565 :       output_asm_insn ("mov%z0\t{%1, %0|%0, %1}", xops);
    6524         3565 :       fputs ("\tret\n", asm_out_file);
    6525         3565 :       final_end_function ();
    6526         3565 :       init_insn_lengths ();
    6527         3565 :       free_after_compilation (cfun);
    6528         3565 :       set_cfun (NULL);
    6529         3565 :       current_function_decl = NULL;
    6530              :     }
    6531              : 
    6532       230133 :   if (flag_split_stack)
    6533         4712 :     file_end_indicate_split_stack ();
    6534       230133 : }
    6535              : 
    6536              : /* Emit code for the SET_GOT patterns.  */
    6537              : 
    6538              : const char *
    6539        33678 : output_set_got (rtx dest, rtx label)
    6540              : {
    6541        33678 :   rtx xops[3];
    6542              : 
    6543        33678 :   xops[0] = dest;
    6544              : 
    6545        33678 :   if (TARGET_VXWORKS_GOTTPIC && TARGET_VXWORKS_RTP && flag_pic)
    6546              :     {
    6547              :       /* Load (*VXWORKS_GOTT_BASE) into the PIC register.  */
    6548              :       xops[2] = gen_rtx_MEM (Pmode,
    6549              :                              gen_rtx_SYMBOL_REF (Pmode, VXWORKS_GOTT_BASE));
    6550              :       output_asm_insn ("mov{l}\t{%2, %0|%0, %2}", xops);
    6551              : 
    6552              :       /* Load (*VXWORKS_GOTT_BASE)[VXWORKS_GOTT_INDEX] into the PIC register.
    6553              :          Use %P and a local symbol in order to print VXWORKS_GOTT_INDEX as
    6554              :          an unadorned address.  */
    6555              :       xops[2] = gen_rtx_SYMBOL_REF (Pmode, VXWORKS_GOTT_INDEX);
    6556              :       SYMBOL_REF_FLAGS (xops[2]) |= SYMBOL_FLAG_LOCAL;
    6557              :       output_asm_insn ("mov{l}\t{%P2(%0), %0|%0, DWORD PTR %P2[%0]}", xops);
    6558              :       return "";
    6559              :     }
    6560              : 
    6561        67356 :   xops[1] = gen_rtx_SYMBOL_REF (Pmode, GOT_SYMBOL_NAME);
    6562              : 
    6563        33678 :   if (flag_pic)
    6564              :     {
    6565        33678 :       char name[32];
    6566        33678 :       get_pc_thunk_name (name, REGNO (dest));
    6567        33678 :       pic_labels_used |= 1 << REGNO (dest);
    6568              : 
    6569        67356 :       xops[2] = gen_rtx_SYMBOL_REF (Pmode, ggc_strdup (name));
    6570        33678 :       xops[2] = gen_rtx_MEM (QImode, xops[2]);
    6571        33678 :       output_asm_insn ("%!call\t%X2", xops);
    6572              : 
    6573              : #if TARGET_MACHO
    6574              :       /* Output the Mach-O "canonical" pic base label name ("Lxx$pb") here.
    6575              :          This is what will be referenced by the Mach-O PIC subsystem.  */
    6576              :       if (machopic_should_output_picbase_label () || !label)
    6577              :         ASM_OUTPUT_LABEL (asm_out_file, MACHOPIC_FUNCTION_BASE_NAME);
    6578              : 
    6579              :       /* When we are restoring the pic base at the site of a nonlocal label,
    6580              :          and we decided to emit the pic base above, we will still output a
    6581              :          local label used for calculating the correction offset (even though
    6582              :          the offset will be 0 in that case).  */
    6583              :       if (label)
    6584              :         targetm.asm_out.internal_label (asm_out_file, "L",
    6585              :                                            CODE_LABEL_NUMBER (label));
    6586              : #endif
    6587              :     }
    6588              :   else
    6589              :     {
    6590            0 :       if (TARGET_MACHO)
    6591              :         /* We don't need a pic base, we're not producing pic.  */
    6592              :         gcc_unreachable ();
    6593              : 
    6594            0 :       xops[2] = gen_rtx_LABEL_REF (Pmode, label ? label : gen_label_rtx ());
    6595            0 :       output_asm_insn ("mov%z0\t{%2, %0|%0, %2}", xops);
    6596            0 :       targetm.asm_out.internal_label (asm_out_file, "L",
    6597            0 :                                       CODE_LABEL_NUMBER (XEXP (xops[2], 0)));
    6598              :     }
    6599              : 
    6600        33678 :   if (!TARGET_MACHO)
    6601        33678 :     output_asm_insn ("add%z0\t{%1, %0|%0, %1}", xops);
    6602              : 
    6603        33678 :   return "";
    6604              : }
    6605              : 
    6606              : /* Generate an "push" pattern for input ARG.  */
    6607              : 
    6608              : rtx
    6609      1872107 : gen_push (rtx arg, bool ppx_p)
    6610              : {
    6611      1872107 :   struct machine_function *m = cfun->machine;
    6612              : 
    6613      1872107 :   if (m->fs.cfa_reg == stack_pointer_rtx)
    6614      1597627 :     m->fs.cfa_offset += UNITS_PER_WORD;
    6615      1872107 :   m->fs.sp_offset += UNITS_PER_WORD;
    6616              : 
    6617      1872107 :   if (REG_P (arg) && GET_MODE (arg) != word_mode)
    6618           28 :     arg = gen_rtx_REG (word_mode, REGNO (arg));
    6619              : 
    6620      1872107 :   rtx stack = gen_rtx_MEM (word_mode,
    6621      1872107 :                            gen_rtx_PRE_DEC (Pmode,
    6622              :                                             stack_pointer_rtx));
    6623      3744126 :   return ppx_p ? gen_pushp_di (stack, arg) : gen_rtx_SET (stack, arg);
    6624              : }
    6625              : 
    6626              : rtx
    6627           23 : gen_pushfl (void)
    6628              : {
    6629           23 :   struct machine_function *m = cfun->machine;
    6630           23 :   rtx flags, mem;
    6631              : 
    6632           23 :   if (m->fs.cfa_reg == stack_pointer_rtx)
    6633            0 :     m->fs.cfa_offset += UNITS_PER_WORD;
    6634           23 :   m->fs.sp_offset += UNITS_PER_WORD;
    6635              : 
    6636           23 :   flags = gen_rtx_REG (CCmode, FLAGS_REG);
    6637              : 
    6638           23 :   mem = gen_rtx_MEM (word_mode,
    6639           23 :                      gen_rtx_PRE_DEC (Pmode, stack_pointer_rtx));
    6640              : 
    6641           23 :   return gen_pushfl2 (word_mode, mem, flags);
    6642              : }
    6643              : 
    6644              : /* Generate an "pop" pattern for input ARG.  */
    6645              : 
    6646              : rtx
    6647      1455780 : gen_pop (rtx arg, bool ppx_p)
    6648              : {
    6649      1455780 :   if (REG_P (arg) && GET_MODE (arg) != word_mode)
    6650           24 :     arg = gen_rtx_REG (word_mode, REGNO (arg));
    6651              : 
    6652      1455780 :   rtx stack = gen_rtx_MEM (word_mode,
    6653      1455780 :                            gen_rtx_POST_INC (Pmode,
    6654              :                                              stack_pointer_rtx));
    6655              : 
    6656      2911472 :   return ppx_p ? gen_popp_di (arg, stack) : gen_rtx_SET (arg, stack);
    6657              : }
    6658              : 
    6659              : rtx
    6660           21 : gen_popfl (void)
    6661              : {
    6662           21 :   rtx flags, mem;
    6663              : 
    6664           21 :   flags = gen_rtx_REG (CCmode, FLAGS_REG);
    6665              : 
    6666           21 :   mem = gen_rtx_MEM (word_mode,
    6667           21 :                      gen_rtx_POST_INC (Pmode, stack_pointer_rtx));
    6668              : 
    6669           21 :   return gen_popfl1 (word_mode, flags, mem);
    6670              : }
    6671              : 
    6672              : /* Generate a "push2" pattern for input ARG.  */
    6673              : rtx
    6674           19 : gen_push2 (rtx mem, rtx reg1, rtx reg2, bool ppx_p = false)
    6675              : {
    6676           19 :   struct machine_function *m = cfun->machine;
    6677           19 :   const int offset = UNITS_PER_WORD * 2;
    6678              : 
    6679           19 :   if (m->fs.cfa_reg == stack_pointer_rtx)
    6680           14 :     m->fs.cfa_offset += offset;
    6681           19 :   m->fs.sp_offset += offset;
    6682              : 
    6683           19 :   if (REG_P (reg1) && GET_MODE (reg1) != word_mode)
    6684            0 :     reg1 = gen_rtx_REG (word_mode, REGNO (reg1));
    6685              : 
    6686           19 :   if (REG_P (reg2) && GET_MODE (reg2) != word_mode)
    6687            0 :     reg2 = gen_rtx_REG (word_mode, REGNO (reg2));
    6688              : 
    6689           19 :   return ppx_p ? gen_push2p_di (mem, reg1, reg2)
    6690            4 :                : gen_push2_di (mem, reg1, reg2);
    6691              : }
    6692              : 
    6693              : /* Return >= 0 if there is an unused call-clobbered register available
    6694              :    for the entire function.  */
    6695              : 
    6696              : static unsigned int
    6697            0 : ix86_select_alt_pic_regnum (void)
    6698              : {
    6699            0 :   if (ix86_use_pseudo_pic_reg ())
    6700              :     return INVALID_REGNUM;
    6701              : 
    6702            0 :   if (crtl->is_leaf
    6703            0 :       && !crtl->profile
    6704            0 :       && !ix86_current_function_calls_tls_descriptor)
    6705              :     {
    6706            0 :       int i, drap;
    6707              :       /* Can't use the same register for both PIC and DRAP.  */
    6708            0 :       if (crtl->drap_reg)
    6709            0 :         drap = REGNO (crtl->drap_reg);
    6710              :       else
    6711              :         drap = -1;
    6712            0 :       for (i = 2; i >= 0; --i)
    6713            0 :         if (i != drap && !df_regs_ever_live_p (i))
    6714              :           return i;
    6715              :     }
    6716              : 
    6717              :   return INVALID_REGNUM;
    6718              : }
    6719              : 
    6720              : /* Return true if REGNO is used by the epilogue.  */
    6721              : 
    6722              : bool
    6723   1656573810 : ix86_epilogue_uses (int regno)
    6724              : {
    6725              :   /* If there are no caller-saved registers, we preserve all registers,
    6726              :      except for MMX and x87 registers which aren't supported when saving
    6727              :      and restoring registers.  Don't explicitly save SP register since
    6728              :      it is always preserved.  */
    6729   1656573810 :   return (epilogue_completed
    6730    262311074 :           && (cfun->machine->call_saved_registers
    6731    262311074 :               == TYPE_NO_CALLER_SAVED_REGISTERS)
    6732        27140 :           && !fixed_regs[regno]
    6733         4857 :           && !STACK_REGNO_P (regno)
    6734   1656578667 :           && !MMX_REGNO_P (regno));
    6735              : }
    6736              : 
    6737              : /* Return nonzero if register REGNO can be used as a scratch register
    6738              :    in peephole2.  */
    6739              : 
    6740              : static bool
    6741      1241603 : ix86_hard_regno_scratch_ok (unsigned int regno)
    6742              : {
    6743              :   /* If there are no caller-saved registers, we can't use any register
    6744              :      as a scratch register after epilogue and use REGNO as scratch
    6745              :      register only if it has been used before to avoid saving and
    6746              :      restoring it.  */
    6747      1241603 :   return ((cfun->machine->call_saved_registers
    6748      1241603 :            != TYPE_NO_CALLER_SAVED_REGISTERS)
    6749      1241603 :           || (!epilogue_completed
    6750            0 :               && df_regs_ever_live_p (regno)));
    6751              : }
    6752              : 
    6753              : /* Return TRUE if we need to save REGNO.  */
    6754              : 
    6755              : bool
    6756    352483790 : ix86_save_reg (unsigned int regno, bool maybe_eh_return, bool ignore_outlined)
    6757              : {
    6758    352483790 :   rtx reg;
    6759              : 
    6760    352483790 :   switch (cfun->machine->call_saved_registers)
    6761              :     {
    6762              :     case TYPE_DEFAULT_CALL_SAVED_REGISTERS:
    6763              :       break;
    6764              : 
    6765        57152 :     case TYPE_NO_CALLER_SAVED_REGISTERS:
    6766              :       /* If there are no caller-saved registers, we preserve all
    6767              :          registers, except for MMX and x87 registers which aren't
    6768              :          supported when saving and restoring registers.  Don't
    6769              :          explicitly save SP register since it is always preserved.
    6770              : 
    6771              :          Don't preserve registers used for function return value.  */
    6772        57152 :       reg = crtl->return_rtx;
    6773        57152 :       if (reg)
    6774              :         {
    6775          768 :           unsigned int i = REGNO (reg);
    6776          768 :           unsigned int nregs = REG_NREGS (reg);
    6777         1522 :           while (nregs-- > 0)
    6778          768 :             if ((i + nregs) == regno)
    6779              :               return false;
    6780              :         }
    6781              : 
    6782        57138 :       return (df_regs_ever_live_p (regno)
    6783         6932 :               && !fixed_regs[regno]
    6784         5964 :               && !STACK_REGNO_P (regno)
    6785         5964 :               && !MMX_REGNO_P (regno)
    6786        63102 :               && (regno != HARD_FRAME_POINTER_REGNUM
    6787          249 :                   || !frame_pointer_needed));
    6788              : 
    6789        17696 :     case TYPE_NO_CALLEE_SAVED_REGISTERS:
    6790        17696 :     case TYPE_PRESERVE_NONE:
    6791        17696 :       if (regno != HARD_FRAME_POINTER_REGNUM)
    6792              :         return false;
    6793              :       break;
    6794              :     }
    6795              : 
    6796    386298967 :   if (regno == REAL_PIC_OFFSET_TABLE_REGNUM
    6797     10720185 :       && pic_offset_table_rtx)
    6798              :     {
    6799       382662 :       if (ix86_use_pseudo_pic_reg ())
    6800              :         {
    6801              :           /* REAL_PIC_OFFSET_TABLE_REGNUM used by call to
    6802              :           _mcount in prologue.  */
    6803       382662 :           if (!TARGET_64BIT && flag_pic && crtl->profile)
    6804              :             return true;
    6805              :         }
    6806            0 :       else if (df_regs_ever_live_p (REAL_PIC_OFFSET_TABLE_REGNUM)
    6807            0 :                || crtl->profile
    6808            0 :                || crtl->calls_eh_return
    6809            0 :                || crtl->uses_const_pool
    6810            0 :                || cfun->has_nonlocal_label)
    6811            0 :         return ix86_select_alt_pic_regnum () == INVALID_REGNUM;
    6812              :     }
    6813              : 
    6814    352409495 :   if (crtl->calls_eh_return && maybe_eh_return)
    6815              :     {
    6816              :       unsigned i;
    6817        13237 :       for (i = 0; ; i++)
    6818              :         {
    6819        20181 :           unsigned test = EH_RETURN_DATA_REGNO (i);
    6820        13671 :           if (test == INVALID_REGNUM)
    6821              :             break;
    6822        13671 :           if (test == regno)
    6823              :             return true;
    6824        13237 :         }
    6825              :     }
    6826              : 
    6827    352409061 :   if (ignore_outlined && cfun->machine->call_ms2sysv)
    6828              :     {
    6829      2650688 :       unsigned count = cfun->machine->call_ms2sysv_extra_regs
    6830              :                        + xlogue_layout::MIN_REGS;
    6831      2650688 :       if (xlogue_layout::is_stub_managed_reg (regno, count))
    6832              :         return false;
    6833              :     }
    6834              : 
    6835    351909192 :   if (crtl->drap_reg
    6836      2188960 :       && regno == REGNO (crtl->drap_reg)
    6837    351964707 :       && !cfun->machine->no_drap_save_restore)
    6838              :     return true;
    6839              : 
    6840    351853677 :   return (df_regs_ever_live_p (regno)
    6841    371239381 :           && !call_used_or_fixed_reg_p (regno)
    6842    370609268 :           && (regno != HARD_FRAME_POINTER_REGNUM || !frame_pointer_needed));
    6843              : }
    6844              : 
    6845              : /* Return number of saved general prupose registers.  */
    6846              : 
    6847              : static int
    6848      8118237 : ix86_nsaved_regs (void)
    6849              : {
    6850      8118237 :   int nregs = 0;
    6851      8118237 :   int regno;
    6852              : 
    6853    754996041 :   for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
    6854    746877804 :     if (GENERAL_REGNO_P (regno) && ix86_save_reg (regno, true, true))
    6855      8186197 :       nregs ++;
    6856      8118237 :   return nregs;
    6857              : }
    6858              : 
    6859              : /* Return number of saved SSE registers.  */
    6860              : 
    6861              : static int
    6862      8153150 : ix86_nsaved_sseregs (void)
    6863              : {
    6864      8153150 :   int nregs = 0;
    6865      8153150 :   int regno;
    6866              : 
    6867      7353000 :   if (!TARGET_64BIT_MS_ABI
    6868      8153150 :       && (cfun->machine->call_saved_registers
    6869      7927744 :           != TYPE_NO_CALLER_SAVED_REGISTERS))
    6870              :     return 0;
    6871     21031299 :   for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
    6872     20805156 :     if (SSE_REGNO_P (regno) && ix86_save_reg (regno, true, true))
    6873      1896395 :       nregs ++;
    6874              :   return nregs;
    6875              : }
    6876              : 
    6877              : /* Given FROM and TO register numbers, say whether this elimination is
    6878              :    allowed.  If stack alignment is needed, we can only replace argument
    6879              :    pointer with hard frame pointer, or replace frame pointer with stack
    6880              :    pointer.  Otherwise, frame pointer elimination is automatically
    6881              :    handled and all other eliminations are valid.  */
    6882              : 
    6883              : static bool
    6884     48080289 : ix86_can_eliminate (const int from, const int to)
    6885              : {
    6886     48080289 :   if (stack_realign_fp)
    6887      1704188 :     return ((from == ARG_POINTER_REGNUM
    6888      1704188 :              && to == HARD_FRAME_POINTER_REGNUM)
    6889      1704188 :             || (from == FRAME_POINTER_REGNUM
    6890      1704188 :                 && to == STACK_POINTER_REGNUM));
    6891              :   else
    6892     86352244 :     return to == STACK_POINTER_REGNUM ? !frame_pointer_needed : true;
    6893              : }
    6894              : 
    6895              : /* Return the offset between two registers, one to be eliminated, and the other
    6896              :    its replacement, at the start of a routine.  */
    6897              : 
    6898              : HOST_WIDE_INT
    6899    141078402 : ix86_initial_elimination_offset (int from, int to)
    6900              : {
    6901    141078402 :   struct ix86_frame &frame = cfun->machine->frame;
    6902              : 
    6903    141078402 :   if (from == ARG_POINTER_REGNUM && to == HARD_FRAME_POINTER_REGNUM)
    6904     10370397 :     return frame.hard_frame_pointer_offset;
    6905    130708005 :   else if (from == FRAME_POINTER_REGNUM
    6906    130708005 :            && to == HARD_FRAME_POINTER_REGNUM)
    6907      8082997 :     return frame.hard_frame_pointer_offset - frame.frame_pointer_offset;
    6908              :   else
    6909              :     {
    6910    122625008 :       gcc_assert (to == STACK_POINTER_REGNUM);
    6911              : 
    6912    122625008 :       if (from == ARG_POINTER_REGNUM)
    6913    114542011 :         return frame.stack_pointer_offset;
    6914              : 
    6915      8082997 :       gcc_assert (from == FRAME_POINTER_REGNUM);
    6916      8082997 :       return frame.stack_pointer_offset - frame.frame_pointer_offset;
    6917              :     }
    6918              : }
    6919              : 
    6920              : /* Emits a warning for unsupported msabi to sysv pro/epilogues.  */
    6921              : void
    6922            0 : warn_once_call_ms2sysv_xlogues (const char *feature)
    6923              : {
    6924            0 :   static bool warned_once = false;
    6925            0 :   if (!warned_once)
    6926              :     {
    6927            0 :       warning (0, "%<-mcall-ms2sysv-xlogues%> is not compatible with %s",
    6928              :                feature);
    6929            0 :       warned_once = true;
    6930              :     }
    6931            0 : }
    6932              : 
    6933              : /* Return the probing interval for -fstack-clash-protection.  */
    6934              : 
    6935              : static HOST_WIDE_INT
    6936          489 : get_probe_interval (void)
    6937              : {
    6938          335 :   if (flag_stack_clash_protection)
    6939          405 :     return (HOST_WIDE_INT_1U
    6940          405 :             << param_stack_clash_protection_probe_interval);
    6941              :   else
    6942              :     return (HOST_WIDE_INT_1U << STACK_CHECK_PROBE_INTERVAL_EXP);
    6943              : }
    6944              : 
    6945              : /* When using -fsplit-stack, the allocation routines set a field in
    6946              :    the TCB to the bottom of the stack plus this much space, measured
    6947              :    in bytes.  */
    6948              : 
    6949              : #define SPLIT_STACK_AVAILABLE 256
    6950              : 
    6951              : /* Return true if push2/pop2 can be generated.  */
    6952              : 
    6953              : static bool
    6954      8118888 : ix86_can_use_push2pop2 (void)
    6955              : {
    6956              :   /* Use push2/pop2 only if the incoming stack is 16-byte aligned.  */
    6957      8118888 :   unsigned int incoming_stack_boundary
    6958      8118888 :     = (crtl->parm_stack_boundary > ix86_incoming_stack_boundary
    6959      8118888 :        ? crtl->parm_stack_boundary : ix86_incoming_stack_boundary);
    6960      8118888 :   return incoming_stack_boundary % 128 == 0;
    6961              : }
    6962              : 
    6963              : /* Helper function to determine whether push2/pop2 can be used in prologue or
    6964              :    epilogue for register save/restore.  */
    6965              : static bool
    6966      8118237 : ix86_pro_and_epilogue_can_use_push2pop2 (int nregs)
    6967              : {
    6968      8118237 :   if (!ix86_can_use_push2pop2 ())
    6969              :     return false;
    6970      8082286 :   int aligned = cfun->machine->fs.sp_offset % 16 == 0;
    6971      8082286 :   return TARGET_APX_PUSH2POP2
    6972         2829 :          && !cfun->machine->frame.save_regs_using_mov
    6973         2817 :          && cfun->machine->func_type == TYPE_NORMAL
    6974      8085095 :          && (nregs + aligned) >= 3;
    6975              : }
    6976              : 
    6977              : /* Check if push/pop should be used to save/restore registers.  */
    6978              : static bool
    6979      8841911 : save_regs_using_push_pop (HOST_WIDE_INT to_allocate)
    6980              : {
    6981      3183971 :   return ((!to_allocate && cfun->machine->frame.nregs <= 1)
    6982      5908147 :           || (TARGET_64BIT && to_allocate >= HOST_WIDE_INT_C (0x80000000))
    6983              :           /* If static stack checking is enabled and done with probes,
    6984              :              the registers need to be saved before allocating the frame.  */
    6985      5907486 :           || flag_stack_check == STATIC_BUILTIN_STACK_CHECK
    6986              :           /* If stack clash probing needs a loop, then it needs a
    6987              :              scratch register.  But the returned register is only guaranteed
    6988              :              to be safe to use after register saves are complete.  So if
    6989              :              stack clash protections are enabled and the allocated frame is
    6990              :              larger than the probe interval, then use pushes to save
    6991              :              callee saved registers.  */
    6992     14749317 :           || (flag_stack_clash_protection
    6993          335 :               && !ix86_target_stack_probe ()
    6994          335 :               && to_allocate > get_probe_interval ()));
    6995              : }
    6996              : 
    6997              : /* Fill structure ix86_frame about frame of currently computed function.  */
    6998              : 
    6999              : static void
    7000      8118237 : ix86_compute_frame_layout (void)
    7001              : {
    7002      8118237 :   struct ix86_frame *frame = &cfun->machine->frame;
    7003      8118237 :   struct machine_function *m = cfun->machine;
    7004      8118237 :   unsigned HOST_WIDE_INT stack_alignment_needed;
    7005      8118237 :   HOST_WIDE_INT offset;
    7006      8118237 :   unsigned HOST_WIDE_INT preferred_alignment;
    7007      8118237 :   HOST_WIDE_INT size = ix86_get_frame_size ();
    7008      8118237 :   HOST_WIDE_INT to_allocate;
    7009              : 
    7010              :   /* m->call_ms2sysv is initially enabled in ix86_expand_call for all 64-bit
    7011              :    * ms_abi functions that call a sysv function.  We now need to prune away
    7012              :    * cases where it should be disabled.  */
    7013      8118237 :   if (TARGET_64BIT && m->call_ms2sysv)
    7014              :     {
    7015        35225 :       gcc_assert (TARGET_64BIT_MS_ABI);
    7016        35225 :       gcc_assert (TARGET_CALL_MS2SYSV_XLOGUES);
    7017        35225 :       gcc_assert (!TARGET_SEH);
    7018        35225 :       gcc_assert (TARGET_SSE);
    7019        35225 :       gcc_assert (!ix86_using_red_zone ());
    7020              : 
    7021        35225 :       if (crtl->calls_eh_return)
    7022              :         {
    7023            0 :           gcc_assert (!reload_completed);
    7024            0 :           m->call_ms2sysv = false;
    7025            0 :           warn_once_call_ms2sysv_xlogues ("__builtin_eh_return");
    7026              :         }
    7027              : 
    7028        35225 :       else if (ix86_static_chain_on_stack)
    7029              :         {
    7030            0 :           gcc_assert (!reload_completed);
    7031            0 :           m->call_ms2sysv = false;
    7032            0 :           warn_once_call_ms2sysv_xlogues ("static call chains");
    7033              :         }
    7034              : 
    7035              :       /* Finally, compute which registers the stub will manage.  */
    7036              :       else
    7037              :         {
    7038        35225 :           unsigned count = xlogue_layout::count_stub_managed_regs ();
    7039        35225 :           m->call_ms2sysv_extra_regs = count - xlogue_layout::MIN_REGS;
    7040        35225 :           m->call_ms2sysv_pad_in = 0;
    7041              :         }
    7042              :     }
    7043              : 
    7044      8118237 :   frame->nregs = ix86_nsaved_regs ();
    7045      8118237 :   frame->nsseregs = ix86_nsaved_sseregs ();
    7046              : 
    7047              :   /* 64-bit MS ABI seem to require stack alignment to be always 16,
    7048              :      except for function prologues, leaf functions and when the defult
    7049              :      incoming stack boundary is overriden at command line or via
    7050              :      force_align_arg_pointer attribute.
    7051              : 
    7052              :      Darwin's ABI specifies 128b alignment for both 32 and  64 bit variants
    7053              :      at call sites, including profile function calls.
    7054              : 
    7055              :      For APX push2/pop2, the stack also requires 128b alignment.  */
    7056      8118237 :   if ((ix86_pro_and_epilogue_can_use_push2pop2 (frame->nregs)
    7057           67 :        && crtl->preferred_stack_boundary < 128)
    7058      8118302 :       || (((TARGET_64BIT_MS_ABI || TARGET_MACHO)
    7059       225404 :            && crtl->preferred_stack_boundary < 128)
    7060            0 :           && (!crtl->is_leaf || cfun->calls_alloca != 0
    7061            0 :               || ix86_current_function_calls_tls_descriptor
    7062            0 :               || (TARGET_MACHO && crtl->profile)
    7063            0 :               || ix86_incoming_stack_boundary < 128)))
    7064              :     {
    7065            2 :       crtl->preferred_stack_boundary = 128;
    7066            2 :       if (crtl->stack_alignment_needed < 128)
    7067            1 :         crtl->stack_alignment_needed = 128;
    7068              :     }
    7069              : 
    7070      8118237 :   stack_alignment_needed = crtl->stack_alignment_needed / BITS_PER_UNIT;
    7071      8118237 :   preferred_alignment = crtl->preferred_stack_boundary / BITS_PER_UNIT;
    7072              : 
    7073      8118237 :   gcc_assert (!size || stack_alignment_needed);
    7074      8918358 :   gcc_assert (preferred_alignment >= STACK_BOUNDARY / BITS_PER_UNIT);
    7075      8118237 :   gcc_assert (preferred_alignment <= stack_alignment_needed);
    7076              : 
    7077              :   /* The only ABI saving SSE regs should be 64-bit ms_abi or with
    7078              :      no_caller_saved_registers attribue.  */
    7079      8118237 :   gcc_assert (TARGET_64BIT
    7080              :               || (cfun->machine->call_saved_registers
    7081              :                   == TYPE_NO_CALLER_SAVED_REGISTERS)
    7082              :               || !frame->nsseregs);
    7083      8118237 :   if (TARGET_64BIT && m->call_ms2sysv)
    7084              :     {
    7085        35225 :       gcc_assert (stack_alignment_needed >= 16);
    7086        35225 :       gcc_assert ((cfun->machine->call_saved_registers
    7087              :                    == TYPE_NO_CALLER_SAVED_REGISTERS)
    7088              :                   || !frame->nsseregs);
    7089              :     }
    7090              : 
    7091              :   /* For SEH we have to limit the amount of code movement into the prologue.
    7092              :      At present we do this via a BLOCKAGE, at which point there's very little
    7093              :      scheduling that can be done, which means that there's very little point
    7094              :      in doing anything except PUSHs.  */
    7095      8118237 :   if (TARGET_SEH)
    7096              :     m->use_fast_prologue_epilogue = false;
    7097      8118237 :   else if (!optimize_bb_for_size_p (ENTRY_BLOCK_PTR_FOR_FN (cfun)))
    7098              :     {
    7099      7785220 :       int count = frame->nregs;
    7100      7785220 :       struct cgraph_node *node = cgraph_node::get (current_function_decl);
    7101              : 
    7102              :       /* The fast prologue uses move instead of push to save registers.  This
    7103              :          is significantly longer, but also executes faster as modern hardware
    7104              :          can execute the moves in parallel, but can't do that for push/pop.
    7105              : 
    7106              :          Be careful about choosing what prologue to emit:  When function takes
    7107              :          many instructions to execute we may use slow version as well as in
    7108              :          case function is known to be outside hot spot (this is known with
    7109              :          feedback only).  Weight the size of function by number of registers
    7110              :          to save as it is cheap to use one or two push instructions but very
    7111              :          slow to use many of them.
    7112              : 
    7113              :          Calling this hook multiple times with the same frame requirements
    7114              :          must produce the same layout, since the RA might otherwise be
    7115              :          unable to reach a fixed point or might fail its final sanity checks.
    7116              :          This means that once we've assumed that a function does or doesn't
    7117              :          have a particular size, we have to stick to that assumption
    7118              :          regardless of how the function has changed since.  */
    7119      7785220 :       if (count)
    7120      2600395 :         count = (count - 1) * FAST_PROLOGUE_INSN_COUNT;
    7121      7785220 :       if (node->frequency < NODE_FREQUENCY_NORMAL
    7122      7094868 :           || (flag_branch_probabilities
    7123          986 :               && node->frequency < NODE_FREQUENCY_HOT))
    7124       690677 :         m->use_fast_prologue_epilogue = false;
    7125              :       else
    7126              :         {
    7127      7094543 :           if (count != frame->expensive_count)
    7128              :             {
    7129       285493 :               frame->expensive_count = count;
    7130       285493 :               frame->expensive_p = expensive_function_p (count);
    7131              :             }
    7132      7094543 :           m->use_fast_prologue_epilogue = !frame->expensive_p;
    7133              :         }
    7134              :     }
    7135              : 
    7136      8118237 :   frame->save_regs_using_mov
    7137      8118237 :     = TARGET_PROLOGUE_USING_MOVE && m->use_fast_prologue_epilogue;
    7138              : 
    7139              :   /* Skip return address and error code in exception handler.  */
    7140      8118237 :   offset = INCOMING_FRAME_SP_OFFSET;
    7141              : 
    7142              :   /* Skip pushed static chain.  */
    7143      8118237 :   if (ix86_static_chain_on_stack)
    7144            0 :     offset += UNITS_PER_WORD;
    7145              : 
    7146              :   /* Skip saved base pointer.  */
    7147      8118237 :   if (frame_pointer_needed)
    7148      2732125 :     offset += UNITS_PER_WORD;
    7149      8118237 :   frame->hfp_save_offset = offset;
    7150              : 
    7151              :   /* The traditional frame pointer location is at the top of the frame.  */
    7152      8118237 :   frame->hard_frame_pointer_offset = offset;
    7153              : 
    7154              :   /* Register save area */
    7155      8118237 :   offset += frame->nregs * UNITS_PER_WORD;
    7156      8118237 :   frame->reg_save_offset = offset;
    7157              : 
    7158              :   /* Calculate the size of the va-arg area (not including padding, if any).  */
    7159      8118237 :   frame->va_arg_size = ix86_varargs_gpr_size + ix86_varargs_fpr_size;
    7160              : 
    7161              :   /* Also adjust stack_realign_offset for the largest alignment of
    7162              :      stack slot actually used.  */
    7163      8118237 :   if (stack_realign_fp
    7164      7811701 :       || (cfun->machine->max_used_stack_alignment != 0
    7165          133 :           && (offset % cfun->machine->max_used_stack_alignment) != 0))
    7166              :     {
    7167              :       /* We may need a 16-byte aligned stack for the remainder of the
    7168              :          register save area, but the stack frame for the local function
    7169              :          may require a greater alignment if using AVX/2/512.  In order
    7170              :          to avoid wasting space, we first calculate the space needed for
    7171              :          the rest of the register saves, add that to the stack pointer,
    7172              :          and then realign the stack to the boundary of the start of the
    7173              :          frame for the local function.  */
    7174       306601 :       HOST_WIDE_INT space_needed = 0;
    7175       306601 :       HOST_WIDE_INT sse_reg_space_needed = 0;
    7176              : 
    7177       306601 :       if (TARGET_64BIT)
    7178              :         {
    7179       304801 :           if (m->call_ms2sysv)
    7180              :             {
    7181         6415 :               m->call_ms2sysv_pad_in = 0;
    7182         6415 :               space_needed = xlogue_layout::get_instance ().get_stack_space_used ();
    7183              :             }
    7184              : 
    7185       298386 :           else if (frame->nsseregs)
    7186              :             /* The only ABI that has saved SSE registers (Win64) also has a
    7187              :                16-byte aligned default stack.  However, many programs violate
    7188              :                the ABI, and Wine64 forces stack realignment to compensate.  */
    7189         6447 :             space_needed = frame->nsseregs * 16;
    7190              : 
    7191       304801 :           sse_reg_space_needed = space_needed = ROUND_UP (space_needed, 16);
    7192              : 
    7193              :           /* 64-bit frame->va_arg_size should always be a multiple of 16, but
    7194              :              rounding to be pedantic.  */
    7195       304801 :           space_needed = ROUND_UP (space_needed + frame->va_arg_size, 16);
    7196              :         }
    7197              :       else
    7198         1800 :         space_needed = frame->va_arg_size;
    7199              : 
    7200              :       /* Record the allocation size required prior to the realignment AND.  */
    7201       306601 :       frame->stack_realign_allocate = space_needed;
    7202              : 
    7203              :       /* The re-aligned stack starts at frame->stack_realign_offset.  Values
    7204              :          before this point are not directly comparable with values below
    7205              :          this point.  Use sp_valid_at to determine if the stack pointer is
    7206              :          valid for a given offset, fp_valid_at for the frame pointer, or
    7207              :          choose_baseaddr to have a base register chosen for you.
    7208              : 
    7209              :          Note that the result of (frame->stack_realign_offset
    7210              :          & (stack_alignment_needed - 1)) may not equal zero.  */
    7211       306601 :       offset = ROUND_UP (offset + space_needed, stack_alignment_needed);
    7212       306601 :       frame->stack_realign_offset = offset - space_needed;
    7213       306601 :       frame->sse_reg_save_offset = frame->stack_realign_offset
    7214       306601 :                                                         + sse_reg_space_needed;
    7215       306601 :     }
    7216              :   else
    7217              :     {
    7218      7811636 :       frame->stack_realign_offset = offset;
    7219              : 
    7220      7811636 :       if (TARGET_64BIT && m->call_ms2sysv)
    7221              :         {
    7222        28810 :           m->call_ms2sysv_pad_in = !!(offset & UNITS_PER_WORD);
    7223        28810 :           offset += xlogue_layout::get_instance ().get_stack_space_used ();
    7224              :         }
    7225              : 
    7226              :       /* Align and set SSE register save area.  */
    7227      7782826 :       else if (frame->nsseregs)
    7228              :         {
    7229              :           /* If the incoming stack boundary is at least 16 bytes, or DRAP is
    7230              :              required and the DRAP re-alignment boundary is at least 16 bytes,
    7231              :              then we want the SSE register save area properly aligned.  */
    7232       183182 :           if (ix86_incoming_stack_boundary >= 128
    7233         6400 :                   || (stack_realign_drap && stack_alignment_needed >= 16))
    7234       183182 :             offset = ROUND_UP (offset, 16);
    7235       183182 :           offset += frame->nsseregs * 16;
    7236              :         }
    7237      7811636 :       frame->sse_reg_save_offset = offset;
    7238      7811636 :       offset += frame->va_arg_size;
    7239              :     }
    7240              : 
    7241              :   /* Align start of frame for local function.  When a function call
    7242              :      is removed, it may become a leaf function.  But if argument may
    7243              :      be passed on stack, we need to align the stack when there is no
    7244              :      tail call.  */
    7245      8118237 :   if (m->call_ms2sysv
    7246      8083012 :       || frame->va_arg_size != 0
    7247      8003889 :       || size != 0
    7248      4372328 :       || !crtl->is_leaf
    7249      2046006 :       || (!crtl->tail_call_emit
    7250      1720791 :           && cfun->machine->outgoing_args_on_stack)
    7251      2045956 :       || cfun->calls_alloca
    7252     10162406 :       || ix86_current_function_calls_tls_descriptor)
    7253      6074482 :     offset = ROUND_UP (offset, stack_alignment_needed);
    7254              : 
    7255              :   /* Frame pointer points here.  */
    7256      8118237 :   frame->frame_pointer_offset = offset;
    7257              : 
    7258      8118237 :   offset += size;
    7259              : 
    7260              :   /* Add outgoing arguments area.  Can be skipped if we eliminated
    7261              :      all the function calls as dead code.
    7262              :      Skipping is however impossible when function calls alloca.  Alloca
    7263              :      expander assumes that last crtl->outgoing_args_size
    7264              :      of stack frame are unused.  */
    7265      8118237 :   if (ACCUMULATE_OUTGOING_ARGS
    7266      8736158 :       && (!crtl->is_leaf || cfun->calls_alloca
    7267       391782 :           || ix86_current_function_calls_tls_descriptor))
    7268              :     {
    7269       226139 :       offset += crtl->outgoing_args_size;
    7270       226139 :       frame->outgoing_arguments_size = crtl->outgoing_args_size;
    7271              :     }
    7272              :   else
    7273      7892098 :     frame->outgoing_arguments_size = 0;
    7274              : 
    7275              :   /* Align stack boundary.  Only needed if we're calling another function
    7276              :      or using alloca.  */
    7277      2744149 :   if (!crtl->is_leaf || cfun->calls_alloca
    7278     10858909 :       || ix86_current_function_calls_tls_descriptor)
    7279      5379363 :     offset = ROUND_UP (offset, preferred_alignment);
    7280              : 
    7281              :   /* We've reached end of stack frame.  */
    7282      8118237 :   frame->stack_pointer_offset = offset;
    7283              : 
    7284              :   /* Size prologue needs to allocate.  */
    7285      8118237 :   to_allocate = offset - frame->sse_reg_save_offset;
    7286              : 
    7287      8118237 :   if (save_regs_using_push_pop (to_allocate))
    7288      2561606 :     frame->save_regs_using_mov = false;
    7289              : 
    7290      8118237 :   if (ix86_using_red_zone ()
    7291      7092521 :       && crtl->sp_is_unchanging
    7292      6450241 :       && crtl->is_leaf
    7293      2645102 :       && !cfun->machine->asm_redzone_clobber_seen
    7294      2645089 :       && !ix86_pc_thunk_call_expanded
    7295     10763326 :       && !ix86_current_function_calls_tls_descriptor)
    7296              :     {
    7297      2645074 :       frame->red_zone_size = to_allocate;
    7298      2645074 :       if (frame->save_regs_using_mov)
    7299       139945 :         frame->red_zone_size += frame->nregs * UNITS_PER_WORD;
    7300      2645074 :       if (frame->red_zone_size > RED_ZONE_SIZE - RED_ZONE_RESERVE)
    7301       102783 :         frame->red_zone_size = RED_ZONE_SIZE - RED_ZONE_RESERVE;
    7302              :     }
    7303              :   else
    7304      5473163 :     frame->red_zone_size = 0;
    7305      8118237 :   frame->stack_pointer_offset -= frame->red_zone_size;
    7306              : 
    7307              :   /* The SEH frame pointer location is near the bottom of the frame.
    7308              :      This is enforced by the fact that the difference between the
    7309              :      stack pointer and the frame pointer is limited to 240 bytes in
    7310              :      the unwind data structure.  */
    7311      8118237 :   if (TARGET_SEH)
    7312              :     {
    7313              :       /* Force the frame pointer to point at or below the lowest register save
    7314              :          area, see the SEH code in config/i386/winnt.cc for the rationale.  */
    7315              :       frame->hard_frame_pointer_offset = frame->sse_reg_save_offset;
    7316              : 
    7317              :       /* If we can leave the frame pointer where it is, do so; however return
    7318              :          the establisher frame for __builtin_frame_address (0) or else if the
    7319              :          frame overflows the SEH maximum frame size.
    7320              : 
    7321              :          Note that the value returned by __builtin_frame_address (0) is quite
    7322              :          constrained, because setjmp is piggybacked on the SEH machinery with
    7323              :          recent versions of MinGW:
    7324              : 
    7325              :           #    elif defined(__SEH__)
    7326              :           #     if defined(__aarch64__) || defined(_ARM64_)
    7327              :           #      define setjmp(BUF) _setjmp((BUF), __builtin_sponentry())
    7328              :           #     elif (__MINGW_GCC_VERSION < 40702)
    7329              :           #      define setjmp(BUF) _setjmp((BUF), mingw_getsp())
    7330              :           #     else
    7331              :           #      define setjmp(BUF) _setjmp((BUF), __builtin_frame_address (0))
    7332              :           #     endif
    7333              : 
    7334              :          and the second argument passed to _setjmp, if not null, is forwarded
    7335              :          to the TargetFrame parameter of RtlUnwindEx by longjmp (after it has
    7336              :          built an ExceptionRecord on the fly describing the setjmp buffer).  */
    7337              :       const HOST_WIDE_INT diff
    7338              :         = frame->stack_pointer_offset - frame->hard_frame_pointer_offset;
    7339              :       if (diff <= 255 && !crtl->accesses_prior_frames)
    7340              :         {
    7341              :           /* The resulting diff will be a multiple of 16 lower than 255,
    7342              :              i.e. at most 240 as required by the unwind data structure.  */
    7343              :           frame->hard_frame_pointer_offset += (diff & 15);
    7344              :         }
    7345              :       else if (diff <= SEH_MAX_FRAME_SIZE && !crtl->accesses_prior_frames)
    7346              :         {
    7347              :           /* Ideally we'd determine what portion of the local stack frame
    7348              :              (within the constraint of the lowest 240) is most heavily used.
    7349              :              But without that complication, simply bias the frame pointer
    7350              :              by 128 bytes so as to maximize the amount of the local stack
    7351              :              frame that is addressable with 8-bit offsets.  */
    7352              :           frame->hard_frame_pointer_offset = frame->stack_pointer_offset - 128;
    7353              :         }
    7354              :       else
    7355              :         frame->hard_frame_pointer_offset = frame->hfp_save_offset;
    7356              :     }
    7357      8118237 : }
    7358              : 
    7359              : /* This is semi-inlined memory_address_length, but simplified
    7360              :    since we know that we're always dealing with reg+offset, and
    7361              :    to avoid having to create and discard all that rtl.  */
    7362              : 
    7363              : static inline int
    7364      1019563 : choose_baseaddr_len (unsigned int regno, HOST_WIDE_INT offset)
    7365              : {
    7366      1019563 :   int len = 4;
    7367              : 
    7368            0 :   if (offset == 0)
    7369              :     {
    7370              :       /* EBP and R13 cannot be encoded without an offset.  */
    7371            0 :       len = (regno == BP_REG || regno == R13_REG);
    7372              :     }
    7373      1011457 :   else if (IN_RANGE (offset, -128, 127))
    7374       635693 :     len = 1;
    7375              : 
    7376              :   /* ESP and R12 must be encoded with a SIB byte.  */
    7377            0 :   if (regno == SP_REG || regno == R12_REG)
    7378            0 :     len++;
    7379              : 
    7380      1019563 :   return len;
    7381              : }
    7382              : 
    7383              : /* Determine if the stack pointer is valid for accessing the CFA_OFFSET in
    7384              :    the frame save area.  The register is saved at CFA - CFA_OFFSET.  */
    7385              : 
    7386              : static bool
    7387      3505103 : sp_valid_at (HOST_WIDE_INT cfa_offset)
    7388              : {
    7389      3505103 :   const struct machine_frame_state &fs = cfun->machine->fs;
    7390      3505103 :   if (fs.sp_realigned && cfa_offset <= fs.sp_realigned_offset)
    7391              :     {
    7392              :       /* Validate that the cfa_offset isn't in a "no-man's land".  */
    7393        46396 :       gcc_assert (cfa_offset <= fs.sp_realigned_fp_last);
    7394              :       return false;
    7395              :     }
    7396      3458707 :   return fs.sp_valid;
    7397              : }
    7398              : 
    7399              : /* Determine if the frame pointer is valid for accessing the CFA_OFFSET in
    7400              :    the frame save area.  The register is saved at CFA - CFA_OFFSET.  */
    7401              : 
    7402              : static inline bool
    7403      1389367 : fp_valid_at (HOST_WIDE_INT cfa_offset)
    7404              : {
    7405      1389367 :   const struct machine_frame_state &fs = cfun->machine->fs;
    7406      1389367 :   if (fs.sp_realigned && cfa_offset > fs.sp_realigned_fp_last)
    7407              :     {
    7408              :       /* Validate that the cfa_offset isn't in a "no-man's land".  */
    7409        28328 :       gcc_assert (cfa_offset >= fs.sp_realigned_offset);
    7410              :       return false;
    7411              :     }
    7412      1361039 :   return fs.fp_valid;
    7413              : }
    7414              : 
    7415              : /* Choose a base register based upon alignment requested, speed and/or
    7416              :    size.  */
    7417              : 
    7418              : static void
    7419      1389367 : choose_basereg (HOST_WIDE_INT cfa_offset, rtx &base_reg,
    7420              :                 HOST_WIDE_INT &base_offset,
    7421              :                 unsigned int align_reqested, unsigned int *align)
    7422              : {
    7423      1389367 :   const struct machine_function *m = cfun->machine;
    7424      1389367 :   unsigned int hfp_align;
    7425      1389367 :   unsigned int drap_align;
    7426      1389367 :   unsigned int sp_align;
    7427      1389367 :   bool hfp_ok  = fp_valid_at (cfa_offset);
    7428      1389367 :   bool drap_ok = m->fs.drap_valid;
    7429      1389367 :   bool sp_ok   = sp_valid_at (cfa_offset);
    7430              : 
    7431      1389367 :   hfp_align = drap_align = sp_align = INCOMING_STACK_BOUNDARY;
    7432              : 
    7433              :   /* Filter out any registers that don't meet the requested alignment
    7434              :      criteria.  */
    7435      1389367 :   if (align_reqested)
    7436              :     {
    7437       974972 :       if (m->fs.realigned)
    7438        28160 :         hfp_align = drap_align = sp_align = crtl->stack_alignment_needed;
    7439              :       /* SEH unwind code does do not currently support REG_CFA_EXPRESSION
    7440              :          notes (which we would need to use a realigned stack pointer),
    7441              :          so disable on SEH targets.  */
    7442       946812 :       else if (m->fs.sp_realigned)
    7443        28328 :         sp_align = crtl->stack_alignment_needed;
    7444              : 
    7445       974972 :       hfp_ok = hfp_ok && hfp_align >= align_reqested;
    7446       974972 :       drap_ok = drap_ok && drap_align >= align_reqested;
    7447       974972 :       sp_ok = sp_ok && sp_align >= align_reqested;
    7448              :     }
    7449              : 
    7450      1389367 :   if (m->use_fast_prologue_epilogue)
    7451              :     {
    7452              :       /* Choose the base register most likely to allow the most scheduling
    7453              :          opportunities.  Generally FP is valid throughout the function,
    7454              :          while DRAP must be reloaded within the epilogue.  But choose either
    7455              :          over the SP due to increased encoding size.  */
    7456              : 
    7457       665191 :       if (hfp_ok)
    7458              :         {
    7459       117557 :           base_reg = hard_frame_pointer_rtx;
    7460       117557 :           base_offset = m->fs.fp_offset - cfa_offset;
    7461              :         }
    7462       547634 :       else if (drap_ok)
    7463              :         {
    7464            0 :           base_reg = crtl->drap_reg;
    7465            0 :           base_offset = 0 - cfa_offset;
    7466              :         }
    7467       547634 :       else if (sp_ok)
    7468              :         {
    7469       547634 :           base_reg = stack_pointer_rtx;
    7470       547634 :           base_offset = m->fs.sp_offset - cfa_offset;
    7471              :         }
    7472              :     }
    7473              :   else
    7474              :     {
    7475       724176 :       HOST_WIDE_INT toffset;
    7476       724176 :       int len = 16, tlen;
    7477              : 
    7478              :       /* Choose the base register with the smallest address encoding.
    7479              :          With a tie, choose FP > DRAP > SP.  */
    7480       724176 :       if (sp_ok)
    7481              :         {
    7482       706851 :           base_reg = stack_pointer_rtx;
    7483       706851 :           base_offset = m->fs.sp_offset - cfa_offset;
    7484      1405596 :           len = choose_baseaddr_len (STACK_POINTER_REGNUM, base_offset);
    7485              :         }
    7486       724176 :       if (drap_ok)
    7487              :         {
    7488            0 :           toffset = 0 - cfa_offset;
    7489            0 :           tlen = choose_baseaddr_len (REGNO (crtl->drap_reg), toffset);
    7490            0 :           if (tlen <= len)
    7491              :             {
    7492            0 :               base_reg = crtl->drap_reg;
    7493            0 :               base_offset = toffset;
    7494            0 :               len = tlen;
    7495              :             }
    7496              :         }
    7497       724176 :       if (hfp_ok)
    7498              :         {
    7499       312712 :           toffset = m->fs.fp_offset - cfa_offset;
    7500       312712 :           tlen = choose_baseaddr_len (HARD_FRAME_POINTER_REGNUM, toffset);
    7501       312712 :           if (tlen <= len)
    7502              :             {
    7503       222080 :               base_reg = hard_frame_pointer_rtx;
    7504       222080 :               base_offset = toffset;
    7505              :             }
    7506              :         }
    7507              :     }
    7508              : 
    7509              :     /* Set the align return value.  */
    7510      1389367 :     if (align)
    7511              :       {
    7512       974972 :         if (base_reg == stack_pointer_rtx)
    7513       693241 :           *align = sp_align;
    7514       281731 :         else if (base_reg == crtl->drap_reg)
    7515            0 :           *align = drap_align;
    7516       281731 :         else if (base_reg == hard_frame_pointer_rtx)
    7517       281731 :           *align = hfp_align;
    7518              :       }
    7519      1389367 : }
    7520              : 
    7521              : /* Return an RTX that points to CFA_OFFSET within the stack frame and
    7522              :    the alignment of address.  If ALIGN is non-null, it should point to
    7523              :    an alignment value (in bits) that is preferred or zero and will
    7524              :    recieve the alignment of the base register that was selected,
    7525              :    irrespective of rather or not CFA_OFFSET is a multiple of that
    7526              :    alignment value.  If it is possible for the base register offset to be
    7527              :    non-immediate then SCRATCH_REGNO should specify a scratch register to
    7528              :    use.
    7529              : 
    7530              :    The valid base registers are taken from CFUN->MACHINE->FS.  */
    7531              : 
    7532              : static rtx
    7533      1389367 : choose_baseaddr (HOST_WIDE_INT cfa_offset, unsigned int *align,
    7534              :                  unsigned int scratch_regno = INVALID_REGNUM)
    7535              : {
    7536      1389367 :   rtx base_reg = NULL;
    7537      1389367 :   HOST_WIDE_INT base_offset = 0;
    7538              : 
    7539              :   /* If a specific alignment is requested, try to get a base register
    7540              :      with that alignment first.  */
    7541      1389367 :   if (align && *align)
    7542       974972 :     choose_basereg (cfa_offset, base_reg, base_offset, *align, align);
    7543              : 
    7544      1389367 :   if (!base_reg)
    7545       414395 :     choose_basereg (cfa_offset, base_reg, base_offset, 0, align);
    7546              : 
    7547      1389367 :   gcc_assert (base_reg != NULL);
    7548              : 
    7549      1389367 :   rtx base_offset_rtx = GEN_INT (base_offset);
    7550              : 
    7551      1440897 :   if (!x86_64_immediate_operand (base_offset_rtx, Pmode))
    7552              :     {
    7553            1 :       gcc_assert (scratch_regno != INVALID_REGNUM);
    7554              : 
    7555            1 :       rtx scratch_reg = gen_rtx_REG (Pmode, scratch_regno);
    7556            1 :       emit_move_insn (scratch_reg, base_offset_rtx);
    7557              : 
    7558            1 :       return gen_rtx_PLUS (Pmode, base_reg, scratch_reg);
    7559              :     }
    7560              : 
    7561      1440896 :   return plus_constant (Pmode, base_reg, base_offset);
    7562              : }
    7563              : 
    7564              : /* Emit code to save registers in the prologue.  */
    7565              : 
    7566              : static void
    7567       426733 : ix86_emit_save_regs (void)
    7568              : {
    7569       426733 :   int regno;
    7570       426733 :   rtx_insn *insn;
    7571       426733 :   bool use_ppx = TARGET_APX_PPX && !crtl->calls_eh_return;
    7572              : 
    7573       426733 :   if (!TARGET_APX_PUSH2POP2
    7574           90 :       || !ix86_can_use_push2pop2 ()
    7575       426821 :       || cfun->machine->func_type != TYPE_NORMAL)
    7576              :     {
    7577     39678078 :       for (regno = FIRST_PSEUDO_REGISTER - 1; regno >= 0; regno--)
    7578     39251432 :         if (GENERAL_REGNO_P (regno) && ix86_save_reg (regno, true, true))
    7579              :           {
    7580      1195779 :             insn = emit_insn (gen_push (gen_rtx_REG (word_mode, regno),
    7581              :                                         use_ppx));
    7582      1195779 :             RTX_FRAME_RELATED_P (insn) = 1;
    7583              :           }
    7584              :     }
    7585              :   else
    7586              :     {
    7587           87 :       int regno_list[2];
    7588           87 :       regno_list[0] = regno_list[1] = -1;
    7589           87 :       int loaded_regnum = 0;
    7590           87 :       bool aligned = cfun->machine->fs.sp_offset % 16 == 0;
    7591              : 
    7592         8091 :       for (regno = FIRST_PSEUDO_REGISTER - 1; regno >= 0; regno--)
    7593         8004 :         if (GENERAL_REGNO_P (regno) && ix86_save_reg (regno, true, true))
    7594              :           {
    7595          127 :             if (aligned)
    7596              :               {
    7597           45 :                 regno_list[loaded_regnum++] = regno;
    7598           45 :                 if (loaded_regnum == 2)
    7599              :                   {
    7600           19 :                     gcc_assert (regno_list[0] != -1
    7601              :                                 && regno_list[1] != -1
    7602              :                                 && regno_list[0] != regno_list[1]);
    7603           19 :                     const int offset = UNITS_PER_WORD * 2;
    7604           19 :                     rtx mem = gen_rtx_MEM (TImode,
    7605           19 :                                            gen_rtx_PRE_DEC (Pmode,
    7606              :                                                             stack_pointer_rtx));
    7607           19 :                     insn = emit_insn (gen_push2 (mem,
    7608              :                                                  gen_rtx_REG (word_mode,
    7609              :                                                               regno_list[0]),
    7610              :                                                  gen_rtx_REG (word_mode,
    7611              :                                                               regno_list[1]),
    7612              :                                                  use_ppx));
    7613           19 :                     RTX_FRAME_RELATED_P (insn) = 1;
    7614           19 :                     rtx dwarf = gen_rtx_SEQUENCE (VOIDmode, rtvec_alloc (3));
    7615              : 
    7616           57 :                     for (int i = 0; i < 2; i++)
    7617              :                       {
    7618           76 :                         rtx dwarf_reg = gen_rtx_REG (word_mode,
    7619           38 :                                                      regno_list[i]);
    7620           38 :                         rtx sp_offset = plus_constant (Pmode,
    7621              :                                                        stack_pointer_rtx,
    7622           38 :                                                        + UNITS_PER_WORD
    7623           38 :                                                          * (1 - i));
    7624           38 :                         rtx tmp = gen_rtx_SET (gen_frame_mem (DImode,
    7625              :                                                               sp_offset),
    7626              :                                                dwarf_reg);
    7627           38 :                         RTX_FRAME_RELATED_P (tmp) = 1;
    7628           38 :                         XVECEXP (dwarf, 0, i + 1) = tmp;
    7629              :                       }
    7630           19 :                     rtx sp_tmp = gen_rtx_SET (stack_pointer_rtx,
    7631              :                                               plus_constant (Pmode,
    7632              :                                                              stack_pointer_rtx,
    7633              :                                                              -offset));
    7634           19 :                     RTX_FRAME_RELATED_P (sp_tmp) = 1;
    7635           19 :                     XVECEXP (dwarf, 0, 0) = sp_tmp;
    7636           19 :                     add_reg_note (insn, REG_FRAME_RELATED_EXPR, dwarf);
    7637              : 
    7638           19 :                     loaded_regnum = 0;
    7639           19 :                     regno_list[0] = regno_list[1] = -1;
    7640              :                   }
    7641              :               }
    7642              :             else
    7643              :               {
    7644           82 :                 insn = emit_insn (gen_push (gen_rtx_REG (word_mode, regno),
    7645              :                                             use_ppx));
    7646           82 :                 RTX_FRAME_RELATED_P (insn) = 1;
    7647           82 :                 aligned = true;
    7648              :               }
    7649              :           }
    7650           87 :       if (loaded_regnum == 1)
    7651              :         {
    7652            7 :           insn = emit_insn (gen_push (gen_rtx_REG (word_mode,
    7653            7 :                                                    regno_list[0]),
    7654              :                                       use_ppx));
    7655            7 :           RTX_FRAME_RELATED_P (insn) = 1;
    7656              :         }
    7657              :     }
    7658       426733 : }
    7659              : 
    7660              : /* Emit a single register save at CFA - CFA_OFFSET.  */
    7661              : 
    7662              : static void
    7663       620989 : ix86_emit_save_reg_using_mov (machine_mode mode, unsigned int regno,
    7664              :                               HOST_WIDE_INT cfa_offset)
    7665              : {
    7666       620989 :   struct machine_function *m = cfun->machine;
    7667       620989 :   rtx reg = gen_rtx_REG (mode, regno);
    7668       620989 :   rtx mem, addr, base, insn;
    7669       620989 :   unsigned int align = GET_MODE_ALIGNMENT (mode);
    7670              : 
    7671       620989 :   addr = choose_baseaddr (cfa_offset, &align);
    7672       620989 :   mem = gen_frame_mem (mode, addr);
    7673              : 
    7674              :   /* The location aligment depends upon the base register.  */
    7675       620989 :   align = MIN (GET_MODE_ALIGNMENT (mode), align);
    7676       620989 :   gcc_assert (! (cfa_offset & (align / BITS_PER_UNIT - 1)));
    7677       620989 :   set_mem_align (mem, align);
    7678              : 
    7679       620989 :   insn = emit_insn (gen_rtx_SET (mem, reg));
    7680       620989 :   RTX_FRAME_RELATED_P (insn) = 1;
    7681              : 
    7682       620989 :   base = addr;
    7683       620989 :   if (GET_CODE (base) == PLUS)
    7684       608882 :     base = XEXP (base, 0);
    7685       620989 :   gcc_checking_assert (REG_P (base));
    7686              : 
    7687              :   /* When saving registers into a re-aligned local stack frame, avoid
    7688              :      any tricky guessing by dwarf2out.  */
    7689       620989 :   if (m->fs.realigned)
    7690              :     {
    7691        12800 :       gcc_checking_assert (stack_realign_drap);
    7692              : 
    7693        12800 :       if (regno == REGNO (crtl->drap_reg))
    7694              :         {
    7695              :           /* A bit of a hack.  We force the DRAP register to be saved in
    7696              :              the re-aligned stack frame, which provides us with a copy
    7697              :              of the CFA that will last past the prologue.  Install it.  */
    7698            0 :           gcc_checking_assert (cfun->machine->fs.fp_valid);
    7699            0 :           addr = plus_constant (Pmode, hard_frame_pointer_rtx,
    7700            0 :                                 cfun->machine->fs.fp_offset - cfa_offset);
    7701            0 :           mem = gen_rtx_MEM (mode, addr);
    7702            0 :           add_reg_note (insn, REG_CFA_DEF_CFA, mem);
    7703              :         }
    7704              :       else
    7705              :         {
    7706              :           /* The frame pointer is a stable reference within the
    7707              :              aligned frame.  Use it.  */
    7708        12800 :           gcc_checking_assert (cfun->machine->fs.fp_valid);
    7709        12800 :           addr = plus_constant (Pmode, hard_frame_pointer_rtx,
    7710        12800 :                                 cfun->machine->fs.fp_offset - cfa_offset);
    7711        12800 :           mem = gen_rtx_MEM (mode, addr);
    7712        12800 :           add_reg_note (insn, REG_CFA_EXPRESSION, gen_rtx_SET (mem, reg));
    7713              :         }
    7714              :     }
    7715              : 
    7716       608189 :   else if (base == stack_pointer_rtx && m->fs.sp_realigned
    7717        12881 :            && cfa_offset >= m->fs.sp_realigned_offset)
    7718              :     {
    7719        12881 :       gcc_checking_assert (stack_realign_fp);
    7720        12881 :       add_reg_note (insn, REG_CFA_EXPRESSION, gen_rtx_SET (mem, reg));
    7721              :     }
    7722              : 
    7723              :   /* The memory may not be relative to the current CFA register,
    7724              :      which means that we may need to generate a new pattern for
    7725              :      use by the unwind info.  */
    7726       595308 :   else if (base != m->fs.cfa_reg)
    7727              :     {
    7728        45078 :       addr = plus_constant (Pmode, m->fs.cfa_reg,
    7729        45078 :                             m->fs.cfa_offset - cfa_offset);
    7730        45078 :       mem = gen_rtx_MEM (mode, addr);
    7731        45078 :       add_reg_note (insn, REG_CFA_OFFSET, gen_rtx_SET (mem, reg));
    7732              :     }
    7733       620989 : }
    7734              : 
    7735              : /* Emit code to save registers using MOV insns.
    7736              :    First register is stored at CFA - CFA_OFFSET.  */
    7737              : static void
    7738        45826 : ix86_emit_save_regs_using_mov (HOST_WIDE_INT cfa_offset)
    7739              : {
    7740        45826 :   unsigned int regno;
    7741              : 
    7742      4261818 :   for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
    7743      4215992 :     if (GENERAL_REGNO_P (regno) && ix86_save_reg (regno, true, true))
    7744              :       {
    7745              :         /* Skip registers, already processed by shrink wrap separate.  */
    7746       192698 :         if (!cfun->machine->reg_is_wrapped_separately[regno])
    7747        85015 :           ix86_emit_save_reg_using_mov (word_mode, regno, cfa_offset);
    7748       207148 :         cfa_offset -= UNITS_PER_WORD;
    7749              :       }
    7750        45826 : }
    7751              : 
    7752              : /* Emit code to save SSE registers using MOV insns.
    7753              :    First register is stored at CFA - CFA_OFFSET.  */
    7754              : static void
    7755        33353 : ix86_emit_save_sse_regs_using_mov (HOST_WIDE_INT cfa_offset)
    7756              : {
    7757        33353 :   unsigned int regno;
    7758              : 
    7759      3101829 :   for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
    7760      3068476 :     if (SSE_REGNO_P (regno) && ix86_save_reg (regno, true, true))
    7761              :       {
    7762       333557 :         ix86_emit_save_reg_using_mov (V4SFmode, regno, cfa_offset);
    7763       333557 :         cfa_offset -= GET_MODE_SIZE (V4SFmode);
    7764              :       }
    7765        33353 : }
    7766              : 
    7767              : static GTY(()) rtx queued_cfa_restores;
    7768              : 
    7769              : /* Add a REG_CFA_RESTORE REG note to INSN or queue them until next stack
    7770              :    manipulation insn.  The value is on the stack at CFA - CFA_OFFSET.
    7771              :    Don't add the note if the previously saved value will be left untouched
    7772              :    within stack red-zone till return, as unwinders can find the same value
    7773              :    in the register and on the stack.  */
    7774              : 
    7775              : static void
    7776      2274698 : ix86_add_cfa_restore_note (rtx_insn *insn, rtx reg, HOST_WIDE_INT cfa_offset)
    7777              : {
    7778      2274698 :   if (!crtl->shrink_wrapped
    7779      2255484 :       && cfa_offset <= cfun->machine->fs.red_zone_offset)
    7780              :     return;
    7781              : 
    7782       770598 :   if (insn)
    7783              :     {
    7784       359760 :       add_reg_note (insn, REG_CFA_RESTORE, reg);
    7785       359760 :       RTX_FRAME_RELATED_P (insn) = 1;
    7786              :     }
    7787              :   else
    7788       410838 :     queued_cfa_restores
    7789       410838 :       = alloc_reg_note (REG_CFA_RESTORE, reg, queued_cfa_restores);
    7790              : }
    7791              : 
    7792              : /* Add queued REG_CFA_RESTORE notes if any to INSN.  */
    7793              : 
    7794              : static void
    7795      2537226 : ix86_add_queued_cfa_restore_notes (rtx insn)
    7796              : {
    7797      2537226 :   rtx last;
    7798      2537226 :   if (!queued_cfa_restores)
    7799              :     return;
    7800       410838 :   for (last = queued_cfa_restores; XEXP (last, 1); last = XEXP (last, 1))
    7801              :     ;
    7802        53154 :   XEXP (last, 1) = REG_NOTES (insn);
    7803        53154 :   REG_NOTES (insn) = queued_cfa_restores;
    7804        53154 :   queued_cfa_restores = NULL_RTX;
    7805        53154 :   RTX_FRAME_RELATED_P (insn) = 1;
    7806              : }
    7807              : 
    7808              : /* Expand prologue or epilogue stack adjustment.
    7809              :    The pattern exist to put a dependency on all ebp-based memory accesses.
    7810              :    STYLE should be negative if instructions should be marked as frame related,
    7811              :    zero if %r11 register is live and cannot be freely used and positive
    7812              :    otherwise.  */
    7813              : 
    7814              : static rtx
    7815      1578300 : pro_epilogue_adjust_stack (rtx dest, rtx src, rtx offset,
    7816              :                            int style, bool set_cfa)
    7817              : {
    7818      1578300 :   struct machine_function *m = cfun->machine;
    7819      1578300 :   rtx addend = offset;
    7820      1578300 :   rtx insn;
    7821      1578300 :   bool add_frame_related_expr = false;
    7822              : 
    7823      1796242 :   if (!x86_64_immediate_operand (offset, Pmode))
    7824              :     {
    7825              :       /* r11 is used by indirect sibcall return as well, set before the
    7826              :          epilogue and used after the epilogue.  */
    7827          199 :       if (style)
    7828          174 :         addend = gen_rtx_REG (Pmode, R11_REG);
    7829              :       else
    7830              :         {
    7831           25 :           gcc_assert (src != hard_frame_pointer_rtx
    7832              :                       && dest != hard_frame_pointer_rtx);
    7833              :           addend = hard_frame_pointer_rtx;
    7834              :         }
    7835          199 :       emit_insn (gen_rtx_SET (addend, offset));
    7836          199 :       if (style < 0)
    7837           88 :         add_frame_related_expr = true;
    7838              :     }
    7839              : 
    7840              :   /*  Shrink wrap separate may insert prologue between TEST and JMP.  In order
    7841              :       not to affect EFlags, emit add without reg clobbering.  */
    7842      1578300 :   if (crtl->shrink_wrapped_separate)
    7843        97249 :     insn = emit_insn (gen_pro_epilogue_adjust_stack_add_nocc
    7844        97249 :                       (Pmode, dest, src, addend));
    7845              :   else
    7846      1481051 :     insn = emit_insn (gen_pro_epilogue_adjust_stack_add
    7847      1481051 :                       (Pmode, dest, src, addend));
    7848              : 
    7849      1578300 :   if (style >= 0)
    7850       696873 :     ix86_add_queued_cfa_restore_notes (insn);
    7851              : 
    7852      1578300 :   if (set_cfa)
    7853              :     {
    7854      1219648 :       rtx r;
    7855              : 
    7856      1219648 :       gcc_assert (m->fs.cfa_reg == src);
    7857      1219648 :       m->fs.cfa_offset += INTVAL (offset);
    7858      1219648 :       m->fs.cfa_reg = dest;
    7859              : 
    7860      1415292 :       r = gen_rtx_PLUS (Pmode, src, offset);
    7861      1219648 :       r = gen_rtx_SET (dest, r);
    7862      1219648 :       add_reg_note (insn, REG_CFA_ADJUST_CFA, r);
    7863      1219648 :       RTX_FRAME_RELATED_P (insn) = 1;
    7864              :     }
    7865       358652 :   else if (style < 0)
    7866              :     {
    7867       292418 :       RTX_FRAME_RELATED_P (insn) = 1;
    7868       292418 :       if (add_frame_related_expr)
    7869              :         {
    7870           20 :           rtx r = gen_rtx_PLUS (Pmode, src, offset);
    7871           20 :           r = gen_rtx_SET (dest, r);
    7872           20 :           add_reg_note (insn, REG_FRAME_RELATED_EXPR, r);
    7873              :         }
    7874              :     }
    7875              : 
    7876      1578300 :   if (dest == stack_pointer_rtx)
    7877              :     {
    7878      1578300 :       HOST_WIDE_INT ooffset = m->fs.sp_offset;
    7879      1578300 :       bool valid = m->fs.sp_valid;
    7880      1578300 :       bool realigned = m->fs.sp_realigned;
    7881              : 
    7882      1578300 :       if (src == hard_frame_pointer_rtx)
    7883              :         {
    7884        29774 :           valid = m->fs.fp_valid;
    7885        29774 :           realigned = false;
    7886        29774 :           ooffset = m->fs.fp_offset;
    7887              :         }
    7888      1548526 :       else if (src == crtl->drap_reg)
    7889              :         {
    7890            0 :           valid = m->fs.drap_valid;
    7891            0 :           realigned = false;
    7892            0 :           ooffset = 0;
    7893              :         }
    7894              :       else
    7895              :         {
    7896              :           /* Else there are two possibilities: SP itself, which we set
    7897              :              up as the default above.  Or EH_RETURN_STACKADJ_RTX, which is
    7898              :              taken care of this by hand along the eh_return path.  */
    7899      1548526 :           gcc_checking_assert (src == stack_pointer_rtx
    7900              :                                || offset == const0_rtx);
    7901              :         }
    7902              : 
    7903      1578300 :       m->fs.sp_offset = ooffset - INTVAL (offset);
    7904      1578300 :       m->fs.sp_valid = valid;
    7905      1578300 :       m->fs.sp_realigned = realigned;
    7906              :     }
    7907      1578300 :   return insn;
    7908              : }
    7909              : 
    7910              : /* Find an available register to be used as dynamic realign argument
    7911              :    pointer regsiter.  Such a register will be written in prologue and
    7912              :    used in begin of body, so it must not be
    7913              :         1. parameter passing register.
    7914              :         2. GOT pointer.
    7915              :    We reuse static-chain register if it is available.  Otherwise, we
    7916              :    use DI for i386 and R13 for x86-64.  We chose R13 since it has
    7917              :    shorter encoding.
    7918              : 
    7919              :    Return: the regno of chosen register.  */
    7920              : 
    7921              : static unsigned int
    7922         7280 : find_drap_reg (void)
    7923              : {
    7924         7280 :   tree decl = cfun->decl;
    7925              : 
    7926              :   /* Always use callee-saved register if there are no caller-saved
    7927              :      registers.  */
    7928         7280 :   if (TARGET_64BIT)
    7929              :     {
    7930              :       /* Use R13 for nested function or function need static chain.
    7931              :          Since function with tail call may use any caller-saved
    7932              :          registers in epilogue, DRAP must not use caller-saved
    7933              :          register in such case.  */
    7934         6995 :       if (DECL_STATIC_CHAIN (decl)
    7935         6953 :           || (cfun->machine->call_saved_registers
    7936         6953 :               == TYPE_NO_CALLER_SAVED_REGISTERS)
    7937        13948 :           || crtl->tail_call_emit)
    7938          190 :         return R13_REG;
    7939              : 
    7940              :       return R10_REG;
    7941              :     }
    7942              :   else
    7943              :     {
    7944              :       /* Use DI for nested function or function need static chain.
    7945              :          Since function with tail call may use any caller-saved
    7946              :          registers in epilogue, DRAP must not use caller-saved
    7947              :          register in such case.  */
    7948          285 :       if (DECL_STATIC_CHAIN (decl)
    7949          285 :           || (cfun->machine->call_saved_registers
    7950          285 :               == TYPE_NO_CALLER_SAVED_REGISTERS)
    7951          285 :           || crtl->tail_call_emit
    7952          550 :           || crtl->calls_eh_return)
    7953              :         return DI_REG;
    7954              : 
    7955              :       /* Reuse static chain register if it isn't used for parameter
    7956              :          passing.  */
    7957          265 :       if (ix86_function_regparm (TREE_TYPE (decl), decl) <= 2)
    7958              :         {
    7959          265 :           unsigned int ccvt = ix86_get_callcvt (TREE_TYPE (decl));
    7960          265 :           if ((ccvt & (IX86_CALLCVT_FASTCALL | IX86_CALLCVT_THISCALL)) == 0)
    7961              :             return CX_REG;
    7962              :         }
    7963            0 :       return DI_REG;
    7964              :     }
    7965              : }
    7966              : 
    7967              : /* Return minimum incoming stack alignment.  */
    7968              : 
    7969              : static unsigned int
    7970      1606624 : ix86_minimum_incoming_stack_boundary (bool sibcall)
    7971              : {
    7972      1606624 :   unsigned int incoming_stack_boundary;
    7973              : 
    7974              :   /* Stack of interrupt handler is aligned to 128 bits in 64bit mode.  */
    7975      1606624 :   if (cfun->machine->func_type != TYPE_NORMAL)
    7976          120 :     incoming_stack_boundary = TARGET_64BIT ? 128 : MIN_STACK_BOUNDARY;
    7977              :   /* Prefer the one specified at command line. */
    7978      1606504 :   else if (ix86_user_incoming_stack_boundary)
    7979              :     incoming_stack_boundary = ix86_user_incoming_stack_boundary;
    7980              :   /* In 32bit, use MIN_STACK_BOUNDARY for incoming stack boundary
    7981              :      if -mstackrealign is used, it isn't used for sibcall check and
    7982              :      estimated stack alignment is 128bit.  */
    7983      1606482 :   else if (!sibcall
    7984      1472003 :            && ix86_force_align_arg_pointer
    7985         4574 :            && crtl->stack_alignment_estimated == 128)
    7986          596 :     incoming_stack_boundary = MIN_STACK_BOUNDARY;
    7987              :   else
    7988      1605886 :     incoming_stack_boundary = ix86_default_incoming_stack_boundary;
    7989              : 
    7990              :   /* Incoming stack alignment can be changed on individual functions
    7991              :      via force_align_arg_pointer attribute.  We use the smallest
    7992              :      incoming stack boundary.  */
    7993      1606624 :   if (incoming_stack_boundary > MIN_STACK_BOUNDARY
    7994      3212642 :       && lookup_attribute ("force_align_arg_pointer",
    7995      1606018 :                            TYPE_ATTRIBUTES (TREE_TYPE (current_function_decl))))
    7996         5708 :     incoming_stack_boundary = MIN_STACK_BOUNDARY;
    7997              : 
    7998              :   /* The incoming stack frame has to be aligned at least at
    7999              :      parm_stack_boundary.  */
    8000      1606624 :   if (incoming_stack_boundary < crtl->parm_stack_boundary)
    8001              :     incoming_stack_boundary = crtl->parm_stack_boundary;
    8002              : 
    8003              :   /* Stack at entrance of main is aligned by runtime.  We use the
    8004              :      smallest incoming stack boundary. */
    8005      1606624 :   if (incoming_stack_boundary > MAIN_STACK_BOUNDARY
    8006       140508 :       && DECL_NAME (current_function_decl)
    8007       140508 :       && MAIN_NAME_P (DECL_NAME (current_function_decl))
    8008      1609106 :       && DECL_FILE_SCOPE_P (current_function_decl))
    8009         2482 :     incoming_stack_boundary = MAIN_STACK_BOUNDARY;
    8010              : 
    8011      1606624 :   return incoming_stack_boundary;
    8012              : }
    8013              : 
    8014              : /* Update incoming stack boundary and estimated stack alignment.  */
    8015              : 
    8016              : static void
    8017      1472140 : ix86_update_stack_boundary (void)
    8018              : {
    8019      1472140 :   ix86_incoming_stack_boundary
    8020      1472140 :     = ix86_minimum_incoming_stack_boundary (false);
    8021              : 
    8022              :   /* x86_64 vararg needs 16byte stack alignment for register save area.  */
    8023      1472140 :   if (TARGET_64BIT
    8024      1345859 :       && cfun->stdarg
    8025        21359 :       && crtl->stack_alignment_estimated < 128)
    8026        10170 :     crtl->stack_alignment_estimated = 128;
    8027              : 
    8028              :   /* __tls_get_addr needs to be called with 16-byte aligned stack.  */
    8029      1472140 :   if (ix86_tls_descriptor_calls_expanded_in_cfun
    8030         1072 :       && crtl->preferred_stack_boundary < 128)
    8031          745 :     crtl->preferred_stack_boundary = 128;
    8032              : 
    8033              :   /* For 32-bit MS ABI, both the incoming and preferred stack boundaries
    8034              :      are 32 bits, but if force_align_arg_pointer is specified, it should
    8035              :      prefer 128 bits for a backward-compatibility reason, which is also
    8036              :      what the doc suggests.  */
    8037      1472140 :   if (lookup_attribute ("force_align_arg_pointer",
    8038      1472140 :                         TYPE_ATTRIBUTES (TREE_TYPE (current_function_decl)))
    8039      1472140 :       && crtl->preferred_stack_boundary < 128)
    8040            4 :     crtl->preferred_stack_boundary = 128;
    8041      1472140 : }
    8042              : 
    8043              : /* Handle the TARGET_GET_DRAP_RTX hook.  Return NULL if no DRAP is
    8044              :    needed or an rtx for DRAP otherwise.  */
    8045              : 
    8046              : static rtx
    8047      1576785 : ix86_get_drap_rtx (void)
    8048              : {
    8049              :   /* We must use DRAP if there are outgoing arguments on stack or
    8050              :      the stack pointer register is clobbered by asm statement and
    8051              :      ACCUMULATE_OUTGOING_ARGS is false.  */
    8052      1576785 :   if (ix86_force_drap
    8053      1576785 :       || ((cfun->machine->outgoing_args_on_stack
    8054      1245393 :            || crtl->sp_is_clobbered_by_asm)
    8055       329446 :           && !ACCUMULATE_OUTGOING_ARGS))
    8056       309254 :     crtl->need_drap = true;
    8057              : 
    8058      1576785 :   if (stack_realign_drap)
    8059              :     {
    8060              :       /* Assign DRAP to vDRAP and returns vDRAP */
    8061         7280 :       unsigned int regno = find_drap_reg ();
    8062         7280 :       rtx drap_vreg;
    8063         7280 :       rtx arg_ptr;
    8064         7280 :       rtx_insn *seq, *insn;
    8065              : 
    8066         7565 :       arg_ptr = gen_rtx_REG (Pmode, regno);
    8067         7280 :       crtl->drap_reg = arg_ptr;
    8068              : 
    8069         7280 :       start_sequence ();
    8070         7280 :       drap_vreg = copy_to_reg (arg_ptr);
    8071         7280 :       seq = end_sequence ();
    8072              : 
    8073         7280 :       insn = emit_insn_before (seq, NEXT_INSN (entry_of_function ()));
    8074         7280 :       if (!optimize)
    8075              :         {
    8076         1893 :           add_reg_note (insn, REG_CFA_SET_VDRAP, drap_vreg);
    8077         1893 :           RTX_FRAME_RELATED_P (insn) = 1;
    8078              :         }
    8079         7280 :       return drap_vreg;
    8080              :     }
    8081              :   else
    8082              :     return NULL;
    8083              : }
    8084              : 
    8085              : /* Handle the TARGET_INTERNAL_ARG_POINTER hook.  */
    8086              : 
    8087              : static rtx
    8088      1472141 : ix86_internal_arg_pointer (void)
    8089              : {
    8090      1472141 :   return virtual_incoming_args_rtx;
    8091              : }
    8092              : 
    8093              : struct scratch_reg {
    8094              :   rtx reg;
    8095              :   bool saved;
    8096              : };
    8097              : 
    8098              : /* Return a short-lived scratch register for use on function entry.
    8099              :    In 32-bit mode, it is valid only after the registers are saved
    8100              :    in the prologue.  This register must be released by means of
    8101              :    release_scratch_register_on_entry once it is dead.  */
    8102              : 
    8103              : static void
    8104           25 : get_scratch_register_on_entry (struct scratch_reg *sr)
    8105              : {
    8106           25 :   int regno;
    8107              : 
    8108           25 :   sr->saved = false;
    8109              : 
    8110           25 :   if (TARGET_64BIT)
    8111              :     {
    8112              :       /* We always use R11 in 64-bit mode.  */
    8113              :       regno = R11_REG;
    8114              :     }
    8115              :   else
    8116              :     {
    8117            0 :       tree decl = current_function_decl, fntype = TREE_TYPE (decl);
    8118            0 :       bool fastcall_p
    8119            0 :         = lookup_attribute ("fastcall", TYPE_ATTRIBUTES (fntype)) != NULL_TREE;
    8120            0 :       bool thiscall_p
    8121            0 :         = lookup_attribute ("thiscall", TYPE_ATTRIBUTES (fntype)) != NULL_TREE;
    8122            0 :       bool static_chain_p = DECL_STATIC_CHAIN (decl);
    8123            0 :       int regparm = ix86_function_regparm (fntype, decl);
    8124            0 :       int drap_regno
    8125            0 :         = crtl->drap_reg ? REGNO (crtl->drap_reg) : INVALID_REGNUM;
    8126              : 
    8127              :       /* 'fastcall' sets regparm to 2, uses ecx/edx for arguments and eax
    8128              :           for the static chain register.  */
    8129            0 :       if ((regparm < 1 || (fastcall_p && !static_chain_p))
    8130            0 :           && drap_regno != AX_REG)
    8131              :         regno = AX_REG;
    8132              :       /* 'thiscall' sets regparm to 1, uses ecx for arguments and edx
    8133              :           for the static chain register.  */
    8134            0 :       else if (thiscall_p && !static_chain_p && drap_regno != AX_REG)
    8135              :         regno = AX_REG;
    8136            0 :       else if (regparm < 2 && !thiscall_p && drap_regno != DX_REG)
    8137              :         regno = DX_REG;
    8138              :       /* ecx is the static chain register.  */
    8139            0 :       else if (regparm < 3 && !fastcall_p && !thiscall_p
    8140            0 :                && !static_chain_p
    8141            0 :                && drap_regno != CX_REG)
    8142              :         regno = CX_REG;
    8143            0 :       else if (ix86_save_reg (BX_REG, true, false))
    8144              :         regno = BX_REG;
    8145              :       /* esi is the static chain register.  */
    8146            0 :       else if (!(regparm == 3 && static_chain_p)
    8147            0 :                && ix86_save_reg (SI_REG, true, false))
    8148              :         regno = SI_REG;
    8149            0 :       else if (ix86_save_reg (DI_REG, true, false))
    8150              :         regno = DI_REG;
    8151              :       else
    8152              :         {
    8153            0 :           regno = (drap_regno == AX_REG ? DX_REG : AX_REG);
    8154            0 :           sr->saved = true;
    8155              :         }
    8156              :     }
    8157              : 
    8158           25 :   sr->reg = gen_rtx_REG (Pmode, regno);
    8159           25 :   if (sr->saved)
    8160              :     {
    8161            0 :       rtx_insn *insn = emit_insn (gen_push (sr->reg));
    8162            0 :       RTX_FRAME_RELATED_P (insn) = 1;
    8163              :     }
    8164           25 : }
    8165              : 
    8166              : /* Release a scratch register obtained from the preceding function.
    8167              : 
    8168              :    If RELEASE_VIA_POP is true, we just pop the register off the stack
    8169              :    to release it.  This is what non-Linux systems use with -fstack-check.
    8170              : 
    8171              :    Otherwise we use OFFSET to locate the saved register and the
    8172              :    allocated stack space becomes part of the local frame and is
    8173              :    deallocated by the epilogue.  */
    8174              : 
    8175              : static void
    8176           25 : release_scratch_register_on_entry (struct scratch_reg *sr, HOST_WIDE_INT offset,
    8177              :                                    bool release_via_pop)
    8178              : {
    8179           25 :   if (sr->saved)
    8180              :     {
    8181            0 :       if (release_via_pop)
    8182              :         {
    8183            0 :           struct machine_function *m = cfun->machine;
    8184            0 :           rtx x, insn = emit_insn (gen_pop (sr->reg));
    8185              : 
    8186              :           /* The RX FRAME_RELATED_P mechanism doesn't know about pop.  */
    8187            0 :           RTX_FRAME_RELATED_P (insn) = 1;
    8188            0 :           x = plus_constant (Pmode, stack_pointer_rtx, UNITS_PER_WORD);
    8189            0 :           x = gen_rtx_SET (stack_pointer_rtx, x);
    8190            0 :           add_reg_note (insn, REG_FRAME_RELATED_EXPR, x);
    8191            0 :           m->fs.sp_offset -= UNITS_PER_WORD;
    8192              :         }
    8193              :       else
    8194              :         {
    8195            0 :           rtx x = plus_constant (Pmode, stack_pointer_rtx, offset);
    8196            0 :           x = gen_rtx_SET (sr->reg, gen_rtx_MEM (word_mode, x));
    8197            0 :           emit_insn (x);
    8198              :         }
    8199              :     }
    8200           25 : }
    8201              : 
    8202              : /* Emit code to adjust the stack pointer by SIZE bytes while probing it.
    8203              : 
    8204              :    If INT_REGISTERS_SAVED is true, then integer registers have already been
    8205              :    pushed on the stack.
    8206              : 
    8207              :    If PROTECTION AREA is true, then probe PROBE_INTERVAL plus a small dope
    8208              :    beyond SIZE bytes.
    8209              : 
    8210              :    This assumes no knowledge of the current probing state, i.e. it is never
    8211              :    allowed to allocate more than PROBE_INTERVAL bytes of stack space without
    8212              :    a suitable probe.  */
    8213              : 
    8214              : static void
    8215          126 : ix86_adjust_stack_and_probe (HOST_WIDE_INT size,
    8216              :                              const bool int_registers_saved,
    8217              :                              const bool protection_area)
    8218              : {
    8219          126 :   struct machine_function *m = cfun->machine;
    8220              : 
    8221              :   /* If this function does not statically allocate stack space, then
    8222              :      no probes are needed.  */
    8223          126 :   if (!size)
    8224              :     {
    8225              :       /* However, the allocation of space via pushes for register
    8226              :          saves could be viewed as allocating space, but without the
    8227              :          need to probe.  */
    8228           43 :       if (m->frame.nregs || m->frame.nsseregs || frame_pointer_needed)
    8229           23 :         dump_stack_clash_frame_info (NO_PROBE_SMALL_FRAME, true);
    8230              :       else
    8231           20 :         dump_stack_clash_frame_info (NO_PROBE_NO_FRAME, false);
    8232           43 :       return;
    8233              :     }
    8234              : 
    8235              :   /* If we are a noreturn function, then we have to consider the
    8236              :      possibility that we're called via a jump rather than a call.
    8237              : 
    8238              :      Thus we don't have the implicit probe generated by saving the
    8239              :      return address into the stack at the call.  Thus, the stack
    8240              :      pointer could be anywhere in the guard page.  The safe thing
    8241              :      to do is emit a probe now.
    8242              : 
    8243              :      The probe can be avoided if we have already emitted any callee
    8244              :      register saves into the stack or have a frame pointer (which will
    8245              :      have been saved as well).  Those saves will function as implicit
    8246              :      probes.
    8247              : 
    8248              :      ?!? This should be revamped to work like aarch64 and s390 where
    8249              :      we track the offset from the most recent probe.  Normally that
    8250              :      offset would be zero.  For a noreturn function we would reset
    8251              :      it to PROBE_INTERVAL - (STACK_BOUNDARY / BITS_PER_UNIT).   Then
    8252              :      we just probe when we cross PROBE_INTERVAL.  */
    8253           83 :   if (TREE_THIS_VOLATILE (cfun->decl)
    8254           15 :       && !(m->frame.nregs || m->frame.nsseregs || frame_pointer_needed))
    8255              :     {
    8256              :       /* We can safely use any register here since we're just going to push
    8257              :          its value and immediately pop it back.  But we do try and avoid
    8258              :          argument passing registers so as not to introduce dependencies in
    8259              :          the pipeline.  For 32 bit we use %esi and for 64 bit we use %rax.  */
    8260           15 :       rtx dummy_reg = gen_rtx_REG (word_mode, TARGET_64BIT ? AX_REG : SI_REG);
    8261           15 :       rtx_insn *insn_push = emit_insn (gen_push (dummy_reg));
    8262           15 :       rtx_insn *insn_pop = emit_insn (gen_pop (dummy_reg));
    8263           15 :       m->fs.sp_offset -= UNITS_PER_WORD;
    8264           15 :       if (m->fs.cfa_reg == stack_pointer_rtx)
    8265              :         {
    8266           15 :           m->fs.cfa_offset -= UNITS_PER_WORD;
    8267           15 :           rtx x = plus_constant (Pmode, stack_pointer_rtx, -UNITS_PER_WORD);
    8268           15 :           x = gen_rtx_SET (stack_pointer_rtx, x);
    8269           15 :           add_reg_note (insn_push, REG_CFA_ADJUST_CFA, x);
    8270           15 :           RTX_FRAME_RELATED_P (insn_push) = 1;
    8271           15 :           x = plus_constant (Pmode, stack_pointer_rtx, UNITS_PER_WORD);
    8272           15 :           x = gen_rtx_SET (stack_pointer_rtx, x);
    8273           15 :           add_reg_note (insn_pop, REG_CFA_ADJUST_CFA, x);
    8274           15 :           RTX_FRAME_RELATED_P (insn_pop) = 1;
    8275              :         }
    8276           15 :       emit_insn (gen_blockage ());
    8277              :     }
    8278              : 
    8279           83 :   const HOST_WIDE_INT probe_interval = get_probe_interval ();
    8280           83 :   const int dope = 4 * UNITS_PER_WORD;
    8281              : 
    8282              :   /* If there is protection area, take it into account in the size.  */
    8283           83 :   if (protection_area)
    8284           25 :     size += probe_interval + dope;
    8285              : 
    8286              :   /* If we allocate less than the size of the guard statically,
    8287              :      then no probing is necessary, but we do need to allocate
    8288              :      the stack.  */
    8289           58 :   else if (size < (1 << param_stack_clash_protection_guard_size))
    8290              :     {
    8291           37 :       pro_epilogue_adjust_stack (stack_pointer_rtx, stack_pointer_rtx,
    8292              :                                  GEN_INT (-size), -1,
    8293           37 :                                  m->fs.cfa_reg == stack_pointer_rtx);
    8294           37 :       dump_stack_clash_frame_info (NO_PROBE_SMALL_FRAME, true);
    8295           37 :       return;
    8296              :     }
    8297              : 
    8298              :   /* We're allocating a large enough stack frame that we need to
    8299              :      emit probes.  Either emit them inline or in a loop depending
    8300              :      on the size.  */
    8301           46 :   if (size <= 4 * probe_interval)
    8302              :     {
    8303              :       HOST_WIDE_INT i;
    8304           49 :       for (i = probe_interval; i <= size; i += probe_interval)
    8305              :         {
    8306              :           /* Allocate PROBE_INTERVAL bytes.  */
    8307           28 :           rtx insn
    8308           28 :             = pro_epilogue_adjust_stack (stack_pointer_rtx, stack_pointer_rtx,
    8309              :                                          GEN_INT (-probe_interval), -1,
    8310           28 :                                          m->fs.cfa_reg == stack_pointer_rtx);
    8311           28 :           add_reg_note (insn, REG_STACK_CHECK, const0_rtx);
    8312              : 
    8313              :           /* And probe at *sp.  */
    8314           28 :           emit_stack_probe (stack_pointer_rtx);
    8315           28 :           emit_insn (gen_blockage ());
    8316              :         }
    8317              : 
    8318              :       /* We need to allocate space for the residual, but we do not need
    8319              :          to probe the residual...  */
    8320           21 :       HOST_WIDE_INT residual = (i - probe_interval - size);
    8321           21 :       if (residual)
    8322              :         {
    8323           21 :           pro_epilogue_adjust_stack (stack_pointer_rtx, stack_pointer_rtx,
    8324              :                                      GEN_INT (residual), -1,
    8325           21 :                                      m->fs.cfa_reg == stack_pointer_rtx);
    8326              : 
    8327              :           /* ...except if there is a protection area to maintain.  */
    8328           21 :           if (protection_area)
    8329           12 :             emit_stack_probe (stack_pointer_rtx);
    8330              :         }
    8331              : 
    8332           21 :       dump_stack_clash_frame_info (PROBE_INLINE, residual != 0);
    8333              :     }
    8334              :   else
    8335              :     {
    8336              :       /* We expect the GP registers to be saved when probes are used
    8337              :          as the probing sequences might need a scratch register and
    8338              :          the routine to allocate one assumes the integer registers
    8339              :          have already been saved.  */
    8340           25 :       gcc_assert (int_registers_saved);
    8341              : 
    8342           25 :       struct scratch_reg sr;
    8343           25 :       get_scratch_register_on_entry (&sr);
    8344              : 
    8345              :       /* If we needed to save a register, then account for any space
    8346              :          that was pushed (we are not going to pop the register when
    8347              :          we do the restore).  */
    8348           25 :       if (sr.saved)
    8349            0 :         size -= UNITS_PER_WORD;
    8350              : 
    8351              :       /* Step 1: round SIZE down to a multiple of the interval.  */
    8352           25 :       HOST_WIDE_INT rounded_size = size & -probe_interval;
    8353              : 
    8354              :       /* Step 2: compute final value of the loop counter.  Use lea if
    8355              :          possible.  */
    8356           25 :       rtx addr = plus_constant (Pmode, stack_pointer_rtx, -rounded_size);
    8357           25 :       rtx insn;
    8358           25 :       if (address_no_seg_operand (addr, Pmode))
    8359           13 :         insn = emit_insn (gen_rtx_SET (sr.reg, addr));
    8360              :       else
    8361              :         {
    8362           12 :           emit_move_insn (sr.reg, GEN_INT (-rounded_size));
    8363           12 :           insn = emit_insn (gen_rtx_SET (sr.reg,
    8364              :                                          gen_rtx_PLUS (Pmode, sr.reg,
    8365              :                                                        stack_pointer_rtx)));
    8366              :         }
    8367           25 :       if (m->fs.cfa_reg == stack_pointer_rtx)
    8368              :         {
    8369           22 :           add_reg_note (insn, REG_CFA_DEF_CFA,
    8370           22 :                         plus_constant (Pmode, sr.reg,
    8371           22 :                                        m->fs.cfa_offset + rounded_size));
    8372           22 :           RTX_FRAME_RELATED_P (insn) = 1;
    8373              :         }
    8374              : 
    8375              :       /* Step 3: the loop.  */
    8376           25 :       rtx size_rtx = GEN_INT (rounded_size);
    8377           25 :       insn = emit_insn (gen_adjust_stack_and_probe (Pmode, sr.reg, sr.reg,
    8378              :                                                     size_rtx));
    8379           25 :       if (m->fs.cfa_reg == stack_pointer_rtx)
    8380              :         {
    8381           22 :           m->fs.cfa_offset += rounded_size;
    8382           22 :           add_reg_note (insn, REG_CFA_DEF_CFA,
    8383           22 :                         plus_constant (Pmode, stack_pointer_rtx,
    8384           22 :                                        m->fs.cfa_offset));
    8385           22 :           RTX_FRAME_RELATED_P (insn) = 1;
    8386              :         }
    8387           25 :       m->fs.sp_offset += rounded_size;
    8388           25 :       emit_insn (gen_blockage ());
    8389              : 
    8390              :       /* Step 4: adjust SP if we cannot assert at compile-time that SIZE
    8391              :          is equal to ROUNDED_SIZE.  */
    8392              : 
    8393           25 :       if (size != rounded_size)
    8394              :         {
    8395           25 :           pro_epilogue_adjust_stack (stack_pointer_rtx, stack_pointer_rtx,
    8396              :                                      GEN_INT (rounded_size - size), -1,
    8397           25 :                                      m->fs.cfa_reg == stack_pointer_rtx);
    8398              : 
    8399           25 :           if (protection_area)
    8400           13 :             emit_stack_probe (stack_pointer_rtx);
    8401              :         }
    8402              : 
    8403           25 :       dump_stack_clash_frame_info (PROBE_LOOP, size != rounded_size);
    8404              : 
    8405              :       /* This does not deallocate the space reserved for the scratch
    8406              :          register.  That will be deallocated in the epilogue.  */
    8407           25 :       release_scratch_register_on_entry (&sr, size, false);
    8408              :     }
    8409              : 
    8410              :   /* Adjust back to account for the protection area.  */
    8411           46 :   if (protection_area)
    8412           25 :     pro_epilogue_adjust_stack (stack_pointer_rtx, stack_pointer_rtx,
    8413           25 :                                GEN_INT (probe_interval + dope), -1,
    8414           25 :                                m->fs.cfa_reg == stack_pointer_rtx);
    8415              : 
    8416              :   /* Make sure nothing is scheduled before we are done.  */
    8417           46 :   emit_insn (gen_blockage ());
    8418              : }
    8419              : 
    8420              : /* Adjust the stack pointer up to REG while probing it.  */
    8421              : 
    8422              : const char *
    8423           25 : output_adjust_stack_and_probe (rtx reg)
    8424              : {
    8425           25 :   static int labelno = 0;
    8426           25 :   char loop_lab[32];
    8427           25 :   rtx xops[2];
    8428              : 
    8429           25 :   ASM_GENERATE_INTERNAL_LABEL (loop_lab, "LPSRL", labelno++);
    8430              : 
    8431              :   /* Loop.  */
    8432           25 :   ASM_OUTPUT_INTERNAL_LABEL (asm_out_file, loop_lab);
    8433              : 
    8434              :   /* SP = SP + PROBE_INTERVAL.  */
    8435           25 :   xops[0] = stack_pointer_rtx;
    8436           37 :   xops[1] = GEN_INT (get_probe_interval ());
    8437           25 :   output_asm_insn ("sub%z0\t{%1, %0|%0, %1}", xops);
    8438              : 
    8439              :   /* Probe at SP.  */
    8440           25 :   xops[1] = const0_rtx;
    8441           25 :   output_asm_insn ("or%z0\t{%1, (%0)|DWORD PTR [%0], %1}", xops);
    8442              : 
    8443              :   /* Test if SP == LAST_ADDR.  */
    8444           25 :   xops[0] = stack_pointer_rtx;
    8445           25 :   xops[1] = reg;
    8446           25 :   output_asm_insn ("cmp%z0\t{%1, %0|%0, %1}", xops);
    8447              : 
    8448              :   /* Branch.  */
    8449           25 :   fputs ("\tjne\t", asm_out_file);
    8450           25 :   assemble_name_raw (asm_out_file, loop_lab);
    8451           25 :   fputc ('\n', asm_out_file);
    8452              : 
    8453           25 :   return "";
    8454              : }
    8455              : 
    8456              : /* Emit code to probe a range of stack addresses from FIRST to FIRST+SIZE,
    8457              :    inclusive.  These are offsets from the current stack pointer.
    8458              : 
    8459              :    INT_REGISTERS_SAVED is true if integer registers have already been
    8460              :    pushed on the stack.  */
    8461              : 
    8462              : static void
    8463            0 : ix86_emit_probe_stack_range (HOST_WIDE_INT first, HOST_WIDE_INT size,
    8464              :                              const bool int_registers_saved)
    8465              : {
    8466            0 :   const HOST_WIDE_INT probe_interval = get_probe_interval ();
    8467              : 
    8468              :   /* See if we have a constant small number of probes to generate.  If so,
    8469              :      that's the easy case.  The run-time loop is made up of 6 insns in the
    8470              :      generic case while the compile-time loop is made up of n insns for n #
    8471              :      of intervals.  */
    8472            0 :   if (size <= 6 * probe_interval)
    8473              :     {
    8474              :       HOST_WIDE_INT i;
    8475              : 
    8476              :       /* Probe at FIRST + N * PROBE_INTERVAL for values of N from 1 until
    8477              :          it exceeds SIZE.  If only one probe is needed, this will not
    8478              :          generate any code.  Then probe at FIRST + SIZE.  */
    8479            0 :       for (i = probe_interval; i < size; i += probe_interval)
    8480            0 :         emit_stack_probe (plus_constant (Pmode, stack_pointer_rtx,
    8481            0 :                                          -(first + i)));
    8482              : 
    8483            0 :       emit_stack_probe (plus_constant (Pmode, stack_pointer_rtx,
    8484            0 :                                        -(first + size)));
    8485              :     }
    8486              : 
    8487              :   /* Otherwise, do the same as above, but in a loop.  Note that we must be
    8488              :      extra careful with variables wrapping around because we might be at
    8489              :      the very top (or the very bottom) of the address space and we have
    8490              :      to be able to handle this case properly; in particular, we use an
    8491              :      equality test for the loop condition.  */
    8492              :   else
    8493              :     {
    8494              :       /* We expect the GP registers to be saved when probes are used
    8495              :          as the probing sequences might need a scratch register and
    8496              :          the routine to allocate one assumes the integer registers
    8497              :          have already been saved.  */
    8498            0 :       gcc_assert (int_registers_saved);
    8499              : 
    8500            0 :       HOST_WIDE_INT rounded_size, last;
    8501            0 :       struct scratch_reg sr;
    8502              : 
    8503            0 :       get_scratch_register_on_entry (&sr);
    8504              : 
    8505              : 
    8506              :       /* Step 1: round SIZE to the previous multiple of the interval.  */
    8507              : 
    8508            0 :       rounded_size = ROUND_DOWN (size, probe_interval);
    8509              : 
    8510              : 
    8511              :       /* Step 2: compute initial and final value of the loop counter.  */
    8512              : 
    8513              :       /* TEST_OFFSET = FIRST.  */
    8514            0 :       emit_move_insn (sr.reg, GEN_INT (-first));
    8515              : 
    8516              :       /* LAST_OFFSET = FIRST + ROUNDED_SIZE.  */
    8517            0 :       last = first + rounded_size;
    8518              : 
    8519              : 
    8520              :       /* Step 3: the loop
    8521              : 
    8522              :          do
    8523              :            {
    8524              :              TEST_ADDR = TEST_ADDR + PROBE_INTERVAL
    8525              :              probe at TEST_ADDR
    8526              :            }
    8527              :          while (TEST_ADDR != LAST_ADDR)
    8528              : 
    8529              :          probes at FIRST + N * PROBE_INTERVAL for values of N from 1
    8530              :          until it is equal to ROUNDED_SIZE.  */
    8531              : 
    8532            0 :       emit_insn
    8533            0 :         (gen_probe_stack_range (Pmode, sr.reg, sr.reg, GEN_INT (-last)));
    8534              : 
    8535              : 
    8536              :       /* Step 4: probe at FIRST + SIZE if we cannot assert at compile-time
    8537              :          that SIZE is equal to ROUNDED_SIZE.  */
    8538              : 
    8539            0 :       if (size != rounded_size)
    8540            0 :         emit_stack_probe (plus_constant (Pmode,
    8541            0 :                                          gen_rtx_PLUS (Pmode,
    8542              :                                                        stack_pointer_rtx,
    8543              :                                                        sr.reg),
    8544            0 :                                          rounded_size - size));
    8545              : 
    8546            0 :       release_scratch_register_on_entry (&sr, size, true);
    8547              :     }
    8548              : 
    8549              :   /* Make sure nothing is scheduled before we are done.  */
    8550            0 :   emit_insn (gen_blockage ());
    8551            0 : }
    8552              : 
    8553              : /* Probe a range of stack addresses from REG to END, inclusive.  These are
    8554              :    offsets from the current stack pointer.  */
    8555              : 
    8556              : const char *
    8557            0 : output_probe_stack_range (rtx reg, rtx end)
    8558              : {
    8559            0 :   static int labelno = 0;
    8560            0 :   char loop_lab[32];
    8561            0 :   rtx xops[3];
    8562              : 
    8563            0 :   ASM_GENERATE_INTERNAL_LABEL (loop_lab, "LPSRL", labelno++);
    8564              : 
    8565              :   /* Loop.  */
    8566            0 :   ASM_OUTPUT_INTERNAL_LABEL (asm_out_file, loop_lab);
    8567              : 
    8568              :   /* TEST_ADDR = TEST_ADDR + PROBE_INTERVAL.  */
    8569            0 :   xops[0] = reg;
    8570            0 :   xops[1] = GEN_INT (get_probe_interval ());
    8571            0 :   output_asm_insn ("sub%z0\t{%1, %0|%0, %1}", xops);
    8572              : 
    8573              :   /* Probe at TEST_ADDR.  */
    8574            0 :   xops[0] = stack_pointer_rtx;
    8575            0 :   xops[1] = reg;
    8576            0 :   xops[2] = const0_rtx;
    8577            0 :   output_asm_insn ("or%z0\t{%2, (%0,%1)|DWORD PTR [%0+%1], %2}", xops);
    8578              : 
    8579              :   /* Test if TEST_ADDR == LAST_ADDR.  */
    8580            0 :   xops[0] = reg;
    8581            0 :   xops[1] = end;
    8582            0 :   output_asm_insn ("cmp%z0\t{%1, %0|%0, %1}", xops);
    8583              : 
    8584              :   /* Branch.  */
    8585            0 :   fputs ("\tjne\t", asm_out_file);
    8586            0 :   assemble_name_raw (asm_out_file, loop_lab);
    8587            0 :   fputc ('\n', asm_out_file);
    8588              : 
    8589            0 :   return "";
    8590              : }
    8591              : 
    8592              : /* Data passed to ix86_update_stack_alignment.  */
    8593              : struct stack_access_data
    8594              : {
    8595              :   /* The stack access register.  */
    8596              :   const_rtx reg;
    8597              :   /* Pointer to stack alignment.  */
    8598              :   unsigned int *stack_alignment;
    8599              : };
    8600              : 
    8601              : /* Update the maximum stack slot alignment from memory alignment in PAT.  */
    8602              : 
    8603              : static void
    8604     49896142 : ix86_update_stack_alignment (rtx, const_rtx pat, void *data)
    8605              : {
    8606              :   /* This insn may reference stack slot.  Update the maximum stack slot
    8607              :      alignment if the memory is referenced by the stack access register. */
    8608     49896142 :   stack_access_data *p = (stack_access_data *) data;
    8609              : 
    8610     49896142 :   subrtx_iterator::array_type array;
    8611    189559678 :   FOR_EACH_SUBRTX (iter, array, pat, ALL)
    8612              :     {
    8613    161429330 :       auto op = *iter;
    8614    161429330 :       if (MEM_P (op))
    8615              :         {
    8616     26632649 :           if (reg_mentioned_p (p->reg, XEXP (op, 0)))
    8617              :             {
    8618     21765794 :               unsigned int alignment = MEM_ALIGN (op);
    8619              : 
    8620     21765794 :               if (alignment > *p->stack_alignment)
    8621        55998 :                 *p->stack_alignment = alignment;
    8622              :               break;
    8623              :             }
    8624              :           else
    8625      4866855 :             iter.skip_subrtxes ();
    8626              :         }
    8627              :     }
    8628     49896142 : }
    8629              : 
    8630              : /* Helper function for ix86_find_all_reg_uses.  */
    8631              : 
    8632              : static void
    8633     45242357 : ix86_find_all_reg_uses_1 (HARD_REG_SET &regset,
    8634              :                           rtx set, unsigned int regno,
    8635              :                           auto_bitmap &worklist)
    8636              : {
    8637     45242357 :   rtx dest = SET_DEST (set);
    8638              : 
    8639     45242357 :   if (!REG_P (dest))
    8640     40963381 :     return;
    8641              : 
    8642              :   /* Reject non-Pmode modes.  */
    8643     34285907 :   if (GET_MODE (dest) != Pmode)
    8644              :     return;
    8645              : 
    8646     18109115 :   unsigned int dst_regno = REGNO (dest);
    8647              : 
    8648     18109115 :   if (TEST_HARD_REG_BIT (regset, dst_regno))
    8649              :     return;
    8650              : 
    8651      4278976 :   const_rtx src = SET_SRC (set);
    8652              : 
    8653      4278976 :   subrtx_iterator::array_type array;
    8654      8506059 :   FOR_EACH_SUBRTX (iter, array, src, ALL)
    8655              :     {
    8656      5495156 :       auto op = *iter;
    8657              : 
    8658      5495156 :       if (MEM_P (op))
    8659      2995307 :         iter.skip_subrtxes ();
    8660              : 
    8661      5495156 :       if (REG_P (op) && REGNO (op) == regno)
    8662              :         {
    8663              :           /* Add this register to register set.  */
    8664      1435809 :           add_to_hard_reg_set (&regset, Pmode, dst_regno);
    8665      1268073 :           bitmap_set_bit (worklist, dst_regno);
    8666      1268073 :           break;
    8667              :         }
    8668              :     }
    8669      4278976 : }
    8670              : 
    8671              : /* Find all registers defined with register REGNO.  */
    8672              : 
    8673              : static void
    8674      2279754 : ix86_find_all_reg_uses (HARD_REG_SET &regset,
    8675              :                         unsigned int regno, auto_bitmap &worklist)
    8676              : {
    8677      2279754 :   for (df_ref ref = DF_REG_USE_CHAIN (regno);
    8678     81417394 :        ref != NULL;
    8679     79137640 :        ref = DF_REF_NEXT_REG (ref))
    8680              :     {
    8681     79137640 :       if (DF_REF_IS_ARTIFICIAL (ref))
    8682     16565509 :         continue;
    8683              : 
    8684     62572131 :       rtx_insn *insn = DF_REF_INSN (ref);
    8685              : 
    8686     62572131 :       if (!NONJUMP_INSN_P (insn))
    8687     17946631 :         continue;
    8688              : 
    8689     44625500 :       unsigned int ref_regno = DF_REF_REGNO (ref);
    8690              : 
    8691     44625500 :       rtx set = single_set (insn);
    8692     44625500 :       if (set)
    8693              :         {
    8694     43867317 :           ix86_find_all_reg_uses_1 (regset, set,
    8695              :                                     ref_regno, worklist);
    8696     43867317 :           continue;
    8697              :         }
    8698              : 
    8699       758183 :       rtx pat = PATTERN (insn);
    8700       758183 :       if (GET_CODE (pat) != PARALLEL)
    8701       124772 :         continue;
    8702              : 
    8703      2411505 :       for (int i = 0; i < XVECLEN (pat, 0); i++)
    8704              :         {
    8705      1778094 :           rtx exp = XVECEXP (pat, 0, i);
    8706              : 
    8707      1778094 :           if (GET_CODE (exp) == SET)
    8708      1375040 :             ix86_find_all_reg_uses_1 (regset, exp,
    8709              :                                       ref_regno, worklist);
    8710              :         }
    8711              :     }
    8712      2279754 : }
    8713              : 
    8714              : /* Return true if the hard register REGNO used for a stack access is
    8715              :    defined in a basic block that dominates the block where it is used.  */
    8716              : 
    8717              : static bool
    8718     21504333 : ix86_access_stack_p (unsigned int regno, basic_block bb,
    8719              :                      HARD_REG_SET &set_up_by_prologue,
    8720              :                      HARD_REG_SET &prologue_used)
    8721              : {
    8722              :   /* Get all BBs which set REGNO and dominate the current BB from all
    8723              :      DEFs of REGNO.  */
    8724     21504333 :   for (df_ref def = DF_REG_DEF_CHAIN (regno);
    8725   2368599318 :        def;
    8726   2347094985 :        def = DF_REF_NEXT_REG (def))
    8727   2365433418 :     if (!DF_REF_IS_ARTIFICIAL (def)
    8728   2352479283 :         && !DF_REF_FLAGS_IS_SET (def, DF_REF_MAY_CLOBBER)
    8729   1931259793 :         && !DF_REF_FLAGS_IS_SET (def, DF_REF_MUST_CLOBBER))
    8730              :       {
    8731   1929802146 :         basic_block set_bb = DF_REF_BB (def);
    8732   1929802146 :         if (dominated_by_p (CDI_DOMINATORS, bb, set_bb))
    8733              :           {
    8734     77660654 :             rtx_insn *insn = DF_REF_INSN (def);
    8735              :             /* Return true if INSN requires stack.  */
    8736     77660654 :             if (requires_stack_frame_p (insn, prologue_used,
    8737              :                                         set_up_by_prologue))
    8738              :               return true;
    8739              :           }
    8740              :       }
    8741              : 
    8742              :   return false;
    8743              : }
    8744              : 
    8745              : /* Set stack_frame_required to false if stack frame isn't required.
    8746              :    Update STACK_ALIGNMENT to the largest alignment, in bits, of stack
    8747              :    slot used if stack frame is required and CHECK_STACK_SLOT is true.  */
    8748              : 
    8749              : static void
    8750      1471289 : ix86_find_max_used_stack_alignment (unsigned int &stack_alignment,
    8751              :                                     bool check_stack_slot)
    8752              : {
    8753      1471289 :   HARD_REG_SET set_up_by_prologue, prologue_used;
    8754      1471289 :   basic_block bb;
    8755              : 
    8756      5885156 :   CLEAR_HARD_REG_SET (prologue_used);
    8757      1471289 :   CLEAR_HARD_REG_SET (set_up_by_prologue);
    8758      1597677 :   add_to_hard_reg_set (&set_up_by_prologue, Pmode, STACK_POINTER_REGNUM);
    8759      1471289 :   add_to_hard_reg_set (&set_up_by_prologue, Pmode, ARG_POINTER_REGNUM);
    8760      1471289 :   add_to_hard_reg_set (&set_up_by_prologue, Pmode,
    8761              :                        HARD_FRAME_POINTER_REGNUM);
    8762              : 
    8763      1471289 :   bool require_stack_frame = false;
    8764              : 
    8765     15856566 :   FOR_EACH_BB_FN (bb, cfun)
    8766              :     {
    8767     14385277 :       rtx_insn *insn;
    8768     90355241 :       FOR_BB_INSNS (bb, insn)
    8769     83831751 :         if (NONDEBUG_INSN_P (insn)
    8770     83831751 :             && requires_stack_frame_p (insn, prologue_used,
    8771              :                                        set_up_by_prologue))
    8772              :           {
    8773              :             require_stack_frame = true;
    8774              :             break;
    8775              :           }
    8776              :     }
    8777              : 
    8778      1471289 :   cfun->machine->stack_frame_required = require_stack_frame;
    8779              : 
    8780              :   /* Stop if we don't need to check stack slot.  */
    8781      1471289 :   if (!check_stack_slot)
    8782       782680 :     return;
    8783              : 
    8784              :   /* The preferred stack alignment is the minimum stack alignment.  */
    8785       688609 :   if (stack_alignment > crtl->preferred_stack_boundary)
    8786       142802 :     stack_alignment = crtl->preferred_stack_boundary;
    8787              : 
    8788              :   HARD_REG_SET stack_slot_access;
    8789       688609 :   CLEAR_HARD_REG_SET (stack_slot_access);
    8790              : 
    8791              :   /* Stack slot can be accessed by stack pointer, frame pointer or
    8792              :      registers defined by stack pointer or frame pointer.  */
    8793       688609 :   auto_bitmap worklist;
    8794              : 
    8795       747984 :   add_to_hard_reg_set (&stack_slot_access, Pmode, STACK_POINTER_REGNUM);
    8796       688609 :   bitmap_set_bit (worklist, STACK_POINTER_REGNUM);
    8797              : 
    8798       688609 :   if (frame_pointer_needed)
    8799              :     {
    8800       332062 :       add_to_hard_reg_set (&stack_slot_access, Pmode,
    8801              :                            HARD_FRAME_POINTER_REGNUM);
    8802       323072 :       bitmap_set_bit (worklist, HARD_FRAME_POINTER_REGNUM);
    8803              :     }
    8804              : 
    8805       688609 :   HARD_REG_SET hard_stack_slot_access = stack_slot_access;
    8806              : 
    8807       688609 :   calculate_dominance_info (CDI_DOMINATORS);
    8808              : 
    8809      2279754 :   unsigned int regno;
    8810              : 
    8811      2279754 :   do
    8812              :     {
    8813      2279754 :       regno = bitmap_clear_first_set_bit (worklist);
    8814      2279754 :       ix86_find_all_reg_uses (stack_slot_access, regno, worklist);
    8815              :     }
    8816      2279754 :   while (!bitmap_empty_p (worklist));
    8817              : 
    8818       688609 :   hard_reg_set_iterator hrsi;
    8819       688609 :   stack_access_data data;
    8820              : 
    8821       688609 :   data.stack_alignment = &stack_alignment;
    8822              : 
    8823      2968363 :   EXECUTE_IF_SET_IN_HARD_REG_SET (stack_slot_access, 0, regno, hrsi)
    8824      2279754 :     for (df_ref ref = DF_REG_USE_CHAIN (regno);
    8825     81417394 :          ref != NULL;
    8826     79137640 :          ref = DF_REF_NEXT_REG (ref))
    8827              :       {
    8828     79137640 :         if (DF_REF_IS_ARTIFICIAL (ref))
    8829     16565509 :           continue;
    8830              : 
    8831     62572131 :         rtx_insn *insn = DF_REF_INSN (ref);
    8832              : 
    8833     62572131 :         if (!NONJUMP_INSN_P (insn))
    8834     17946631 :           continue;
    8835              : 
    8836     44625500 :         if (TEST_HARD_REG_BIT (hard_stack_slot_access, regno)
    8837     44625500 :             || ix86_access_stack_p (regno, BLOCK_FOR_INSN (insn),
    8838              :                                     set_up_by_prologue, prologue_used))
    8839              :           {
    8840              :             /* Update stack alignment if REGNO is used for stack
    8841              :                access.  */
    8842     41459600 :             data.reg = DF_REF_REG (ref);
    8843     41459600 :             note_stores (insn, ix86_update_stack_alignment, &data);
    8844     41459600 :             continue;
    8845              :           }
    8846              :       }
    8847              : 
    8848       688609 :   free_dominance_info (CDI_DOMINATORS);
    8849       688609 : }
    8850              : 
    8851              : /* Finalize stack_realign_needed and frame_pointer_needed flags, which
    8852              :    will guide prologue/epilogue to be generated in correct form.  */
    8853              : 
    8854              : static void
    8855      3419428 : ix86_finalize_stack_frame_flags (void)
    8856              : {
    8857              :   /* Check if stack realign is really needed after reload, and
    8858              :      stores result in cfun */
    8859      3419428 :   unsigned int incoming_stack_boundary
    8860      3419428 :     = (crtl->parm_stack_boundary > ix86_incoming_stack_boundary
    8861      3419428 :        ? crtl->parm_stack_boundary : ix86_incoming_stack_boundary);
    8862      3419428 :   unsigned int stack_alignment
    8863      1174212 :     = (crtl->is_leaf && !ix86_current_function_calls_tls_descriptor
    8864      4593640 :        ? crtl->max_used_stack_slot_alignment
    8865      3419428 :        : crtl->stack_alignment_needed);
    8866      3419428 :   unsigned int stack_realign
    8867      3419428 :     = (incoming_stack_boundary < stack_alignment);
    8868      3419428 :   bool recompute_frame_layout_p = false;
    8869              : 
    8870      3419428 :   if (crtl->stack_realign_finalized)
    8871              :     {
    8872              :       /* After stack_realign_needed is finalized, we can't no longer
    8873              :          change it.  */
    8874      1948139 :       gcc_assert (crtl->stack_realign_needed == stack_realign);
    8875      1948139 :       return;
    8876              :     }
    8877              : 
    8878              :   /* It is always safe to compute max_used_stack_alignment.  We
    8879              :      compute it only if 128-bit aligned load/store may be generated
    8880              :      on misaligned stack slot which will lead to segfault. */
    8881      2942578 :   bool check_stack_slot
    8882      1471289 :     = (stack_realign || crtl->max_used_stack_slot_alignment >= 128);
    8883      1471289 :   ix86_find_max_used_stack_alignment (stack_alignment,
    8884              :                                       check_stack_slot);
    8885              : 
    8886              :   /* If the only reason for frame_pointer_needed is that we conservatively
    8887              :      assumed stack realignment might be needed or -fno-omit-frame-pointer
    8888              :      is used, but in the end nothing that needed the stack alignment had
    8889              :      been spilled nor stack access, clear frame_pointer_needed and say we
    8890              :      don't need stack realignment.
    8891              : 
    8892              :      When vector register is used for piecewise move and store, we don't
    8893              :      increase stack_alignment_needed as there is no register spill for
    8894              :      piecewise move and store.  Since stack_realign_needed is set to true
    8895              :      by checking stack_alignment_estimated which is updated by pseudo
    8896              :      vector register usage, we also need to check stack_realign_needed to
    8897              :      eliminate frame pointer.  */
    8898      1471289 :   if ((stack_realign
    8899      1405089 :        || (!flag_omit_frame_pointer && optimize)
    8900      1394843 :        || crtl->stack_realign_needed)
    8901        77097 :       && frame_pointer_needed
    8902        77097 :       && crtl->is_leaf
    8903        52638 :       && crtl->sp_is_unchanging
    8904        52586 :       && !ix86_current_function_calls_tls_descriptor
    8905        52586 :       && !crtl->accesses_prior_frames
    8906        52586 :       && !cfun->calls_alloca
    8907        52586 :       && !crtl->calls_eh_return
    8908              :       /* See ira_setup_eliminable_regset for the rationale.  */
    8909        52586 :       && !(STACK_CHECK_MOVING_SP
    8910        52586 :            && flag_stack_check
    8911            0 :            && flag_exceptions
    8912            0 :            && cfun->can_throw_non_call_exceptions)
    8913        52586 :       && !ix86_frame_pointer_required ()
    8914        52585 :       && ix86_get_frame_size () == 0
    8915        34913 :       && ix86_nsaved_sseregs () == 0
    8916      1506202 :       && ix86_varargs_gpr_size + ix86_varargs_fpr_size == 0)
    8917              :     {
    8918        34913 :       if (cfun->machine->stack_frame_required)
    8919              :         {
    8920              :           /* Stack frame is required.  If stack alignment needed is less
    8921              :              than incoming stack boundary, don't realign stack.  */
    8922          272 :           stack_realign = incoming_stack_boundary < stack_alignment;
    8923          272 :           if (!stack_realign)
    8924              :             {
    8925          272 :               crtl->max_used_stack_slot_alignment
    8926          272 :                 = incoming_stack_boundary;
    8927          272 :               crtl->stack_alignment_needed
    8928          272 :                 = incoming_stack_boundary;
    8929              :               /* Also update preferred_stack_boundary for leaf
    8930              :                  functions.  */
    8931          272 :               crtl->preferred_stack_boundary
    8932          272 :                 = incoming_stack_boundary;
    8933              :             }
    8934              :         }
    8935              :       else
    8936              :         {
    8937              :           /* If drap has been set, but it actually isn't live at the
    8938              :              start of the function, there is no reason to set it up.  */
    8939        34641 :           if (crtl->drap_reg)
    8940              :             {
    8941           35 :               basic_block bb = ENTRY_BLOCK_PTR_FOR_FN (cfun)->next_bb;
    8942           70 :               if (! REGNO_REG_SET_P (DF_LR_IN (bb),
    8943              :                                      REGNO (crtl->drap_reg)))
    8944              :                 {
    8945           35 :                   crtl->drap_reg = NULL_RTX;
    8946           35 :                   crtl->need_drap = false;
    8947              :                 }
    8948              :             }
    8949              :           else
    8950        34606 :             cfun->machine->no_drap_save_restore = true;
    8951              : 
    8952        34641 :           frame_pointer_needed = false;
    8953        34641 :           stack_realign = false;
    8954        34641 :           crtl->max_used_stack_slot_alignment = incoming_stack_boundary;
    8955        34641 :           crtl->stack_alignment_needed = incoming_stack_boundary;
    8956        34641 :           crtl->stack_alignment_estimated = incoming_stack_boundary;
    8957        34641 :           if (crtl->preferred_stack_boundary > incoming_stack_boundary)
    8958            1 :             crtl->preferred_stack_boundary = incoming_stack_boundary;
    8959        34641 :           df_finish_pass (true);
    8960        34641 :           df_scan_alloc (NULL);
    8961        34641 :           df_scan_blocks ();
    8962        34641 :           df_compute_regs_ever_live (true);
    8963        34641 :           df_analyze ();
    8964              : 
    8965        34641 :           if (flag_var_tracking)
    8966              :             {
    8967              :               /* Since frame pointer is no longer available, replace it with
    8968              :                  stack pointer - UNITS_PER_WORD in debug insns.  */
    8969          133 :               df_ref ref, next;
    8970          133 :               for (ref = DF_REG_USE_CHAIN (HARD_FRAME_POINTER_REGNUM);
    8971          133 :                    ref; ref = next)
    8972              :                 {
    8973            0 :                   next = DF_REF_NEXT_REG (ref);
    8974            0 :                   if (!DF_REF_INSN_INFO (ref))
    8975            0 :                     continue;
    8976              : 
    8977              :                   /* Make sure the next ref is for a different instruction,
    8978              :                      so that we're not affected by the rescan.  */
    8979            0 :                   rtx_insn *insn = DF_REF_INSN (ref);
    8980            0 :                   while (next && DF_REF_INSN (next) == insn)
    8981            0 :                     next = DF_REF_NEXT_REG (next);
    8982              : 
    8983            0 :                   if (DEBUG_INSN_P (insn))
    8984              :                     {
    8985              :                       bool changed = false;
    8986            0 :                       for (; ref != next; ref = DF_REF_NEXT_REG (ref))
    8987              :                         {
    8988            0 :                           rtx *loc = DF_REF_LOC (ref);
    8989            0 :                           if (*loc == hard_frame_pointer_rtx)
    8990              :                             {
    8991            0 :                               *loc = plus_constant (Pmode,
    8992              :                                                     stack_pointer_rtx,
    8993            0 :                                                     -UNITS_PER_WORD);
    8994            0 :                               changed = true;
    8995              :                             }
    8996              :                         }
    8997            0 :                       if (changed)
    8998            0 :                         df_insn_rescan (insn);
    8999              :                     }
    9000              :                 }
    9001              :             }
    9002              : 
    9003              :           recompute_frame_layout_p = true;
    9004              :         }
    9005              :     }
    9006      1436376 :   else if (crtl->max_used_stack_slot_alignment >= 128
    9007       652667 :            && cfun->machine->stack_frame_required)
    9008              :     {
    9009              :       /* We don't need to realign stack.  max_used_stack_alignment is
    9010              :          used to decide how stack frame should be aligned.  This is
    9011              :          independent of any psABIs nor 32-bit vs 64-bit.  */
    9012       607691 :       cfun->machine->max_used_stack_alignment
    9013       607691 :         = stack_alignment / BITS_PER_UNIT;
    9014              :     }
    9015              : 
    9016      1471289 :   if (crtl->stack_realign_needed != stack_realign)
    9017        35147 :     recompute_frame_layout_p = true;
    9018      1471289 :   crtl->stack_realign_needed = stack_realign;
    9019      1471289 :   crtl->stack_realign_finalized = true;
    9020      1471289 :   if (recompute_frame_layout_p)
    9021        35240 :     ix86_compute_frame_layout ();
    9022              : }
    9023              : 
    9024              : /* Delete SET_GOT right after entry block if it is allocated to reg.  */
    9025              : 
    9026              : static void
    9027            0 : ix86_elim_entry_set_got (rtx reg)
    9028              : {
    9029            0 :   basic_block bb = ENTRY_BLOCK_PTR_FOR_FN (cfun)->next_bb;
    9030            0 :   rtx_insn *c_insn = BB_HEAD (bb);
    9031            0 :   if (!NONDEBUG_INSN_P (c_insn))
    9032            0 :     c_insn = next_nonnote_nondebug_insn (c_insn);
    9033            0 :   if (c_insn && NONJUMP_INSN_P (c_insn))
    9034              :     {
    9035            0 :       rtx pat = PATTERN (c_insn);
    9036            0 :       if (GET_CODE (pat) == PARALLEL)
    9037              :         {
    9038            0 :           rtx set = XVECEXP (pat, 0, 0);
    9039            0 :           if (GET_CODE (set) == SET
    9040            0 :               && GET_CODE (SET_SRC (set)) == UNSPEC
    9041            0 :               && XINT (SET_SRC (set), 1) == UNSPEC_SET_GOT
    9042            0 :               && REGNO (SET_DEST (set)) == REGNO (reg))
    9043            0 :             delete_insn (c_insn);
    9044              :         }
    9045              :     }
    9046            0 : }
    9047              : 
    9048              : static rtx
    9049       193166 : gen_frame_set (rtx reg, rtx frame_reg, int offset, bool store)
    9050              : {
    9051       193166 :   rtx addr, mem;
    9052              : 
    9053       193166 :   if (offset)
    9054       184480 :     addr = plus_constant (Pmode, frame_reg, offset);
    9055       193166 :   mem = gen_frame_mem (GET_MODE (reg), offset ? addr : frame_reg);
    9056       193166 :   return gen_rtx_SET (store ? mem : reg, store ? reg : mem);
    9057              : }
    9058              : 
    9059              : static inline rtx
    9060       100333 : gen_frame_load (rtx reg, rtx frame_reg, int offset)
    9061              : {
    9062       100333 :   return gen_frame_set (reg, frame_reg, offset, false);
    9063              : }
    9064              : 
    9065              : static inline rtx
    9066        92833 : gen_frame_store (rtx reg, rtx frame_reg, int offset)
    9067              : {
    9068        92833 :   return gen_frame_set (reg, frame_reg, offset, true);
    9069              : }
    9070              : 
    9071              : static void
    9072         7045 : ix86_emit_outlined_ms2sysv_save (const struct ix86_frame &frame)
    9073              : {
    9074         7045 :   struct machine_function *m = cfun->machine;
    9075         7045 :   const unsigned ncregs = NUM_X86_64_MS_CLOBBERED_REGS
    9076         7045 :                           + m->call_ms2sysv_extra_regs;
    9077         7045 :   rtvec v = rtvec_alloc (ncregs + 1);
    9078         7045 :   unsigned int align, i, vi = 0;
    9079         7045 :   rtx_insn *insn;
    9080         7045 :   rtx sym, addr;
    9081         7045 :   rtx rax = gen_rtx_REG (word_mode, AX_REG);
    9082         7045 :   const class xlogue_layout &xlogue = xlogue_layout::get_instance ();
    9083              : 
    9084              :   /* AL should only be live with sysv_abi.  */
    9085         7045 :   gcc_assert (!ix86_eax_live_at_start_p ());
    9086         7045 :   gcc_assert (m->fs.sp_offset >= frame.sse_reg_save_offset);
    9087              : 
    9088              :   /* Setup RAX as the stub's base pointer.  We use stack_realign_offset rather
    9089              :      we've actually realigned the stack or not.  */
    9090         7045 :   align = GET_MODE_ALIGNMENT (V4SFmode);
    9091         7045 :   addr = choose_baseaddr (frame.stack_realign_offset
    9092         7045 :                           + xlogue.get_stub_ptr_offset (), &align, AX_REG);
    9093         7045 :   gcc_assert (align >= GET_MODE_ALIGNMENT (V4SFmode));
    9094              : 
    9095         7045 :   emit_insn (gen_rtx_SET (rax, addr));
    9096              : 
    9097              :   /* Get the stub symbol.  */
    9098         8327 :   sym = xlogue.get_stub_rtx (frame_pointer_needed ? XLOGUE_STUB_SAVE_HFP
    9099              :                                                   : XLOGUE_STUB_SAVE);
    9100         7045 :   RTVEC_ELT (v, vi++) = gen_rtx_USE (VOIDmode, sym);
    9101              : 
    9102        99878 :   for (i = 0; i < ncregs; ++i)
    9103              :     {
    9104        92833 :       const xlogue_layout::reginfo &r = xlogue.get_reginfo (i);
    9105        92833 :       rtx reg = gen_rtx_REG ((SSE_REGNO_P (r.regno) ? V4SFmode : word_mode),
    9106        92833 :                              r.regno);
    9107        92833 :       RTVEC_ELT (v, vi++) = gen_frame_store (reg, rax, -r.offset);
    9108              :     }
    9109              : 
    9110         7045 :   gcc_assert (vi == (unsigned)GET_NUM_ELEM (v));
    9111              : 
    9112         7045 :   insn = emit_insn (gen_rtx_PARALLEL (VOIDmode, v));
    9113         7045 :   RTX_FRAME_RELATED_P (insn) = true;
    9114         7045 : }
    9115              : 
    9116              : /* Generate and return an insn body to AND X with Y.  */
    9117              : 
    9118              : static rtx_insn *
    9119        31704 : gen_and2_insn (rtx x, rtx y)
    9120              : {
    9121        31704 :   enum insn_code icode = optab_handler (and_optab, GET_MODE (x));
    9122              : 
    9123        31704 :   gcc_assert (insn_operand_matches (icode, 0, x));
    9124        31704 :   gcc_assert (insn_operand_matches (icode, 1, x));
    9125        31704 :   gcc_assert (insn_operand_matches (icode, 2, y));
    9126              : 
    9127        31704 :   return GEN_FCN (icode) (x, x, y);
    9128              : }
    9129              : 
    9130              : /* Expand the prologue into a bunch of separate insns.  */
    9131              : 
    9132              : void
    9133      1517128 : ix86_expand_prologue (void)
    9134              : {
    9135      1517128 :   struct machine_function *m = cfun->machine;
    9136      1517128 :   rtx insn, t;
    9137      1517128 :   HOST_WIDE_INT allocate;
    9138      1517128 :   bool int_registers_saved;
    9139      1517128 :   bool sse_registers_saved;
    9140      1517128 :   bool save_stub_call_needed;
    9141      1517128 :   rtx static_chain = NULL_RTX;
    9142              : 
    9143      1517128 :   ix86_last_zero_store_uid = 0;
    9144      1517128 :   if (ix86_function_naked (current_function_decl))
    9145              :     {
    9146           74 :       if (flag_stack_usage_info)
    9147            0 :         current_function_static_stack_size = 0;
    9148           74 :       return;
    9149              :     }
    9150              : 
    9151      1517054 :   ix86_finalize_stack_frame_flags ();
    9152              : 
    9153              :   /* DRAP should not coexist with stack_realign_fp */
    9154      1517054 :   gcc_assert (!(crtl->drap_reg && stack_realign_fp));
    9155              : 
    9156      1517054 :   memset (&m->fs, 0, sizeof (m->fs));
    9157              : 
    9158              :   /* Initialize CFA state for before the prologue.  */
    9159      1517054 :   m->fs.cfa_reg = stack_pointer_rtx;
    9160      1517054 :   m->fs.cfa_offset = INCOMING_FRAME_SP_OFFSET;
    9161              : 
    9162              :   /* Track SP offset to the CFA.  We continue tracking this after we've
    9163              :      swapped the CFA register away from SP.  In the case of re-alignment
    9164              :      this is fudged; we're interested to offsets within the local frame.  */
    9165      1517054 :   m->fs.sp_offset = INCOMING_FRAME_SP_OFFSET;
    9166      1517054 :   m->fs.sp_valid = true;
    9167      1517054 :   m->fs.sp_realigned = false;
    9168              : 
    9169      1517054 :   const struct ix86_frame &frame = cfun->machine->frame;
    9170              : 
    9171      1517054 :   if (!TARGET_64BIT && ix86_function_ms_hook_prologue (current_function_decl))
    9172              :     {
    9173              :       /* We should have already generated an error for any use of
    9174              :          ms_hook on a nested function.  */
    9175            0 :       gcc_checking_assert (!ix86_static_chain_on_stack);
    9176              : 
    9177              :       /* Check if profiling is active and we shall use profiling before
    9178              :          prologue variant. If so sorry.  */
    9179            0 :       if (crtl->profile && flag_fentry != 0)
    9180            0 :         sorry ("%<ms_hook_prologue%> attribute is not compatible "
    9181              :                "with %<-mfentry%> for 32-bit");
    9182              : 
    9183              :       /* In ix86_asm_output_function_label we emitted:
    9184              :          8b ff     movl.s %edi,%edi
    9185              :          55        push   %ebp
    9186              :          8b ec     movl.s %esp,%ebp
    9187              : 
    9188              :          This matches the hookable function prologue in Win32 API
    9189              :          functions in Microsoft Windows XP Service Pack 2 and newer.
    9190              :          Wine uses this to enable Windows apps to hook the Win32 API
    9191              :          functions provided by Wine.
    9192              : 
    9193              :          What that means is that we've already set up the frame pointer.  */
    9194              : 
    9195            0 :       if (frame_pointer_needed
    9196            0 :           && !(crtl->drap_reg && crtl->stack_realign_needed))
    9197              :         {
    9198            0 :           rtx push, mov;
    9199              : 
    9200              :           /* We've decided to use the frame pointer already set up.
    9201              :              Describe this to the unwinder by pretending that both
    9202              :              push and mov insns happen right here.
    9203              : 
    9204              :              Putting the unwind info here at the end of the ms_hook
    9205              :              is done so that we can make absolutely certain we get
    9206              :              the required byte sequence at the start of the function,
    9207              :              rather than relying on an assembler that can produce
    9208              :              the exact encoding required.
    9209              : 
    9210              :              However it does mean (in the unpatched case) that we have
    9211              :              a 1 insn window where the asynchronous unwind info is
    9212              :              incorrect.  However, if we placed the unwind info at
    9213              :              its correct location we would have incorrect unwind info
    9214              :              in the patched case.  Which is probably all moot since
    9215              :              I don't expect Wine generates dwarf2 unwind info for the
    9216              :              system libraries that use this feature.  */
    9217              : 
    9218            0 :           insn = emit_insn (gen_blockage ());
    9219              : 
    9220            0 :           push = gen_push (hard_frame_pointer_rtx);
    9221            0 :           mov = gen_rtx_SET (hard_frame_pointer_rtx,
    9222              :                              stack_pointer_rtx);
    9223            0 :           RTX_FRAME_RELATED_P (push) = 1;
    9224            0 :           RTX_FRAME_RELATED_P (mov) = 1;
    9225              : 
    9226            0 :           RTX_FRAME_RELATED_P (insn) = 1;
    9227            0 :           add_reg_note (insn, REG_FRAME_RELATED_EXPR,
    9228              :                         gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, push, mov)));
    9229              : 
    9230              :           /* Note that gen_push incremented m->fs.cfa_offset, even
    9231              :              though we didn't emit the push insn here.  */
    9232            0 :           m->fs.cfa_reg = hard_frame_pointer_rtx;
    9233            0 :           m->fs.fp_offset = m->fs.cfa_offset;
    9234            0 :           m->fs.fp_valid = true;
    9235            0 :         }
    9236              :       else
    9237              :         {
    9238              :           /* The frame pointer is not needed so pop %ebp again.
    9239              :              This leaves us with a pristine state.  */
    9240            0 :           emit_insn (gen_pop (hard_frame_pointer_rtx));
    9241              :         }
    9242              :     }
    9243              : 
    9244              :   /* The first insn of a function that accepts its static chain on the
    9245              :      stack is to push the register that would be filled in by a direct
    9246              :      call.  This insn will be skipped by the trampoline.  */
    9247      1517054 :   else if (ix86_static_chain_on_stack)
    9248              :     {
    9249            0 :       static_chain = ix86_static_chain (cfun->decl, false);
    9250            0 :       insn = emit_insn (gen_push (static_chain));
    9251            0 :       emit_insn (gen_blockage ());
    9252              : 
    9253              :       /* We don't want to interpret this push insn as a register save,
    9254              :          only as a stack adjustment.  The real copy of the register as
    9255              :          a save will be done later, if needed.  */
    9256            0 :       t = plus_constant (Pmode, stack_pointer_rtx, -UNITS_PER_WORD);
    9257            0 :       t = gen_rtx_SET (stack_pointer_rtx, t);
    9258            0 :       add_reg_note (insn, REG_CFA_ADJUST_CFA, t);
    9259            0 :       RTX_FRAME_RELATED_P (insn) = 1;
    9260              :     }
    9261              : 
    9262              :   /* Emit prologue code to adjust stack alignment and setup DRAP, in case
    9263              :      of DRAP is needed and stack realignment is really needed after reload */
    9264      1517054 :   if (stack_realign_drap)
    9265              :     {
    9266         7065 :       int align_bytes = crtl->stack_alignment_needed / BITS_PER_UNIT;
    9267              : 
    9268              :       /* Can't use DRAP in interrupt function.  */
    9269         7065 :       if (cfun->machine->func_type != TYPE_NORMAL)
    9270            0 :         sorry ("Dynamic Realign Argument Pointer (DRAP) not supported "
    9271              :                "in interrupt service routine.  This may be worked "
    9272              :                "around by avoiding functions with aggregate return.");
    9273              : 
    9274              :       /* Only need to push parameter pointer reg if it is caller saved.  */
    9275         7065 :       if (!call_used_or_fixed_reg_p (REGNO (crtl->drap_reg)))
    9276              :         {
    9277              :           /* Push arg pointer reg */
    9278          136 :           insn = emit_insn (gen_push (crtl->drap_reg));
    9279          136 :           RTX_FRAME_RELATED_P (insn) = 1;
    9280              :         }
    9281              : 
    9282              :       /* Grab the argument pointer.  */
    9283         7350 :       t = plus_constant (Pmode, stack_pointer_rtx, m->fs.sp_offset);
    9284         7065 :       insn = emit_insn (gen_rtx_SET (crtl->drap_reg, t));
    9285         7065 :       RTX_FRAME_RELATED_P (insn) = 1;
    9286         7065 :       m->fs.cfa_reg = crtl->drap_reg;
    9287         7065 :       m->fs.cfa_offset = 0;
    9288              : 
    9289              :       /* Align the stack.  */
    9290         7065 :       insn = emit_insn (gen_and2_insn (stack_pointer_rtx,
    9291         7065 :                                        GEN_INT (-align_bytes)));
    9292         7065 :       RTX_FRAME_RELATED_P (insn) = 1;
    9293              : 
    9294              :       /* Replicate the return address on the stack so that return
    9295              :          address can be reached via (argp - 1) slot.  This is needed
    9296              :          to implement macro RETURN_ADDR_RTX and intrinsic function
    9297              :          expand_builtin_return_addr etc.  */
    9298         7635 :       t = plus_constant (Pmode, crtl->drap_reg, -UNITS_PER_WORD);
    9299         7065 :       t = gen_frame_mem (word_mode, t);
    9300         7065 :       insn = emit_insn (gen_push (t));
    9301         7065 :       RTX_FRAME_RELATED_P (insn) = 1;
    9302              : 
    9303              :       /* For the purposes of frame and register save area addressing,
    9304              :          we've started over with a new frame.  */
    9305         7065 :       m->fs.sp_offset = INCOMING_FRAME_SP_OFFSET;
    9306         7065 :       m->fs.realigned = true;
    9307              : 
    9308         7065 :       if (static_chain)
    9309              :         {
    9310              :           /* Replicate static chain on the stack so that static chain
    9311              :              can be reached via (argp - 2) slot.  This is needed for
    9312              :              nested function with stack realignment.  */
    9313            0 :           insn = emit_insn (gen_push (static_chain));
    9314            0 :           RTX_FRAME_RELATED_P (insn) = 1;
    9315              :         }
    9316              :     }
    9317              : 
    9318      1517054 :   int_registers_saved = (frame.nregs == 0);
    9319      1517054 :   sse_registers_saved = (frame.nsseregs == 0);
    9320      1517054 :   save_stub_call_needed = (m->call_ms2sysv);
    9321      1517054 :   gcc_assert (sse_registers_saved || !save_stub_call_needed);
    9322              : 
    9323      1517054 :   if (frame_pointer_needed && !m->fs.fp_valid)
    9324              :     {
    9325              :       /* Note: AT&T enter does NOT have reversed args.  Enter is probably
    9326              :          slower on all targets.  Also sdb didn't like it.  */
    9327       473088 :       insn = emit_insn (gen_push (hard_frame_pointer_rtx));
    9328       473088 :       RTX_FRAME_RELATED_P (insn) = 1;
    9329              : 
    9330       473088 :       if (m->fs.sp_offset == frame.hard_frame_pointer_offset)
    9331              :         {
    9332       473088 :           insn = emit_move_insn (hard_frame_pointer_rtx, stack_pointer_rtx);
    9333       473088 :           RTX_FRAME_RELATED_P (insn) = 1;
    9334              : 
    9335       473088 :           if (m->fs.cfa_reg == stack_pointer_rtx)
    9336       466023 :             m->fs.cfa_reg = hard_frame_pointer_rtx;
    9337       473088 :           m->fs.fp_offset = m->fs.sp_offset;
    9338       473088 :           m->fs.fp_valid = true;
    9339              :         }
    9340              :     }
    9341              : 
    9342      1517054 :   if (!int_registers_saved)
    9343              :     {
    9344              :       /* If saving registers via PUSH, do so now.  */
    9345       472559 :       if (!frame.save_regs_using_mov)
    9346              :         {
    9347       426733 :           ix86_emit_save_regs ();
    9348       426733 :           m->fs.apx_ppx_used = TARGET_APX_PPX && !crtl->calls_eh_return;
    9349       426733 :           int_registers_saved = true;
    9350       426733 :           gcc_assert (m->fs.sp_offset == frame.reg_save_offset);
    9351              :         }
    9352              : 
    9353              :       /* When using red zone we may start register saving before allocating
    9354              :          the stack frame saving one cycle of the prologue.  However, avoid
    9355              :          doing this if we have to probe the stack; at least on x86_64 the
    9356              :          stack probe can turn into a call that clobbers a red zone location. */
    9357        45826 :       else if (ix86_using_red_zone ()
    9358        45826 :                 && (! TARGET_STACK_PROBE
    9359            0 :                     || frame.stack_pointer_offset < CHECK_STACK_LIMIT))
    9360              :         {
    9361        41446 :           HOST_WIDE_INT allocate_offset;
    9362        41446 :           if (crtl->shrink_wrapped_separate)
    9363              :             {
    9364        41390 :               allocate_offset = m->fs.sp_offset - frame.stack_pointer_offset;
    9365              : 
    9366              :               /* Adjust the total offset at the beginning of the function.  */
    9367        41390 :               pro_epilogue_adjust_stack (stack_pointer_rtx, stack_pointer_rtx,
    9368              :                                          GEN_INT (allocate_offset), -1,
    9369        41390 :                                          m->fs.cfa_reg == stack_pointer_rtx);
    9370        41390 :               m->fs.sp_offset = cfun->machine->frame.stack_pointer_offset;
    9371              :             }
    9372              : 
    9373        41446 :           ix86_emit_save_regs_using_mov (frame.reg_save_offset);
    9374        41446 :           int_registers_saved = true;
    9375              :         }
    9376              :     }
    9377              : 
    9378      1517054 :   if (frame.red_zone_size != 0)
    9379       137627 :     cfun->machine->red_zone_used = true;
    9380              : 
    9381      1517054 :   if (stack_realign_fp)
    9382              :     {
    9383        24639 :       int align_bytes = crtl->stack_alignment_needed / BITS_PER_UNIT;
    9384        24991 :       gcc_assert (align_bytes > MIN_STACK_BOUNDARY / BITS_PER_UNIT);
    9385              : 
    9386              :       /* Record last valid frame pointer offset.  */
    9387        24639 :       m->fs.sp_realigned_fp_last = frame.reg_save_offset;
    9388              : 
    9389              :       /* The computation of the size of the re-aligned stack frame means
    9390              :          that we must allocate the size of the register save area before
    9391              :          performing the actual alignment.  Otherwise we cannot guarantee
    9392              :          that there's enough storage above the realignment point.  */
    9393        24639 :       allocate = frame.reg_save_offset - m->fs.sp_offset
    9394        24639 :                  + frame.stack_realign_allocate;
    9395        24639 :       if (allocate)
    9396         2691 :         pro_epilogue_adjust_stack (stack_pointer_rtx, stack_pointer_rtx,
    9397              :                                    GEN_INT (-allocate), -1, false);
    9398              : 
    9399              :       /* Align the stack.  */
    9400        24639 :       emit_insn (gen_and2_insn (stack_pointer_rtx, GEN_INT (-align_bytes)));
    9401        24639 :       m->fs.sp_offset = ROUND_UP (m->fs.sp_offset, align_bytes);
    9402        24639 :       m->fs.sp_realigned_offset = m->fs.sp_offset
    9403        24639 :                                               - frame.stack_realign_allocate;
    9404              :       /* The stack pointer may no longer be equal to CFA - m->fs.sp_offset.
    9405              :          Beyond this point, stack access should be done via choose_baseaddr or
    9406              :          by using sp_valid_at and fp_valid_at to determine the correct base
    9407              :          register.  Henceforth, any CFA offset should be thought of as logical
    9408              :          and not physical.  */
    9409        24639 :       gcc_assert (m->fs.sp_realigned_offset >= m->fs.sp_realigned_fp_last);
    9410        24639 :       gcc_assert (m->fs.sp_realigned_offset == frame.stack_realign_offset);
    9411        24639 :       m->fs.sp_realigned = true;
    9412              : 
    9413              :       /* SEH unwind emit doesn't currently support REG_CFA_EXPRESSION, which
    9414              :          is needed to describe where a register is saved using a realigned
    9415              :          stack pointer, so we need to invalidate the stack pointer for that
    9416              :          target.  */
    9417        24639 :       if (TARGET_SEH)
    9418              :         m->fs.sp_valid = false;
    9419              : 
    9420              :       /* If SP offset is non-immediate after allocation of the stack frame,
    9421              :          then emit SSE saves or stub call prior to allocating the rest of the
    9422              :          stack frame.  This is less efficient for the out-of-line stub because
    9423              :          we can't combine allocations across the call barrier, but it's better
    9424              :          than using a scratch register.  */
    9425        24639 :       else if (!x86_64_immediate_operand (GEN_INT (frame.stack_pointer_offset
    9426              :                                                    - m->fs.sp_realigned_offset),
    9427        24639 :                                           Pmode))
    9428              :         {
    9429            3 :           if (!sse_registers_saved)
    9430              :             {
    9431            1 :               ix86_emit_save_sse_regs_using_mov (frame.sse_reg_save_offset);
    9432            1 :               sse_registers_saved = true;
    9433              :             }
    9434            2 :           else if (save_stub_call_needed)
    9435              :             {
    9436            1 :               ix86_emit_outlined_ms2sysv_save (frame);
    9437            1 :               save_stub_call_needed = false;
    9438              :             }
    9439              :         }
    9440              :     }
    9441              : 
    9442      1517054 :   allocate = frame.stack_pointer_offset - m->fs.sp_offset;
    9443              : 
    9444      1517054 :   if (flag_stack_usage_info)
    9445              :     {
    9446              :       /* We start to count from ARG_POINTER.  */
    9447          355 :       HOST_WIDE_INT stack_size = frame.stack_pointer_offset;
    9448              : 
    9449              :       /* If it was realigned, take into account the fake frame.  */
    9450          355 :       if (stack_realign_drap)
    9451              :         {
    9452            1 :           if (ix86_static_chain_on_stack)
    9453            0 :             stack_size += UNITS_PER_WORD;
    9454              : 
    9455            1 :           if (!call_used_or_fixed_reg_p (REGNO (crtl->drap_reg)))
    9456            0 :             stack_size += UNITS_PER_WORD;
    9457              : 
    9458              :           /* This over-estimates by 1 minimal-stack-alignment-unit but
    9459              :              mitigates that by counting in the new return address slot.  */
    9460            1 :           current_function_dynamic_stack_size
    9461            1 :             += crtl->stack_alignment_needed / BITS_PER_UNIT;
    9462              :         }
    9463              : 
    9464          355 :       current_function_static_stack_size = stack_size;
    9465              :     }
    9466              : 
    9467              :   /* On SEH target with very large frame size, allocate an area to save
    9468              :      SSE registers (as the very large allocation won't be described).  */
    9469      1517054 :   if (TARGET_SEH
    9470              :       && frame.stack_pointer_offset > SEH_MAX_FRAME_SIZE
    9471              :       && !sse_registers_saved)
    9472              :     {
    9473              :       HOST_WIDE_INT sse_size
    9474              :         = frame.sse_reg_save_offset - frame.reg_save_offset;
    9475              : 
    9476              :       gcc_assert (int_registers_saved);
    9477              : 
    9478              :       /* No need to do stack checking as the area will be immediately
    9479              :          written.  */
    9480              :       pro_epilogue_adjust_stack (stack_pointer_rtx, stack_pointer_rtx,
    9481              :                                  GEN_INT (-sse_size), -1,
    9482              :                                  m->fs.cfa_reg == stack_pointer_rtx);
    9483              :       allocate -= sse_size;
    9484              :       ix86_emit_save_sse_regs_using_mov (frame.sse_reg_save_offset);
    9485              :       sse_registers_saved = true;
    9486              :     }
    9487              : 
    9488              :   /* If stack clash protection is requested, then probe the stack, unless it
    9489              :      is already probed on the target.  */
    9490      1517054 :   if (allocate >= 0
    9491      1517050 :       && flag_stack_clash_protection
    9492      1517151 :       && !ix86_target_stack_probe ())
    9493              :     {
    9494           97 :       ix86_adjust_stack_and_probe (allocate, int_registers_saved, false);
    9495           97 :       allocate = 0;
    9496              :     }
    9497              : 
    9498              :   /* The stack has already been decremented by the instruction calling us
    9499              :      so probe if the size is non-negative to preserve the protection area.  */
    9500      1516957 :   else if (allocate >= 0 && flag_stack_check == STATIC_BUILTIN_STACK_CHECK)
    9501              :     {
    9502           46 :       const HOST_WIDE_INT probe_interval = get_probe_interval ();
    9503              : 
    9504           46 :       if (STACK_CHECK_MOVING_SP)
    9505              :         {
    9506           46 :           if (crtl->is_leaf
    9507           18 :               && !cfun->calls_alloca
    9508           18 :               && allocate <= probe_interval)
    9509              :             ;
    9510              : 
    9511              :           else
    9512              :             {
    9513           29 :               ix86_adjust_stack_and_probe (allocate, int_registers_saved, true);
    9514           29 :               allocate = 0;
    9515              :             }
    9516              :         }
    9517              : 
    9518              :       else
    9519              :         {
    9520              :           HOST_WIDE_INT size = allocate;
    9521              : 
    9522              :           if (TARGET_64BIT && size >= HOST_WIDE_INT_C (0x80000000))
    9523              :             size = 0x80000000 - get_stack_check_protect () - 1;
    9524              : 
    9525              :           if (TARGET_STACK_PROBE)
    9526              :             {
    9527              :               if (crtl->is_leaf && !cfun->calls_alloca)
    9528              :                 {
    9529              :                   if (size > probe_interval)
    9530              :                     ix86_emit_probe_stack_range (0, size, int_registers_saved);
    9531              :                 }
    9532              :               else
    9533              :                 ix86_emit_probe_stack_range (0,
    9534              :                                              size + get_stack_check_protect (),
    9535              :                                              int_registers_saved);
    9536              :             }
    9537              :           else
    9538              :             {
    9539              :               if (crtl->is_leaf && !cfun->calls_alloca)
    9540              :                 {
    9541              :                   if (size > probe_interval
    9542              :                       && size > get_stack_check_protect ())
    9543              :                     ix86_emit_probe_stack_range (get_stack_check_protect (),
    9544              :                                                  (size
    9545              :                                                   - get_stack_check_protect ()),
    9546              :                                                  int_registers_saved);
    9547              :                 }
    9548              :               else
    9549              :                 ix86_emit_probe_stack_range (get_stack_check_protect (), size,
    9550              :                                              int_registers_saved);
    9551              :             }
    9552              :         }
    9553              :     }
    9554              : 
    9555      1517050 :   if (allocate == 0)
    9556              :     ;
    9557       837255 :   else if (!ix86_target_stack_probe ()
    9558       837255 :            || frame.stack_pointer_offset < CHECK_STACK_LIMIT)
    9559              :     {
    9560       837210 :       pro_epilogue_adjust_stack (stack_pointer_rtx, stack_pointer_rtx,
    9561              :                                  GEN_INT (-allocate), -1,
    9562       837210 :                                  m->fs.cfa_reg == stack_pointer_rtx);
    9563              :     }
    9564              :   else
    9565              :     {
    9566           45 :       rtx eax = gen_rtx_REG (Pmode, AX_REG);
    9567           45 :       rtx r10 = NULL;
    9568           45 :       const bool sp_is_cfa_reg = (m->fs.cfa_reg == stack_pointer_rtx);
    9569           45 :       bool eax_live = ix86_eax_live_at_start_p ();
    9570           45 :       bool r10_live = false;
    9571              : 
    9572           45 :       if (TARGET_64BIT)
    9573           45 :         r10_live = (DECL_STATIC_CHAIN (current_function_decl) != 0);
    9574              : 
    9575           45 :       if (eax_live)
    9576              :         {
    9577            0 :           insn = emit_insn (gen_push (eax));
    9578            0 :           allocate -= UNITS_PER_WORD;
    9579              :           /* Note that SEH directives need to continue tracking the stack
    9580              :              pointer even after the frame pointer has been set up.  */
    9581            0 :           if (sp_is_cfa_reg || TARGET_SEH)
    9582              :             {
    9583            0 :               if (sp_is_cfa_reg)
    9584            0 :                 m->fs.cfa_offset += UNITS_PER_WORD;
    9585            0 :               RTX_FRAME_RELATED_P (insn) = 1;
    9586            0 :               add_reg_note (insn, REG_FRAME_RELATED_EXPR,
    9587            0 :                             gen_rtx_SET (stack_pointer_rtx,
    9588              :                                          plus_constant (Pmode,
    9589              :                                                         stack_pointer_rtx,
    9590              :                                                         -UNITS_PER_WORD)));
    9591              :             }
    9592              :         }
    9593              : 
    9594           45 :       if (r10_live)
    9595              :         {
    9596            0 :           r10 = gen_rtx_REG (Pmode, R10_REG);
    9597            0 :           insn = emit_insn (gen_push (r10));
    9598            0 :           allocate -= UNITS_PER_WORD;
    9599            0 :           if (sp_is_cfa_reg || TARGET_SEH)
    9600              :             {
    9601            0 :               if (sp_is_cfa_reg)
    9602            0 :                 m->fs.cfa_offset += UNITS_PER_WORD;
    9603            0 :               RTX_FRAME_RELATED_P (insn) = 1;
    9604            0 :               add_reg_note (insn, REG_FRAME_RELATED_EXPR,
    9605            0 :                             gen_rtx_SET (stack_pointer_rtx,
    9606              :                                          plus_constant (Pmode,
    9607              :                                                         stack_pointer_rtx,
    9608              :                                                         -UNITS_PER_WORD)));
    9609              :             }
    9610              :         }
    9611              : 
    9612           45 :       emit_move_insn (eax, GEN_INT (allocate));
    9613           45 :       emit_insn (gen_allocate_stack_worker_probe (Pmode, eax, eax));
    9614              : 
    9615              :       /* Use the fact that AX still contains ALLOCATE.  */
    9616           45 :       insn = emit_insn (gen_pro_epilogue_adjust_stack_sub
    9617           45 :                         (Pmode, stack_pointer_rtx, stack_pointer_rtx, eax));
    9618              : 
    9619           45 :       if (sp_is_cfa_reg || TARGET_SEH)
    9620              :         {
    9621           37 :           if (sp_is_cfa_reg)
    9622           37 :             m->fs.cfa_offset += allocate;
    9623           37 :           RTX_FRAME_RELATED_P (insn) = 1;
    9624           37 :           add_reg_note (insn, REG_FRAME_RELATED_EXPR,
    9625           37 :                         gen_rtx_SET (stack_pointer_rtx,
    9626              :                                      plus_constant (Pmode, stack_pointer_rtx,
    9627              :                                                     -allocate)));
    9628              :         }
    9629           45 :       m->fs.sp_offset += allocate;
    9630              : 
    9631              :       /* Use stack_pointer_rtx for relative addressing so that code works for
    9632              :          realigned stack.  But this means that we need a blockage to prevent
    9633              :          stores based on the frame pointer from being scheduled before.  */
    9634           45 :       if (r10_live && eax_live)
    9635              :         {
    9636            0 :           t = gen_rtx_PLUS (Pmode, stack_pointer_rtx, eax);
    9637            0 :           emit_move_insn (gen_rtx_REG (word_mode, R10_REG),
    9638              :                           gen_frame_mem (word_mode, t));
    9639            0 :           t = plus_constant (Pmode, t, UNITS_PER_WORD);
    9640            0 :           emit_move_insn (gen_rtx_REG (word_mode, AX_REG),
    9641              :                           gen_frame_mem (word_mode, t));
    9642            0 :           emit_insn (gen_memory_blockage ());
    9643              :         }
    9644           45 :       else if (eax_live || r10_live)
    9645              :         {
    9646            0 :           t = gen_rtx_PLUS (Pmode, stack_pointer_rtx, eax);
    9647            0 :           emit_move_insn (gen_rtx_REG (word_mode,
    9648              :                                        (eax_live ? AX_REG : R10_REG)),
    9649              :                           gen_frame_mem (word_mode, t));
    9650            0 :           emit_insn (gen_memory_blockage ());
    9651              :         }
    9652              :     }
    9653      1517054 :   gcc_assert (m->fs.sp_offset == frame.stack_pointer_offset);
    9654              : 
    9655              :   /* If we havn't already set up the frame pointer, do so now.  */
    9656      1517054 :   if (frame_pointer_needed && !m->fs.fp_valid)
    9657              :     {
    9658            0 :       insn = gen_add3_insn (hard_frame_pointer_rtx, stack_pointer_rtx,
    9659            0 :                             GEN_INT (frame.stack_pointer_offset
    9660              :                                      - frame.hard_frame_pointer_offset));
    9661            0 :       insn = emit_insn (insn);
    9662            0 :       RTX_FRAME_RELATED_P (insn) = 1;
    9663            0 :       add_reg_note (insn, REG_CFA_ADJUST_CFA, NULL);
    9664              : 
    9665            0 :       if (m->fs.cfa_reg == stack_pointer_rtx)
    9666            0 :         m->fs.cfa_reg = hard_frame_pointer_rtx;
    9667            0 :       m->fs.fp_offset = frame.hard_frame_pointer_offset;
    9668            0 :       m->fs.fp_valid = true;
    9669              :     }
    9670              : 
    9671      1517054 :   if (!int_registers_saved)
    9672         4380 :     ix86_emit_save_regs_using_mov (frame.reg_save_offset);
    9673      1517054 :   if (!sse_registers_saved)
    9674        33352 :     ix86_emit_save_sse_regs_using_mov (frame.sse_reg_save_offset);
    9675      1483702 :   else if (save_stub_call_needed)
    9676         7044 :     ix86_emit_outlined_ms2sysv_save (frame);
    9677              : 
    9678              :   /* For the mcount profiling on 32 bit PIC mode we need to emit SET_GOT
    9679              :      in PROLOGUE.  */
    9680      1517054 :   if (!TARGET_64BIT && pic_offset_table_rtx && crtl->profile && !flag_fentry)
    9681              :     {
    9682            0 :       rtx pic = gen_rtx_REG (Pmode, REAL_PIC_OFFSET_TABLE_REGNUM);
    9683            0 :       insn = emit_insn (gen_set_got (pic));
    9684            0 :       RTX_FRAME_RELATED_P (insn) = 1;
    9685            0 :       add_reg_note (insn, REG_CFA_FLUSH_QUEUE, NULL_RTX);
    9686            0 :       emit_insn (gen_prologue_use (pic));
    9687              :       /* Deleting already emmitted SET_GOT if exist and allocated to
    9688              :          REAL_PIC_OFFSET_TABLE_REGNUM.  */
    9689            0 :       ix86_elim_entry_set_got (pic);
    9690              :     }
    9691              : 
    9692      1517054 :   if (crtl->drap_reg && !crtl->stack_realign_needed)
    9693              :     {
    9694              :       /* vDRAP is setup but after reload it turns out stack realign
    9695              :          isn't necessary, here we will emit prologue to setup DRAP
    9696              :          without stack realign adjustment */
    9697          177 :       t = choose_baseaddr (0, NULL);
    9698          177 :       emit_insn (gen_rtx_SET (crtl->drap_reg, t));
    9699              :     }
    9700              : 
    9701              :   /* Prevent instructions from being scheduled into register save push
    9702              :      sequence when access to the redzone area is done through frame pointer.
    9703              :      The offset between the frame pointer and the stack pointer is calculated
    9704              :      relative to the value of the stack pointer at the end of the function
    9705              :      prologue, and moving instructions that access redzone area via frame
    9706              :      pointer inside push sequence violates this assumption.  */
    9707      1517054 :   if (frame_pointer_needed && frame.red_zone_size)
    9708       126623 :     emit_insn (gen_memory_blockage ());
    9709              : 
    9710              :   /* SEH requires that the prologue end within 256 bytes of the start of
    9711              :      the function.  Prevent instruction schedules that would extend that.
    9712              :      Further, prevent alloca modifications to the stack pointer from being
    9713              :      combined with prologue modifications.  */
    9714              :   if (TARGET_SEH)
    9715              :     emit_insn (gen_prologue_use (stack_pointer_rtx));
    9716              : }
    9717              : 
    9718              : /* Emit code to restore REG using a POP or POPP insn.  */
    9719              : 
    9720              : static void
    9721      1455725 : ix86_emit_restore_reg_using_pop (rtx reg, bool ppx_p)
    9722              : {
    9723      1455725 :   struct machine_function *m = cfun->machine;
    9724      1455725 :   rtx_insn *insn = emit_insn (gen_pop (reg, ppx_p));
    9725              : 
    9726      1455725 :   ix86_add_cfa_restore_note (insn, reg, m->fs.sp_offset);
    9727      1455725 :   m->fs.sp_offset -= UNITS_PER_WORD;
    9728              : 
    9729      1455725 :   if (m->fs.cfa_reg == crtl->drap_reg
    9730      1455725 :       && REGNO (reg) == REGNO (crtl->drap_reg))
    9731              :     {
    9732              :       /* Previously we'd represented the CFA as an expression
    9733              :          like *(%ebp - 8).  We've just popped that value from
    9734              :          the stack, which means we need to reset the CFA to
    9735              :          the drap register.  This will remain until we restore
    9736              :          the stack pointer.  */
    9737         4021 :       add_reg_note (insn, REG_CFA_DEF_CFA, reg);
    9738         4021 :       RTX_FRAME_RELATED_P (insn) = 1;
    9739              : 
    9740              :       /* This means that the DRAP register is valid for addressing too.  */
    9741         4021 :       m->fs.drap_valid = true;
    9742         4021 :       return;
    9743              :     }
    9744              : 
    9745      1451704 :   if (m->fs.cfa_reg == stack_pointer_rtx)
    9746              :     {
    9747      1373382 :       rtx x = plus_constant (Pmode, stack_pointer_rtx, UNITS_PER_WORD);
    9748      1011297 :       x = gen_rtx_SET (stack_pointer_rtx, x);
    9749      1011297 :       add_reg_note (insn, REG_CFA_ADJUST_CFA, x);
    9750      1011297 :       RTX_FRAME_RELATED_P (insn) = 1;
    9751              : 
    9752      1192332 :       m->fs.cfa_offset -= UNITS_PER_WORD;
    9753              :     }
    9754              : 
    9755              :   /* When the frame pointer is the CFA, and we pop it, we are
    9756              :      swapping back to the stack pointer as the CFA.  This happens
    9757              :      for stack frames that don't allocate other data, so we assume
    9758              :      the stack pointer is now pointing at the return address, i.e.
    9759              :      the function entry state, which makes the offset be 1 word.  */
    9760      1451704 :   if (reg == hard_frame_pointer_rtx)
    9761              :     {
    9762       232533 :       m->fs.fp_valid = false;
    9763       232533 :       if (m->fs.cfa_reg == hard_frame_pointer_rtx)
    9764              :         {
    9765       228499 :           m->fs.cfa_reg = stack_pointer_rtx;
    9766       228499 :           m->fs.cfa_offset -= UNITS_PER_WORD;
    9767              : 
    9768       228499 :           add_reg_note (insn, REG_CFA_DEF_CFA,
    9769       228499 :                         plus_constant (Pmode, stack_pointer_rtx,
    9770       228499 :                                        m->fs.cfa_offset));
    9771       228499 :           RTX_FRAME_RELATED_P (insn) = 1;
    9772              :         }
    9773              :     }
    9774              : }
    9775              : 
    9776              : /* Emit code to restore REG using a POP2 insn.  */
    9777              : static void
    9778           19 : ix86_emit_restore_reg_using_pop2 (rtx reg1, rtx reg2, bool ppx_p = false)
    9779              : {
    9780           19 :   struct machine_function *m = cfun->machine;
    9781           19 :   const int offset = UNITS_PER_WORD * 2;
    9782           19 :   rtx_insn *insn;
    9783              : 
    9784           19 :   rtx mem = gen_rtx_MEM (TImode, gen_rtx_POST_INC (Pmode,
    9785              :                                                    stack_pointer_rtx));
    9786              : 
    9787           19 :   if (ppx_p)
    9788           15 :     insn = emit_insn (gen_pop2p_di (reg1, mem, reg2));
    9789              :   else
    9790            4 :     insn = emit_insn (gen_pop2_di (reg1, mem, reg2));
    9791              : 
    9792           19 :   RTX_FRAME_RELATED_P (insn) = 1;
    9793              : 
    9794           19 :   rtx dwarf = NULL_RTX;
    9795           19 :   dwarf = alloc_reg_note (REG_CFA_RESTORE, reg1, dwarf);
    9796           19 :   dwarf = alloc_reg_note (REG_CFA_RESTORE, reg2, dwarf);
    9797           19 :   REG_NOTES (insn) = dwarf;
    9798           19 :   m->fs.sp_offset -= offset;
    9799              : 
    9800           19 :   if (m->fs.cfa_reg == crtl->drap_reg
    9801           19 :       && (REGNO (reg1) == REGNO (crtl->drap_reg)
    9802            3 :           || REGNO (reg2) == REGNO (crtl->drap_reg)))
    9803              :     {
    9804              :       /* Previously we'd represented the CFA as an expression
    9805              :          like *(%ebp - 8).  We've just popped that value from
    9806              :          the stack, which means we need to reset the CFA to
    9807              :          the drap register.  This will remain until we restore
    9808              :          the stack pointer.  */
    9809            1 :       add_reg_note (insn, REG_CFA_DEF_CFA,
    9810            1 :                     REGNO (reg1) == REGNO (crtl->drap_reg) ? reg1 : reg2);
    9811            1 :       RTX_FRAME_RELATED_P (insn) = 1;
    9812              : 
    9813              :       /* This means that the DRAP register is valid for addressing too.  */
    9814            1 :       m->fs.drap_valid = true;
    9815            1 :       return;
    9816              :     }
    9817              : 
    9818           18 :   if (m->fs.cfa_reg == stack_pointer_rtx)
    9819              :     {
    9820           14 :       rtx x = plus_constant (Pmode, stack_pointer_rtx, offset);
    9821           14 :       x = gen_rtx_SET (stack_pointer_rtx, x);
    9822           14 :       add_reg_note (insn, REG_CFA_ADJUST_CFA, x);
    9823           14 :       RTX_FRAME_RELATED_P (insn) = 1;
    9824              : 
    9825           14 :       m->fs.cfa_offset -= offset;
    9826              :     }
    9827              : 
    9828              :   /* When the frame pointer is the CFA, and we pop it, we are
    9829              :      swapping back to the stack pointer as the CFA.  This happens
    9830              :      for stack frames that don't allocate other data, so we assume
    9831              :      the stack pointer is now pointing at the return address, i.e.
    9832              :      the function entry state, which makes the offset be 1 word.  */
    9833           18 :   if (reg1 == hard_frame_pointer_rtx || reg2 == hard_frame_pointer_rtx)
    9834              :     {
    9835            0 :       m->fs.fp_valid = false;
    9836            0 :       if (m->fs.cfa_reg == hard_frame_pointer_rtx)
    9837              :         {
    9838            0 :           m->fs.cfa_reg = stack_pointer_rtx;
    9839            0 :           m->fs.cfa_offset -= offset;
    9840              : 
    9841            0 :           add_reg_note (insn, REG_CFA_DEF_CFA,
    9842            0 :                         plus_constant (Pmode, stack_pointer_rtx,
    9843            0 :                                        m->fs.cfa_offset));
    9844            0 :           RTX_FRAME_RELATED_P (insn) = 1;
    9845              :         }
    9846              :     }
    9847              : }
    9848              : 
    9849              : /* Emit code to restore saved registers using POP insns.  */
    9850              : 
    9851              : static void
    9852      1348230 : ix86_emit_restore_regs_using_pop (bool ppx_p)
    9853              : {
    9854      1348230 :   unsigned int regno;
    9855              : 
    9856    125385390 :   for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
    9857    124037160 :     if (GENERAL_REGNO_P (regno) && ix86_save_reg (regno, false, true))
    9858      1222873 :       ix86_emit_restore_reg_using_pop (gen_rtx_REG (word_mode, regno), ppx_p);
    9859      1348230 : }
    9860              : 
    9861              : /* Emit code to restore saved registers using POP2 insns.  */
    9862              : 
    9863              : static void
    9864          558 : ix86_emit_restore_regs_using_pop2 (void)
    9865              : {
    9866          558 :   int regno;
    9867          558 :   int regno_list[2];
    9868          558 :   regno_list[0] = regno_list[1] = -1;
    9869          558 :   int loaded_regnum = 0;
    9870          558 :   bool aligned = cfun->machine->fs.sp_offset % 16 == 0;
    9871              : 
    9872        51894 :   for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
    9873        51336 :     if (GENERAL_REGNO_P (regno) && ix86_save_reg (regno, false, true))
    9874              :       {
    9875          127 :         if (aligned)
    9876              :           {
    9877          120 :             regno_list[loaded_regnum++] = regno;
    9878          120 :             if (loaded_regnum == 2)
    9879              :               {
    9880           19 :                 gcc_assert (regno_list[0] != -1
    9881              :                             && regno_list[1] != -1
    9882              :                             && regno_list[0] != regno_list[1]);
    9883              : 
    9884           19 :                 ix86_emit_restore_reg_using_pop2 (gen_rtx_REG (word_mode,
    9885              :                                                                regno_list[0]),
    9886              :                                                   gen_rtx_REG (word_mode,
    9887              :                                                                regno_list[1]),
    9888           19 :                                                   TARGET_APX_PPX);
    9889           19 :                 loaded_regnum = 0;
    9890           19 :                 regno_list[0] = regno_list[1] = -1;
    9891              :               }
    9892              :           }
    9893              :         else
    9894              :           {
    9895           14 :             ix86_emit_restore_reg_using_pop (gen_rtx_REG (word_mode, regno),
    9896            7 :                                              TARGET_APX_PPX);
    9897            7 :             aligned = true;
    9898              :           }
    9899              :       }
    9900              : 
    9901          558 :   if (loaded_regnum == 1)
    9902           82 :     ix86_emit_restore_reg_using_pop (gen_rtx_REG (word_mode, regno_list[0]),
    9903           82 :                                      TARGET_APX_PPX);
    9904          558 : }
    9905              : 
    9906              : /* Emit code and notes for the LEAVE instruction.  If insn is non-null,
    9907              :    omits the emit and only attaches the notes.  */
    9908              : 
    9909              : static void
    9910       241854 : ix86_emit_leave (rtx_insn *insn)
    9911              : {
    9912       241854 :   struct machine_function *m = cfun->machine;
    9913              : 
    9914       241854 :   if (!insn)
    9915       240883 :     insn = emit_insn (gen_leave (word_mode));
    9916              : 
    9917       241854 :   ix86_add_queued_cfa_restore_notes (insn);
    9918              : 
    9919       241854 :   gcc_assert (m->fs.fp_valid);
    9920       241854 :   m->fs.sp_valid = true;
    9921       241854 :   m->fs.sp_realigned = false;
    9922       241854 :   m->fs.sp_offset = m->fs.fp_offset - UNITS_PER_WORD;
    9923       241854 :   m->fs.fp_valid = false;
    9924              : 
    9925       241854 :   if (m->fs.cfa_reg == hard_frame_pointer_rtx)
    9926              :     {
    9927       238717 :       m->fs.cfa_reg = stack_pointer_rtx;
    9928       238717 :       m->fs.cfa_offset = m->fs.sp_offset;
    9929              : 
    9930       238717 :       add_reg_note (insn, REG_CFA_DEF_CFA,
    9931       238717 :                     plus_constant (Pmode, stack_pointer_rtx,
    9932       238717 :                                    m->fs.sp_offset));
    9933       238717 :       RTX_FRAME_RELATED_P (insn) = 1;
    9934              :     }
    9935       241854 :   ix86_add_cfa_restore_note (insn, hard_frame_pointer_rtx,
    9936              :                              m->fs.fp_offset);
    9937       241854 : }
    9938              : 
    9939              : /* Emit code to restore saved registers using MOV insns.
    9940              :    First register is restored from CFA - CFA_OFFSET.  */
    9941              : static void
    9942        97162 : ix86_emit_restore_regs_using_mov (HOST_WIDE_INT cfa_offset,
    9943              :                                   bool maybe_eh_return)
    9944              : {
    9945        97162 :   struct machine_function *m = cfun->machine;
    9946        97162 :   unsigned int regno;
    9947              : 
    9948      9036066 :   for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
    9949      8938904 :     if (GENERAL_REGNO_P (regno) && ix86_save_reg (regno, maybe_eh_return, true))
    9950              :       {
    9951              : 
    9952              :         /* Skip registers, already processed by shrink wrap separate.  */
    9953       268266 :         if (!cfun->machine->reg_is_wrapped_separately[regno])
    9954              :           {
    9955       140606 :             rtx reg = gen_rtx_REG (word_mode, regno);
    9956       140606 :             rtx mem;
    9957       140606 :             rtx_insn *insn;
    9958              : 
    9959       140606 :             mem = choose_baseaddr (cfa_offset, NULL);
    9960       140606 :             mem = gen_frame_mem (word_mode, mem);
    9961       140606 :             insn = emit_move_insn (reg, mem);
    9962              : 
    9963       140606 :             if (m->fs.cfa_reg == crtl->drap_reg
    9964       140606 :                 && regno == REGNO (crtl->drap_reg))
    9965              :               {
    9966              :                 /* Previously we'd represented the CFA as an expression
    9967              :                    like *(%ebp - 8).  We've just popped that value from
    9968              :                    the stack, which means we need to reset the CFA to
    9969              :                    the drap register.  This will remain until we restore
    9970              :                    the stack pointer.  */
    9971         3137 :                 add_reg_note (insn, REG_CFA_DEF_CFA, reg);
    9972         3137 :                 RTX_FRAME_RELATED_P (insn) = 1;
    9973              : 
    9974              :                 /* DRAP register is valid for addressing.  */
    9975         3137 :                 m->fs.drap_valid = true;
    9976              :               }
    9977              :             else
    9978       137469 :               ix86_add_cfa_restore_note (NULL, reg, cfa_offset);
    9979              :           }
    9980       288472 :         cfa_offset -= UNITS_PER_WORD;
    9981              :       }
    9982        97162 : }
    9983              : 
    9984              : /* Emit code to restore saved registers using MOV insns.
    9985              :    First register is restored from CFA - CFA_OFFSET.  */
    9986              : static void
    9987        33929 : ix86_emit_restore_sse_regs_using_mov (HOST_WIDE_INT cfa_offset,
    9988              :                                       bool maybe_eh_return)
    9989              : {
    9990        33929 :   unsigned int regno;
    9991              : 
    9992      3155397 :   for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
    9993      3121468 :     if (SSE_REGNO_P (regno) && ix86_save_reg (regno, maybe_eh_return, true))
    9994              :       {
    9995       339317 :         rtx reg = gen_rtx_REG (V4SFmode, regno);
    9996       339317 :         rtx mem;
    9997       339317 :         unsigned int align = GET_MODE_ALIGNMENT (V4SFmode);
    9998              : 
    9999       339317 :         mem = choose_baseaddr (cfa_offset, &align);
   10000       339317 :         mem = gen_rtx_MEM (V4SFmode, mem);
   10001              : 
   10002              :         /* The location aligment depends upon the base register.  */
   10003       339317 :         align = MIN (GET_MODE_ALIGNMENT (V4SFmode), align);
   10004       339317 :         gcc_assert (! (cfa_offset & (align / BITS_PER_UNIT - 1)));
   10005       339317 :         set_mem_align (mem, align);
   10006       339317 :         emit_insn (gen_rtx_SET (reg, mem));
   10007              : 
   10008       339317 :         ix86_add_cfa_restore_note (NULL, reg, cfa_offset);
   10009              : 
   10010       339317 :         cfa_offset -= GET_MODE_SIZE (V4SFmode);
   10011              :       }
   10012        33929 : }
   10013              : 
   10014              : static void
   10015         7621 : ix86_emit_outlined_ms2sysv_restore (const struct ix86_frame &frame,
   10016              :                                   bool use_call, int style)
   10017              : {
   10018         7621 :   struct machine_function *m = cfun->machine;
   10019         7621 :   const unsigned ncregs = NUM_X86_64_MS_CLOBBERED_REGS
   10020         7621 :                           + m->call_ms2sysv_extra_regs;
   10021         7621 :   rtvec v;
   10022         7621 :   unsigned int elems_needed, align, i, vi = 0;
   10023         7621 :   rtx_insn *insn;
   10024         7621 :   rtx sym, tmp;
   10025         7621 :   rtx rsi = gen_rtx_REG (word_mode, SI_REG);
   10026         7621 :   rtx r10 = NULL_RTX;
   10027         7621 :   const class xlogue_layout &xlogue = xlogue_layout::get_instance ();
   10028         7621 :   HOST_WIDE_INT stub_ptr_offset = xlogue.get_stub_ptr_offset ();
   10029         7621 :   HOST_WIDE_INT rsi_offset = frame.stack_realign_offset + stub_ptr_offset;
   10030         7621 :   rtx rsi_frame_load = NULL_RTX;
   10031         7621 :   HOST_WIDE_INT rsi_restore_offset = (HOST_WIDE_INT)-1;
   10032         7621 :   enum xlogue_stub stub;
   10033              : 
   10034         7621 :   gcc_assert (!m->fs.fp_valid || frame_pointer_needed);
   10035              : 
   10036              :   /* If using a realigned stack, we should never start with padding.  */
   10037         7621 :   gcc_assert (!stack_realign_fp || !xlogue.get_stack_align_off_in ());
   10038              : 
   10039              :   /* Setup RSI as the stub's base pointer.  */
   10040         7621 :   align = GET_MODE_ALIGNMENT (V4SFmode);
   10041         7621 :   tmp = choose_baseaddr (rsi_offset, &align, SI_REG);
   10042         7621 :   gcc_assert (align >= GET_MODE_ALIGNMENT (V4SFmode));
   10043              : 
   10044         7621 :   emit_insn (gen_rtx_SET (rsi, tmp));
   10045              : 
   10046              :   /* Get a symbol for the stub.  */
   10047         7621 :   if (frame_pointer_needed)
   10048         5955 :     stub = use_call ? XLOGUE_STUB_RESTORE_HFP
   10049              :                     : XLOGUE_STUB_RESTORE_HFP_TAIL;
   10050              :   else
   10051         1666 :     stub = use_call ? XLOGUE_STUB_RESTORE
   10052              :                     : XLOGUE_STUB_RESTORE_TAIL;
   10053         7621 :   sym = xlogue.get_stub_rtx (stub);
   10054              : 
   10055         7621 :   elems_needed = ncregs;
   10056         7621 :   if (use_call)
   10057         6498 :     elems_needed += 1;
   10058              :   else
   10059         1275 :     elems_needed += frame_pointer_needed ? 5 : 3;
   10060         7621 :   v = rtvec_alloc (elems_needed);
   10061              : 
   10062              :   /* We call the epilogue stub when we need to pop incoming args or we are
   10063              :      doing a sibling call as the tail.  Otherwise, we will emit a jmp to the
   10064              :      epilogue stub and it is the tail-call.  */
   10065         7621 :   if (use_call)
   10066         6498 :       RTVEC_ELT (v, vi++) = gen_rtx_USE (VOIDmode, sym);
   10067              :   else
   10068              :     {
   10069         1123 :       RTVEC_ELT (v, vi++) = ret_rtx;
   10070         1123 :       RTVEC_ELT (v, vi++) = gen_rtx_USE (VOIDmode, sym);
   10071         1123 :       if (frame_pointer_needed)
   10072              :         {
   10073          971 :           rtx rbp = gen_rtx_REG (DImode, BP_REG);
   10074          971 :           gcc_assert (m->fs.fp_valid);
   10075          971 :           gcc_assert (m->fs.cfa_reg == hard_frame_pointer_rtx);
   10076              : 
   10077          971 :           tmp = plus_constant (DImode, rbp, 8);
   10078          971 :           RTVEC_ELT (v, vi++) = gen_rtx_SET (stack_pointer_rtx, tmp);
   10079          971 :           RTVEC_ELT (v, vi++) = gen_rtx_SET (rbp, gen_rtx_MEM (DImode, rbp));
   10080          971 :           tmp = gen_rtx_MEM (BLKmode, gen_rtx_SCRATCH (VOIDmode));
   10081          971 :           RTVEC_ELT (v, vi++) = gen_rtx_CLOBBER (VOIDmode, tmp);
   10082              :         }
   10083              :       else
   10084              :         {
   10085              :           /* If no hard frame pointer, we set R10 to the SP restore value.  */
   10086          152 :           gcc_assert (!m->fs.fp_valid);
   10087          152 :           gcc_assert (m->fs.cfa_reg == stack_pointer_rtx);
   10088          152 :           gcc_assert (m->fs.sp_valid);
   10089              : 
   10090          152 :           r10 = gen_rtx_REG (DImode, R10_REG);
   10091          152 :           tmp = plus_constant (Pmode, rsi, stub_ptr_offset);
   10092          152 :           emit_insn (gen_rtx_SET (r10, tmp));
   10093              : 
   10094          152 :           RTVEC_ELT (v, vi++) = gen_rtx_SET (stack_pointer_rtx, r10);
   10095              :         }
   10096              :     }
   10097              : 
   10098              :   /* Generate frame load insns and restore notes.  */
   10099       107954 :   for (i = 0; i < ncregs; ++i)
   10100              :     {
   10101       100333 :       const xlogue_layout::reginfo &r = xlogue.get_reginfo (i);
   10102       100333 :       machine_mode mode = SSE_REGNO_P (r.regno) ? V4SFmode : word_mode;
   10103       100333 :       rtx reg, frame_load;
   10104              : 
   10105       100333 :       reg = gen_rtx_REG (mode, r.regno);
   10106       100333 :       frame_load = gen_frame_load (reg, rsi, r.offset);
   10107              : 
   10108              :       /* Save RSI frame load insn & note to add last.  */
   10109       100333 :       if (r.regno == SI_REG)
   10110              :         {
   10111         7621 :           gcc_assert (!rsi_frame_load);
   10112         7621 :           rsi_frame_load = frame_load;
   10113         7621 :           rsi_restore_offset = r.offset;
   10114              :         }
   10115              :       else
   10116              :         {
   10117        92712 :           RTVEC_ELT (v, vi++) = frame_load;
   10118        92712 :           ix86_add_cfa_restore_note (NULL, reg, r.offset);
   10119              :         }
   10120              :     }
   10121              : 
   10122              :   /* Add RSI frame load & restore note at the end.  */
   10123         7621 :   gcc_assert (rsi_frame_load);
   10124         7621 :   gcc_assert (rsi_restore_offset != (HOST_WIDE_INT)-1);
   10125         7621 :   RTVEC_ELT (v, vi++) = rsi_frame_load;
   10126         7621 :   ix86_add_cfa_restore_note (NULL, gen_rtx_REG (DImode, SI_REG),
   10127              :                              rsi_restore_offset);
   10128              : 
   10129              :   /* Finally, for tail-call w/o a hard frame pointer, set SP to R10.  */
   10130         7621 :   if (!use_call && !frame_pointer_needed)
   10131              :     {
   10132          152 :       gcc_assert (m->fs.sp_valid);
   10133          152 :       gcc_assert (!m->fs.sp_realigned);
   10134              : 
   10135              :       /* At this point, R10 should point to frame.stack_realign_offset.  */
   10136          152 :       if (m->fs.cfa_reg == stack_pointer_rtx)
   10137          152 :         m->fs.cfa_offset += m->fs.sp_offset - frame.stack_realign_offset;
   10138          152 :       m->fs.sp_offset = frame.stack_realign_offset;
   10139              :     }
   10140              : 
   10141         7621 :   gcc_assert (vi == (unsigned int)GET_NUM_ELEM (v));
   10142         7621 :   tmp = gen_rtx_PARALLEL (VOIDmode, v);
   10143         7621 :   if (use_call)
   10144         6498 :       insn = emit_insn (tmp);
   10145              :   else
   10146              :     {
   10147         1123 :       insn = emit_jump_insn (tmp);
   10148         1123 :       JUMP_LABEL (insn) = ret_rtx;
   10149              : 
   10150         1123 :       if (frame_pointer_needed)
   10151          971 :         ix86_emit_leave (insn);
   10152              :       else
   10153              :         {
   10154              :           /* Need CFA adjust note.  */
   10155          152 :           tmp = gen_rtx_SET (stack_pointer_rtx, r10);
   10156          152 :           add_reg_note (insn, REG_CFA_ADJUST_CFA, tmp);
   10157              :         }
   10158              :     }
   10159              : 
   10160         7621 :   RTX_FRAME_RELATED_P (insn) = true;
   10161         7621 :   ix86_add_queued_cfa_restore_notes (insn);
   10162              : 
   10163              :   /* If we're not doing a tail-call, we need to adjust the stack.  */
   10164         7621 :   if (use_call && m->fs.sp_valid)
   10165              :     {
   10166         3706 :       HOST_WIDE_INT dealloc = m->fs.sp_offset - frame.stack_realign_offset;
   10167         3706 :       pro_epilogue_adjust_stack (stack_pointer_rtx, stack_pointer_rtx,
   10168              :                                 GEN_INT (dealloc), style,
   10169         3706 :                                 m->fs.cfa_reg == stack_pointer_rtx);
   10170              :     }
   10171         7621 : }
   10172              : 
   10173              : /* Restore function stack, frame, and registers.  */
   10174              : 
   10175              : void
   10176      1642516 : ix86_expand_epilogue (int style)
   10177              : {
   10178      1642516 :   struct machine_function *m = cfun->machine;
   10179      1642516 :   struct machine_frame_state frame_state_save = m->fs;
   10180      1642516 :   bool restore_regs_via_mov;
   10181      1642516 :   bool using_drap;
   10182      1642516 :   bool restore_stub_is_tail = false;
   10183              : 
   10184      1642516 :   if (ix86_function_naked (current_function_decl))
   10185              :     {
   10186              :       /* The program should not reach this point.  */
   10187           74 :       emit_insn (gen_ud2 ());
   10188       125507 :       return;
   10189              :     }
   10190              : 
   10191      1642442 :   ix86_finalize_stack_frame_flags ();
   10192      1642442 :   const struct ix86_frame &frame = cfun->machine->frame;
   10193              : 
   10194      1642442 :   m->fs.sp_realigned = stack_realign_fp;
   10195        31844 :   m->fs.sp_valid = stack_realign_fp
   10196      1617757 :                    || !frame_pointer_needed
   10197      2092171 :                    || crtl->sp_is_unchanging;
   10198      1642442 :   gcc_assert (!m->fs.sp_valid
   10199              :               || m->fs.sp_offset == frame.stack_pointer_offset);
   10200              : 
   10201              :   /* The FP must be valid if the frame pointer is present.  */
   10202      1642442 :   gcc_assert (frame_pointer_needed == m->fs.fp_valid);
   10203      1642442 :   gcc_assert (!m->fs.fp_valid
   10204              :               || m->fs.fp_offset == frame.hard_frame_pointer_offset);
   10205              : 
   10206              :   /* We must have *some* valid pointer to the stack frame.  */
   10207      1642442 :   gcc_assert (m->fs.sp_valid || m->fs.fp_valid);
   10208              : 
   10209              :   /* The DRAP is never valid at this point.  */
   10210      1642442 :   gcc_assert (!m->fs.drap_valid);
   10211              : 
   10212              :   /* See the comment about red zone and frame
   10213              :      pointer usage in ix86_expand_prologue.  */
   10214      1642442 :   if (frame_pointer_needed && frame.red_zone_size)
   10215       126656 :     emit_insn (gen_memory_blockage ());
   10216              : 
   10217      1642442 :   using_drap = crtl->drap_reg && crtl->stack_realign_needed;
   10218         7159 :   gcc_assert (!using_drap || m->fs.cfa_reg == crtl->drap_reg);
   10219              : 
   10220              :   /* Determine the CFA offset of the end of the red-zone.  */
   10221      1642442 :   m->fs.red_zone_offset = 0;
   10222      1642442 :   if (ix86_using_red_zone () && crtl->args.pops_args < 65536)
   10223              :     {
   10224              :       /* The red-zone begins below return address and error code in
   10225              :          exception handler.  */
   10226      1465368 :       m->fs.red_zone_offset = RED_ZONE_SIZE + INCOMING_FRAME_SP_OFFSET;
   10227              : 
   10228              :       /* When the register save area is in the aligned portion of
   10229              :          the stack, determine the maximum runtime displacement that
   10230              :          matches up with the aligned frame.  */
   10231      1465368 :       if (stack_realign_drap)
   10232         8588 :         m->fs.red_zone_offset -= (crtl->stack_alignment_needed / BITS_PER_UNIT
   10233         4294 :                                   + UNITS_PER_WORD);
   10234              :     }
   10235              : 
   10236      1642442 :   HOST_WIDE_INT reg_save_offset = frame.reg_save_offset;
   10237              : 
   10238              :   /* Special care must be taken for the normal return case of a function
   10239              :      using eh_return: the eax and edx registers are marked as saved, but
   10240              :      not restored along this path.  Adjust the save location to match.  */
   10241      1642442 :   if (crtl->calls_eh_return && style != 2)
   10242           37 :     reg_save_offset -= 2 * UNITS_PER_WORD;
   10243              : 
   10244              :   /* EH_RETURN requires the use of moves to function properly.  */
   10245      1642442 :   if (crtl->calls_eh_return)
   10246              :     restore_regs_via_mov = true;
   10247              :   /* SEH requires the use of pops to identify the epilogue.  */
   10248      1642384 :   else if (TARGET_SEH)
   10249              :     restore_regs_via_mov = false;
   10250              :   /* If we already save reg with pushp, don't use move at epilogue.  */
   10251      1642384 :   else if (m->fs.apx_ppx_used)
   10252              :     restore_regs_via_mov = false;
   10253              :   /* If we're only restoring one register and sp cannot be used then
   10254              :      using a move instruction to restore the register since it's
   10255              :      less work than reloading sp and popping the register.  */
   10256      1642297 :   else if (!sp_valid_at (frame.hfp_save_offset) && frame.nregs <= 1)
   10257              :     restore_regs_via_mov = true;
   10258      1581497 :   else if (crtl->shrink_wrapped_separate
   10259      1527658 :            || (TARGET_EPILOGUE_USING_MOVE
   10260        56735 :                && cfun->machine->use_fast_prologue_epilogue
   10261        56679 :                && (frame.nregs > 1
   10262        56666 :                    || m->fs.sp_offset != reg_save_offset)))
   10263              :     restore_regs_via_mov = true;
   10264      1527423 :   else if (frame_pointer_needed
   10265       411007 :            && !frame.nregs
   10266       316609 :            && m->fs.sp_offset != reg_save_offset)
   10267              :     restore_regs_via_mov = true;
   10268      1378061 :   else if (frame_pointer_needed
   10269       261645 :            && TARGET_USE_LEAVE
   10270       261570 :            && cfun->machine->use_fast_prologue_epilogue
   10271       204984 :            && frame.nregs == 1)
   10272              :     restore_regs_via_mov = true;
   10273              :   else
   10274      1642442 :     restore_regs_via_mov = false;
   10275              : 
   10276      1642442 :   if (crtl->shrink_wrapped_separate)
   10277        53870 :     gcc_assert (restore_regs_via_mov);
   10278              : 
   10279      1588572 :   if (restore_regs_via_mov || frame.nsseregs)
   10280              :     {
   10281              :       /* Ensure that the entire register save area is addressable via
   10282              :          the stack pointer, if we will restore SSE regs via sp.  */
   10283       327229 :       if (TARGET_64BIT
   10284       314691 :           && m->fs.sp_offset > 0x7fffffff
   10285           23 :           && sp_valid_at (frame.stack_realign_offset + 1)
   10286       327251 :           && (frame.nsseregs + frame.nregs) != 0)
   10287              :         {
   10288            6 :           pro_epilogue_adjust_stack (stack_pointer_rtx, stack_pointer_rtx,
   10289            6 :                                      GEN_INT (m->fs.sp_offset
   10290              :                                               - frame.sse_reg_save_offset),
   10291              :                                      style,
   10292            6 :                                      m->fs.cfa_reg == stack_pointer_rtx);
   10293              :         }
   10294              :     }
   10295              : 
   10296              :   /* If there are any SSE registers to restore, then we have to do it
   10297              :      via moves, since there's obviously no pop for SSE regs.  */
   10298      1642442 :   if (frame.nsseregs)
   10299        33929 :     ix86_emit_restore_sse_regs_using_mov (frame.sse_reg_save_offset,
   10300              :                                           style == 2);
   10301              : 
   10302      1642442 :   if (m->call_ms2sysv)
   10303              :     {
   10304         7621 :       int pop_incoming_args = crtl->args.pops_args && crtl->args.size;
   10305              : 
   10306              :       /* We cannot use a tail-call for the stub if:
   10307              :          1. We have to pop incoming args,
   10308              :          2. We have additional int regs to restore, or
   10309              :          3. A sibling call will be the tail-call, or
   10310              :          4. We are emitting an eh_return_internal epilogue.
   10311              : 
   10312              :          TODO: Item 4 has not yet tested!
   10313              : 
   10314              :          If any of the above are true, we will call the stub rather than
   10315              :          jump to it.  */
   10316         7621 :       restore_stub_is_tail = !(pop_incoming_args || frame.nregs || style != 1);
   10317         7621 :       ix86_emit_outlined_ms2sysv_restore (frame, !restore_stub_is_tail, style);
   10318              :     }
   10319              : 
   10320              :   /* If using out-of-line stub that is a tail-call, then...*/
   10321      1642442 :   if (m->call_ms2sysv && restore_stub_is_tail)
   10322              :     {
   10323              :       /* TODO: parinoid tests. (remove eventually)  */
   10324         1123 :       gcc_assert (m->fs.sp_valid);
   10325         1123 :       gcc_assert (!m->fs.sp_realigned);
   10326         1123 :       gcc_assert (!m->fs.fp_valid);
   10327         1123 :       gcc_assert (!m->fs.realigned);
   10328         1123 :       gcc_assert (m->fs.sp_offset == UNITS_PER_WORD);
   10329         1123 :       gcc_assert (!crtl->drap_reg);
   10330         1123 :       gcc_assert (!frame.nregs);
   10331         1123 :       gcc_assert (!crtl->shrink_wrapped_separate);
   10332              :     }
   10333      1641319 :   else if (restore_regs_via_mov)
   10334              :     {
   10335       292531 :       rtx t;
   10336              : 
   10337       292531 :       if (frame.nregs)
   10338        97162 :         ix86_emit_restore_regs_using_mov (reg_save_offset, style == 2);
   10339              : 
   10340              :       /* eh_return epilogues need %ecx added to the stack pointer.  */
   10341       292531 :       if (style == 2)
   10342              :         {
   10343           37 :           rtx sa = EH_RETURN_STACKADJ_RTX;
   10344           29 :           rtx_insn *insn;
   10345              : 
   10346           29 :           gcc_assert (!crtl->shrink_wrapped_separate);
   10347              : 
   10348              :           /* Stack realignment doesn't work with eh_return.  */
   10349           29 :           if (crtl->stack_realign_needed)
   10350            0 :             sorry ("Stack realignment not supported with "
   10351              :                    "%<__builtin_eh_return%>");
   10352              : 
   10353              :           /* regparm nested functions don't work with eh_return.  */
   10354           29 :           if (ix86_static_chain_on_stack)
   10355            0 :             sorry ("regparm nested function not supported with "
   10356              :                    "%<__builtin_eh_return%>");
   10357              : 
   10358           29 :           if (frame_pointer_needed)
   10359              :             {
   10360           35 :               t = gen_rtx_PLUS (Pmode, hard_frame_pointer_rtx, sa);
   10361           43 :               t = plus_constant (Pmode, t, m->fs.fp_offset - UNITS_PER_WORD);
   10362           27 :               emit_insn (gen_rtx_SET (sa, t));
   10363              : 
   10364              :               /* NB: eh_return epilogues must restore the frame pointer
   10365              :                  in word_mode since the upper 32 bits of RBP register
   10366              :                  can have any values.  */
   10367           27 :               t = gen_frame_mem (word_mode, hard_frame_pointer_rtx);
   10368           27 :               rtx frame_reg = gen_rtx_REG (word_mode,
   10369              :                                            HARD_FRAME_POINTER_REGNUM);
   10370           27 :               insn = emit_move_insn (frame_reg, t);
   10371              : 
   10372              :               /* Note that we use SA as a temporary CFA, as the return
   10373              :                  address is at the proper place relative to it.  We
   10374              :                  pretend this happens at the FP restore insn because
   10375              :                  prior to this insn the FP would be stored at the wrong
   10376              :                  offset relative to SA, and after this insn we have no
   10377              :                  other reasonable register to use for the CFA.  We don't
   10378              :                  bother resetting the CFA to the SP for the duration of
   10379              :                  the return insn, unless the control flow instrumentation
   10380              :                  is done.  In this case the SP is used later and we have
   10381              :                  to reset CFA to SP.  */
   10382           27 :               add_reg_note (insn, REG_CFA_DEF_CFA,
   10383           35 :                             plus_constant (Pmode, sa, UNITS_PER_WORD));
   10384           27 :               ix86_add_queued_cfa_restore_notes (insn);
   10385           27 :               add_reg_note (insn, REG_CFA_RESTORE, frame_reg);
   10386           27 :               RTX_FRAME_RELATED_P (insn) = 1;
   10387              : 
   10388           27 :               m->fs.cfa_reg = sa;
   10389           27 :               m->fs.cfa_offset = UNITS_PER_WORD;
   10390           27 :               m->fs.fp_valid = false;
   10391              : 
   10392           27 :               pro_epilogue_adjust_stack (stack_pointer_rtx, sa,
   10393              :                                          const0_rtx, style,
   10394           27 :                                          flag_cf_protection);
   10395              :             }
   10396              :           else
   10397              :             {
   10398            2 :               t = gen_rtx_PLUS (Pmode, stack_pointer_rtx, sa);
   10399            2 :               t = plus_constant (Pmode, t, m->fs.sp_offset - UNITS_PER_WORD);
   10400            2 :               insn = emit_insn (gen_rtx_SET (stack_pointer_rtx, t));
   10401            2 :               ix86_add_queued_cfa_restore_notes (insn);
   10402              : 
   10403            2 :               gcc_assert (m->fs.cfa_reg == stack_pointer_rtx);
   10404            2 :               if (m->fs.cfa_offset != UNITS_PER_WORD)
   10405              :                 {
   10406            2 :                   m->fs.cfa_offset = UNITS_PER_WORD;
   10407            2 :                   add_reg_note (insn, REG_CFA_DEF_CFA,
   10408            2 :                                 plus_constant (Pmode, stack_pointer_rtx,
   10409            2 :                                                UNITS_PER_WORD));
   10410            2 :                   RTX_FRAME_RELATED_P (insn) = 1;
   10411              :                 }
   10412              :             }
   10413           29 :           m->fs.sp_offset = UNITS_PER_WORD;
   10414           29 :           m->fs.sp_valid = true;
   10415           29 :           m->fs.sp_realigned = false;
   10416              :         }
   10417              :     }
   10418              :   else
   10419              :     {
   10420              :       /* SEH requires that the function end with (1) a stack adjustment
   10421              :          if necessary, (2) a sequence of pops, and (3) a return or
   10422              :          jump instruction.  Prevent insns from the function body from
   10423              :          being scheduled into this sequence.  */
   10424      1348788 :       if (TARGET_SEH)
   10425              :         {
   10426              :           /* Prevent a catch region from being adjacent to the standard
   10427              :              epilogue sequence.  Unfortunately neither crtl->uses_eh_lsda
   10428              :              nor several other flags that would be interesting to test are
   10429              :              set up yet.  */
   10430              :           if (flag_non_call_exceptions)
   10431              :             emit_insn (gen_nops (const1_rtx));
   10432              :           else
   10433              :             emit_insn (gen_blockage ());
   10434              :         }
   10435              : 
   10436              :       /* First step is to deallocate the stack frame so that we can
   10437              :          pop the registers.  If the stack pointer was realigned, it needs
   10438              :          to be restored now.  Also do it on SEH target for very large
   10439              :          frame as the emitted instructions aren't allowed by the ABI
   10440              :          in epilogues.  */
   10441      1348788 :       if (!m->fs.sp_valid || m->fs.sp_realigned
   10442              :           || (TARGET_SEH
   10443              :               && (m->fs.sp_offset - reg_save_offset
   10444              :                   >= SEH_MAX_FRAME_SIZE)))
   10445              :         {
   10446        29762 :           pro_epilogue_adjust_stack (stack_pointer_rtx, hard_frame_pointer_rtx,
   10447        29762 :                                      GEN_INT (m->fs.fp_offset
   10448              :                                               - reg_save_offset),
   10449              :                                      style, false);
   10450              :         }
   10451      1319026 :       else if (m->fs.sp_offset != reg_save_offset)
   10452              :         {
   10453       611767 :           pro_epilogue_adjust_stack (stack_pointer_rtx, stack_pointer_rtx,
   10454              :                                      GEN_INT (m->fs.sp_offset
   10455              :                                               - reg_save_offset),
   10456              :                                      style,
   10457       611767 :                                      m->fs.cfa_reg == stack_pointer_rtx);
   10458              :         }
   10459              : 
   10460      1348788 :       if (TARGET_APX_PUSH2POP2
   10461          561 :           && ix86_can_use_push2pop2 ()
   10462      1349347 :           && m->func_type == TYPE_NORMAL)
   10463          558 :         ix86_emit_restore_regs_using_pop2 ();
   10464              :       else
   10465      1348230 :         ix86_emit_restore_regs_using_pop (TARGET_APX_PPX);
   10466              :     }
   10467              : 
   10468              :   /* If we used a stack pointer and haven't already got rid of it,
   10469              :      then do so now.  */
   10470      1642442 :   if (m->fs.fp_valid)
   10471              :     {
   10472              :       /* If the stack pointer is valid and pointing at the frame
   10473              :          pointer store address, then we only need a pop.  */
   10474       473416 :       if (sp_valid_at (frame.hfp_save_offset)
   10475       473416 :           && m->fs.sp_offset == frame.hfp_save_offset)
   10476       232521 :         ix86_emit_restore_reg_using_pop (hard_frame_pointer_rtx);
   10477              :       /* Leave results in shorter dependency chains on CPUs that are
   10478              :          able to grok it fast.  */
   10479       240895 :       else if (TARGET_USE_LEAVE
   10480           12 :                || optimize_bb_for_size_p (EXIT_BLOCK_PTR_FOR_FN (cfun))
   10481       240907 :                || !cfun->machine->use_fast_prologue_epilogue)
   10482       240883 :         ix86_emit_leave (NULL);
   10483              :       else
   10484              :         {
   10485           12 :           pro_epilogue_adjust_stack (stack_pointer_rtx,
   10486              :                                      hard_frame_pointer_rtx,
   10487           12 :                                      const0_rtx, style, !using_drap);
   10488           12 :           ix86_emit_restore_reg_using_pop (hard_frame_pointer_rtx);
   10489              :         }
   10490              :     }
   10491              : 
   10492      1642442 :   if (using_drap)
   10493              :     {
   10494         7159 :       int param_ptr_offset = UNITS_PER_WORD;
   10495         7159 :       rtx_insn *insn;
   10496              : 
   10497         7159 :       gcc_assert (stack_realign_drap);
   10498              : 
   10499         7159 :       if (ix86_static_chain_on_stack)
   10500            0 :         param_ptr_offset += UNITS_PER_WORD;
   10501         7159 :       if (!call_used_or_fixed_reg_p (REGNO (crtl->drap_reg)))
   10502          230 :         param_ptr_offset += UNITS_PER_WORD;
   10503              : 
   10504         7464 :       insn = emit_insn (gen_rtx_SET
   10505              :                         (stack_pointer_rtx,
   10506              :                          plus_constant (Pmode, crtl->drap_reg,
   10507              :                                         -param_ptr_offset)));
   10508         7159 :       m->fs.cfa_reg = stack_pointer_rtx;
   10509         7159 :       m->fs.cfa_offset = param_ptr_offset;
   10510         7159 :       m->fs.sp_offset = param_ptr_offset;
   10511         7159 :       m->fs.realigned = false;
   10512              : 
   10513         7464 :       add_reg_note (insn, REG_CFA_DEF_CFA,
   10514         7159 :                     plus_constant (Pmode, stack_pointer_rtx,
   10515         7159 :                                    param_ptr_offset));
   10516         7159 :       RTX_FRAME_RELATED_P (insn) = 1;
   10517              : 
   10518         7159 :       if (!call_used_or_fixed_reg_p (REGNO (crtl->drap_reg)))
   10519          230 :         ix86_emit_restore_reg_using_pop (crtl->drap_reg);
   10520              :     }
   10521              : 
   10522              :   /* At this point the stack pointer must be valid, and we must have
   10523              :      restored all of the registers.  We may not have deallocated the
   10524              :      entire stack frame.  We've delayed this until now because it may
   10525              :      be possible to merge the local stack deallocation with the
   10526              :      deallocation forced by ix86_static_chain_on_stack.   */
   10527      1642442 :   gcc_assert (m->fs.sp_valid);
   10528      1642442 :   gcc_assert (!m->fs.sp_realigned);
   10529      1642442 :   gcc_assert (!m->fs.fp_valid);
   10530      1642442 :   gcc_assert (!m->fs.realigned);
   10531      1777800 :   if (m->fs.sp_offset != UNITS_PER_WORD)
   10532              :     {
   10533        51593 :       pro_epilogue_adjust_stack (stack_pointer_rtx, stack_pointer_rtx,
   10534              :                                  GEN_INT (m->fs.sp_offset - UNITS_PER_WORD),
   10535              :                                  style, true);
   10536              :     }
   10537              :   else
   10538      1590849 :     ix86_add_queued_cfa_restore_notes (get_last_insn ());
   10539              : 
   10540              :   /* Sibcall epilogues don't want a return instruction.  */
   10541      1642442 :   if (style == 0)
   10542              :     {
   10543       125359 :       m->fs = frame_state_save;
   10544       125359 :       return;
   10545              :     }
   10546              : 
   10547      1517083 :   if (cfun->machine->func_type != TYPE_NORMAL)
   10548          120 :     emit_jump_insn (gen_interrupt_return ());
   10549      1516963 :   else if (crtl->args.pops_args && crtl->args.size)
   10550              :     {
   10551        25896 :       rtx popc = GEN_INT (crtl->args.pops_args);
   10552              : 
   10553              :       /* i386 can only pop 64K bytes.  If asked to pop more, pop return
   10554              :          address, do explicit add, and jump indirectly to the caller.  */
   10555              : 
   10556        25896 :       if (crtl->args.pops_args >= 65536)
   10557              :         {
   10558            0 :           rtx ecx = gen_rtx_REG (SImode, CX_REG);
   10559            0 :           rtx_insn *insn;
   10560              : 
   10561              :           /* There is no "pascal" calling convention in any 64bit ABI.  */
   10562            0 :           gcc_assert (!TARGET_64BIT);
   10563              : 
   10564            0 :           insn = emit_insn (gen_pop (ecx));
   10565            0 :           m->fs.cfa_offset -= UNITS_PER_WORD;
   10566            0 :           m->fs.sp_offset -= UNITS_PER_WORD;
   10567              : 
   10568            0 :           rtx x = plus_constant (Pmode, stack_pointer_rtx, UNITS_PER_WORD);
   10569            0 :           x = gen_rtx_SET (stack_pointer_rtx, x);
   10570            0 :           add_reg_note (insn, REG_CFA_ADJUST_CFA, x);
   10571            0 :           add_reg_note (insn, REG_CFA_REGISTER, gen_rtx_SET (ecx, pc_rtx));
   10572            0 :           RTX_FRAME_RELATED_P (insn) = 1;
   10573              : 
   10574            0 :           pro_epilogue_adjust_stack (stack_pointer_rtx, stack_pointer_rtx,
   10575              :                                      popc, -1, true);
   10576            0 :           emit_jump_insn (gen_simple_return_indirect_internal (ecx));
   10577              :         }
   10578              :       else
   10579        25896 :         emit_jump_insn (gen_simple_return_pop_internal (popc));
   10580              :     }
   10581      1491067 :   else if (!m->call_ms2sysv || !restore_stub_is_tail)
   10582              :     {
   10583              :       /* In case of return from EH a simple return cannot be used
   10584              :          as a return address will be compared with a shadow stack
   10585              :          return address.  Use indirect jump instead.  */
   10586      1489944 :       if (style == 2 && flag_cf_protection)
   10587              :         {
   10588              :           /* Register used in indirect jump must be in word_mode.  But
   10589              :              Pmode may not be the same as word_mode for x32.  */
   10590           17 :           rtx ecx = gen_rtx_REG (word_mode, CX_REG);
   10591           17 :           rtx_insn *insn;
   10592              : 
   10593           17 :           insn = emit_insn (gen_pop (ecx));
   10594           17 :           m->fs.cfa_offset -= UNITS_PER_WORD;
   10595           17 :           m->fs.sp_offset -= UNITS_PER_WORD;
   10596              : 
   10597           33 :           rtx x = plus_constant (Pmode, stack_pointer_rtx, UNITS_PER_WORD);
   10598           17 :           x = gen_rtx_SET (stack_pointer_rtx, x);
   10599           17 :           add_reg_note (insn, REG_CFA_ADJUST_CFA, x);
   10600           17 :           add_reg_note (insn, REG_CFA_REGISTER, gen_rtx_SET (ecx, pc_rtx));
   10601           17 :           RTX_FRAME_RELATED_P (insn) = 1;
   10602              : 
   10603           17 :           emit_jump_insn (gen_simple_return_indirect_internal (ecx));
   10604           17 :         }
   10605              :       else
   10606      1489927 :         emit_jump_insn (gen_simple_return_internal ());
   10607              :     }
   10608              : 
   10609              :   /* Restore the state back to the state from the prologue,
   10610              :      so that it's correct for the next epilogue.  */
   10611      1517083 :   m->fs = frame_state_save;
   10612              : }
   10613              : 
   10614              : /* Reset from the function's potential modifications.  */
   10615              : 
   10616              : static void
   10617      1476712 : ix86_output_function_epilogue (FILE *file ATTRIBUTE_UNUSED)
   10618              : {
   10619      1476712 :   if (pic_offset_table_rtx
   10620      1476712 :       && !ix86_use_pseudo_pic_reg ())
   10621            0 :     SET_REGNO (pic_offset_table_rtx, REAL_PIC_OFFSET_TABLE_REGNUM);
   10622              : 
   10623      1476712 :   if (TARGET_MACHO)
   10624              :     {
   10625              :       rtx_insn *insn = get_last_insn ();
   10626              :       rtx_insn *deleted_debug_label = NULL;
   10627              : 
   10628              :       /* Mach-O doesn't support labels at the end of objects, so if
   10629              :          it looks like we might want one, take special action.
   10630              :         First, collect any sequence of deleted debug labels.  */
   10631              :       while (insn
   10632              :              && NOTE_P (insn)
   10633              :              && NOTE_KIND (insn) != NOTE_INSN_DELETED_LABEL)
   10634              :         {
   10635              :           /* Don't insert a nop for NOTE_INSN_DELETED_DEBUG_LABEL
   10636              :              notes only, instead set their CODE_LABEL_NUMBER to -1,
   10637              :              otherwise there would be code generation differences
   10638              :              in between -g and -g0.  */
   10639              :           if (NOTE_P (insn) && NOTE_KIND (insn)
   10640              :               == NOTE_INSN_DELETED_DEBUG_LABEL)
   10641              :             deleted_debug_label = insn;
   10642              :           insn = PREV_INSN (insn);
   10643              :         }
   10644              : 
   10645              :       /* If we have:
   10646              :          label:
   10647              :             barrier
   10648              :           then this needs to be detected, so skip past the barrier.  */
   10649              : 
   10650              :       if (insn && BARRIER_P (insn))
   10651              :         insn = PREV_INSN (insn);
   10652              : 
   10653              :       /* Up to now we've only seen notes or barriers.  */
   10654              :       if (insn)
   10655              :         {
   10656              :           if (LABEL_P (insn)
   10657              :               || (NOTE_P (insn)
   10658              :                   && NOTE_KIND (insn) == NOTE_INSN_DELETED_LABEL))
   10659              :             /* Trailing label.  */
   10660              :             fputs ("\tnop\n", file);
   10661              :           else if (cfun && ! cfun->is_thunk)
   10662              :             {
   10663              :               /* See if we have a completely empty function body, skipping
   10664              :                  the special case of the picbase thunk emitted as asm.  */
   10665              :               while (insn && ! INSN_P (insn))
   10666              :                 insn = PREV_INSN (insn);
   10667              :               /* If we don't find any insns, we've got an empty function body;
   10668              :                  I.e. completely empty - without a return or branch.  This is
   10669              :                  taken as the case where a function body has been removed
   10670              :                  because it contains an inline __builtin_unreachable().  GCC
   10671              :                  declares that reaching __builtin_unreachable() means UB so
   10672              :                  we're not obliged to do anything special; however, we want
   10673              :                  non-zero-sized function bodies.  To meet this, and help the
   10674              :                  user out, let's trap the case.  */
   10675              :               if (insn == NULL)
   10676              :                 fputs ("\tud2\n", file);
   10677              :             }
   10678              :         }
   10679              :       else if (deleted_debug_label)
   10680              :         for (insn = deleted_debug_label; insn; insn = NEXT_INSN (insn))
   10681              :           if (NOTE_KIND (insn) == NOTE_INSN_DELETED_DEBUG_LABEL)
   10682              :             CODE_LABEL_NUMBER (insn) = -1;
   10683              :     }
   10684      1476712 : }
   10685              : 
   10686              : /* Implement TARGET_ASM_PRINT_PATCHABLE_FUNCTION_ENTRY.  */
   10687              : 
   10688              : void
   10689           59 : ix86_print_patchable_function_entry (FILE *file,
   10690              :                                      unsigned HOST_WIDE_INT patch_area_size,
   10691              :                                      bool record_p)
   10692              : {
   10693           59 :   if (cfun->machine->function_label_emitted)
   10694              :     {
   10695              :       /* NB: When ix86_print_patchable_function_entry is called after
   10696              :          function table has been emitted, we have inserted or queued
   10697              :          a pseudo UNSPECV_PATCHABLE_AREA instruction at the proper
   10698              :          place.  There is nothing to do here.  */
   10699              :       return;
   10700              :     }
   10701              : 
   10702            8 :   default_print_patchable_function_entry (file, patch_area_size,
   10703              :                                           record_p);
   10704              : }
   10705              : 
   10706              : /* Output patchable area.  NB: default_print_patchable_function_entry
   10707              :    isn't available in i386.md.  */
   10708              : 
   10709              : void
   10710           51 : ix86_output_patchable_area (unsigned int patch_area_size,
   10711              :                             bool record_p)
   10712              : {
   10713           51 :   default_print_patchable_function_entry (asm_out_file,
   10714              :                                           patch_area_size,
   10715              :                                           record_p);
   10716           51 : }
   10717              : 
   10718              : /* Return a scratch register to use in the split stack prologue.  The
   10719              :    split stack prologue is used for -fsplit-stack.  It is the first
   10720              :    instructions in the function, even before the regular prologue.
   10721              :    The scratch register can be any caller-saved register which is not
   10722              :    used for parameters or for the static chain.  */
   10723              : 
   10724              : static unsigned int
   10725        24609 : split_stack_prologue_scratch_regno (void)
   10726              : {
   10727        24609 :   if (TARGET_64BIT)
   10728              :     return R11_REG;
   10729              :   else
   10730              :     {
   10731         6949 :       bool is_fastcall, is_thiscall;
   10732         6949 :       int regparm;
   10733              : 
   10734         6949 :       is_fastcall = (lookup_attribute ("fastcall",
   10735         6949 :                                        TYPE_ATTRIBUTES (TREE_TYPE (cfun->decl)))
   10736              :                      != NULL);
   10737         6949 :       is_thiscall = (lookup_attribute ("thiscall",
   10738         6949 :                                        TYPE_ATTRIBUTES (TREE_TYPE (cfun->decl)))
   10739              :                      != NULL);
   10740         6949 :       regparm = ix86_function_regparm (TREE_TYPE (cfun->decl), cfun->decl);
   10741              : 
   10742         6949 :       if (is_fastcall)
   10743              :         {
   10744            0 :           if (DECL_STATIC_CHAIN (cfun->decl))
   10745              :             {
   10746            0 :               sorry ("%<-fsplit-stack%> does not support fastcall with "
   10747              :                      "nested function");
   10748            0 :               return INVALID_REGNUM;
   10749              :             }
   10750              :           return AX_REG;
   10751              :         }
   10752         6949 :       else if (is_thiscall)
   10753              :         {
   10754            0 :           if (!DECL_STATIC_CHAIN (cfun->decl))
   10755              :             return DX_REG;
   10756            0 :           return AX_REG;
   10757              :         }
   10758         6949 :       else if (regparm < 3)
   10759              :         {
   10760         6949 :           if (!DECL_STATIC_CHAIN (cfun->decl))
   10761              :             return CX_REG;
   10762              :           else
   10763              :             {
   10764          459 :               if (regparm >= 2)
   10765              :                 {
   10766            0 :                   sorry ("%<-fsplit-stack%> does not support 2 register "
   10767              :                          "parameters for a nested function");
   10768            0 :                   return INVALID_REGNUM;
   10769              :                 }
   10770              :               return DX_REG;
   10771              :             }
   10772              :         }
   10773              :       else
   10774              :         {
   10775              :           /* FIXME: We could make this work by pushing a register
   10776              :              around the addition and comparison.  */
   10777            0 :           sorry ("%<-fsplit-stack%> does not support 3 register parameters");
   10778            0 :           return INVALID_REGNUM;
   10779              :         }
   10780              :     }
   10781              : }
   10782              : 
   10783              : /* A SYMBOL_REF for the function which allocates new stackspace for
   10784              :    -fsplit-stack.  */
   10785              : 
   10786              : static GTY(()) rtx split_stack_fn;
   10787              : 
   10788              : /* A SYMBOL_REF for the more stack function when using the large model.  */
   10789              : 
   10790              : static GTY(()) rtx split_stack_fn_large;
   10791              : 
   10792              : /* Return location of the stack guard value in the TLS block.  */
   10793              : 
   10794              : rtx
   10795       259942 : ix86_split_stack_guard (void)
   10796              : {
   10797       259942 :   int offset;
   10798       259942 :   addr_space_t as = DEFAULT_TLS_SEG_REG;
   10799       259942 :   rtx r;
   10800              : 
   10801       259942 :   gcc_assert (flag_split_stack);
   10802              : 
   10803              : #ifdef TARGET_THREAD_SPLIT_STACK_OFFSET
   10804       259942 :   offset = TARGET_THREAD_SPLIT_STACK_OFFSET;
   10805              : #else
   10806              :   gcc_unreachable ();
   10807              : #endif
   10808              : 
   10809       259942 :   r = GEN_INT (offset);
   10810       357899 :   r = gen_const_mem (Pmode, r);
   10811       259942 :   set_mem_addr_space (r, as);
   10812              : 
   10813       259942 :   return r;
   10814              : }
   10815              : 
   10816              : /* Handle -fsplit-stack.  These are the first instructions in the
   10817              :    function, even before the regular prologue.  */
   10818              : 
   10819              : void
   10820       259932 : ix86_expand_split_stack_prologue (void)
   10821              : {
   10822       259932 :   HOST_WIDE_INT allocate;
   10823       259932 :   unsigned HOST_WIDE_INT args_size;
   10824       259932 :   rtx_code_label *label;
   10825       259932 :   rtx limit, current, allocate_rtx, call_fusage;
   10826       259932 :   rtx_insn *call_insn;
   10827       259932 :   unsigned int scratch_regno = INVALID_REGNUM;
   10828       259932 :   rtx scratch_reg = NULL_RTX;
   10829       259932 :   rtx_code_label *varargs_label = NULL;
   10830       259932 :   rtx fn;
   10831              : 
   10832       259932 :   gcc_assert (flag_split_stack && reload_completed);
   10833              : 
   10834       259932 :   ix86_finalize_stack_frame_flags ();
   10835       259932 :   struct ix86_frame &frame = cfun->machine->frame;
   10836       259932 :   allocate = frame.stack_pointer_offset - INCOMING_FRAME_SP_OFFSET;
   10837              : 
   10838              :   /* This is the label we will branch to if we have enough stack
   10839              :      space.  We expect the basic block reordering pass to reverse this
   10840              :      branch if optimizing, so that we branch in the unlikely case.  */
   10841       259932 :   label = gen_label_rtx ();
   10842              : 
   10843              :   /* We need to compare the stack pointer minus the frame size with
   10844              :      the stack boundary in the TCB.  The stack boundary always gives
   10845              :      us SPLIT_STACK_AVAILABLE bytes, so if we need less than that we
   10846              :      can compare directly.  Otherwise we need to do an addition.  */
   10847              : 
   10848       259932 :   limit = ix86_split_stack_guard ();
   10849              : 
   10850       259932 :   if (allocate >= SPLIT_STACK_AVAILABLE
   10851       235486 :       || flag_force_indirect_call)
   10852              :     {
   10853        24461 :       scratch_regno = split_stack_prologue_scratch_regno ();
   10854        24461 :       if (scratch_regno == INVALID_REGNUM)
   10855            0 :         return;
   10856              :     }
   10857              : 
   10858       259932 :   if (allocate >= SPLIT_STACK_AVAILABLE)
   10859              :     {
   10860        24446 :       rtx offset;
   10861              : 
   10862              :       /* We need a scratch register to hold the stack pointer minus
   10863              :          the required frame size.  Since this is the very start of the
   10864              :          function, the scratch register can be any caller-saved
   10865              :          register which is not used for parameters.  */
   10866        24446 :       offset = GEN_INT (- allocate);
   10867              : 
   10868        31341 :       scratch_reg = gen_rtx_REG (Pmode, scratch_regno);
   10869        24446 :       if (!TARGET_64BIT || x86_64_immediate_operand (offset, Pmode))
   10870              :         {
   10871              :           /* We don't use gen_add in this case because it will
   10872              :              want to split to lea, but when not optimizing the insn
   10873              :              will not be split after this point.  */
   10874        31341 :           emit_insn (gen_rtx_SET (scratch_reg,
   10875              :                                   gen_rtx_PLUS (Pmode, stack_pointer_rtx,
   10876              :                                                 offset)));
   10877              :         }
   10878              :       else
   10879              :         {
   10880            0 :           emit_move_insn (scratch_reg, offset);
   10881            0 :           emit_insn (gen_add2_insn (scratch_reg, stack_pointer_rtx));
   10882              :         }
   10883              :       current = scratch_reg;
   10884              :     }
   10885              :   else
   10886       235486 :     current = stack_pointer_rtx;
   10887              : 
   10888       259932 :   ix86_expand_branch (GEU, current, limit, label);
   10889       259932 :   rtx_insn *jump_insn = get_last_insn ();
   10890       259932 :   JUMP_LABEL (jump_insn) = label;
   10891              : 
   10892              :   /* Mark the jump as very likely to be taken.  */
   10893       259932 :   add_reg_br_prob_note (jump_insn, profile_probability::very_likely ());
   10894              : 
   10895       259932 :   if (split_stack_fn == NULL_RTX)
   10896              :     {
   10897         5451 :       split_stack_fn = gen_rtx_SYMBOL_REF (Pmode, "__morestack");
   10898         4347 :       SYMBOL_REF_FLAGS (split_stack_fn) |= SYMBOL_FLAG_LOCAL;
   10899              :     }
   10900       259932 :   fn = split_stack_fn;
   10901              : 
   10902              :   /* Get more stack space.  We pass in the desired stack space and the
   10903              :      size of the arguments to copy to the new stack.  In 32-bit mode
   10904              :      we push the parameters; __morestack will return on a new stack
   10905              :      anyhow.  In 64-bit mode we pass the parameters in r10 and
   10906              :      r11.  */
   10907       259932 :   allocate_rtx = GEN_INT (allocate);
   10908       259932 :   args_size = crtl->args.size >= 0 ? (HOST_WIDE_INT) crtl->args.size : 0;
   10909       259932 :   call_fusage = NULL_RTX;
   10910       259932 :   rtx pop = NULL_RTX;
   10911       259932 :   if (TARGET_64BIT)
   10912              :     {
   10913       161975 :       rtx reg10, reg11;
   10914              : 
   10915       161975 :       reg10 = gen_rtx_REG (DImode, R10_REG);
   10916       161975 :       reg11 = gen_rtx_REG (DImode, R11_REG);
   10917              : 
   10918              :       /* If this function uses a static chain, it will be in %r10.
   10919              :          Preserve it across the call to __morestack.  */
   10920       161975 :       if (DECL_STATIC_CHAIN (cfun->decl))
   10921              :         {
   10922         7505 :           rtx rax;
   10923              : 
   10924         7505 :           rax = gen_rtx_REG (word_mode, AX_REG);
   10925         7505 :           emit_move_insn (rax, gen_rtx_REG (word_mode, R10_REG));
   10926         7505 :           use_reg (&call_fusage, rax);
   10927              :         }
   10928              : 
   10929       161975 :       if (flag_force_indirect_call
   10930       161960 :           || ix86_cmodel == CM_LARGE || ix86_cmodel == CM_LARGE_PIC)
   10931              :         {
   10932           16 :           HOST_WIDE_INT argval;
   10933              : 
   10934           16 :           if (split_stack_fn_large == NULL_RTX)
   10935              :             {
   10936            7 :               split_stack_fn_large
   10937            7 :                 = gen_rtx_SYMBOL_REF (Pmode, "__morestack_large_model");
   10938            7 :               SYMBOL_REF_FLAGS (split_stack_fn_large) |= SYMBOL_FLAG_LOCAL;
   10939              :             }
   10940              : 
   10941           16 :           fn = split_stack_fn_large;
   10942              : 
   10943           16 :           if (ix86_cmodel == CM_LARGE_PIC)
   10944              :             {
   10945            3 :               rtx_code_label *label;
   10946            3 :               rtx x;
   10947              : 
   10948            3 :               gcc_assert (Pmode == DImode);
   10949              : 
   10950            3 :               label = gen_label_rtx ();
   10951            3 :               emit_label (label);
   10952            3 :               LABEL_PRESERVE_P (label) = 1;
   10953            3 :               emit_insn (gen_set_rip_rex64 (reg10, label));
   10954            3 :               emit_insn (gen_set_got_offset_rex64 (reg11, label));
   10955            3 :               emit_insn (gen_add2_insn (reg10, reg11));
   10956            3 :               x = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, fn), UNSPEC_GOT);
   10957            3 :               x = gen_rtx_CONST (Pmode, x);
   10958            3 :               emit_move_insn (reg11, x);
   10959            3 :               x = gen_rtx_PLUS (Pmode, reg10, reg11);
   10960            3 :               x = gen_const_mem (Pmode, x);
   10961            3 :               fn = copy_to_suggested_reg (x, reg11, Pmode);
   10962              :             }
   10963           13 :           else if (ix86_cmodel == CM_LARGE)
   10964            1 :             fn = copy_to_suggested_reg (fn, reg11, Pmode);
   10965              : 
   10966              :           /* When using the large model we need to load the address
   10967              :              into a register, and we've run out of registers.  So we
   10968              :              switch to a different calling convention, and we call a
   10969              :              different function: __morestack_large.  We pass the
   10970              :              argument size in the upper 32 bits of r10 and pass the
   10971              :              frame size in the lower 32 bits.  */
   10972           16 :           gcc_assert ((allocate & HOST_WIDE_INT_C (0xffffffff)) == allocate);
   10973           16 :           gcc_assert ((args_size & 0xffffffff) == args_size);
   10974              : 
   10975           16 :           argval = ((args_size << 16) << 16) + allocate;
   10976           16 :           emit_move_insn (reg10, GEN_INT (argval));
   10977           16 :         }
   10978              :       else
   10979              :         {
   10980       161959 :           emit_move_insn (reg10, allocate_rtx);
   10981       161959 :           emit_move_insn (reg11, GEN_INT (args_size));
   10982       161959 :           use_reg (&call_fusage, reg11);
   10983              :         }
   10984              : 
   10985       161975 :       use_reg (&call_fusage, reg10);
   10986              :     }
   10987              :   else
   10988              :     {
   10989        97957 :       if (flag_force_indirect_call && flag_pic)
   10990              :         {
   10991            0 :           rtx x;
   10992              : 
   10993            0 :           gcc_assert (Pmode == SImode);
   10994              : 
   10995            0 :           scratch_reg = gen_rtx_REG (Pmode, scratch_regno);
   10996              : 
   10997            0 :           emit_insn (gen_set_got (scratch_reg));
   10998            0 :           x = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, split_stack_fn),
   10999              :                               UNSPEC_GOT);
   11000            0 :           x = gen_rtx_CONST (Pmode, x);
   11001            0 :           x = gen_rtx_PLUS (Pmode, scratch_reg, x);
   11002            0 :           x = gen_const_mem (Pmode, x);
   11003            0 :           fn = copy_to_suggested_reg (x, scratch_reg, Pmode);
   11004              :         }
   11005              : 
   11006        97957 :       rtx_insn *insn = emit_insn (gen_push (GEN_INT (args_size)));
   11007       195914 :       add_reg_note (insn, REG_ARGS_SIZE, GEN_INT (UNITS_PER_WORD));
   11008        97957 :       insn = emit_insn (gen_push (allocate_rtx));
   11009       195914 :       add_reg_note (insn, REG_ARGS_SIZE, GEN_INT (2 * UNITS_PER_WORD));
   11010       195914 :       pop = GEN_INT (2 * UNITS_PER_WORD);
   11011              :     }
   11012              : 
   11013       259932 :   if (flag_force_indirect_call && !register_operand (fn, VOIDmode))
   11014              :     {
   11015           12 :       scratch_reg = gen_rtx_REG (word_mode, scratch_regno);
   11016              : 
   11017           12 :       if (GET_MODE (fn) != word_mode)
   11018            0 :         fn = gen_rtx_ZERO_EXTEND (word_mode, fn);
   11019              : 
   11020           12 :       fn = copy_to_suggested_reg (fn, scratch_reg, word_mode);
   11021              :     }
   11022              : 
   11023       259932 :   call_insn = ix86_expand_call (NULL_RTX, gen_rtx_MEM (QImode, fn),
   11024       259932 :                                 GEN_INT (UNITS_PER_WORD), constm1_rtx,
   11025              :                                 pop, false);
   11026       259932 :   add_function_usage_to (call_insn, call_fusage);
   11027       259932 :   if (!TARGET_64BIT)
   11028        97957 :     add_reg_note (call_insn, REG_ARGS_SIZE, GEN_INT (0));
   11029              :   /* Indicate that this function can't jump to non-local gotos.  */
   11030       259932 :   make_reg_eh_region_note_nothrow_nononlocal (call_insn);
   11031              : 
   11032              :   /* In order to make call/return prediction work right, we now need
   11033              :      to execute a return instruction.  See
   11034              :      libgcc/config/i386/morestack.S for the details on how this works.
   11035              : 
   11036              :      For flow purposes gcc must not see this as a return
   11037              :      instruction--we need control flow to continue at the subsequent
   11038              :      label.  Therefore, we use an unspec.  */
   11039       259932 :   gcc_assert (crtl->args.pops_args < 65536);
   11040       259932 :   rtx_insn *ret_insn
   11041       259932 :     = emit_insn (gen_split_stack_return (GEN_INT (crtl->args.pops_args)));
   11042              : 
   11043       259932 :   if ((flag_cf_protection & CF_BRANCH))
   11044              :     {
   11045              :       /* Insert ENDBR since __morestack will jump back here via indirect
   11046              :          call.  */
   11047           21 :       rtx cet_eb = gen_nop_endbr ();
   11048           21 :       emit_insn_after (cet_eb, ret_insn);
   11049              :     }
   11050              : 
   11051              :   /* If we are in 64-bit mode and this function uses a static chain,
   11052              :      we saved %r10 in %rax before calling _morestack.  */
   11053       259932 :   if (TARGET_64BIT && DECL_STATIC_CHAIN (cfun->decl))
   11054         7505 :     emit_move_insn (gen_rtx_REG (word_mode, R10_REG),
   11055              :                     gen_rtx_REG (word_mode, AX_REG));
   11056              : 
   11057              :   /* If this function calls va_start, we need to store a pointer to
   11058              :      the arguments on the old stack, because they may not have been
   11059              :      all copied to the new stack.  At this point the old stack can be
   11060              :      found at the frame pointer value used by __morestack, because
   11061              :      __morestack has set that up before calling back to us.  Here we
   11062              :      store that pointer in a scratch register, and in
   11063              :      ix86_expand_prologue we store the scratch register in a stack
   11064              :      slot.  */
   11065       259932 :   if (cfun->machine->split_stack_varargs_pointer != NULL_RTX)
   11066              :     {
   11067           12 :       rtx frame_reg;
   11068           12 :       int words;
   11069              : 
   11070           12 :       scratch_regno = split_stack_prologue_scratch_regno ();
   11071           16 :       scratch_reg = gen_rtx_REG (Pmode, scratch_regno);
   11072           16 :       frame_reg = gen_rtx_REG (Pmode, BP_REG);
   11073              : 
   11074              :       /* 64-bit:
   11075              :          fp -> old fp value
   11076              :                return address within this function
   11077              :                return address of caller of this function
   11078              :                stack arguments
   11079              :          So we add three words to get to the stack arguments.
   11080              : 
   11081              :          32-bit:
   11082              :          fp -> old fp value
   11083              :                return address within this function
   11084              :                first argument to __morestack
   11085              :                second argument to __morestack
   11086              :                return address of caller of this function
   11087              :                stack arguments
   11088              :          So we add five words to get to the stack arguments.
   11089              :       */
   11090           12 :       words = TARGET_64BIT ? 3 : 5;
   11091           20 :       emit_insn (gen_rtx_SET (scratch_reg,
   11092              :                               plus_constant (Pmode, frame_reg,
   11093              :                                              words * UNITS_PER_WORD)));
   11094              : 
   11095           12 :       varargs_label = gen_label_rtx ();
   11096           12 :       emit_jump_insn (gen_jump (varargs_label));
   11097           12 :       JUMP_LABEL (get_last_insn ()) = varargs_label;
   11098              : 
   11099           12 :       emit_barrier ();
   11100              :     }
   11101              : 
   11102       259932 :   emit_label (label);
   11103       259932 :   LABEL_NUSES (label) = 1;
   11104              : 
   11105              :   /* If this function calls va_start, we now have to set the scratch
   11106              :      register for the case where we do not call __morestack.  In this
   11107              :      case we need to set it based on the stack pointer.  */
   11108       259932 :   if (cfun->machine->split_stack_varargs_pointer != NULL_RTX)
   11109              :     {
   11110           20 :       emit_insn (gen_rtx_SET (scratch_reg,
   11111              :                               plus_constant (Pmode, stack_pointer_rtx,
   11112              :                                              UNITS_PER_WORD)));
   11113              : 
   11114           12 :       emit_label (varargs_label);
   11115           12 :       LABEL_NUSES (varargs_label) = 1;
   11116              :     }
   11117              : }
   11118              : 
   11119              : /* We may have to tell the dataflow pass that the split stack prologue
   11120              :    is initializing a scratch register.  */
   11121              : 
   11122              : static void
   11123     15780298 : ix86_live_on_entry (bitmap regs)
   11124              : {
   11125     15780298 :   if (cfun->machine->split_stack_varargs_pointer != NULL_RTX)
   11126              :     {
   11127          124 :       gcc_assert (flag_split_stack);
   11128          124 :       bitmap_set_bit (regs, split_stack_prologue_scratch_regno ());
   11129              :     }
   11130     15780298 : }
   11131              : 
   11132              : /* Extract the parts of an RTL expression that is a valid memory address
   11133              :    for an instruction.  Return false if the structure of the address is
   11134              :    grossly off.  */
   11135              : 
   11136              : bool
   11137   4325279305 : ix86_decompose_address (rtx addr, struct ix86_address *out)
   11138              : {
   11139   4325279305 :   rtx base = NULL_RTX, index = NULL_RTX, disp = NULL_RTX;
   11140   4325279305 :   rtx base_reg, index_reg;
   11141   4325279305 :   HOST_WIDE_INT scale = 1;
   11142   4325279305 :   rtx scale_rtx = NULL_RTX;
   11143   4325279305 :   rtx tmp;
   11144   4325279305 :   addr_space_t seg = ADDR_SPACE_GENERIC;
   11145              : 
   11146              :   /* Allow zero-extended SImode addresses,
   11147              :      they will be emitted with addr32 prefix.  */
   11148   4325279305 :   if (TARGET_64BIT && GET_MODE (addr) == DImode)
   11149              :     {
   11150   2285274221 :       if (GET_CODE (addr) == ZERO_EXTEND
   11151      2290237 :           && GET_MODE (XEXP (addr, 0)) == SImode)
   11152              :         {
   11153      2194579 :           addr = XEXP (addr, 0);
   11154      2194579 :           if (CONST_INT_P (addr))
   11155              :             return false;
   11156              :         }
   11157   2283079642 :       else if (GET_CODE (addr) == AND)
   11158              :         {
   11159      2773186 :           rtx mask = XEXP (addr, 1);
   11160      2773186 :           rtx shift_val;
   11161              : 
   11162      2773186 :           if (const_32bit_mask (mask, DImode)
   11163              :               /* For ASHIFT inside AND, combine will not generate
   11164              :                  canonical zero-extend. Merge mask for AND and shift_count
   11165              :                  to check if it is canonical zero-extend.  */
   11166      2773186 :               || (CONST_INT_P (mask)
   11167      1781341 :                   && GET_CODE (XEXP (addr, 0)) == ASHIFT
   11168       138858 :                   && CONST_INT_P (shift_val = XEXP (XEXP (addr, 0), 1))
   11169       135770 :                   && ((UINTVAL (mask)
   11170       135770 :                        | ((HOST_WIDE_INT_1U << INTVAL (shift_val)) - 1))
   11171              :                       == HOST_WIDE_INT_UC (0xffffffff))))
   11172              :             {
   11173        81296 :               addr = lowpart_subreg (SImode, XEXP (addr, 0), DImode);
   11174        81296 :               if (addr == NULL_RTX)
   11175              :                 return false;
   11176              : 
   11177        81296 :               if (CONST_INT_P (addr))
   11178              :                 return false;
   11179              :             }
   11180              :         }
   11181              :     }
   11182              : 
   11183              :   /* Allow SImode subregs of DImode addresses,
   11184              :      they will be emitted with addr32 prefix.  */
   11185   4325279305 :   if (TARGET_64BIT && GET_MODE (addr) == SImode)
   11186              :     {
   11187     17790579 :       if (SUBREG_P (addr)
   11188       215679 :           && GET_MODE (SUBREG_REG (addr)) == DImode)
   11189              :         {
   11190       194130 :           addr = SUBREG_REG (addr);
   11191       194130 :           if (CONST_INT_P (addr))
   11192              :             return false;
   11193              :         }
   11194              :     }
   11195              : 
   11196   4325279305 :   if (REG_P (addr))
   11197              :     base = addr;
   11198              :   else if (SUBREG_P (addr))
   11199              :     {
   11200       454510 :       if (REG_P (SUBREG_REG (addr)))
   11201              :         base = addr;
   11202              :       else
   11203              :         return false;
   11204              :     }
   11205              :   else if (GET_CODE (addr) == PLUS)
   11206              :     {
   11207              :       rtx addends[4], op;
   11208              :       int n = 0, i;
   11209              : 
   11210              :       op = addr;
   11211   3157802711 :       do
   11212              :         {
   11213   3157802711 :           if (n >= 4)
   11214    642926351 :             return false;
   11215   3157796553 :           addends[n++] = XEXP (op, 1);
   11216   3157796553 :           op = XEXP (op, 0);
   11217              :         }
   11218   3157796553 :       while (GET_CODE (op) == PLUS);
   11219   3093832611 :       if (n >= 4)
   11220              :         return false;
   11221   3093824416 :       addends[n] = op;
   11222              : 
   11223   8062677401 :       for (i = n; i >= 0; --i)
   11224              :         {
   11225   5611764983 :           op = addends[i];
   11226   5611764983 :           switch (GET_CODE (op))
   11227              :             {
   11228     61401076 :             case MULT:
   11229     61401076 :               if (index)
   11230              :                 return false;
   11231     61359414 :               index = XEXP (op, 0);
   11232     61359414 :               scale_rtx = XEXP (op, 1);
   11233     61359414 :               break;
   11234              : 
   11235     12917743 :             case ASHIFT:
   11236     12917743 :               if (index)
   11237              :                 return false;
   11238     12846600 :               index = XEXP (op, 0);
   11239     12846600 :               tmp = XEXP (op, 1);
   11240     12846600 :               if (!CONST_INT_P (tmp))
   11241              :                 return false;
   11242     12832015 :               scale = INTVAL (tmp);
   11243     12832015 :               if ((unsigned HOST_WIDE_INT) scale > 3)
   11244              :                 return false;
   11245     12417538 :               scale = 1 << scale;
   11246     12417538 :               break;
   11247              : 
   11248      1110683 :             case ZERO_EXTEND:
   11249      1110683 :               op = XEXP (op, 0);
   11250      1110683 :               if (GET_CODE (op) != UNSPEC)
   11251              :                 return false;
   11252              :               /* FALLTHRU */
   11253              : 
   11254       707661 :             case UNSPEC:
   11255       707661 :               if (XINT (op, 1) == UNSPEC_TP
   11256       699449 :                   && TARGET_TLS_DIRECT_SEG_REFS
   11257       699449 :                   && seg == ADDR_SPACE_GENERIC)
   11258       699449 :                 seg = DEFAULT_TLS_SEG_REG;
   11259              :               else
   11260              :                 return false;
   11261              :               break;
   11262              : 
   11263       547244 :             case SUBREG:
   11264       547244 :               if (!REG_P (SUBREG_REG (op)))
   11265              :                 return false;
   11266              :               /* FALLTHRU */
   11267              : 
   11268   2521482594 :             case REG:
   11269   2521482594 :               if (!base)
   11270              :                 base = op;
   11271     82486855 :               else if (!index)
   11272              :                 index = op;
   11273              :               else
   11274              :                 return false;
   11275              :               break;
   11276              : 
   11277   2373714550 :             case CONST:
   11278   2373714550 :             case CONST_INT:
   11279   2373714550 :             case SYMBOL_REF:
   11280   2373714550 :             case LABEL_REF:
   11281   2373714550 :               if (disp)
   11282              :                 return false;
   11283              :               disp = op;
   11284              :               break;
   11285              : 
   11286              :             default:
   11287              :               return false;
   11288              :             }
   11289              :         }
   11290              :     }
   11291              :   else if (GET_CODE (addr) == MULT)
   11292              :     {
   11293      3610331 :       index = XEXP (addr, 0);           /* index*scale */
   11294      3610331 :       scale_rtx = XEXP (addr, 1);
   11295              :     }
   11296              :   else if (GET_CODE (addr) == ASHIFT)
   11297              :     {
   11298              :       /* We're called for lea too, which implements ashift on occasion.  */
   11299      3271145 :       index = XEXP (addr, 0);
   11300      3271145 :       tmp = XEXP (addr, 1);
   11301      3271145 :       if (!CONST_INT_P (tmp))
   11302              :         return false;
   11303      2889140 :       scale = INTVAL (tmp);
   11304      2889140 :       if ((unsigned HOST_WIDE_INT) scale > 3)
   11305              :         return false;
   11306      2140672 :       scale = 1 << scale;
   11307              :     }
   11308              :   else
   11309              :     disp = addr;                        /* displacement */
   11310              : 
   11311   2456663421 :   if (index)
   11312              :     {
   11313    152296721 :       if (REG_P (index))
   11314              :         ;
   11315      4050531 :       else if (SUBREG_P (index)
   11316       289253 :                && REG_P (SUBREG_REG (index)))
   11317              :         ;
   11318              :       else
   11319              :         return false;
   11320              :     }
   11321              : 
   11322              :   /* Extract the integral value of scale.  */
   11323   3677402440 :   if (scale_rtx)
   11324              :     {
   11325     56550328 :       if (!CONST_INT_P (scale_rtx))
   11326              :         return false;
   11327     55957594 :       scale = INTVAL (scale_rtx);
   11328              :     }
   11329              : 
   11330   3676809706 :   base_reg = base && SUBREG_P (base) ? SUBREG_REG (base) : base;
   11331   3676809706 :   index_reg = index && SUBREG_P (index) ? SUBREG_REG (index) : index;
   11332              : 
   11333              :   /* Avoid useless 0 displacement.  */
   11334   3676809706 :   if (disp == const0_rtx && (base || index))
   11335   3676809706 :     disp = NULL_RTX;
   11336              : 
   11337              :   /* Allow arg pointer and stack pointer as index if there is not scaling.  */
   11338   2678400036 :   if (base_reg && index_reg && scale == 1
   11339   3757806173 :       && (REGNO (index_reg) == ARG_POINTER_REGNUM
   11340              :           || REGNO (index_reg) == FRAME_POINTER_REGNUM
   11341              :           || REGNO (index_reg) == SP_REG))
   11342              :     {
   11343              :       std::swap (base, index);
   11344              :       std::swap (base_reg, index_reg);
   11345              :     }
   11346              : 
   11347              :   /* Special case: %ebp cannot be encoded as a base without a displacement.
   11348              :      Similarly %r13.  */
   11349    323115771 :   if (!disp && base_reg
   11350   3995700383 :       && (REGNO (base_reg) == ARG_POINTER_REGNUM
   11351              :           || REGNO (base_reg) == FRAME_POINTER_REGNUM
   11352              :           || REGNO (base_reg) == BP_REG
   11353              :           || REGNO (base_reg) == R13_REG))
   11354              :     disp = const0_rtx;
   11355              : 
   11356              :   /* Special case: on K6, [%esi] makes the instruction vector decoded.
   11357              :      Avoid this by transforming to [%esi+0].
   11358              :      Reload calls address legitimization without cfun defined, so we need
   11359              :      to test cfun for being non-NULL. */
   11360            0 :   if (TARGET_CPU_P (K6) && cfun && optimize_function_for_speed_p (cfun)
   11361            0 :       && base_reg && !index_reg && !disp
   11362   3676809706 :       && REGNO (base_reg) == SI_REG)
   11363            0 :     disp = const0_rtx;
   11364              : 
   11365              :   /* Special case: encode reg+reg instead of reg*2.  */
   11366   3676809706 :   if (!base && index && scale == 2)
   11367    998409670 :     base = index, base_reg = index_reg, scale = 1;
   11368              : 
   11369              :   /* Special case: scaling cannot be encoded without base or displacement.  */
   11370    998409670 :   if (!base && !disp && index && scale != 1)
   11371      3304238 :     disp = const0_rtx;
   11372              : 
   11373   3676809706 :   out->base = base;
   11374   3676809706 :   out->index = index;
   11375   3676809706 :   out->disp = disp;
   11376   3676809706 :   out->scale = scale;
   11377   3676809706 :   out->seg = seg;
   11378              : 
   11379   3676809706 :   return true;
   11380              : }
   11381              : 
   11382              : /* Return cost of the memory address x.
   11383              :    For i386, it is better to use a complex address than let gcc copy
   11384              :    the address into a reg and make a new pseudo.  But not if the address
   11385              :    requires to two regs - that would mean more pseudos with longer
   11386              :    lifetimes.  */
   11387              : static int
   11388     10783477 : ix86_address_cost (rtx x, machine_mode, addr_space_t, bool)
   11389              : {
   11390     10783477 :   struct ix86_address parts;
   11391     10783477 :   int cost = 1;
   11392     10783477 :   int ok = ix86_decompose_address (x, &parts);
   11393              : 
   11394     10783477 :   gcc_assert (ok);
   11395              : 
   11396     10783477 :   if (parts.base && SUBREG_P (parts.base))
   11397          500 :     parts.base = SUBREG_REG (parts.base);
   11398     10783477 :   if (parts.index && SUBREG_P (parts.index))
   11399           21 :     parts.index = SUBREG_REG (parts.index);
   11400              : 
   11401              :   /* Attempt to minimize number of registers in the address by increasing
   11402              :      address cost for each used register.  We don't increase address cost
   11403              :      for "pic_offset_table_rtx".  When a memopt with "pic_offset_table_rtx"
   11404              :      is not invariant itself it most likely means that base or index is not
   11405              :      invariant.  Therefore only "pic_offset_table_rtx" could be hoisted out,
   11406              :      which is not profitable for x86.  */
   11407     10783477 :   if (parts.base
   11408      9375237 :       && (!REG_P (parts.base) || REGNO (parts.base) >= FIRST_PSEUDO_REGISTER)
   11409     19866594 :       && (current_pass->type == GIMPLE_PASS
   11410      2748058 :           || !pic_offset_table_rtx
   11411       132665 :           || !REG_P (parts.base)
   11412       132665 :           || REGNO (pic_offset_table_rtx) != REGNO (parts.base)))
   11413              :     cost++;
   11414              : 
   11415     10783477 :   if (parts.index
   11416      5170130 :       && (!REG_P (parts.index) || REGNO (parts.index) >= FIRST_PSEUDO_REGISTER)
   11417     15939452 :       && (current_pass->type == GIMPLE_PASS
   11418       669497 :           || !pic_offset_table_rtx
   11419        57276 :           || !REG_P (parts.index)
   11420        57276 :           || REGNO (pic_offset_table_rtx) != REGNO (parts.index)))
   11421      5154780 :     cost++;
   11422              : 
   11423              :   /* AMD-K6 don't like addresses with ModR/M set to 00_xxx_100b,
   11424              :      since it's predecode logic can't detect the length of instructions
   11425              :      and it degenerates to vector decoded.  Increase cost of such
   11426              :      addresses here.  The penalty is minimally 2 cycles.  It may be worthwhile
   11427              :      to split such addresses or even refuse such addresses at all.
   11428              : 
   11429              :      Following addressing modes are affected:
   11430              :       [base+scale*index]
   11431              :       [scale*index+disp]
   11432              :       [base+index]
   11433              : 
   11434              :      The first and last case  may be avoidable by explicitly coding the zero in
   11435              :      memory address, but I don't have AMD-K6 machine handy to check this
   11436              :      theory.  */
   11437              : 
   11438     10783477 :   if (TARGET_CPU_P (K6)
   11439            0 :       && ((!parts.disp && parts.base && parts.index && parts.scale != 1)
   11440            0 :           || (parts.disp && !parts.base && parts.index && parts.scale != 1)
   11441            0 :           || (!parts.disp && parts.base && parts.index && parts.scale == 1)))
   11442            0 :     cost += 10;
   11443              : 
   11444     10783477 :   return cost;
   11445              : }
   11446              : 
   11447              : /* Implement TARGET_USE_BY_PIECES_INFRASTRUCTURE_P.  */
   11448              : 
   11449              : bool
   11450      1181140 : ix86_use_by_pieces_infrastructure_p (unsigned HOST_WIDE_INT size,
   11451              :                                      unsigned int align,
   11452              :                                      enum by_pieces_operation op,
   11453              :                                      bool speed_p)
   11454              : {
   11455              :   /* Return true when we are currently expanding memcpy/memset epilogue
   11456              :      with move_by_pieces or store_by_pieces.  */
   11457      1181140 :   if (cfun->machine->by_pieces_in_use)
   11458              :     return true;
   11459              : 
   11460      1179037 :   return default_use_by_pieces_infrastructure_p (size, align, op,
   11461      1179037 :                                                  speed_p);
   11462              : }
   11463              : 
   11464              : /* Allow {LABEL | SYMBOL}_REF - SYMBOL_REF-FOR-PICBASE for Mach-O as
   11465              :    this is used for to form addresses to local data when -fPIC is in
   11466              :    use.  */
   11467              : 
   11468              : static bool
   11469            0 : darwin_local_data_pic (rtx disp)
   11470              : {
   11471            0 :   return (GET_CODE (disp) == UNSPEC
   11472            0 :           && XINT (disp, 1) == UNSPEC_MACHOPIC_OFFSET);
   11473              : }
   11474              : 
   11475              : /* True if the function symbol operand X should be loaded from GOT.
   11476              :    If CALL_P is true, X is a call operand.
   11477              : 
   11478              :    NB: -mno-direct-extern-access doesn't force load from GOT for
   11479              :    call.
   11480              : 
   11481              :    NB: In 32-bit mode, only non-PIC is allowed in inline assembly
   11482              :    statements, since a PIC register could not be available at the
   11483              :    call site.  */
   11484              : 
   11485              : bool
   11486   1842821850 : ix86_force_load_from_GOT_p (rtx x, bool call_p)
   11487              : {
   11488     96335015 :   return ((TARGET_64BIT || (!flag_pic && HAVE_AS_IX86_GOT32X))
   11489              :           && !TARGET_PECOFF && !TARGET_MACHO
   11490   1839967055 :           && (!flag_pic || this_is_asm_operands)
   11491   1819740314 :           && ix86_cmodel != CM_LARGE
   11492   1819734285 :           && ix86_cmodel != CM_LARGE_PIC
   11493   1819734284 :           && SYMBOL_REF_P (x)
   11494   1819734282 :           && ((!call_p
   11495   1814333208 :                && (!ix86_direct_extern_access
   11496   1814330938 :                    || (SYMBOL_REF_DECL (x)
   11497   1632308842 :                        && lookup_attribute ("nodirect_extern_access",
   11498   1632308842 :                                             DECL_ATTRIBUTES (SYMBOL_REF_DECL (x))))))
   11499   1819731558 :               || (SYMBOL_REF_FUNCTION_P (x)
   11500    684972132 :                   && (!flag_plt
   11501    684967721 :                       || (SYMBOL_REF_DECL (x)
   11502    684967721 :                           && lookup_attribute ("noplt",
   11503    684967721 :                                                DECL_ATTRIBUTES (SYMBOL_REF_DECL (x)))))))
   11504   1842829384 :           && !SYMBOL_REF_LOCAL_P (x));
   11505              : }
   11506              : 
   11507              : /* Determine if a given RTX is a valid constant.  We already know this
   11508              :    satisfies CONSTANT_P.  */
   11509              : 
   11510              : static bool
   11511   1554420781 : ix86_legitimate_constant_p (machine_mode mode, rtx x)
   11512              : {
   11513   1554420781 :   switch (GET_CODE (x))
   11514              :     {
   11515    138529064 :     case CONST:
   11516    138529064 :       x = XEXP (x, 0);
   11517              : 
   11518    138529064 :       if (GET_CODE (x) == PLUS)
   11519              :         {
   11520    138412768 :           if (!CONST_INT_P (XEXP (x, 1)))
   11521              :             return false;
   11522    138412768 :           x = XEXP (x, 0);
   11523              :         }
   11524              : 
   11525    138529064 :       if (TARGET_MACHO && darwin_local_data_pic (x))
   11526              :         return true;
   11527              : 
   11528              :       /* Only some unspecs are valid as "constants".  */
   11529    138529064 :       if (GET_CODE (x) == UNSPEC)
   11530       494253 :         switch (XINT (x, 1))
   11531              :           {
   11532        20688 :           case UNSPEC_GOT:
   11533        20688 :           case UNSPEC_GOTOFF:
   11534        20688 :           case UNSPEC_PLTOFF:
   11535        20688 :             return TARGET_64BIT;
   11536       473202 :           case UNSPEC_TPOFF:
   11537       473202 :           case UNSPEC_NTPOFF:
   11538       473202 :             x = XVECEXP (x, 0, 0);
   11539       473202 :             return (SYMBOL_REF_P (x)
   11540       473202 :                     && SYMBOL_REF_TLS_MODEL (x) == TLS_MODEL_LOCAL_EXEC);
   11541          275 :           case UNSPEC_DTPOFF:
   11542          275 :             x = XVECEXP (x, 0, 0);
   11543          275 :             return (SYMBOL_REF_P (x)
   11544          275 :                     && SYMBOL_REF_TLS_MODEL (x) == TLS_MODEL_LOCAL_DYNAMIC);
   11545            0 :           case UNSPEC_SECREL32:
   11546            0 :             x = XVECEXP (x, 0, 0);
   11547            0 :             return SYMBOL_REF_P (x);
   11548              :           default:
   11549              :             return false;
   11550              :           }
   11551              : 
   11552              :       /* We must have drilled down to a symbol.  */
   11553    138034811 :       if (LABEL_REF_P (x))
   11554              :         return true;
   11555    138030101 :       if (!SYMBOL_REF_P (x))
   11556              :         return false;
   11557              :       /* FALLTHRU */
   11558              : 
   11559    924014638 :     case SYMBOL_REF:
   11560              :       /* TLS symbols are never valid.  */
   11561    924014638 :       if (SYMBOL_REF_TLS_MODEL (x))
   11562              :         return false;
   11563              : 
   11564              :       /* DLLIMPORT symbols are never valid.  */
   11565    923910849 :       if (TARGET_DLLIMPORT_DECL_ATTRIBUTES
   11566              :           && SYMBOL_REF_DLLIMPORT_P (x))
   11567              :         return false;
   11568              : 
   11569              : #if TARGET_MACHO
   11570              :       /* mdynamic-no-pic */
   11571              :       if (MACHO_DYNAMIC_NO_PIC_P)
   11572              :         return machopic_symbol_defined_p (x);
   11573              : #endif
   11574              : 
   11575              :       /* External function address should be loaded
   11576              :          via the GOT slot to avoid PLT.  */
   11577    923910849 :       if (ix86_force_load_from_GOT_p (x))
   11578              :         return false;
   11579              : 
   11580              :       break;
   11581              : 
   11582    608890209 :     CASE_CONST_SCALAR_INT:
   11583    608890209 :       if (ix86_endbr_immediate_operand (x, VOIDmode))
   11584              :         return false;
   11585              : 
   11586    608890008 :       switch (mode)
   11587              :         {
   11588      1510869 :         case E_TImode:
   11589      1510869 :           if (TARGET_64BIT)
   11590              :             return true;
   11591              :           /* FALLTHRU */
   11592        22253 :         case E_OImode:
   11593        22253 :         case E_XImode:
   11594        22253 :           if (!standard_sse_constant_p (x, mode)
   11595        35955 :               && GET_MODE_SIZE (TARGET_AVX512F
   11596              :                                 ? XImode
   11597              :                                 : (TARGET_AVX
   11598              :                                    ? OImode
   11599              :                                    : (TARGET_SSE2
   11600        13702 :                                       ? TImode : DImode))) < GET_MODE_SIZE (mode))
   11601              :             return false;
   11602              :         default:
   11603              :           break;
   11604              :         }
   11605              :       break;
   11606              : 
   11607      8631883 :     case CONST_VECTOR:
   11608      8631883 :       if (!standard_sse_constant_p (x, mode))
   11609              :         return false;
   11610              :       break;
   11611              : 
   11612      7689561 :     case CONST_DOUBLE:
   11613      7689561 :       if (mode == E_BFmode)
   11614              :         return false;
   11615              : 
   11616              :     default:
   11617              :       break;
   11618              :     }
   11619              : 
   11620              :   /* Otherwise we handle everything else in the move patterns.  */
   11621              :   return true;
   11622              : }
   11623              : 
   11624              : /* Determine if it's legal to put X into the constant pool.  This
   11625              :    is not possible for the address of thread-local symbols, which
   11626              :    is checked above.  */
   11627              : 
   11628              : static bool
   11629     61965965 : ix86_cannot_force_const_mem (machine_mode mode, rtx x)
   11630              : {
   11631              :   /* We can put any immediate constant in memory.  */
   11632     61965965 :   switch (GET_CODE (x))
   11633              :     {
   11634              :     CASE_CONST_ANY:
   11635              :       return false;
   11636              : 
   11637      1786299 :     default:
   11638      1786299 :       break;
   11639              :     }
   11640              : 
   11641      1786299 :   return !ix86_legitimate_constant_p (mode, x);
   11642              : }
   11643              : 
   11644              : /* Return a unique alias set for the GOT.  */
   11645              : 
   11646              : alias_set_type
   11647       188094 : ix86_GOT_alias_set (void)
   11648              : {
   11649       188094 :   static alias_set_type set = -1;
   11650       188094 :   if (set == -1)
   11651         2929 :     set = new_alias_set ();
   11652       188094 :   return set;
   11653              : }
   11654              : 
   11655              : /* Nonzero if the constant value X is a legitimate general operand
   11656              :    when generating PIC code.  It is given that flag_pic is on and
   11657              :    that X satisfies CONSTANT_P.  */
   11658              : 
   11659              : bool
   11660    124767552 : legitimate_pic_operand_p (rtx x)
   11661              : {
   11662    124767552 :   rtx inner;
   11663              : 
   11664    124767552 :   switch (GET_CODE (x))
   11665              :     {
   11666      2493607 :     case CONST:
   11667      2493607 :       inner = XEXP (x, 0);
   11668      2493607 :       if (GET_CODE (inner) == PLUS
   11669       353028 :           && CONST_INT_P (XEXP (inner, 1)))
   11670       353028 :         inner = XEXP (inner, 0);
   11671              : 
   11672              :       /* Only some unspecs are valid as "constants".  */
   11673      2493607 :       if (GET_CODE (inner) == UNSPEC)
   11674      2247810 :         switch (XINT (inner, 1))
   11675              :           {
   11676      2187157 :           case UNSPEC_GOT:
   11677      2187157 :           case UNSPEC_GOTOFF:
   11678      2187157 :           case UNSPEC_PLTOFF:
   11679      2187157 :             return TARGET_64BIT;
   11680            0 :           case UNSPEC_TPOFF:
   11681            0 :             x = XVECEXP (inner, 0, 0);
   11682            0 :             return (SYMBOL_REF_P (x)
   11683            0 :                     && SYMBOL_REF_TLS_MODEL (x) == TLS_MODEL_LOCAL_EXEC);
   11684            0 :           case UNSPEC_SECREL32:
   11685            0 :             x = XVECEXP (inner, 0, 0);
   11686            0 :             return SYMBOL_REF_P (x);
   11687            0 :           case UNSPEC_MACHOPIC_OFFSET:
   11688            0 :             return legitimate_pic_address_disp_p (x);
   11689              :           default:
   11690              :             return false;
   11691              :           }
   11692              :       /* FALLTHRU */
   11693              : 
   11694      6934853 :     case SYMBOL_REF:
   11695      6934853 :     case LABEL_REF:
   11696      6934853 :       return legitimate_pic_address_disp_p (x);
   11697              : 
   11698              :     default:
   11699              :       return true;
   11700              :     }
   11701              : }
   11702              : 
   11703              : /* Determine if a given CONST RTX is a valid memory displacement
   11704              :    in PIC mode.  */
   11705              : 
   11706              : bool
   11707     64777509 : legitimate_pic_address_disp_p (rtx disp)
   11708              : {
   11709     64777509 :   bool saw_plus;
   11710              : 
   11711              :   /* In 64bit mode we can allow direct addresses of symbols and labels
   11712              :      when they are not dynamic symbols.  */
   11713     64777509 :   if (TARGET_64BIT)
   11714              :     {
   11715     39784200 :       rtx op0 = disp, op1;
   11716              : 
   11717     39784200 :       switch (GET_CODE (disp))
   11718              :         {
   11719              :         case LABEL_REF:
   11720              :           return true;
   11721              : 
   11722     10910779 :         case CONST:
   11723     10910779 :           if (GET_CODE (XEXP (disp, 0)) != PLUS)
   11724              :             break;
   11725      1170351 :           op0 = XEXP (XEXP (disp, 0), 0);
   11726      1170351 :           op1 = XEXP (XEXP (disp, 0), 1);
   11727      1170351 :           if (!CONST_INT_P (op1))
   11728              :             break;
   11729      1170351 :           if (GET_CODE (op0) == UNSPEC
   11730          296 :               && (XINT (op0, 1) == UNSPEC_DTPOFF
   11731          296 :                   || XINT (op0, 1) == UNSPEC_NTPOFF)
   11732      1170647 :               && trunc_int_for_mode (INTVAL (op1), SImode) == INTVAL (op1))
   11733              :             return true;
   11734      1170055 :           if (INTVAL (op1) >= 16*1024*1024
   11735      1170055 :               || INTVAL (op1) < -16*1024*1024)
   11736              :             break;
   11737      1169967 :           if (LABEL_REF_P (op0))
   11738              :             return true;
   11739      1169967 :           if (GET_CODE (op0) == CONST
   11740            0 :               && GET_CODE (XEXP (op0, 0)) == UNSPEC
   11741            0 :               && XINT (XEXP (op0, 0), 1) == UNSPEC_PCREL)
   11742              :             return true;
   11743      1169967 :           if (GET_CODE (op0) == UNSPEC
   11744            0 :               && XINT (op0, 1) == UNSPEC_PCREL)
   11745              :             return true;
   11746      1169967 :           if (!SYMBOL_REF_P (op0))
   11747              :             break;
   11748              :           /* FALLTHRU */
   11749              : 
   11750     29829277 :         case SYMBOL_REF:
   11751              :           /* TLS references should always be enclosed in UNSPEC.
   11752              :              The dllimported symbol needs always to be resolved.  */
   11753     29829277 :           if (SYMBOL_REF_TLS_MODEL (op0)
   11754              :               || (TARGET_DLLIMPORT_DECL_ATTRIBUTES && SYMBOL_REF_DLLIMPORT_P (op0)))
   11755              :             return false;
   11756              : 
   11757     29673748 :           if (TARGET_PECOFF)
   11758              :             {
   11759              : #if TARGET_PECOFF
   11760              :               if (is_imported_p (op0))
   11761              :                 return true;
   11762              : #endif
   11763              : 
   11764              :               if (SYMBOL_REF_FAR_ADDR_P (op0) || !SYMBOL_REF_LOCAL_P (op0))
   11765              :                 break;
   11766              : 
   11767              :               /* Non-external-weak function symbols need to be resolved only
   11768              :                  for the large model.  Non-external symbols don't need to be
   11769              :                  resolved for large and medium models.  For the small model,
   11770              :                  we don't need to resolve anything here.  */
   11771              :               if ((ix86_cmodel != CM_LARGE_PIC
   11772              :                    && SYMBOL_REF_FUNCTION_P (op0)
   11773              :                    && !(SYMBOL_REF_EXTERNAL_P (op0) && SYMBOL_REF_WEAK (op0)))
   11774              :                   || !SYMBOL_REF_EXTERNAL_P (op0)
   11775              :                   || ix86_cmodel == CM_SMALL_PIC)
   11776              :                 return true;
   11777              :             }
   11778     29673748 :           else if (!SYMBOL_REF_FAR_ADDR_P (op0)
   11779     29673744 :                    && (SYMBOL_REF_LOCAL_P (op0)
   11780     18070635 :                        || ((ix86_direct_extern_access
   11781     35969960 :                             && !(SYMBOL_REF_DECL (op0)
   11782     17899488 :                                  && lookup_attribute ("nodirect_extern_access",
   11783     17899488 :                                                       DECL_ATTRIBUTES (SYMBOL_REF_DECL (op0)))))
   11784              :                            && HAVE_LD_PIE_COPYRELOC
   11785     18070309 :                            && flag_pie
   11786        34047 :                            && !SYMBOL_REF_WEAK (op0)
   11787        33659 :                            && !SYMBOL_REF_FUNCTION_P (op0)))
   11788     41280590 :                    && ix86_cmodel != CM_LARGE_PIC)
   11789              :             return true;
   11790              :           break;
   11791              : 
   11792              :         default:
   11793              :           break;
   11794              :         }
   11795              :     }
   11796     52804437 :   if (GET_CODE (disp) != CONST)
   11797              :     return false;
   11798     14926454 :   disp = XEXP (disp, 0);
   11799              : 
   11800     14926454 :   if (TARGET_64BIT)
   11801              :     {
   11802              :       /* We are unsafe to allow PLUS expressions.  This limit allowed distance
   11803              :          of GOT tables.  We should not need these anyway.  */
   11804      9792395 :       if (GET_CODE (disp) != UNSPEC
   11805      9740428 :           || (XINT (disp, 1) != UNSPEC_GOTPCREL
   11806      9740428 :               && XINT (disp, 1) != UNSPEC_GOTOFF
   11807              :               && XINT (disp, 1) != UNSPEC_PCREL
   11808              :               && XINT (disp, 1) != UNSPEC_PLTOFF))
   11809              :         return false;
   11810              : 
   11811      9740428 :       if (!SYMBOL_REF_P (XVECEXP (disp, 0, 0))
   11812      9740428 :           && !LABEL_REF_P (XVECEXP (disp, 0, 0)))
   11813              :         return false;
   11814              :       return true;
   11815              :     }
   11816              : 
   11817      5134059 :   saw_plus = false;
   11818      5134059 :   if (GET_CODE (disp) == PLUS)
   11819              :     {
   11820       585582 :       if (!CONST_INT_P (XEXP (disp, 1)))
   11821              :         return false;
   11822       585582 :       disp = XEXP (disp, 0);
   11823       585582 :       saw_plus = true;
   11824              :     }
   11825              : 
   11826      5134059 :   if (TARGET_MACHO && darwin_local_data_pic (disp))
   11827              :     return true;
   11828              : 
   11829      5134059 :   if (GET_CODE (disp) != UNSPEC)
   11830              :     return false;
   11831              : 
   11832      4970536 :   switch (XINT (disp, 1))
   11833              :     {
   11834      2258793 :     case UNSPEC_GOT:
   11835      2258793 :       if (saw_plus)
   11836              :         return false;
   11837              :       /* We need to check for both symbols and labels because VxWorks loads
   11838              :          text labels with @GOT rather than @GOTOFF.  See gotoff_operand for
   11839              :          details.  */
   11840      2258792 :       return (SYMBOL_REF_P (XVECEXP (disp, 0, 0))
   11841      2258792 :               || LABEL_REF_P (XVECEXP (disp, 0, 0)));
   11842      2711743 :     case UNSPEC_GOTOFF:
   11843              :       /* Refuse GOTOFF in 64bit mode since it is always 64bit when used.
   11844              :          While ABI specify also 32bit relocation but we don't produce it in
   11845              :          small PIC model at all.  */
   11846      2711743 :       if ((SYMBOL_REF_P (XVECEXP (disp, 0, 0))
   11847      2711743 :            || LABEL_REF_P (XVECEXP (disp, 0, 0)))
   11848              :           && !TARGET_64BIT)
   11849      5423486 :         return !TARGET_PECOFF && gotoff_operand (XVECEXP (disp, 0, 0), Pmode);
   11850              :       return false;
   11851            0 :     case UNSPEC_GOTTPOFF:
   11852            0 :     case UNSPEC_GOTNTPOFF:
   11853            0 :     case UNSPEC_INDNTPOFF:
   11854            0 :       if (saw_plus)
   11855              :         return false;
   11856            0 :       disp = XVECEXP (disp, 0, 0);
   11857            0 :       return (SYMBOL_REF_P (disp)
   11858            0 :               && SYMBOL_REF_TLS_MODEL (disp) == TLS_MODEL_INITIAL_EXEC);
   11859            0 :     case UNSPEC_NTPOFF:
   11860            0 :       disp = XVECEXP (disp, 0, 0);
   11861            0 :       return (SYMBOL_REF_P (disp)
   11862            0 :               && SYMBOL_REF_TLS_MODEL (disp) == TLS_MODEL_LOCAL_EXEC);
   11863            0 :     case UNSPEC_DTPOFF:
   11864            0 :       disp = XVECEXP (disp, 0, 0);
   11865            0 :       return (SYMBOL_REF_P (disp)
   11866            0 :               && SYMBOL_REF_TLS_MODEL (disp) == TLS_MODEL_LOCAL_DYNAMIC);
   11867            0 :     case UNSPEC_SECREL32:
   11868            0 :       disp = XVECEXP (disp, 0, 0);
   11869            0 :       return SYMBOL_REF_P (disp);
   11870              :     }
   11871              : 
   11872              :   return false;
   11873              : }
   11874              : 
   11875              : /* Determine if op is suitable RTX for an address register.
   11876              :    Return naked register if a register or a register subreg is
   11877              :    found, otherwise return NULL_RTX.  */
   11878              : 
   11879              : static rtx
   11880   1368794230 : ix86_validate_address_register (rtx op)
   11881              : {
   11882   1368794230 :   machine_mode mode = GET_MODE (op);
   11883              : 
   11884              :   /* Only SImode or DImode registers can form the address.  */
   11885   1368794230 :   if (mode != SImode && mode != DImode)
   11886              :     return NULL_RTX;
   11887              : 
   11888   1368787395 :   if (REG_P (op))
   11889              :     return op;
   11890       727528 :   else if (SUBREG_P (op))
   11891              :     {
   11892       727528 :       rtx reg = SUBREG_REG (op);
   11893              : 
   11894       727528 :       if (!REG_P (reg))
   11895              :         return NULL_RTX;
   11896              : 
   11897       727528 :       mode = GET_MODE (reg);
   11898              : 
   11899              :       /* Don't allow SUBREGs that span more than a word.  It can
   11900              :          lead to spill failures when the register is one word out
   11901              :          of a two word structure.  */
   11902      1500642 :       if (GET_MODE_SIZE (mode) > UNITS_PER_WORD)
   11903              :         return NULL_RTX;
   11904              : 
   11905              :       /* Allow only SUBREGs of non-eliminable hard registers.  */
   11906       250514 :       if (register_no_elim_operand (reg, mode))
   11907              :         return reg;
   11908              :     }
   11909              : 
   11910              :   /* Op is not a register.  */
   11911              :   return NULL_RTX;
   11912              : }
   11913              : 
   11914              : /* Determine which memory address register set insn can use.  */
   11915              : 
   11916              : static enum attr_addr
   11917    255216485 : ix86_memory_address_reg_class (rtx_insn* insn)
   11918              : {
   11919              :   /* LRA can do some initialization with NULL insn,
   11920              :      return maximum register class in this case.  */
   11921    255216485 :   enum attr_addr addr_rclass = ADDR_GPR32;
   11922              : 
   11923    255216485 :   if (!insn)
   11924              :     return addr_rclass;
   11925              : 
   11926     72606370 :   if (asm_noperands (PATTERN (insn)) >= 0
   11927     72606370 :       || GET_CODE (PATTERN (insn)) == ASM_INPUT)
   11928        70404 :     return ix86_apx_inline_asm_use_gpr32 ? ADDR_GPR32 : ADDR_GPR16;
   11929              : 
   11930              :   /* Return maximum register class for unrecognized instructions.  */
   11931     72571168 :   if (INSN_CODE (insn) < 0)
   11932              :     return addr_rclass;
   11933              : 
   11934              :   /* Try to recognize the insn before calling get_attr_addr.
   11935              :      Save current recog_data and current alternative.  */
   11936     72571168 :   struct recog_data_d saved_recog_data = recog_data;
   11937     72571168 :   int saved_alternative = which_alternative;
   11938              : 
   11939              :   /* Update recog_data for processing of alternatives.  */
   11940     72571168 :   extract_insn_cached (insn);
   11941              : 
   11942              :   /* If current alternative is not set, loop throught enabled
   11943              :      alternatives and get the most limited register class.  */
   11944     72571168 :   if (saved_alternative == -1)
   11945              :     {
   11946     72571168 :       alternative_mask enabled = get_enabled_alternatives (insn);
   11947              : 
   11948   1251336713 :       for (int i = 0; i < recog_data.n_alternatives; i++)
   11949              :         {
   11950   1178765545 :           if (!TEST_BIT (enabled, i))
   11951    348377663 :             continue;
   11952              : 
   11953    830387882 :           which_alternative = i;
   11954    830387882 :           addr_rclass = MIN (addr_rclass, get_attr_addr (insn));
   11955              :         }
   11956              :     }
   11957              :   else
   11958              :     {
   11959            0 :       which_alternative = saved_alternative;
   11960            0 :       addr_rclass = get_attr_addr (insn);
   11961              :     }
   11962              : 
   11963     72571168 :   recog_data = saved_recog_data;
   11964     72571168 :   which_alternative = saved_alternative;
   11965              : 
   11966     72571168 :   return addr_rclass;
   11967              : }
   11968              : 
   11969              : /* Return memory address register class insn can use.  */
   11970              : 
   11971              : enum reg_class
   11972    214517208 : ix86_insn_base_reg_class (rtx_insn* insn)
   11973              : {
   11974    214517208 :   switch (ix86_memory_address_reg_class (insn))
   11975              :     {
   11976              :     case ADDR_GPR8:
   11977              :       return LEGACY_GENERAL_REGS;
   11978              :     case ADDR_GPR16:
   11979              :       return GENERAL_GPR16;
   11980              :     case ADDR_GPR32:
   11981              :       break;
   11982            0 :     default:
   11983            0 :       gcc_unreachable ();
   11984              :     }
   11985              : 
   11986              :   return BASE_REG_CLASS;
   11987              : }
   11988              : 
   11989              : bool
   11990      1287243 : ix86_regno_ok_for_insn_base_p (int regno, rtx_insn* insn)
   11991              : {
   11992      1287243 :   switch (ix86_memory_address_reg_class (insn))
   11993              :     {
   11994            0 :     case ADDR_GPR8:
   11995            0 :       return LEGACY_INT_REGNO_P (regno);
   11996            0 :     case ADDR_GPR16:
   11997            0 :       return GENERAL_GPR16_REGNO_P (regno);
   11998      1287243 :     case ADDR_GPR32:
   11999      1287243 :       break;
   12000            0 :     default:
   12001            0 :       gcc_unreachable ();
   12002              :     }
   12003              : 
   12004      1287243 :   return GENERAL_REGNO_P (regno);
   12005              : }
   12006              : 
   12007              : enum reg_class
   12008     39412034 : ix86_insn_index_reg_class (rtx_insn* insn)
   12009              : {
   12010     39412034 :   switch (ix86_memory_address_reg_class (insn))
   12011              :     {
   12012              :     case ADDR_GPR8:
   12013              :       return LEGACY_INDEX_REGS;
   12014              :     case ADDR_GPR16:
   12015              :       return INDEX_GPR16;
   12016              :     case ADDR_GPR32:
   12017              :       break;
   12018            0 :     default:
   12019            0 :       gcc_unreachable ();
   12020              :     }
   12021              : 
   12022              :   return INDEX_REG_CLASS;
   12023              : }
   12024              : 
   12025              : /* Recognizes RTL expressions that are valid memory addresses for an
   12026              :    instruction.  The MODE argument is the machine mode for the MEM
   12027              :    expression that wants to use this address.
   12028              : 
   12029              :    It only recognizes address in canonical form.  LEGITIMIZE_ADDRESS should
   12030              :    convert common non-canonical forms to canonical form so that they will
   12031              :    be recognized.  */
   12032              : 
   12033              : static bool
   12034   2241872420 : ix86_legitimate_address_p (machine_mode, rtx addr, bool strict,
   12035              :                            code_helper = ERROR_MARK)
   12036              : {
   12037   2241872420 :   struct ix86_address parts;
   12038   2241872420 :   rtx base, index, disp;
   12039   2241872420 :   HOST_WIDE_INT scale;
   12040   2241872420 :   addr_space_t seg;
   12041              : 
   12042   2241872420 :   if (ix86_decompose_address (addr, &parts) == 0)
   12043              :     /* Decomposition failed.  */
   12044              :     return false;
   12045              : 
   12046   2230225497 :   base = parts.base;
   12047   2230225497 :   index = parts.index;
   12048   2230225497 :   disp = parts.disp;
   12049   2230225497 :   scale = parts.scale;
   12050   2230225497 :   seg = parts.seg;
   12051              : 
   12052              :   /* Validate base register.  */
   12053   2230225497 :   if (base)
   12054              :     {
   12055   1281558036 :       rtx reg = ix86_validate_address_register (base);
   12056              : 
   12057   1281558036 :       if (reg == NULL_RTX)
   12058              :         return false;
   12059              : 
   12060   1281124335 :       unsigned int regno = REGNO (reg);
   12061   1281124335 :       if ((strict && !REGNO_OK_FOR_BASE_P (regno))
   12062   1276676914 :           || (!strict && !REGNO_OK_FOR_BASE_NONSTRICT_P (regno)))
   12063              :         /* Base is not valid.  */
   12064              :         return false;
   12065              :     }
   12066              : 
   12067              :   /* Validate index register.  */
   12068   2228404341 :   if (index)
   12069              :     {
   12070     87236194 :       rtx reg = ix86_validate_address_register (index);
   12071              : 
   12072     87236194 :       if (reg == NULL_RTX)
   12073              :         return false;
   12074              : 
   12075     87185870 :       unsigned int regno = REGNO (reg);
   12076     87185870 :       if ((strict && !REGNO_OK_FOR_INDEX_P (regno))
   12077     87177294 :           || (!strict && !REGNO_OK_FOR_INDEX_NONSTRICT_P (regno)))
   12078              :         /* Index is not valid.  */
   12079              :         return false;
   12080              :     }
   12081              : 
   12082              :   /* Index and base should have the same mode.  */
   12083   2228352443 :   if (base && index
   12084     77577472 :       && GET_MODE (base) != GET_MODE (index))
   12085              :     return false;
   12086              : 
   12087              :   /* Address override works only on the (%reg) part of %fs:(%reg).  */
   12088   2228055848 :   if (seg != ADDR_SPACE_GENERIC
   12089   2228055848 :       && ((base && GET_MODE (base) != word_mode)
   12090       339693 :           || (index && GET_MODE (index) != word_mode)))
   12091              :     return false;
   12092              : 
   12093              :   /* Validate scale factor.  */
   12094   2228055819 :   if (scale != 1)
   12095              :     {
   12096     39869477 :       if (!index)
   12097              :         /* Scale without index.  */
   12098              :         return false;
   12099              : 
   12100     39869477 :       if (scale != 2 && scale != 4 && scale != 8)
   12101              :         /* Scale is not a valid multiplier.  */
   12102              :         return false;
   12103              :     }
   12104              : 
   12105              :   /* Validate displacement.  */
   12106   2224927129 :   if (disp)
   12107              :     {
   12108   1998669334 :       if (ix86_endbr_immediate_operand (disp, VOIDmode))
   12109              :         return false;
   12110              : 
   12111   1998669291 :       if (GET_CODE (disp) == CONST
   12112    148963897 :           && GET_CODE (XEXP (disp, 0)) == UNSPEC
   12113     15359871 :           && XINT (XEXP (disp, 0), 1) != UNSPEC_MACHOPIC_OFFSET)
   12114     15359871 :         switch (XINT (XEXP (disp, 0), 1))
   12115              :           {
   12116              :           /* Refuse GOTOFF and GOT in 64bit mode since it is always 64bit
   12117              :              when used.  While ABI specify also 32bit relocations, we
   12118              :              don't produce them at all and use IP relative instead.
   12119              :              Allow GOT in 32bit mode for both PIC and non-PIC if symbol
   12120              :              should be loaded via GOT.  */
   12121      2258851 :           case UNSPEC_GOT:
   12122      2258851 :             if (!TARGET_64BIT
   12123      2258851 :                 && ix86_force_load_from_GOT_p (XVECEXP (XEXP (disp, 0), 0, 0)))
   12124            0 :               goto is_legitimate_pic;
   12125              :             /* FALLTHRU */
   12126      4548593 :           case UNSPEC_GOTOFF:
   12127      4548593 :             gcc_assert (flag_pic);
   12128      4548593 :             if (!TARGET_64BIT)
   12129      4548477 :               goto is_legitimate_pic;
   12130              : 
   12131              :             /* 64bit address unspec.  */
   12132              :             return false;
   12133              : 
   12134      9740400 :           case UNSPEC_GOTPCREL:
   12135      9740400 :             if (ix86_force_load_from_GOT_p (XVECEXP (XEXP (disp, 0), 0, 0)))
   12136         2534 :               goto is_legitimate_pic;
   12137              :             /* FALLTHRU */
   12138      9737866 :           case UNSPEC_PCREL:
   12139      9737866 :             gcc_assert (flag_pic);
   12140      9737866 :             goto is_legitimate_pic;
   12141              : 
   12142              :           case UNSPEC_GOTTPOFF:
   12143              :           case UNSPEC_GOTNTPOFF:
   12144              :           case UNSPEC_INDNTPOFF:
   12145              :           case UNSPEC_NTPOFF:
   12146              :           case UNSPEC_DTPOFF:
   12147              :           case UNSPEC_SECREL32:
   12148              :             break;
   12149              : 
   12150              :           default:
   12151              :             /* Invalid address unspec.  */
   12152              :             return false;
   12153              :           }
   12154              : 
   12155   1260459612 :       else if (SYMBOLIC_CONST (disp)
   12156   2116913446 :                && (flag_pic
   12157              : #if TARGET_MACHO
   12158              :                    || (MACHOPIC_INDIRECT
   12159              :                        && !machopic_operand_p (disp))
   12160              : #endif
   12161              :                   ))
   12162              :         {
   12163              : 
   12164     57682263 :         is_legitimate_pic:
   12165     57682263 :           if (TARGET_64BIT && (index || base))
   12166              :             {
   12167              :               /* foo@dtpoff(%rX) is ok.  */
   12168        36327 :               if (GET_CODE (disp) != CONST
   12169         7027 :                   || GET_CODE (XEXP (disp, 0)) != PLUS
   12170         7027 :                   || GET_CODE (XEXP (XEXP (disp, 0), 0)) != UNSPEC
   12171         4631 :                   || !CONST_INT_P (XEXP (XEXP (disp, 0), 1))
   12172         4631 :                   || (XINT (XEXP (XEXP (disp, 0), 0), 1) != UNSPEC_DTPOFF
   12173         4631 :                       && XINT (XEXP (XEXP (disp, 0), 0), 1) != UNSPEC_NTPOFF
   12174            0 :                       && XINT (XEXP (XEXP (disp, 0), 0), 1) != UNSPEC_SECREL32))
   12175              :                 /* Non-constant pic memory reference.  */
   12176              :                 return false;
   12177              :             }
   12178     57645936 :           else if ((!TARGET_MACHO || flag_pic)
   12179     57645936 :                     && ! legitimate_pic_address_disp_p (disp))
   12180              :             /* Displacement is an invalid pic construct.  */
   12181              :             return false;
   12182              : #if TARGET_MACHO
   12183              :           else if (MACHO_DYNAMIC_NO_PIC_P
   12184              :                    && !ix86_legitimate_constant_p (Pmode, disp))
   12185              :             /* displacment must be referenced via non_lazy_pointer */
   12186              :             return false;
   12187              : #endif
   12188              : 
   12189              :           /* This code used to verify that a symbolic pic displacement
   12190              :              includes the pic_offset_table_rtx register.
   12191              : 
   12192              :              While this is good idea, unfortunately these constructs may
   12193              :              be created by "adds using lea" optimization for incorrect
   12194              :              code like:
   12195              : 
   12196              :              int a;
   12197              :              int foo(int i)
   12198              :                {
   12199              :                  return *(&a+i);
   12200              :                }
   12201              : 
   12202              :              This code is nonsensical, but results in addressing
   12203              :              GOT table with pic_offset_table_rtx base.  We can't
   12204              :              just refuse it easily, since it gets matched by
   12205              :              "addsi3" pattern, that later gets split to lea in the
   12206              :              case output register differs from input.  While this
   12207              :              can be handled by separate addsi pattern for this case
   12208              :              that never results in lea, this seems to be easier and
   12209              :              correct fix for crash to disable this test.  */
   12210              :         }
   12211   1939916034 :       else if (!LABEL_REF_P (disp)
   12212   1939701451 :                && !CONST_INT_P (disp)
   12213    870532309 :                && (GET_CODE (disp) != CONST
   12214    135048207 :                    || !ix86_legitimate_constant_p (Pmode, disp))
   12215   2678365366 :                && (!SYMBOL_REF_P (disp)
   12216    745895684 :                    || !ix86_legitimate_constant_p (Pmode, disp)))
   12217              :         /* Displacement is not constant.  */
   12218     57764805 :         return false;
   12219   1882151229 :       else if (TARGET_64BIT
   12220   1882151229 :                && !x86_64_immediate_operand (disp, VOIDmode))
   12221              :         /* Displacement is out of range.  */
   12222              :         return false;
   12223              :       /* In x32 mode, constant addresses are sign extended to 64bit, so
   12224              :          we have to prevent addresses from 0x80000000 to 0xffffffff.  */
   12225        45025 :       else if (TARGET_X32 && !(index || base)
   12226        17348 :                && CONST_INT_P (disp)
   12227   1881630360 :                && val_signbit_known_set_p (SImode, INTVAL (disp)))
   12228              :         return false;
   12229              :     }
   12230              : 
   12231              :   /* Everything looks valid.  */
   12232              :   return true;
   12233              : }
   12234              : 
   12235              : /* Determine if a given RTX is a valid constant address.  */
   12236              : 
   12237              : bool
   12238   2788630664 : constant_address_p (rtx x)
   12239              : {
   12240   2868866897 :   return CONSTANT_P (x) && ix86_legitimate_address_p (Pmode, x, 1);
   12241              : }
   12242              : 
   12243              : 
   12244              : /* Return a legitimate reference for ORIG (an address) using the
   12245              :    register REG.  If REG is 0, a new pseudo is generated.
   12246              : 
   12247              :    There are two types of references that must be handled:
   12248              : 
   12249              :    1. Global data references must load the address from the GOT, via
   12250              :       the PIC reg.  An insn is emitted to do this load, and the reg is
   12251              :       returned.
   12252              : 
   12253              :    2. Static data references, constant pool addresses, and code labels
   12254              :       compute the address as an offset from the GOT, whose base is in
   12255              :       the PIC reg.  Static data objects have SYMBOL_FLAG_LOCAL set to
   12256              :       differentiate them from global data objects.  The returned
   12257              :       address is the PIC reg + an unspec constant.
   12258              : 
   12259              :    TARGET_LEGITIMATE_ADDRESS_P rejects symbolic references unless the PIC
   12260              :    reg also appears in the address.  */
   12261              : 
   12262              : rtx
   12263       396198 : legitimize_pic_address (rtx orig, rtx reg)
   12264              : {
   12265       396198 :   rtx addr = orig;
   12266       396198 :   rtx new_rtx = orig;
   12267              : 
   12268              : #if TARGET_MACHO
   12269              :   if (TARGET_MACHO && !TARGET_64BIT)
   12270              :     {
   12271              :       if (reg == 0)
   12272              :         reg = gen_reg_rtx (Pmode);
   12273              :       /* Use the generic Mach-O PIC machinery.  */
   12274              :       return machopic_legitimize_pic_address (orig, GET_MODE (orig), reg);
   12275              :     }
   12276              : #endif
   12277              : 
   12278       396198 :   if (TARGET_64BIT && TARGET_DLLIMPORT_DECL_ATTRIBUTES)
   12279              :     {
   12280              : #if TARGET_PECOFF
   12281              :       rtx tmp = legitimize_pe_coff_symbol (addr, true);
   12282              :       if (tmp)
   12283              :         return tmp;
   12284              : #endif
   12285              :     }
   12286              : 
   12287       396198 :   if (TARGET_64BIT && legitimate_pic_address_disp_p (addr))
   12288              :     new_rtx = addr;
   12289       300727 :   else if ((!TARGET_64BIT
   12290       101249 :             || /* TARGET_64BIT && */ ix86_cmodel != CM_SMALL_PIC)
   12291              :            && !TARGET_PECOFF
   12292       500286 :            && gotoff_operand (addr, Pmode))
   12293              :     {
   12294              :       /* This symbol may be referenced via a displacement
   12295              :          from the PIC base address (@GOTOFF).  */
   12296        96623 :       if (GET_CODE (addr) == CONST)
   12297         2993 :         addr = XEXP (addr, 0);
   12298              : 
   12299        96623 :       if (GET_CODE (addr) == PLUS)
   12300              :           {
   12301         5986 :             new_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, XEXP (addr, 0)),
   12302              :                                       UNSPEC_GOTOFF);
   12303         5986 :             new_rtx = gen_rtx_PLUS (Pmode, new_rtx, XEXP (addr, 1));
   12304              :           }
   12305              :         else
   12306       187247 :           new_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), UNSPEC_GOTOFF);
   12307              : 
   12308       193233 :       new_rtx = gen_rtx_CONST (Pmode, new_rtx);
   12309              : 
   12310        96623 :       if (TARGET_64BIT)
   12311           13 :         new_rtx = copy_to_suggested_reg (new_rtx, reg, Pmode);
   12312              : 
   12313        96623 :       if (reg != 0)
   12314              :         {
   12315            3 :           gcc_assert (REG_P (reg));
   12316            3 :           new_rtx = expand_simple_binop (Pmode, PLUS, pic_offset_table_rtx,
   12317              :                                          new_rtx, reg, 1, OPTAB_DIRECT);
   12318              :         }
   12319              :       else
   12320       193230 :         new_rtx = gen_rtx_PLUS (Pmode, pic_offset_table_rtx, new_rtx);
   12321              :     }
   12322       381400 :   else if ((SYMBOL_REF_P (addr) && SYMBOL_REF_TLS_MODEL (addr) == 0)
   12323              :            /* We can't always use @GOTOFF for text labels
   12324              :               on VxWorks, see gotoff_operand.  */
   12325       204104 :            || (TARGET_VXWORKS_VAROFF && LABEL_REF_P (addr)))
   12326              :     {
   12327              : #if TARGET_PECOFF
   12328              :       rtx tmp = legitimize_pe_coff_symbol (addr, true);
   12329              :       if (tmp)
   12330              :         return tmp;
   12331              : #endif
   12332              : 
   12333              :       /* For x64 PE-COFF there is no GOT table,
   12334              :          so we use address directly.  */
   12335       177293 :       if (TARGET_64BIT && TARGET_PECOFF)
   12336              :         {
   12337              :           new_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), UNSPEC_PCREL);
   12338              :           new_rtx = gen_rtx_CONST (Pmode, new_rtx);
   12339              :         }
   12340       177293 :       else if (TARGET_64BIT && ix86_cmodel != CM_LARGE_PIC)
   12341              :         {
   12342        94117 :           new_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr),
   12343              :                                     UNSPEC_GOTPCREL);
   12344        94117 :           new_rtx = gen_rtx_CONST (Pmode, new_rtx);
   12345        94117 :           new_rtx = gen_const_mem (Pmode, new_rtx);
   12346        94114 :           set_mem_alias_set (new_rtx, GOT_ALIAS_SET);
   12347              :         }
   12348              :       else
   12349              :         {
   12350              :           /* This symbol must be referenced via a load
   12351              :              from the Global Offset Table (@GOT).  */
   12352       166335 :           new_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), UNSPEC_GOT);
   12353       166335 :           new_rtx = gen_rtx_CONST (Pmode, new_rtx);
   12354              : 
   12355        83179 :           if (TARGET_64BIT)
   12356           23 :             new_rtx = copy_to_suggested_reg (new_rtx, reg, Pmode);
   12357              : 
   12358        83179 :           if (reg != 0)
   12359              :             {
   12360            0 :               gcc_assert (REG_P (reg));
   12361            0 :               new_rtx = expand_simple_binop (Pmode, PLUS, pic_offset_table_rtx,
   12362              :                                              new_rtx, reg, 1, OPTAB_DIRECT);
   12363              :             }
   12364              :           else
   12365       166335 :             new_rtx = gen_rtx_PLUS (Pmode, pic_offset_table_rtx, new_rtx);
   12366              : 
   12367       166335 :           new_rtx = gen_const_mem (Pmode, new_rtx);
   12368        83179 :           set_mem_alias_set (new_rtx, GOT_ALIAS_SET);
   12369              :         }
   12370              : 
   12371       260452 :       new_rtx = copy_to_suggested_reg (new_rtx, reg, Pmode);
   12372              :     }
   12373              :   else
   12374              :     {
   12375        26811 :       if (CONST_INT_P (addr)
   12376        26811 :           && !x86_64_immediate_operand (addr, VOIDmode))
   12377            8 :         new_rtx = copy_to_suggested_reg (addr, reg, Pmode);
   12378        26803 :       else if (GET_CODE (addr) == CONST)
   12379              :         {
   12380        16589 :           addr = XEXP (addr, 0);
   12381              : 
   12382              :           /* We must match stuff we generate before.  Assume the only
   12383              :              unspecs that can get here are ours.  Not that we could do
   12384              :              anything with them anyway....  */
   12385        16589 :           if (GET_CODE (addr) == UNSPEC
   12386         8839 :               || (GET_CODE (addr) == PLUS
   12387         8839 :                   && GET_CODE (XEXP (addr, 0)) == UNSPEC))
   12388              :             return orig;
   12389         6717 :           gcc_assert (GET_CODE (addr) == PLUS);
   12390              :         }
   12391              : 
   12392        16939 :       if (GET_CODE (addr) == PLUS)
   12393              :         {
   12394         8470 :           rtx op0 = XEXP (addr, 0), op1 = XEXP (addr, 1);
   12395              : 
   12396              :           /* Check first to see if this is a constant
   12397              :              offset from a @GOTOFF symbol reference.  */
   12398         8470 :           if (!TARGET_PECOFF
   12399        13390 :               && gotoff_operand (op0, Pmode)
   12400         8470 :               && CONST_INT_P (op1))
   12401              :             {
   12402            4 :               if (!TARGET_64BIT)
   12403              :                 {
   12404            0 :                   new_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, op0),
   12405              :                                             UNSPEC_GOTOFF);
   12406            0 :                   new_rtx = gen_rtx_PLUS (Pmode, new_rtx, op1);
   12407            0 :                   new_rtx = gen_rtx_CONST (Pmode, new_rtx);
   12408              : 
   12409            0 :                   if (reg != 0)
   12410              :                     {
   12411            0 :                       gcc_assert (REG_P (reg));
   12412            0 :                       new_rtx = expand_simple_binop (Pmode, PLUS,
   12413              :                                                      pic_offset_table_rtx,
   12414              :                                                      new_rtx, reg, 1,
   12415              :                                                      OPTAB_DIRECT);
   12416              :                     }
   12417              :                   else
   12418            0 :                     new_rtx
   12419            0 :                       = gen_rtx_PLUS (Pmode, pic_offset_table_rtx, new_rtx);
   12420              :                 }
   12421              :               else
   12422              :                 {
   12423            4 :                   if (INTVAL (op1) < -16*1024*1024
   12424            4 :                       || INTVAL (op1) >= 16*1024*1024)
   12425              :                     {
   12426            4 :                       if (!x86_64_immediate_operand (op1, Pmode))
   12427            4 :                         op1 = force_reg (Pmode, op1);
   12428              : 
   12429            4 :                       new_rtx
   12430            4 :                         = gen_rtx_PLUS (Pmode, force_reg (Pmode, op0), op1);
   12431              :                     }
   12432              :                 }
   12433              :             }
   12434              :           else
   12435              :             {
   12436         8466 :               rtx base = legitimize_pic_address (op0, reg);
   12437         8466 :               machine_mode mode = GET_MODE (base);
   12438         8466 :               new_rtx
   12439         8466 :                 = legitimize_pic_address (op1, base == reg ? NULL_RTX : reg);
   12440              : 
   12441         8466 :               if (CONST_INT_P (new_rtx))
   12442              :                 {
   12443         6705 :                   if (INTVAL (new_rtx) < -16*1024*1024
   12444         6705 :                       || INTVAL (new_rtx) >= 16*1024*1024)
   12445              :                     {
   12446            0 :                       if (!x86_64_immediate_operand (new_rtx, mode))
   12447            0 :                         new_rtx = force_reg (mode, new_rtx);
   12448              : 
   12449            0 :                       new_rtx
   12450            0 :                         = gen_rtx_PLUS (mode, force_reg (mode, base), new_rtx);
   12451              :                     }
   12452              :                   else
   12453         6705 :                     new_rtx = plus_constant (mode, base, INTVAL (new_rtx));
   12454              :                 }
   12455              :               else
   12456              :                 {
   12457              :                   /* For %rip addressing, we have to use
   12458              :                      just disp32, not base nor index.  */
   12459         1761 :                   if (TARGET_64BIT
   12460          100 :                       && (SYMBOL_REF_P (base)
   12461          100 :                           || LABEL_REF_P (base)))
   12462            7 :                     base = force_reg (mode, base);
   12463         1761 :                   if (GET_CODE (new_rtx) == PLUS
   12464         1640 :                       && CONSTANT_P (XEXP (new_rtx, 1)))
   12465              :                     {
   12466         1636 :                       base = gen_rtx_PLUS (mode, base, XEXP (new_rtx, 0));
   12467         1636 :                       new_rtx = XEXP (new_rtx, 1);
   12468              :                     }
   12469         1761 :                   new_rtx = gen_rtx_PLUS (mode, base, new_rtx);
   12470              :                 }
   12471              :             }
   12472              :         }
   12473              :     }
   12474              :   return new_rtx;
   12475              : }
   12476              : 
   12477              : /* Load the thread pointer.  If TO_REG is true, force it into a register.  */
   12478              : 
   12479              : static rtx
   12480        24438 : get_thread_pointer (machine_mode tp_mode, bool to_reg)
   12481              : {
   12482        24438 :   rtx tp = gen_rtx_UNSPEC (ptr_mode, gen_rtvec (1, const0_rtx), UNSPEC_TP);
   12483              : 
   12484        24438 :   if (GET_MODE (tp) != tp_mode)
   12485              :     {
   12486           11 :       gcc_assert (GET_MODE (tp) == SImode);
   12487           11 :       gcc_assert (tp_mode == DImode);
   12488              : 
   12489           11 :       tp = gen_rtx_ZERO_EXTEND (tp_mode, tp);
   12490              :     }
   12491              : 
   12492        24438 :   if (to_reg)
   12493         8122 :     tp = copy_to_mode_reg (tp_mode, tp);
   12494              : 
   12495        24438 :   return tp;
   12496              : }
   12497              : 
   12498              : /* Construct the SYMBOL_REF for the _tls_index symbol.  */
   12499              : 
   12500              : static GTY(()) rtx ix86_tls_index_symbol;
   12501              : 
   12502              : static rtx
   12503            0 : ix86_tls_index (void)
   12504              : {
   12505            0 :   if (!ix86_tls_index_symbol)
   12506            0 :     ix86_tls_index_symbol = gen_rtx_SYMBOL_REF (SImode, "_tls_index");
   12507              : 
   12508            0 :   if (flag_pic)
   12509            0 :     return gen_rtx_CONST (Pmode,
   12510              :                           gen_rtx_UNSPEC (Pmode,
   12511              :                                           gen_rtvec (1, ix86_tls_index_symbol),
   12512              :                                           UNSPEC_PCREL));
   12513              :   else
   12514            0 :     return ix86_tls_index_symbol;
   12515              : }
   12516              : 
   12517              : /* Construct the SYMBOL_REF for the tls_get_addr function.  */
   12518              : 
   12519              : static GTY(()) rtx ix86_tls_symbol;
   12520              : 
   12521              : rtx
   12522         6713 : ix86_tls_get_addr (void)
   12523              : {
   12524         6713 :   if (cfun->machine->call_saved_registers
   12525         6713 :       == TYPE_NO_CALLER_SAVED_REGISTERS)
   12526              :     {
   12527              :       /* __tls_get_addr doesn't preserve vector registers.  When a
   12528              :          function with no_caller_saved_registers attribute calls
   12529              :          __tls_get_addr, YMM and ZMM registers will be clobbered.
   12530              :          Issue an error and suggest -mtls-dialect=gnu2 in this case.  */
   12531            3 :       if (cfun->machine->func_type == TYPE_NORMAL)
   12532            1 :         error (G_("%<-mtls-dialect=gnu2%> must be used with a function"
   12533              :                   " with the %<no_caller_saved_registers%> attribute"));
   12534              :       else
   12535            3 :         error (cfun->machine->func_type == TYPE_EXCEPTION
   12536              :                ? G_("%<-mtls-dialect=gnu2%> must be used with an"
   12537              :                     " exception service routine")
   12538              :                : G_("%<-mtls-dialect=gnu2%> must be used with an"
   12539              :                     " interrupt service routine"));
   12540              :       /* Don't issue the same error twice.  */
   12541            3 :       cfun->machine->func_type = TYPE_NORMAL;
   12542            3 :       cfun->machine->call_saved_registers
   12543            3 :         = TYPE_DEFAULT_CALL_SAVED_REGISTERS;
   12544              :     }
   12545              : 
   12546         6713 :   if (!ix86_tls_symbol)
   12547              :     {
   12548          204 :       const char *sym
   12549          241 :         = ((TARGET_ANY_GNU_TLS && !TARGET_64BIT)
   12550          241 :            ? "___tls_get_addr" : "__tls_get_addr");
   12551              : 
   12552          278 :       ix86_tls_symbol = gen_rtx_SYMBOL_REF (Pmode, sym);
   12553              :     }
   12554              : 
   12555         6713 :   if (ix86_cmodel == CM_LARGE_PIC && !TARGET_PECOFF)
   12556              :     {
   12557            2 :       rtx unspec = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, ix86_tls_symbol),
   12558              :                                    UNSPEC_PLTOFF);
   12559            2 :       return gen_rtx_PLUS (Pmode, pic_offset_table_rtx,
   12560              :                            gen_rtx_CONST (Pmode, unspec));
   12561              :     }
   12562              : 
   12563         6711 :   return ix86_tls_symbol;
   12564              : }
   12565              : 
   12566              : /* Construct the SYMBOL_REF for the _TLS_MODULE_BASE_ symbol.  */
   12567              : 
   12568              : static GTY(()) rtx ix86_tls_module_base_symbol;
   12569              : 
   12570              : rtx
   12571           87 : ix86_tls_module_base (void)
   12572              : {
   12573           87 :   if (!ix86_tls_module_base_symbol)
   12574              :     {
   12575           10 :       ix86_tls_module_base_symbol
   12576           10 :         = gen_rtx_SYMBOL_REF (ptr_mode, "_TLS_MODULE_BASE_");
   12577              : 
   12578           10 :       SYMBOL_REF_FLAGS (ix86_tls_module_base_symbol)
   12579           10 :         |= TLS_MODEL_GLOBAL_DYNAMIC << SYMBOL_FLAG_TLS_SHIFT;
   12580              :     }
   12581              : 
   12582           87 :   return ix86_tls_module_base_symbol;
   12583              : }
   12584              : 
   12585              : /* A subroutine of ix86_legitimize_address and ix86_expand_move.  FOR_MOV is
   12586              :    false if we expect this to be used for a memory address and true if
   12587              :    we expect to load the address into a register.  */
   12588              : 
   12589              : rtx
   12590        30859 : legitimize_tls_address (rtx x, enum tls_model model, bool for_mov)
   12591              : {
   12592        30859 :   rtx dest, base, off;
   12593        30859 :   rtx pic = NULL_RTX, tp = NULL_RTX;
   12594        30859 :   machine_mode tp_mode = Pmode;
   12595        30859 :   int type;
   12596              : 
   12597              :   /* Windows implements a single form of TLS.  */
   12598        30859 :   if (TARGET_WIN32_TLS)
   12599              :     {
   12600              :       /* Load the 32-bit index.  */
   12601              :       rtx ind = gen_const_mem (SImode, ix86_tls_index ());
   12602              :       set_mem_alias_set (ind, GOT_ALIAS_SET);
   12603              :       if (TARGET_64BIT)
   12604              :         ind = convert_to_mode (Pmode, ind, 1);
   12605              :       ind = force_reg (Pmode, ind);
   12606              : 
   12607              :       /* Add it to the thread pointer and load the base.  */
   12608              :       tp = get_thread_pointer (Pmode, true);
   12609              :       rtx addr = gen_rtx_PLUS (Pmode, tp,
   12610              :                                gen_rtx_MULT (Pmode, ind,
   12611              :                                              GEN_INT (UNITS_PER_WORD)));
   12612              :       base = gen_const_mem (Pmode, addr);
   12613              :       set_mem_alias_set (base, GOT_ALIAS_SET);
   12614              : 
   12615              :       /* Add the 32-bit section-relative offset to the base.  */
   12616              :       base = force_reg (Pmode, base);
   12617              :       off = gen_rtx_CONST (Pmode,
   12618              :                            gen_rtx_UNSPEC (SImode,
   12619              :                                            gen_rtvec (1, x),
   12620              :                                            UNSPEC_SECREL32));
   12621              :       return gen_rtx_PLUS (Pmode, base, off);
   12622              :     }
   12623              : 
   12624              :   /* Fall back to global dynamic model if tool chain cannot support local
   12625              :      dynamic.  */
   12626        30859 :   if (TARGET_SUN_TLS && !TARGET_64BIT
   12627              :       && !HAVE_AS_IX86_TLSLDMPLT && !HAVE_AS_IX86_TLSLDM
   12628              :       && model == TLS_MODEL_LOCAL_DYNAMIC)
   12629              :     model = TLS_MODEL_GLOBAL_DYNAMIC;
   12630              : 
   12631        30859 :   switch (model)
   12632              :     {
   12633         6114 :     case TLS_MODEL_GLOBAL_DYNAMIC:
   12634         6114 :       if (!TARGET_64BIT)
   12635              :         {
   12636         1929 :           if (flag_pic && !TARGET_PECOFF)
   12637         1929 :             pic = pic_offset_table_rtx;
   12638              :           else
   12639              :             {
   12640            0 :               pic = gen_reg_rtx (Pmode);
   12641            0 :               emit_insn (gen_set_got (pic));
   12642              :             }
   12643              :         }
   12644              : 
   12645         6114 :       if (TARGET_GNU2_TLS)
   12646              :         {
   12647           53 :           dest = gen_reg_rtx (ptr_mode);
   12648           53 :           if (TARGET_64BIT)
   12649           53 :             emit_insn (gen_tls_dynamic_gnu2_64 (ptr_mode, dest, x));
   12650              :           else
   12651            0 :             emit_insn (gen_tls_dynamic_gnu2_32 (dest, x, pic));
   12652              : 
   12653           53 :           tp = get_thread_pointer (ptr_mode, true);
   12654           53 :           dest = gen_rtx_PLUS (ptr_mode, tp, dest);
   12655           61 :           if (GET_MODE (dest) != Pmode)
   12656            6 :              dest = gen_rtx_ZERO_EXTEND (Pmode, dest);
   12657           61 :           dest = force_reg (Pmode, dest);
   12658              : 
   12659           61 :           if (GET_MODE (x) != Pmode)
   12660            3 :             x = gen_rtx_ZERO_EXTEND (Pmode, x);
   12661              : 
   12662           53 :           set_unique_reg_note (get_last_insn (), REG_EQUAL, x);
   12663              :         }
   12664              :       else
   12665              :         {
   12666         6061 :           rtx caddr = ix86_tls_get_addr ();
   12667              : 
   12668         7990 :           dest = gen_reg_rtx (Pmode);
   12669         6061 :           if (TARGET_64BIT)
   12670              :             {
   12671         4132 :               rtx rax = gen_rtx_REG (Pmode, AX_REG);
   12672         4132 :               rtx rdi = gen_rtx_REG (Pmode, DI_REG);
   12673         4132 :               rtx_insn *insns;
   12674              : 
   12675         4132 :               start_sequence ();
   12676         4132 :               emit_call_insn
   12677         4132 :                 (gen_tls_global_dynamic_64 (Pmode, rax, x, caddr, rdi));
   12678         4132 :               insns = end_sequence ();
   12679              : 
   12680         4132 :               if (GET_MODE (x) != Pmode)
   12681            1 :                 x = gen_rtx_ZERO_EXTEND (Pmode, x);
   12682              : 
   12683         4132 :               RTL_CONST_CALL_P (insns) = 1;
   12684         4132 :               emit_libcall_block (insns, dest, rax, x);
   12685              :             }
   12686              :           else
   12687         1929 :             emit_insn (gen_tls_global_dynamic_32 (dest, x, pic, caddr));
   12688              :         }
   12689              :       break;
   12690              : 
   12691          384 :     case TLS_MODEL_LOCAL_DYNAMIC:
   12692          384 :       if (!TARGET_64BIT)
   12693              :         {
   12694           92 :           if (flag_pic)
   12695           92 :             pic = pic_offset_table_rtx;
   12696              :           else
   12697              :             {
   12698            0 :               pic = gen_reg_rtx (Pmode);
   12699            0 :               emit_insn (gen_set_got (pic));
   12700              :             }
   12701              :         }
   12702              : 
   12703          384 :       if (TARGET_GNU2_TLS)
   12704              :         {
   12705           24 :           rtx tmp = ix86_tls_module_base ();
   12706              : 
   12707           24 :           base = gen_reg_rtx (ptr_mode);
   12708           24 :           if (TARGET_64BIT)
   12709           24 :             emit_insn (gen_tls_dynamic_gnu2_64 (ptr_mode, base, tmp));
   12710              :           else
   12711            0 :             emit_insn (gen_tls_dynamic_gnu2_32 (base, tmp, pic));
   12712              : 
   12713           24 :           tp = get_thread_pointer (ptr_mode, true);
   12714           30 :           if (GET_MODE (base) != Pmode)
   12715            2 :             base = gen_rtx_ZERO_EXTEND (Pmode, base);
   12716           30 :           base = force_reg (Pmode, base);
   12717              :         }
   12718              :       else
   12719              :         {
   12720          360 :           rtx caddr = ix86_tls_get_addr ();
   12721              : 
   12722          452 :           base = gen_reg_rtx (Pmode);
   12723          360 :           if (TARGET_64BIT)
   12724              :             {
   12725          268 :               rtx rax = gen_rtx_REG (Pmode, AX_REG);
   12726          268 :               rtx rdi = gen_rtx_REG (Pmode, DI_REG);
   12727          268 :               rtx_insn *insns;
   12728          268 :               rtx eqv;
   12729              : 
   12730          268 :               start_sequence ();
   12731          268 :               emit_call_insn
   12732          268 :                 (gen_tls_local_dynamic_base_64 (Pmode, rax, caddr, rdi));
   12733          268 :               insns = end_sequence ();
   12734              : 
   12735              :               /* Attach a unique REG_EQUAL, to allow the RTL optimizers to
   12736              :                  share the LD_BASE result with other LD model accesses.  */
   12737          268 :               eqv = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, const0_rtx),
   12738              :                                     UNSPEC_TLS_LD_BASE);
   12739              : 
   12740          268 :               RTL_CONST_CALL_P (insns) = 1;
   12741          268 :               emit_libcall_block (insns, base, rax, eqv);
   12742              :             }
   12743              :           else
   12744           92 :             emit_insn (gen_tls_local_dynamic_base_32 (base, pic, caddr));
   12745              :         }
   12746              : 
   12747          482 :       off = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, x), UNSPEC_DTPOFF);
   12748          482 :       off = gen_rtx_CONST (Pmode, off);
   12749              : 
   12750          580 :       dest = force_reg (Pmode, gen_rtx_PLUS (Pmode, base, off));
   12751              : 
   12752          384 :       if (TARGET_GNU2_TLS)
   12753              :         {
   12754           30 :           if (GET_MODE (tp) != Pmode)
   12755              :             {
   12756            2 :               dest = lowpart_subreg (ptr_mode, dest, Pmode);
   12757            2 :               dest = gen_rtx_PLUS (ptr_mode, tp, dest);
   12758            2 :               dest = gen_rtx_ZERO_EXTEND (Pmode, dest);
   12759              :             }
   12760              :           else
   12761           22 :             dest = gen_rtx_PLUS (Pmode, tp, dest);
   12762           30 :           dest = force_reg (Pmode, dest);
   12763              : 
   12764           30 :           if (GET_MODE (x) != Pmode)
   12765            1 :             x = gen_rtx_ZERO_EXTEND (Pmode, x);
   12766              : 
   12767           24 :           set_unique_reg_note (get_last_insn (), REG_EQUAL, x);
   12768              :         }
   12769              :       break;
   12770              : 
   12771        10781 :     case TLS_MODEL_INITIAL_EXEC:
   12772        10781 :       if (TARGET_64BIT)
   12773              :         {
   12774              :           /* Generate DImode references to avoid %fs:(%reg32)
   12775              :              problems and linker IE->LE relaxation bug.  */
   12776              :           tp_mode = DImode;
   12777              :           pic = NULL;
   12778              :           type = UNSPEC_GOTNTPOFF;
   12779              :         }
   12780          761 :       else if (flag_pic)
   12781              :         {
   12782          760 :           pic = pic_offset_table_rtx;
   12783          760 :           type = TARGET_ANY_GNU_TLS ? UNSPEC_GOTNTPOFF : UNSPEC_GOTTPOFF;
   12784              :         }
   12785            1 :       else if (!TARGET_ANY_GNU_TLS)
   12786              :         {
   12787            0 :           pic = gen_reg_rtx (Pmode);
   12788            0 :           emit_insn (gen_set_got (pic));
   12789            0 :           type = UNSPEC_GOTTPOFF;
   12790              :         }
   12791              :       else
   12792              :         {
   12793              :           pic = NULL;
   12794              :           type = UNSPEC_INDNTPOFF;
   12795              :         }
   12796              : 
   12797        10781 :       off = gen_rtx_UNSPEC (tp_mode, gen_rtvec (1, x), type);
   12798        10781 :       off = gen_rtx_CONST (tp_mode, off);
   12799        10781 :       if (pic)
   12800          760 :         off = gen_rtx_PLUS (tp_mode, pic, off);
   12801        10781 :       off = gen_const_mem (tp_mode, off);
   12802        10781 :       set_mem_alias_set (off, GOT_ALIAS_SET);
   12803              : 
   12804        10781 :       if (TARGET_64BIT || TARGET_ANY_GNU_TLS)
   12805              :         {
   12806        10781 :           base = get_thread_pointer (tp_mode,
   12807        10781 :                                      for_mov || !TARGET_TLS_DIRECT_SEG_REFS);
   12808        10781 :           off = force_reg (tp_mode, off);
   12809        10781 :           dest = gen_rtx_PLUS (tp_mode, base, off);
   12810        11546 :           if (tp_mode != Pmode)
   12811            4 :             dest = convert_to_mode (Pmode, dest, 1);
   12812              :         }
   12813              :       else
   12814              :         {
   12815            0 :           base = get_thread_pointer (Pmode, true);
   12816            0 :           dest = gen_reg_rtx (Pmode);
   12817            0 :           emit_insn (gen_sub3_insn (dest, base, off));
   12818              :         }
   12819              :       break;
   12820              : 
   12821        13580 :     case TLS_MODEL_LOCAL_EXEC:
   12822        27928 :       off = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, x),
   12823              :                             (TARGET_64BIT || TARGET_ANY_GNU_TLS)
   12824              :                             ? UNSPEC_NTPOFF : UNSPEC_TPOFF);
   12825        14348 :       off = gen_rtx_CONST (Pmode, off);
   12826              : 
   12827        13580 :       if (TARGET_64BIT || TARGET_ANY_GNU_TLS)
   12828              :         {
   12829        14348 :           base = get_thread_pointer (Pmode,
   12830        13580 :                                      for_mov || !TARGET_TLS_DIRECT_SEG_REFS);
   12831        14348 :           return gen_rtx_PLUS (Pmode, base, off);
   12832              :         }
   12833              :       else
   12834              :         {
   12835            0 :           base = get_thread_pointer (Pmode, true);
   12836            0 :           dest = gen_reg_rtx (Pmode);
   12837            0 :           emit_insn (gen_sub3_insn (dest, base, off));
   12838              :         }
   12839            0 :       break;
   12840              : 
   12841            0 :     default:
   12842            0 :       gcc_unreachable ();
   12843              :     }
   12844              : 
   12845              :   return dest;
   12846              : }
   12847              : 
   12848              : /* Return true if the TLS address requires insn using integer registers.
   12849              :    It's used to prevent KMOV/VMOV in TLS code sequences which require integer
   12850              :    MOV instructions, refer to PR103275.  */
   12851              : bool
   12852     15211546 : ix86_gpr_tls_address_pattern_p (rtx mem)
   12853              : {
   12854     15211546 :   gcc_assert (MEM_P (mem));
   12855              : 
   12856     15211546 :   rtx addr = XEXP (mem, 0);
   12857     15211546 :   subrtx_var_iterator::array_type array;
   12858     52967298 :   FOR_EACH_SUBRTX_VAR (iter, array, addr, ALL)
   12859              :     {
   12860     37763284 :       rtx op = *iter;
   12861     37763284 :       if (GET_CODE (op) == UNSPEC)
   12862       200818 :         switch (XINT (op, 1))
   12863              :           {
   12864              :           case UNSPEC_GOTNTPOFF:
   12865         7532 :             return true;
   12866            0 :           case UNSPEC_TPOFF:
   12867            0 :             if (!TARGET_64BIT)
   12868              :               return true;
   12869              :             break;
   12870              :           default:
   12871              :             break;
   12872              :           }
   12873              :     }
   12874              : 
   12875     15204014 :   return false;
   12876     15211546 : }
   12877              : 
   12878              : /* Return true if OP refers to a TLS address.  */
   12879              : bool
   12880    233034471 : ix86_tls_address_pattern_p (rtx op)
   12881              : {
   12882    233034471 :   subrtx_var_iterator::array_type array;
   12883   1387202509 :   FOR_EACH_SUBRTX_VAR (iter, array, op, ALL)
   12884              :     {
   12885   1154186046 :       rtx op = *iter;
   12886   1154186046 :       if (MEM_P (op))
   12887              :         {
   12888    105146516 :           rtx *x = &XEXP (op, 0);
   12889    166415963 :           while (GET_CODE (*x) == PLUS)
   12890              :             {
   12891              :               int i;
   12892    183826372 :               for (i = 0; i < 2; i++)
   12893              :                 {
   12894    122556925 :                   rtx u = XEXP (*x, i);
   12895    122556925 :                   if (GET_CODE (u) == ZERO_EXTEND)
   12896       132610 :                     u = XEXP (u, 0);
   12897    122556925 :                   if (GET_CODE (u) == UNSPEC
   12898        18040 :                       && XINT (u, 1) == UNSPEC_TP)
   12899        18008 :                     return true;
   12900              :                 }
   12901     61269447 :               x = &XEXP (*x, 0);
   12902              :             }
   12903              : 
   12904    105128508 :           iter.skip_subrtxes ();
   12905              :         }
   12906              :     }
   12907              : 
   12908    233016463 :   return false;
   12909    233034471 : }
   12910              : 
   12911              : /* Rewrite *LOC so that it refers to a default TLS address space.  */
   12912              : static void
   12913        18008 : ix86_rewrite_tls_address_1 (rtx *loc)
   12914              : {
   12915        18008 :   subrtx_ptr_iterator::array_type array;
   12916        53307 :   FOR_EACH_SUBRTX_PTR (iter, array, loc, ALL)
   12917              :     {
   12918        53307 :       rtx *loc = *iter;
   12919        53307 :       if (MEM_P (*loc))
   12920              :         {
   12921        18195 :           rtx addr = XEXP (*loc, 0);
   12922        18195 :           rtx *x = &addr;
   12923        23031 :           while (GET_CODE (*x) == PLUS)
   12924              :             {
   12925              :               int i;
   12926        32539 :               for (i = 0; i < 2; i++)
   12927              :                 {
   12928        27703 :                   rtx u = XEXP (*x, i);
   12929        27703 :                   if (GET_CODE (u) == ZERO_EXTEND)
   12930           19 :                     u = XEXP (u, 0);
   12931        27703 :                   if (GET_CODE (u) == UNSPEC
   12932        18008 :                       && XINT (u, 1) == UNSPEC_TP)
   12933              :                     {
   12934              :                       /* NB: Since address override only applies to the
   12935              :                          (reg32) part in fs:(reg32), return if address
   12936              :                          override is used.  */
   12937        19635 :                       if (Pmode != word_mode
   12938        18008 :                           && REG_P (XEXP (*x, 1 - i)))
   12939        18008 :                         return;
   12940              : 
   12941        18006 :                       addr_space_t as = DEFAULT_TLS_SEG_REG;
   12942              : 
   12943        18006 :                       *x = XEXP (*x, 1 - i);
   12944              : 
   12945        18006 :                       *loc = replace_equiv_address_nv (*loc, addr, true);
   12946        18006 :                       set_mem_addr_space (*loc, as);
   12947        18006 :                       return;
   12948              :                     }
   12949              :                 }
   12950         4836 :               x = &XEXP (*x, 0);
   12951              :             }
   12952              : 
   12953          187 :           iter.skip_subrtxes ();
   12954              :         }
   12955              :     }
   12956        18008 : }
   12957              : 
   12958              : /* Rewrite instruction pattern involvning TLS address
   12959              :    so that it refers to a default TLS address space.  */
   12960              : rtx
   12961        18008 : ix86_rewrite_tls_address (rtx pattern)
   12962              : {
   12963        18008 :   pattern = copy_insn (pattern);
   12964        18008 :   ix86_rewrite_tls_address_1 (&pattern);
   12965        18008 :   return pattern;
   12966              : }
   12967              : 
   12968              : /* Try machine-dependent ways of modifying an illegitimate address
   12969              :    to be legitimate.  If we find one, return the new, valid address.
   12970              :    This macro is used in only one place: `memory_address' in explow.cc.
   12971              : 
   12972              :    OLDX is the address as it was before break_out_memory_refs was called.
   12973              :    In some cases it is useful to look at this to decide what needs to be done.
   12974              : 
   12975              :    It is always safe for this macro to do nothing.  It exists to recognize
   12976              :    opportunities to optimize the output.
   12977              : 
   12978              :    For the 80386, we handle X+REG by loading X into a register R and
   12979              :    using R+REG.  R will go in a general reg and indexing will be used.
   12980              :    However, if REG is a broken-out memory address or multiplication,
   12981              :    nothing needs to be done because REG can certainly go in a general reg.
   12982              : 
   12983              :    When -fpic is used, special handling is needed for symbolic references.
   12984              :    See comments by legitimize_pic_address in i386.cc for details.  */
   12985              : 
   12986              : static rtx
   12987       672030 : ix86_legitimize_address (rtx x, rtx, machine_mode mode)
   12988              : {
   12989       672030 :   bool changed = false;
   12990       672030 :   unsigned log;
   12991              : 
   12992       672030 :   log = SYMBOL_REF_P (x) ? SYMBOL_REF_TLS_MODEL (x) : 0;
   12993       151700 :   if (log)
   12994        20729 :     return legitimize_tls_address (x, (enum tls_model) log, false);
   12995       651301 :   if (GET_CODE (x) == CONST
   12996          508 :       && GET_CODE (XEXP (x, 0)) == PLUS
   12997          508 :       && SYMBOL_REF_P (XEXP (XEXP (x, 0), 0))
   12998       651809 :       && (log = SYMBOL_REF_TLS_MODEL (XEXP (XEXP (x, 0), 0))))
   12999              :     {
   13000            4 :       rtx t = legitimize_tls_address (XEXP (XEXP (x, 0), 0),
   13001              :                                       (enum tls_model) log, false);
   13002            5 :       return gen_rtx_PLUS (Pmode, t, XEXP (XEXP (x, 0), 1));
   13003              :     }
   13004              : 
   13005       651297 :   if (TARGET_DLLIMPORT_DECL_ATTRIBUTES)
   13006              :     {
   13007              : #if TARGET_PECOFF
   13008              :       rtx tmp = legitimize_pe_coff_symbol (x, true);
   13009              :       if (tmp)
   13010              :         return tmp;
   13011              : #endif
   13012              :     }
   13013              : 
   13014       651297 :   if (flag_pic && SYMBOLIC_CONST (x))
   13015       131354 :     return legitimize_pic_address (x, 0);
   13016              : 
   13017              : #if TARGET_MACHO
   13018              :   if (MACHO_DYNAMIC_NO_PIC_P && SYMBOLIC_CONST (x))
   13019              :     return machopic_indirect_data_reference (x, 0);
   13020              : #endif
   13021              : 
   13022              :   /* Canonicalize shifts by 0, 1, 2, 3 into multiply */
   13023       519943 :   if (GET_CODE (x) == ASHIFT
   13024            0 :       && CONST_INT_P (XEXP (x, 1))
   13025            0 :       && (unsigned HOST_WIDE_INT) INTVAL (XEXP (x, 1)) < 4)
   13026              :     {
   13027            0 :       changed = true;
   13028            0 :       log = INTVAL (XEXP (x, 1));
   13029            0 :       x = gen_rtx_MULT (Pmode, force_reg (Pmode, XEXP (x, 0)),
   13030              :                         GEN_INT (1 << log));
   13031              :     }
   13032              : 
   13033       519943 :   if (GET_CODE (x) == PLUS)
   13034              :     {
   13035              :       /* Canonicalize shifts by 0, 1, 2, 3 into multiply.  */
   13036              : 
   13037       183413 :       if (GET_CODE (XEXP (x, 0)) == ASHIFT
   13038          515 :           && CONST_INT_P (XEXP (XEXP (x, 0), 1))
   13039          515 :           && (unsigned HOST_WIDE_INT) INTVAL (XEXP (XEXP (x, 0), 1)) < 4)
   13040              :         {
   13041          515 :           changed = true;
   13042          515 :           log = INTVAL (XEXP (XEXP (x, 0), 1));
   13043         1501 :           XEXP (x, 0) = gen_rtx_MULT (Pmode,
   13044              :                                       force_reg (Pmode, XEXP (XEXP (x, 0), 0)),
   13045              :                                       GEN_INT (1 << log));
   13046              :         }
   13047              : 
   13048       183413 :       if (GET_CODE (XEXP (x, 1)) == ASHIFT
   13049            0 :           && CONST_INT_P (XEXP (XEXP (x, 1), 1))
   13050            0 :           && (unsigned HOST_WIDE_INT) INTVAL (XEXP (XEXP (x, 1), 1)) < 4)
   13051              :         {
   13052            0 :           changed = true;
   13053            0 :           log = INTVAL (XEXP (XEXP (x, 1), 1));
   13054            0 :           XEXP (x, 1) = gen_rtx_MULT (Pmode,
   13055              :                                       force_reg (Pmode, XEXP (XEXP (x, 1), 0)),
   13056              :                                       GEN_INT (1 << log));
   13057              :         }
   13058              : 
   13059              :       /* Put multiply first if it isn't already.  */
   13060       183413 :       if (GET_CODE (XEXP (x, 1)) == MULT)
   13061              :         {
   13062            0 :           std::swap (XEXP (x, 0), XEXP (x, 1));
   13063            0 :           changed = true;
   13064              :         }
   13065              : 
   13066              :       /* Canonicalize (plus (mult (reg) (const)) (plus (reg) (const)))
   13067              :          into (plus (plus (mult (reg) (const)) (reg)) (const)).  This can be
   13068              :          created by virtual register instantiation, register elimination, and
   13069              :          similar optimizations.  */
   13070       183413 :       if (GET_CODE (XEXP (x, 0)) == MULT && GET_CODE (XEXP (x, 1)) == PLUS)
   13071              :         {
   13072         9782 :           changed = true;
   13073        15390 :           x = gen_rtx_PLUS (Pmode,
   13074              :                             gen_rtx_PLUS (Pmode, XEXP (x, 0),
   13075              :                                           XEXP (XEXP (x, 1), 0)),
   13076              :                             XEXP (XEXP (x, 1), 1));
   13077              :         }
   13078              : 
   13079              :       /* Canonicalize
   13080              :          (plus (plus (mult (reg) (const)) (plus (reg) (const))) const)
   13081              :          into (plus (plus (mult (reg) (const)) (reg)) (const)).  */
   13082       173631 :       else if (GET_CODE (x) == PLUS && GET_CODE (XEXP (x, 0)) == PLUS
   13083       108449 :                && GET_CODE (XEXP (XEXP (x, 0), 0)) == MULT
   13084        51407 :                && GET_CODE (XEXP (XEXP (x, 0), 1)) == PLUS
   13085            0 :                && CONSTANT_P (XEXP (x, 1)))
   13086              :         {
   13087            0 :           rtx constant;
   13088            0 :           rtx other = NULL_RTX;
   13089              : 
   13090            0 :           if (CONST_INT_P (XEXP (x, 1)))
   13091              :             {
   13092            0 :               constant = XEXP (x, 1);
   13093            0 :               other = XEXP (XEXP (XEXP (x, 0), 1), 1);
   13094              :             }
   13095            0 :           else if (CONST_INT_P (XEXP (XEXP (XEXP (x, 0), 1), 1)))
   13096              :             {
   13097              :               constant = XEXP (XEXP (XEXP (x, 0), 1), 1);
   13098              :               other = XEXP (x, 1);
   13099              :             }
   13100              :           else
   13101              :             constant = 0;
   13102              : 
   13103            0 :           if (constant)
   13104              :             {
   13105            0 :               changed = true;
   13106            0 :               x = gen_rtx_PLUS (Pmode,
   13107              :                                 gen_rtx_PLUS (Pmode, XEXP (XEXP (x, 0), 0),
   13108              :                                               XEXP (XEXP (XEXP (x, 0), 1), 0)),
   13109              :                                 plus_constant (Pmode, other,
   13110              :                                                INTVAL (constant)));
   13111              :             }
   13112              :         }
   13113              : 
   13114       183413 :       if (changed && ix86_legitimate_address_p (mode, x, false))
   13115         9818 :         return x;
   13116              : 
   13117       173595 :       if (GET_CODE (XEXP (x, 0)) == MULT)
   13118              :         {
   13119        19988 :           changed = true;
   13120        19988 :           XEXP (x, 0) = copy_addr_to_reg (XEXP (x, 0));
   13121              :         }
   13122              : 
   13123       173595 :       if (GET_CODE (XEXP (x, 1)) == MULT)
   13124              :         {
   13125            0 :           changed = true;
   13126            0 :           XEXP (x, 1) = copy_addr_to_reg (XEXP (x, 1));
   13127              :         }
   13128              : 
   13129       173595 :       if (changed
   13130        19996 :           && REG_P (XEXP (x, 1))
   13131        16450 :           && REG_P (XEXP (x, 0)))
   13132              :         return x;
   13133              : 
   13134       157145 :       if (flag_pic && SYMBOLIC_CONST (XEXP (x, 1)))
   13135              :         {
   13136         1753 :           changed = true;
   13137         1753 :           x = legitimize_pic_address (x, 0);
   13138              :         }
   13139              : 
   13140       157145 :       if (changed && ix86_legitimate_address_p (mode, x, false))
   13141         3823 :         return x;
   13142              : 
   13143       153322 :       if (REG_P (XEXP (x, 0)))
   13144              :         {
   13145        47150 :           rtx temp = gen_reg_rtx (Pmode);
   13146        44410 :           rtx val  = force_operand (XEXP (x, 1), temp);
   13147        44410 :           if (val != temp)
   13148              :             {
   13149        36167 :               val = convert_to_mode (Pmode, val, 1);
   13150        35881 :               emit_move_insn (temp, val);
   13151              :             }
   13152              : 
   13153        44410 :           XEXP (x, 1) = temp;
   13154        44410 :           return x;
   13155              :         }
   13156              : 
   13157       108912 :       else if (REG_P (XEXP (x, 1)))
   13158              :         {
   13159         3578 :           rtx temp = gen_reg_rtx (Pmode);
   13160         2842 :           rtx val  = force_operand (XEXP (x, 0), temp);
   13161         2842 :           if (val != temp)
   13162              :             {
   13163            0 :               val = convert_to_mode (Pmode, val, 1);
   13164            0 :               emit_move_insn (temp, val);
   13165              :             }
   13166              : 
   13167         2842 :           XEXP (x, 0) = temp;
   13168         2842 :           return x;
   13169              :         }
   13170              :     }
   13171              : 
   13172              :   return x;
   13173              : }
   13174              : 
   13175              : /* Print an integer constant expression in assembler syntax.  Addition
   13176              :    and subtraction are the only arithmetic that may appear in these
   13177              :    expressions.  FILE is the stdio stream to write to, X is the rtx, and
   13178              :    CODE is the operand print code from the output string.  */
   13179              : 
   13180              : static void
   13181      3662359 : output_pic_addr_const (FILE *file, rtx x, int code)
   13182              : {
   13183      3891643 :   char buf[256];
   13184              : 
   13185      3891643 :   switch (GET_CODE (x))
   13186              :     {
   13187            0 :     case PC:
   13188            0 :       gcc_assert (flag_pic);
   13189            0 :       putc ('.', file);
   13190            0 :       break;
   13191              : 
   13192       862247 :     case SYMBOL_REF:
   13193       862247 :       if (TARGET_64BIT || ! TARGET_MACHO_SYMBOL_STUBS)
   13194       862247 :         output_addr_const (file, x);
   13195              :       else
   13196              :         {
   13197              :           const char *name = XSTR (x, 0);
   13198              : 
   13199              :           /* Mark the decl as referenced so that cgraph will
   13200              :              output the function.  */
   13201              :           if (SYMBOL_REF_DECL (x))
   13202              :             mark_decl_referenced (SYMBOL_REF_DECL (x));
   13203              : 
   13204              : #if TARGET_MACHO
   13205              :           if (MACHOPIC_INDIRECT
   13206              :               && machopic_classify_symbol (x) == MACHOPIC_UNDEFINED_FUNCTION)
   13207              :             name = machopic_indirection_name (x, /*stub_p=*/true);
   13208              : #endif
   13209              :           assemble_name (file, name);
   13210              :         }
   13211       862247 :       if (!TARGET_MACHO && !(TARGET_64BIT && TARGET_PECOFF)
   13212       862247 :           && code == 'P' && ix86_call_use_plt_p (x))
   13213       392568 :         fputs ("@PLT", file);
   13214              :       break;
   13215              : 
   13216         2472 :     case LABEL_REF:
   13217         2472 :       x = XEXP (x, 0);
   13218              :       /* FALLTHRU */
   13219         2472 :     case CODE_LABEL:
   13220         2472 :       ASM_GENERATE_INTERNAL_LABEL (buf, "L", CODE_LABEL_NUMBER (x));
   13221         2472 :       assemble_name (asm_out_file, buf);
   13222         2472 :       break;
   13223              : 
   13224      2596919 :     CASE_CONST_SCALAR_INT:
   13225      2596919 :       output_addr_const (file, x);
   13226      2596919 :       break;
   13227              : 
   13228       210300 :     case CONST:
   13229              :       /* This used to output parentheses around the expression,
   13230              :          but that does not work on the 386 (either ATT or BSD assembler).  */
   13231       210300 :       output_pic_addr_const (file, XEXP (x, 0), code);
   13232       210300 :       break;
   13233              : 
   13234            0 :     case CONST_DOUBLE:
   13235              :       /* We can't handle floating point constants;
   13236              :          TARGET_PRINT_OPERAND must handle them.  */
   13237            0 :       output_operand_lossage ("floating constant misused");
   13238            0 :       break;
   13239              : 
   13240        18984 :     case PLUS:
   13241              :       /* Some assemblers need integer constants to appear first.  */
   13242        18984 :       if (CONST_INT_P (XEXP (x, 0)))
   13243              :         {
   13244            0 :           output_pic_addr_const (file, XEXP (x, 0), code);
   13245            0 :           putc ('+', file);
   13246            0 :           output_pic_addr_const (file, XEXP (x, 1), code);
   13247              :         }
   13248              :       else
   13249              :         {
   13250        18984 :           gcc_assert (CONST_INT_P (XEXP (x, 1)));
   13251        18984 :           output_pic_addr_const (file, XEXP (x, 1), code);
   13252        18984 :           putc ('+', file);
   13253        18984 :           output_pic_addr_const (file, XEXP (x, 0), code);
   13254              :         }
   13255              :       break;
   13256              : 
   13257            0 :     case MINUS:
   13258            0 :       if (!TARGET_MACHO)
   13259            0 :         putc (ASSEMBLER_DIALECT == ASM_INTEL ? '(' : '[', file);
   13260            0 :       output_pic_addr_const (file, XEXP (x, 0), code);
   13261            0 :       putc ('-', file);
   13262            0 :       output_pic_addr_const (file, XEXP (x, 1), code);
   13263            0 :       if (!TARGET_MACHO)
   13264            0 :         putc (ASSEMBLER_DIALECT == ASM_INTEL ? ')' : ']', file);
   13265            0 :       break;
   13266              : 
   13267       200721 :     case UNSPEC:
   13268       200721 :       gcc_assert (XVECLEN (x, 0) == 1);
   13269       200721 :       output_pic_addr_const (file, XVECEXP (x, 0, 0), code);
   13270       200721 :       switch (XINT (x, 1))
   13271              :         {
   13272        43150 :         case UNSPEC_GOT:
   13273        43150 :           fputs ("@GOT", file);
   13274        43150 :           break;
   13275        77837 :         case UNSPEC_GOTOFF:
   13276        77837 :           fputs ("@GOTOFF", file);
   13277        77837 :           break;
   13278           33 :         case UNSPEC_PLTOFF:
   13279           33 :           fputs ("@PLTOFF", file);
   13280           33 :           break;
   13281            0 :         case UNSPEC_PCREL:
   13282            0 :           fputs (ASSEMBLER_DIALECT == ASM_ATT ?
   13283              :                  "(%rip)" : "[rip]", file);
   13284            0 :           break;
   13285        75517 :         case UNSPEC_GOTPCREL:
   13286        75517 :           fputs (ASSEMBLER_DIALECT == ASM_ATT ?
   13287              :                  "@GOTPCREL(%rip)" : "@GOTPCREL[rip]", file);
   13288        75517 :           break;
   13289            0 :         case UNSPEC_GOTTPOFF:
   13290              :           /* FIXME: This might be @TPOFF in Sun ld too.  */
   13291            0 :           fputs ("@gottpoff", file);
   13292            0 :           break;
   13293            0 :         case UNSPEC_TPOFF:
   13294            0 :           fputs ("@tpoff", file);
   13295            0 :           break;
   13296         1459 :         case UNSPEC_NTPOFF:
   13297         1459 :           if (TARGET_64BIT)
   13298         1459 :             fputs ("@tpoff", file);
   13299              :           else
   13300            0 :             fputs ("@ntpoff", file);
   13301              :           break;
   13302          315 :         case UNSPEC_DTPOFF:
   13303          315 :           fputs ("@dtpoff", file);
   13304          315 :           break;
   13305         2410 :         case UNSPEC_GOTNTPOFF:
   13306         2410 :           if (TARGET_64BIT)
   13307         2147 :             fputs (ASSEMBLER_DIALECT == ASM_ATT ?
   13308              :                    "@gottpoff(%rip)": "@gottpoff[rip]", file);
   13309              :           else
   13310          263 :             fputs ("@gotntpoff", file);
   13311              :           break;
   13312            0 :         case UNSPEC_INDNTPOFF:
   13313            0 :           fputs ("@indntpoff", file);
   13314            0 :           break;
   13315            0 :         case UNSPEC_SECREL32:
   13316            0 :           fputs ("@secrel32", file);
   13317            0 :           break;
   13318              : #if TARGET_MACHO
   13319              :         case UNSPEC_MACHOPIC_OFFSET:
   13320              :           putc ('-', file);
   13321              :           machopic_output_function_base_name (file);
   13322              :           break;
   13323              : #endif
   13324            0 :         default:
   13325            0 :           output_operand_lossage ("invalid UNSPEC as operand");
   13326            0 :           break;
   13327              :         }
   13328              :        break;
   13329              : 
   13330            0 :     default:
   13331            0 :       output_operand_lossage ("invalid expression as operand");
   13332              :     }
   13333      3662359 : }
   13334              : 
   13335              : /* This is called from dwarf2out.cc via TARGET_ASM_OUTPUT_DWARF_DTPREL.
   13336              :    We need to emit DTP-relative relocations.  */
   13337              : 
   13338              : static void ATTRIBUTE_UNUSED
   13339          667 : i386_output_dwarf_dtprel (FILE *file, int size, rtx x)
   13340              : {
   13341          667 :   fputs (ASM_LONG, file);
   13342          667 :   output_addr_const (file, x);
   13343              : #if TARGET_WIN32_TLS
   13344              :   fputs ("@secrel32", file);
   13345              : #else
   13346          667 :   fputs ("@dtpoff", file);
   13347              : #endif
   13348          667 :   switch (size)
   13349              :     {
   13350              :     case 4:
   13351              :       break;
   13352          548 :     case 8:
   13353          548 :       fputs (", 0", file);
   13354          548 :       break;
   13355            0 :     default:
   13356            0 :       gcc_unreachable ();
   13357              :    }
   13358          667 : }
   13359              : 
   13360              : /* Return true if X is a representation of the PIC register.  This copes
   13361              :    with calls from ix86_find_base_term, where the register might have
   13362              :    been replaced by a cselib value.  */
   13363              : 
   13364              : static bool
   13365     26862218 : ix86_pic_register_p (rtx x)
   13366              : {
   13367     26862218 :   if (GET_CODE (x) == VALUE && CSELIB_VAL_PTR (x))
   13368       748607 :     return (pic_offset_table_rtx
   13369       748607 :             && rtx_equal_for_cselib_p (x, pic_offset_table_rtx));
   13370     26113611 :   else if (GET_CODE (x) == UNSPEC && XINT (x, 1) == UNSPEC_SET_GOT)
   13371              :     return true;
   13372     26110666 :   else if (!REG_P (x))
   13373              :     return false;
   13374     25505935 :   else if (pic_offset_table_rtx)
   13375              :     {
   13376     25486051 :       if (REGNO (x) == REGNO (pic_offset_table_rtx))
   13377              :         return true;
   13378       403271 :       if (HARD_REGISTER_P (x)
   13379       382942 :           && !HARD_REGISTER_P (pic_offset_table_rtx)
   13380       786213 :           && ORIGINAL_REGNO (x) == REGNO (pic_offset_table_rtx))
   13381              :         return true;
   13382              :       return false;
   13383              :     }
   13384              :   else
   13385        19884 :     return REGNO (x) == PIC_OFFSET_TABLE_REGNUM;
   13386              : }
   13387              : 
   13388              : /* Helper function for ix86_delegitimize_address.
   13389              :    Attempt to delegitimize TLS local-exec accesses.  */
   13390              : 
   13391              : static rtx
   13392   3499248796 : ix86_delegitimize_tls_address (rtx orig_x)
   13393              : {
   13394   3499248796 :   rtx x = orig_x, unspec;
   13395   3499248796 :   struct ix86_address addr;
   13396              : 
   13397   3499248796 :   if (!TARGET_TLS_DIRECT_SEG_REFS)
   13398              :     return orig_x;
   13399   3499248796 :   if (MEM_P (x))
   13400     42860816 :     x = XEXP (x, 0);
   13401   5026261501 :   if (GET_CODE (x) != PLUS || GET_MODE (x) != Pmode)
   13402              :     return orig_x;
   13403   1684098128 :   if (ix86_decompose_address (x, &addr) == 0
   13404   1942846752 :       || addr.seg != DEFAULT_TLS_SEG_REG
   13405       276759 :       || addr.disp == NULL_RTX
   13406   1684323421 :       || GET_CODE (addr.disp) != CONST)
   13407              :     return orig_x;
   13408       115528 :   unspec = XEXP (addr.disp, 0);
   13409       115528 :   if (GET_CODE (unspec) == PLUS && CONST_INT_P (XEXP (unspec, 1)))
   13410        67974 :     unspec = XEXP (unspec, 0);
   13411       115528 :   if (GET_CODE (unspec) != UNSPEC || XINT (unspec, 1) != UNSPEC_NTPOFF)
   13412              :     return orig_x;
   13413       115462 :   x = XVECEXP (unspec, 0, 0);
   13414       115462 :   gcc_assert (SYMBOL_REF_P (x));
   13415       115462 :   if (unspec != XEXP (addr.disp, 0))
   13416        89811 :     x = gen_rtx_PLUS (Pmode, x, XEXP (XEXP (addr.disp, 0), 1));
   13417       115462 :   if (addr.index)
   13418              :     {
   13419          187 :       rtx idx = addr.index;
   13420          187 :       if (addr.scale != 1)
   13421          187 :         idx = gen_rtx_MULT (Pmode, idx, GEN_INT (addr.scale));
   13422          187 :       x = gen_rtx_PLUS (Pmode, idx, x);
   13423              :     }
   13424       115462 :   if (addr.base)
   13425            2 :     x = gen_rtx_PLUS (Pmode, addr.base, x);
   13426       115462 :   if (MEM_P (orig_x))
   13427          198 :     x = replace_equiv_address_nv (orig_x, x);
   13428              :   return x;
   13429              : }
   13430              : 
   13431              : /* In the name of slightly smaller debug output, and to cater to
   13432              :    general assembler lossage, recognize PIC+GOTOFF and turn it back
   13433              :    into a direct symbol reference.
   13434              : 
   13435              :    On Darwin, this is necessary to avoid a crash, because Darwin
   13436              :    has a different PIC label for each routine but the DWARF debugging
   13437              :    information is not associated with any particular routine, so it's
   13438              :    necessary to remove references to the PIC label from RTL stored by
   13439              :    the DWARF output code.
   13440              : 
   13441              :    This helper is used in the normal ix86_delegitimize_address
   13442              :    entrypoint (e.g. used in the target delegitimization hook) and
   13443              :    in ix86_find_base_term.  As compile time memory optimization, we
   13444              :    avoid allocating rtxes that will not change anything on the outcome
   13445              :    of the callers (find_base_value and find_base_term).  */
   13446              : 
   13447              : static inline rtx
   13448   3524079321 : ix86_delegitimize_address_1 (rtx x, bool base_term_p)
   13449              : {
   13450   3524079321 :   rtx orig_x = delegitimize_mem_from_attrs (x);
   13451              :   /* addend is NULL or some rtx if x is something+GOTOFF where
   13452              :      something doesn't include the PIC register.  */
   13453   3524079321 :   rtx addend = NULL_RTX;
   13454              :   /* reg_addend is NULL or a multiple of some register.  */
   13455   3524079321 :   rtx reg_addend = NULL_RTX;
   13456              :   /* const_addend is NULL or a const_int.  */
   13457   3524079321 :   rtx const_addend = NULL_RTX;
   13458              :   /* This is the result, or NULL.  */
   13459   3524079321 :   rtx result = NULL_RTX;
   13460              : 
   13461   3524079321 :   x = orig_x;
   13462              : 
   13463   3524079321 :   if (MEM_P (x))
   13464     62060910 :     x = XEXP (x, 0);
   13465              : 
   13466   3524079321 :   if (TARGET_64BIT)
   13467              :     {
   13468    254680314 :       if (GET_CODE (x) == CONST
   13469      8680777 :           && GET_CODE (XEXP (x, 0)) == PLUS
   13470      6708428 :           && GET_MODE (XEXP (x, 0)) == Pmode
   13471      6708379 :           && CONST_INT_P (XEXP (XEXP (x, 0), 1))
   13472      6708379 :           && GET_CODE (XEXP (XEXP (x, 0), 0)) == UNSPEC
   13473    254684449 :           && XINT (XEXP (XEXP (x, 0), 0), 1) == UNSPEC_PCREL)
   13474              :         {
   13475              :           /* find_base_{value,term} only care about MEMs with arg_pointer_rtx
   13476              :              base.  A CONST can't be arg_pointer_rtx based.  */
   13477            0 :           if (base_term_p && MEM_P (orig_x))
   13478              :             return orig_x;
   13479            0 :           rtx x2 = XVECEXP (XEXP (XEXP (x, 0), 0), 0, 0);
   13480            0 :           x = gen_rtx_PLUS (Pmode, XEXP (XEXP (x, 0), 1), x2);
   13481            0 :           if (MEM_P (orig_x))
   13482            0 :             x = replace_equiv_address_nv (orig_x, x);
   13483            0 :           return x;
   13484              :         }
   13485              : 
   13486    254680314 :       if (GET_CODE (x) == CONST
   13487      8680777 :           && GET_CODE (XEXP (x, 0)) == UNSPEC
   13488      1972398 :           && (XINT (XEXP (x, 0), 1) == UNSPEC_GOTPCREL
   13489       679361 :               || XINT (XEXP (x, 0), 1) == UNSPEC_PCREL)
   13490      1293037 :           && (MEM_P (orig_x) || XINT (XEXP (x, 0), 1) == UNSPEC_PCREL))
   13491              :         {
   13492       297414 :           x = XVECEXP (XEXP (x, 0), 0, 0);
   13493       297414 :           if (GET_MODE (orig_x) != GET_MODE (x) && MEM_P (orig_x))
   13494              :             {
   13495            9 :               x = lowpart_subreg (GET_MODE (orig_x), x, GET_MODE (x));
   13496            9 :               if (x == NULL_RTX)
   13497              :                 return orig_x;
   13498              :             }
   13499       297414 :           return x;
   13500              :         }
   13501              : 
   13502    254382900 :       if (ix86_cmodel != CM_MEDIUM_PIC && ix86_cmodel != CM_LARGE_PIC)
   13503    254381662 :         return ix86_delegitimize_tls_address (orig_x);
   13504              : 
   13505              :       /* Fall thru into the code shared with -m32 for -mcmodel=large -fpic
   13506              :          and -mcmodel=medium -fpic.  */
   13507              :     }
   13508              : 
   13509   3269400245 :   if (GET_CODE (x) != PLUS
   13510   1551545279 :       || GET_CODE (XEXP (x, 1)) != CONST)
   13511   3243083526 :     return ix86_delegitimize_tls_address (orig_x);
   13512              : 
   13513     26316719 :   if (ix86_pic_register_p (XEXP (x, 0)))
   13514              :     /* %ebx + GOT/GOTOFF */
   13515              :     ;
   13516      1275847 :   else if (GET_CODE (XEXP (x, 0)) == PLUS)
   13517              :     {
   13518              :       /* %ebx + %reg * scale + GOT/GOTOFF */
   13519       470649 :       reg_addend = XEXP (x, 0);
   13520       470649 :       if (ix86_pic_register_p (XEXP (reg_addend, 0)))
   13521       395799 :         reg_addend = XEXP (reg_addend, 1);
   13522        74850 :       else if (ix86_pic_register_p (XEXP (reg_addend, 1)))
   13523        43364 :         reg_addend = XEXP (reg_addend, 0);
   13524              :       else
   13525              :         {
   13526        31486 :           reg_addend = NULL_RTX;
   13527        31486 :           addend = XEXP (x, 0);
   13528              :         }
   13529              :     }
   13530              :   else
   13531              :     addend = XEXP (x, 0);
   13532              : 
   13533     26316719 :   x = XEXP (XEXP (x, 1), 0);
   13534     26316719 :   if (GET_CODE (x) == PLUS
   13535      1444866 :       && CONST_INT_P (XEXP (x, 1)))
   13536              :     {
   13537      1444866 :       const_addend = XEXP (x, 1);
   13538      1444866 :       x = XEXP (x, 0);
   13539              :     }
   13540              : 
   13541     26316719 :   if (GET_CODE (x) == UNSPEC
   13542     25645336 :       && ((XINT (x, 1) == UNSPEC_GOT && MEM_P (orig_x) && !addend)
   13543      6742656 :           || (XINT (x, 1) == UNSPEC_GOTOFF && !MEM_P (orig_x))
   13544      1112229 :           || (XINT (x, 1) == UNSPEC_PLTOFF && ix86_cmodel == CM_LARGE_PIC
   13545            4 :               && !MEM_P (orig_x) && !addend)))
   13546     24533111 :     result = XVECEXP (x, 0, 0);
   13547              : 
   13548     24533111 :   if (!TARGET_64BIT && TARGET_MACHO && darwin_local_data_pic (x)
   13549              :       && !MEM_P (orig_x))
   13550              :     result = XVECEXP (x, 0, 0);
   13551              : 
   13552     24533111 :   if (! result)
   13553      1783608 :     return ix86_delegitimize_tls_address (orig_x);
   13554              : 
   13555              :   /* For (PLUS something CONST_INT) both find_base_{value,term} just
   13556              :      recurse on the first operand.  */
   13557     24533111 :   if (const_addend && !base_term_p)
   13558       355710 :     result = gen_rtx_CONST (Pmode, gen_rtx_PLUS (Pmode, result, const_addend));
   13559     24533111 :   if (reg_addend)
   13560       856032 :     result = gen_rtx_PLUS (Pmode, reg_addend, result);
   13561     24533111 :   if (addend)
   13562              :     {
   13563              :       /* If the rest of original X doesn't involve the PIC register, add
   13564              :          addend and subtract pic_offset_table_rtx.  This can happen e.g.
   13565              :          for code like:
   13566              :          leal (%ebx, %ecx, 4), %ecx
   13567              :          ...
   13568              :          movl foo@GOTOFF(%ecx), %edx
   13569              :          in which case we return (%ecx - %ebx) + foo
   13570              :          or (%ecx - _GLOBAL_OFFSET_TABLE_) + foo if pseudo_pic_reg
   13571              :          and reload has completed.  Don't do the latter for debug,
   13572              :          as _GLOBAL_OFFSET_TABLE_ can't be expressed in the assembly.  */
   13573       137045 :       if (pic_offset_table_rtx
   13574       137045 :           && (!reload_completed || !ix86_use_pseudo_pic_reg ()))
   13575         2364 :         result = gen_rtx_PLUS (Pmode, gen_rtx_MINUS (Pmode, copy_rtx (addend),
   13576              :                                                      pic_offset_table_rtx),
   13577              :                                result);
   13578       136257 :       else if (base_term_p
   13579       130099 :                && pic_offset_table_rtx
   13580              :                && !TARGET_MACHO
   13581              :                && !TARGET_VXWORKS_VAROFF)
   13582              :         {
   13583       260198 :           rtx tmp = gen_rtx_SYMBOL_REF (Pmode, GOT_SYMBOL_NAME);
   13584       260198 :           tmp = gen_rtx_MINUS (Pmode, copy_rtx (addend), tmp);
   13585       260198 :           result = gen_rtx_PLUS (Pmode, tmp, result);
   13586       130099 :         }
   13587              :       else
   13588              :         return orig_x;
   13589              :     }
   13590     49053863 :   if (GET_MODE (orig_x) != Pmode && MEM_P (orig_x))
   13591              :     {
   13592            0 :       result = lowpart_subreg (GET_MODE (orig_x), result, Pmode);
   13593            0 :       if (result == NULL_RTX)
   13594              :         return orig_x;
   13595              :     }
   13596              :   return result;
   13597              : }
   13598              : 
   13599              : /* The normal instantiation of the above template.  */
   13600              : 
   13601              : static rtx
   13602    325135905 : ix86_delegitimize_address (rtx x)
   13603              : {
   13604    325135905 :   return ix86_delegitimize_address_1 (x, false);
   13605              : }
   13606              : 
   13607              : /* If X is a machine specific address (i.e. a symbol or label being
   13608              :    referenced as a displacement from the GOT implemented using an
   13609              :    UNSPEC), then return the base term.  Otherwise return X.  */
   13610              : 
   13611              : rtx
   13612   6656103796 : ix86_find_base_term (rtx x)
   13613              : {
   13614   6656103796 :   rtx term;
   13615              : 
   13616   6656103796 :   if (TARGET_64BIT)
   13617              :     {
   13618   3457160380 :       if (GET_CODE (x) != CONST)
   13619              :         return x;
   13620     44302556 :       term = XEXP (x, 0);
   13621     44302556 :       if (GET_CODE (term) == PLUS
   13622     44287938 :           && CONST_INT_P (XEXP (term, 1)))
   13623     44287938 :         term = XEXP (term, 0);
   13624     44302556 :       if (GET_CODE (term) != UNSPEC
   13625        40405 :           || (XINT (term, 1) != UNSPEC_GOTPCREL
   13626        40405 :               && XINT (term, 1) != UNSPEC_PCREL))
   13627              :         return x;
   13628              : 
   13629            0 :       return XVECEXP (term, 0, 0);
   13630              :     }
   13631              : 
   13632   3198943416 :   return ix86_delegitimize_address_1 (x, true);
   13633              : }
   13634              : 
   13635              : /* Return true if X shouldn't be emitted into the debug info.
   13636              :    Disallow UNSPECs other than @gotoff - we can't emit _GLOBAL_OFFSET_TABLE_
   13637              :    symbol easily into the .debug_info section, so we need not to
   13638              :    delegitimize, but instead assemble as @gotoff.
   13639              :    Disallow _GLOBAL_OFFSET_TABLE_ SYMBOL_REF - the assembler magically
   13640              :    assembles that as _GLOBAL_OFFSET_TABLE_-. expression.  */
   13641              : 
   13642              : static bool
   13643      1847445 : ix86_const_not_ok_for_debug_p (rtx x)
   13644              : {
   13645      1847445 :   if (GET_CODE (x) == UNSPEC && XINT (x, 1) != UNSPEC_GOTOFF)
   13646              :     return true;
   13647              : 
   13648      1847425 :   if (SYMBOL_REF_P (x) && strcmp (XSTR (x, 0), GOT_SYMBOL_NAME) == 0)
   13649            0 :     return true;
   13650              : 
   13651              :   return false;
   13652              : }
   13653              : 
   13654              : static void
   13655      7161200 : put_condition_code (enum rtx_code code, machine_mode mode, bool reverse,
   13656              :                     bool fp, FILE *file)
   13657              : {
   13658      7161200 :   const char *suffix;
   13659              : 
   13660      7161200 :   if (mode == CCFPmode)
   13661              :     {
   13662       562081 :       code = ix86_fp_compare_code_to_integer (code);
   13663       562081 :       mode = CCmode;
   13664              :     }
   13665      7161200 :   if (reverse)
   13666       205228 :     code = reverse_condition (code);
   13667              : 
   13668      7161200 :   switch (code)
   13669              :     {
   13670      2793442 :     case EQ:
   13671      2793442 :       gcc_assert (mode != CCGZmode);
   13672      2793442 :       switch (mode)
   13673              :         {
   13674              :         case E_CCAmode:
   13675              :           suffix = "a";
   13676              :           break;
   13677              :         case E_CCCmode:
   13678        26683 :           suffix = "c";
   13679              :           break;
   13680              :         case E_CCOmode:
   13681      7161200 :           suffix = "o";
   13682              :           break;
   13683              :         case E_CCPmode:
   13684       233369 :           suffix = "p";
   13685              :           break;
   13686              :         case E_CCSmode:
   13687       119835 :           suffix = "s";
   13688              :           break;
   13689      2773780 :         default:
   13690      2773780 :           suffix = "e";
   13691      2773780 :           break;
   13692              :         }
   13693              :       break;
   13694      2319295 :     case NE:
   13695      2319295 :       gcc_assert (mode != CCGZmode);
   13696      2319295 :       switch (mode)
   13697              :         {
   13698              :         case E_CCAmode:
   13699              :           suffix = "na";
   13700              :           break;
   13701              :         case E_CCCmode:
   13702        13126 :           suffix = "nc";
   13703              :           break;
   13704        10764 :         case E_CCOmode:
   13705        10764 :           suffix = "no";
   13706        10764 :           break;
   13707              :         case E_CCPmode:
   13708         4428 :           suffix = "np";
   13709              :           break;
   13710              :         case E_CCSmode:
   13711        49735 :           suffix = "ns";
   13712              :           break;
   13713      2306850 :         default:
   13714      2306850 :           suffix = "ne";
   13715      2306850 :           break;
   13716              :         }
   13717              :       break;
   13718       245349 :     case GT:
   13719       245349 :       gcc_assert (mode == CCmode || mode == CCNOmode || mode == CCGCmode);
   13720              :       suffix = "g";
   13721              :       break;
   13722       176705 :     case GTU:
   13723              :       /* ??? Use "nbe" instead of "a" for fcmov lossage on some assemblers.
   13724              :          Those same assemblers have the same but opposite lossage on cmov.  */
   13725       176705 :       if (mode == CCmode)
   13726       176767 :         suffix = fp ? "nbe" : "a";
   13727              :       else
   13728            0 :         gcc_unreachable ();
   13729              :       break;
   13730       236211 :     case LT:
   13731       236211 :       switch (mode)
   13732              :         {
   13733              :         case E_CCNOmode:
   13734              :         case E_CCGOCmode:
   13735              :           suffix = "s";
   13736              :           break;
   13737              : 
   13738              :         case E_CCmode:
   13739              :         case E_CCGCmode:
   13740              :         case E_CCGZmode:
   13741      7161200 :           suffix = "l";
   13742              :           break;
   13743              : 
   13744            0 :         default:
   13745            0 :           gcc_unreachable ();
   13746              :         }
   13747              :       break;
   13748       446920 :     case LTU:
   13749       446920 :       if (mode == CCmode || mode == CCGZmode)
   13750              :         suffix = "b";
   13751        25353 :       else if (mode == CCCmode)
   13752        26683 :         suffix = fp ? "b" : "c";
   13753              :       else
   13754            0 :         gcc_unreachable ();
   13755              :       break;
   13756       144286 :     case GE:
   13757       144286 :       switch (mode)
   13758              :         {
   13759              :         case E_CCNOmode:
   13760              :         case E_CCGOCmode:
   13761              :           suffix = "ns";
   13762              :           break;
   13763              : 
   13764              :         case E_CCmode:
   13765              :         case E_CCGCmode:
   13766              :         case E_CCGZmode:
   13767      7161200 :           suffix = "ge";
   13768              :           break;
   13769              : 
   13770            0 :         default:
   13771            0 :           gcc_unreachable ();
   13772              :         }
   13773              :       break;
   13774       197486 :     case GEU:
   13775       197486 :       if (mode == CCmode || mode == CCGZmode)
   13776              :         suffix = "nb";
   13777        11465 :       else if (mode == CCCmode)
   13778        13126 :         suffix = fp ? "nb" : "nc";
   13779              :       else
   13780            0 :         gcc_unreachable ();
   13781              :       break;
   13782       247173 :     case LE:
   13783       247173 :       gcc_assert (mode == CCmode || mode == CCGCmode || mode == CCNOmode);
   13784              :       suffix = "le";
   13785              :       break;
   13786       116534 :     case LEU:
   13787       116534 :       if (mode == CCmode)
   13788              :         suffix = "be";
   13789              :       else
   13790            0 :         gcc_unreachable ();
   13791              :       break;
   13792       233369 :     case UNORDERED:
   13793       233376 :       suffix = fp ? "u" : "p";
   13794              :       break;
   13795         4430 :     case ORDERED:
   13796         4435 :       suffix = fp ? "nu" : "np";
   13797              :       break;
   13798            0 :     default:
   13799            0 :       gcc_unreachable ();
   13800              :     }
   13801      7161200 :   fputs (suffix, file);
   13802      7161200 : }
   13803              : 
   13804              : /* Print the name of register X to FILE based on its machine mode and number.
   13805              :    If CODE is 'w', pretend the mode is HImode.
   13806              :    If CODE is 'b', pretend the mode is QImode.
   13807              :    If CODE is 'k', pretend the mode is SImode.
   13808              :    If CODE is 'q', pretend the mode is DImode.
   13809              :    If CODE is 'x', pretend the mode is V4SFmode.
   13810              :    If CODE is 't', pretend the mode is V8SFmode.
   13811              :    If CODE is 'g', pretend the mode is V16SFmode.
   13812              :    If CODE is 'h', pretend the reg is the 'high' byte register.
   13813              :    If CODE is 'y', print "st(0)" instead of "st", if the reg is stack op.
   13814              :    If CODE is 'd', duplicate the operand for AVX instruction.
   13815              :    If CODE is 'V', print naked full integer register name without %.
   13816              :  */
   13817              : 
   13818              : void
   13819    123385976 : print_reg (rtx x, int code, FILE *file)
   13820              : {
   13821    123385976 :   const char *reg;
   13822    123385976 :   int msize;
   13823    123385976 :   unsigned int regno;
   13824    123385976 :   bool duplicated;
   13825              : 
   13826    123385976 :   if (ASSEMBLER_DIALECT == ASM_ATT && code != 'V')
   13827    123383794 :     putc ('%', file);
   13828              : 
   13829    123385976 :   if (x == pc_rtx)
   13830              :     {
   13831      5749133 :       gcc_assert (TARGET_64BIT);
   13832      5749133 :       fputs ("rip", file);
   13833      5749133 :       return;
   13834              :     }
   13835              : 
   13836    117636843 :   if (code == 'y' && STACK_TOP_P (x))
   13837              :     {
   13838       290937 :       fputs ("st(0)", file);
   13839       290937 :       return;
   13840              :     }
   13841              : 
   13842    117345906 :   if (code == 'w')
   13843              :     msize = 2;
   13844              :   else if (code == 'b')
   13845              :     msize = 1;
   13846              :   else if (code == 'k')
   13847              :     msize = 4;
   13848              :   else if (code == 'q')
   13849              :     msize = 8;
   13850              :   else if (code == 'h')
   13851              :     msize = 0;
   13852              :   else if (code == 'x')
   13853              :     msize = 16;
   13854              :   else if (code == 't')
   13855              :     msize = 32;
   13856              :   else if (code == 'g')
   13857              :     msize = 64;
   13858              :   else
   13859    200505526 :     msize = GET_MODE_SIZE (GET_MODE (x));
   13860              : 
   13861    117345906 :   regno = REGNO (x);
   13862              : 
   13863    117345906 :   if (regno == ARG_POINTER_REGNUM
   13864    117345906 :       || regno == FRAME_POINTER_REGNUM
   13865    117345906 :       || regno == FPSR_REG)
   13866              :     {
   13867            0 :       output_operand_lossage
   13868            0 :         ("invalid use of register '%s'", reg_names[regno]);
   13869            0 :       return;
   13870              :     }
   13871    117345906 :   else if (regno == FLAGS_REG)
   13872              :     {
   13873            1 :       output_operand_lossage ("invalid use of asm flag output");
   13874            1 :       return;
   13875              :     }
   13876              : 
   13877    117345905 :   if (code == 'V')
   13878              :     {
   13879            1 :       if (GENERAL_REGNO_P (regno))
   13880            2 :         msize = GET_MODE_SIZE (word_mode);
   13881              :       else
   13882            0 :         error ("%<V%> modifier on non-integer register");
   13883              :     }
   13884              : 
   13885    117345905 :   duplicated = code == 'd' && TARGET_AVX;
   13886              : 
   13887    117345905 :   switch (msize)
   13888              :     {
   13889     77876346 :     case 16:
   13890     77876346 :     case 12:
   13891     77876346 :     case 8:
   13892    145743750 :       if (GENERAL_REGNO_P (regno) && msize > GET_MODE_SIZE (word_mode))
   13893            5 :         warning (0, "unsupported size for integer register");
   13894              :       /* FALLTHRU */
   13895    113876579 :     case 4:
   13896    113876579 :       if (LEGACY_INT_REGNO_P (regno))
   13897    123092029 :         putc (msize > 4 && TARGET_64BIT ? 'r' : 'e', file);
   13898              :       /* FALLTHRU */
   13899    114777387 :     case 2:
   13900     22277586 :     normal:
   13901    114777387 :       reg = hi_reg_name[regno];
   13902    114777387 :       break;
   13903      2290979 :     case 1:
   13904      2290979 :       if (regno >= ARRAY_SIZE (qi_reg_name))
   13905       279663 :         goto normal;
   13906      2011316 :       if (!ANY_QI_REGNO_P (regno))
   13907            0 :         error ("unsupported size for integer register");
   13908      2011316 :       reg = qi_reg_name[regno];
   13909      2011316 :       break;
   13910        27229 :     case 0:
   13911        27229 :       if (regno >= ARRAY_SIZE (qi_high_reg_name))
   13912            0 :         goto normal;
   13913        27229 :       reg = qi_high_reg_name[regno];
   13914        27229 :       break;
   13915       529973 :     case 32:
   13916       529973 :     case 64:
   13917       529973 :       if (SSE_REGNO_P (regno))
   13918              :         {
   13919       529973 :           gcc_assert (!duplicated);
   13920       736298 :           putc (msize == 32 ? 'y' : 'z', file);
   13921       529973 :           reg = hi_reg_name[regno] + 1;
   13922       529973 :           break;
   13923              :         }
   13924            0 :       goto normal;
   13925            0 :     default:
   13926            0 :       gcc_unreachable ();
   13927              :     }
   13928              : 
   13929    117345905 :   fputs (reg, file);
   13930              : 
   13931              :   /* Irritatingly, AMD extended registers use
   13932              :      different naming convention: "r%d[bwd]"  */
   13933    117345905 :   if (REX_INT_REGNO_P (regno) || REX2_INT_REGNO_P (regno))
   13934              :     {
   13935     10556722 :       gcc_assert (TARGET_64BIT);
   13936     10556722 :       switch (msize)
   13937              :         {
   13938            0 :           case 0:
   13939            0 :             error ("extended registers have no high halves");
   13940            0 :             break;
   13941       187738 :           case 1:
   13942       187738 :             putc ('b', file);
   13943       187738 :             break;
   13944        30005 :           case 2:
   13945        30005 :             putc ('w', file);
   13946        30005 :             break;
   13947      2577532 :           case 4:
   13948      2577532 :             putc ('d', file);
   13949      2577532 :             break;
   13950              :           case 8:
   13951              :             /* no suffix */
   13952              :             break;
   13953            0 :           default:
   13954            0 :             error ("unsupported operand size for extended register");
   13955            0 :             break;
   13956              :         }
   13957     10556722 :       return;
   13958              :     }
   13959              : 
   13960    106789183 :   if (duplicated)
   13961              :     {
   13962        16646 :       if (ASSEMBLER_DIALECT == ASM_ATT)
   13963        16625 :         fprintf (file, ", %%%s", reg);
   13964              :       else
   13965           21 :         fprintf (file, ", %s", reg);
   13966              :     }
   13967              : }
   13968              : 
   13969              : /* Meaning of CODE:
   13970              :    L,W,B,Q,S,T -- print the opcode suffix for specified size of operand.
   13971              :    C -- print opcode suffix for set/cmov insn.
   13972              :    c -- like C, but print reversed condition
   13973              :    F,f -- likewise, but for floating-point.
   13974              :    O -- if HAVE_AS_IX86_CMOV_SUN_SYNTAX, expand to "w.", "l." or "q.",
   13975              :         otherwise nothing
   13976              :    R -- print embedded rounding and sae.
   13977              :    r -- print only sae.
   13978              :    z -- print the opcode suffix for the size of the current operand.
   13979              :    Z -- likewise, with special suffixes for x87 instructions.
   13980              :    * -- print a star (in certain assembler syntax)
   13981              :    A -- print an absolute memory reference.
   13982              :    E -- print address with DImode register names if TARGET_64BIT.
   13983              :    w -- print the operand as if it's a "word" (HImode) even if it isn't.
   13984              :    s -- print a shift double count, followed by the assemblers argument
   13985              :         delimiter.
   13986              :    b -- print the QImode name of the register for the indicated operand.
   13987              :         %b0 would print %al if operands[0] is reg 0.
   13988              :    w --  likewise, print the HImode name of the register.
   13989              :    k --  likewise, print the SImode name of the register.
   13990              :    q --  likewise, print the DImode name of the register.
   13991              :    x --  likewise, print the V4SFmode name of the register.
   13992              :    t --  likewise, print the V8SFmode name of the register.
   13993              :    g --  likewise, print the V16SFmode name of the register.
   13994              :    h -- print the QImode name for a "high" register, either ah, bh, ch or dh.
   13995              :    y -- print "st(0)" instead of "st" as a register.
   13996              :    d -- print duplicated register operand for AVX instruction.
   13997              :    D -- print condition for SSE cmp instruction.
   13998              :    P -- if PIC, print an @PLT suffix.  For -fno-plt, load function
   13999              :         address from GOT.
   14000              :    p -- print raw symbol name.
   14001              :    X -- don't print any sort of PIC '@' suffix for a symbol.
   14002              :    & -- print some in-use local-dynamic symbol name.
   14003              :    H -- print a memory address offset by 8; used for sse high-parts
   14004              :    Y -- print condition for XOP pcom* instruction.
   14005              :    V -- print naked full integer register name without %.
   14006              :    v -- print segment override prefix
   14007              :    + -- print a branch hint as 'cs' or 'ds' prefix
   14008              :    ; -- print a semicolon (after prefixes due to bug in older gas).
   14009              :    ~ -- print "i" if TARGET_AVX2, "f" otherwise.
   14010              :    ^ -- print addr32 prefix if Pmode != word_mode
   14011              :    M -- print addr32 prefix for TARGET_X32 with VSIB address.
   14012              :    ! -- print NOTRACK prefix for jxx/call/ret instructions if required.
   14013              :    N -- print maskz if it's constant 0 operand.
   14014              :    G -- print embedded flag for ccmp/ctest.
   14015              :  */
   14016              : 
   14017              : void
   14018    176676608 : ix86_print_operand (FILE *file, rtx x, int code)
   14019              : {
   14020    176872358 :   if (code)
   14021              :     {
   14022     62112632 :       switch (code)
   14023              :         {
   14024       195746 :         case 'A':
   14025       195746 :           switch (ASSEMBLER_DIALECT)
   14026              :             {
   14027       195746 :             case ASM_ATT:
   14028       195746 :               putc ('*', file);
   14029       195746 :               break;
   14030              : 
   14031            0 :             case ASM_INTEL:
   14032              :               /* Intel syntax. For absolute addresses, registers should not
   14033              :                  be surrounded by braces.  */
   14034            0 :               if (!REG_P (x))
   14035              :                 {
   14036            0 :                   putc ('[', file);
   14037            0 :                   ix86_print_operand (file, x, 0);
   14038            0 :                   putc (']', file);
   14039            0 :                   return;
   14040              :                 }
   14041              :               break;
   14042              : 
   14043            0 :             default:
   14044            0 :               gcc_unreachable ();
   14045              :             }
   14046              : 
   14047       195746 :           ix86_print_operand (file, x, 0);
   14048       195746 :           return;
   14049              : 
   14050      3557828 :         case 'E':
   14051              :           /* Wrap address in an UNSPEC to declare special handling.  */
   14052      3557828 :           if (TARGET_64BIT)
   14053      3070353 :             x = gen_rtx_UNSPEC (DImode, gen_rtvec (1, x), UNSPEC_LEA_ADDR);
   14054              : 
   14055      3557828 :           output_address (VOIDmode, x);
   14056      3557828 :           return;
   14057              : 
   14058            0 :         case 'L':
   14059            0 :           if (ASSEMBLER_DIALECT == ASM_ATT)
   14060            0 :             putc ('l', file);
   14061            0 :           return;
   14062              : 
   14063            0 :         case 'W':
   14064            0 :           if (ASSEMBLER_DIALECT == ASM_ATT)
   14065            0 :             putc ('w', file);
   14066            0 :           return;
   14067              : 
   14068            0 :         case 'B':
   14069            0 :           if (ASSEMBLER_DIALECT == ASM_ATT)
   14070            0 :             putc ('b', file);
   14071            0 :           return;
   14072              : 
   14073            0 :         case 'Q':
   14074            0 :           if (ASSEMBLER_DIALECT == ASM_ATT)
   14075            0 :             putc ('l', file);
   14076            0 :           return;
   14077              : 
   14078            0 :         case 'S':
   14079            0 :           if (ASSEMBLER_DIALECT == ASM_ATT)
   14080            0 :             putc ('s', file);
   14081            0 :           return;
   14082              : 
   14083            0 :         case 'T':
   14084            0 :           if (ASSEMBLER_DIALECT == ASM_ATT)
   14085            0 :             putc ('t', file);
   14086            0 :           return;
   14087              : 
   14088              :         case 'O':
   14089              : #ifdef HAVE_AS_IX86_CMOV_SUN_SYNTAX
   14090              :           if (ASSEMBLER_DIALECT != ASM_ATT)
   14091              :             return;
   14092              : 
   14093              :           switch (GET_MODE_SIZE (GET_MODE (x)))
   14094              :             {
   14095              :             case 2:
   14096              :               putc ('w', file);
   14097              :               break;
   14098              : 
   14099              :             case 4:
   14100              :               putc ('l', file);
   14101              :               break;
   14102              : 
   14103              :             case 8:
   14104              :               putc ('q', file);
   14105              :               break;
   14106              : 
   14107              :             default:
   14108              :               output_operand_lossage ("invalid operand size for operand "
   14109              :                                       "code 'O'");
   14110              :               return;
   14111              :             }
   14112              : 
   14113              :           putc ('.', file);
   14114              : #endif
   14115              :           return;
   14116              : 
   14117        37846 :         case 'z':
   14118        37846 :           if (GET_MODE_CLASS (GET_MODE (x)) == MODE_INT)
   14119              :             {
   14120              :               /* Opcodes don't get size suffixes if using Intel opcodes.  */
   14121        37844 :               if (ASSEMBLER_DIALECT == ASM_INTEL)
   14122              :                 return;
   14123              : 
   14124        75688 :               switch (GET_MODE_SIZE (GET_MODE (x)))
   14125              :                 {
   14126            6 :                 case 1:
   14127            6 :                   putc ('b', file);
   14128            6 :                   return;
   14129              : 
   14130            6 :                 case 2:
   14131            6 :                   putc ('w', file);
   14132            6 :                   return;
   14133              : 
   14134        37325 :                 case 4:
   14135        37325 :                   putc ('l', file);
   14136        37325 :                   return;
   14137              : 
   14138          507 :                 case 8:
   14139          507 :                   putc ('q', file);
   14140          507 :                   return;
   14141              : 
   14142            0 :                 default:
   14143            0 :                   output_operand_lossage ("invalid operand size for operand "
   14144              :                                           "code 'z'");
   14145            0 :                   return;
   14146              :                 }
   14147              :             }
   14148              : 
   14149            2 :           if (GET_MODE_CLASS (GET_MODE (x)) == MODE_FLOAT)
   14150              :             {
   14151            1 :               if (this_is_asm_operands)
   14152            1 :                 warning_for_asm (this_is_asm_operands,
   14153              :                                  "non-integer operand used with operand code %<z%>");
   14154              :               else
   14155            0 :                 warning (0, "non-integer operand used with operand code %<z%>");
   14156              :             }
   14157              :           /* FALLTHRU */
   14158              : 
   14159       382315 :         case 'Z':
   14160              :           /* 387 opcodes don't get size suffixes if using Intel opcodes.  */
   14161       382315 :           if (ASSEMBLER_DIALECT == ASM_INTEL)
   14162              :             return;
   14163              : 
   14164       382315 :           if (GET_MODE_CLASS (GET_MODE (x)) == MODE_INT)
   14165              :             {
   14166        29242 :               switch (GET_MODE_SIZE (GET_MODE (x)))
   14167              :                 {
   14168         3501 :                 case 2:
   14169              : #ifdef HAVE_AS_IX86_FILDS
   14170         3501 :                   putc ('s', file);
   14171              : #endif
   14172         3501 :                   return;
   14173              : 
   14174         3941 :                 case 4:
   14175         3941 :                   putc ('l', file);
   14176         3941 :                   return;
   14177              : 
   14178         7179 :                 case 8:
   14179              : #ifdef HAVE_AS_IX86_FILDQ
   14180         7179 :                   putc ('q', file);
   14181              : #else
   14182              :                   fputs ("ll", file);
   14183              : #endif
   14184         7179 :                   return;
   14185              : 
   14186              :                 default:
   14187              :                   break;
   14188              :                 }
   14189              :             }
   14190       367694 :           else if (GET_MODE_CLASS (GET_MODE (x)) == MODE_FLOAT)
   14191              :             {
   14192              :               /* 387 opcodes don't get size suffixes
   14193              :                  if the operands are registers.  */
   14194       367692 :               if (STACK_REG_P (x))
   14195              :                 return;
   14196              : 
   14197       690630 :               switch (GET_MODE_SIZE (GET_MODE (x)))
   14198              :                 {
   14199        23079 :                 case 4:
   14200        23079 :                   putc ('s', file);
   14201        23079 :                   return;
   14202              : 
   14203        32724 :                 case 8:
   14204        32724 :                   putc ('l', file);
   14205        32724 :                   return;
   14206              : 
   14207       289510 :                 case 12:
   14208       289510 :                 case 16:
   14209       289510 :                   putc ('t', file);
   14210       289510 :                   return;
   14211              : 
   14212              :                 default:
   14213              :                   break;
   14214              :                 }
   14215              :             }
   14216              :           else
   14217              :             {
   14218            2 :               output_operand_lossage ("invalid operand type used with "
   14219              :                                       "operand code '%c'", code);
   14220            2 :               return;
   14221              :             }
   14222              : 
   14223            2 :           output_operand_lossage ("invalid operand size for operand code '%c'",
   14224              :                                   code);
   14225            2 :           return;
   14226              : 
   14227              :         case 'd':
   14228              :         case 'b':
   14229              :         case 'w':
   14230              :         case 'k':
   14231              :         case 'q':
   14232              :         case 'h':
   14233              :         case 't':
   14234              :         case 'g':
   14235              :         case 'y':
   14236              :         case 'x':
   14237              :         case 'X':
   14238              :         case 'P':
   14239              :         case 'p':
   14240              :         case 'V':
   14241              :           break;
   14242              : 
   14243            0 :         case 's':
   14244            0 :           if (CONST_INT_P (x) || ! SHIFT_DOUBLE_OMITS_COUNT)
   14245              :             {
   14246            0 :               ix86_print_operand (file, x, 0);
   14247            0 :               fputs (", ", file);
   14248              :             }
   14249            0 :           return;
   14250              : 
   14251          494 :         case 'Y':
   14252          494 :           switch (GET_CODE (x))
   14253              :             {
   14254          182 :             case NE:
   14255          182 :               fputs ("neq", file);
   14256          182 :               break;
   14257           32 :             case EQ:
   14258           32 :               fputs ("eq", file);
   14259           32 :               break;
   14260           64 :             case GE:
   14261           64 :             case GEU:
   14262           64 :               fputs (INTEGRAL_MODE_P (GET_MODE (x)) ? "ge" : "unlt", file);
   14263           64 :               break;
   14264           40 :             case GT:
   14265           40 :             case GTU:
   14266           40 :               fputs (INTEGRAL_MODE_P (GET_MODE (x)) ? "gt" : "unle", file);
   14267           40 :               break;
   14268           64 :             case LE:
   14269           64 :             case LEU:
   14270           64 :               fputs ("le", file);
   14271           64 :               break;
   14272          112 :             case LT:
   14273          112 :             case LTU:
   14274          112 :               fputs ("lt", file);
   14275          112 :               break;
   14276            0 :             case UNORDERED:
   14277            0 :               fputs ("unord", file);
   14278            0 :               break;
   14279            0 :             case ORDERED:
   14280            0 :               fputs ("ord", file);
   14281            0 :               break;
   14282            0 :             case UNEQ:
   14283            0 :               fputs ("ueq", file);
   14284            0 :               break;
   14285            0 :             case UNGE:
   14286            0 :               fputs ("nlt", file);
   14287            0 :               break;
   14288            0 :             case UNGT:
   14289            0 :               fputs ("nle", file);
   14290            0 :               break;
   14291            0 :             case UNLE:
   14292            0 :               fputs ("ule", file);
   14293            0 :               break;
   14294            0 :             case UNLT:
   14295            0 :               fputs ("ult", file);
   14296            0 :               break;
   14297            0 :             case LTGT:
   14298            0 :               fputs ("une", file);
   14299            0 :               break;
   14300            0 :             default:
   14301            0 :               output_operand_lossage ("operand is not a condition code, "
   14302              :                                       "invalid operand code 'Y'");
   14303            0 :               return;
   14304              :             }
   14305          494 :           return;
   14306              : 
   14307         8817 :         case 'D':
   14308              :           /* Little bit of braindamage here.  The SSE compare instructions
   14309              :              does use completely different names for the comparisons that the
   14310              :              fp conditional moves.  */
   14311         8817 :           switch (GET_CODE (x))
   14312              :             {
   14313            3 :             case UNEQ:
   14314            3 :               if (TARGET_AVX)
   14315              :                 {
   14316            3 :                   fputs ("eq_us", file);
   14317            3 :                   break;
   14318              :                 }
   14319              :              /* FALLTHRU */
   14320         4339 :             case EQ:
   14321         4339 :               fputs ("eq", file);
   14322         4339 :               break;
   14323            0 :             case UNLT:
   14324            0 :               if (TARGET_AVX)
   14325              :                 {
   14326            0 :                   fputs ("nge", file);
   14327            0 :                   break;
   14328              :                 }
   14329              :              /* FALLTHRU */
   14330         1545 :             case LT:
   14331         1545 :               fputs ("lt", file);
   14332         1545 :               break;
   14333            0 :             case UNLE:
   14334            0 :               if (TARGET_AVX)
   14335              :                 {
   14336            0 :                   fputs ("ngt", file);
   14337            0 :                   break;
   14338              :                 }
   14339              :              /* FALLTHRU */
   14340          798 :             case LE:
   14341          798 :               fputs ("le", file);
   14342          798 :               break;
   14343           90 :             case UNORDERED:
   14344           90 :               fputs ("unord", file);
   14345           90 :               break;
   14346           24 :             case LTGT:
   14347           24 :               if (TARGET_AVX)
   14348              :                 {
   14349           24 :                   fputs ("neq_oq", file);
   14350           24 :                   break;
   14351              :                 }
   14352              :              /* FALLTHRU */
   14353          758 :             case NE:
   14354          758 :               fputs ("neq", file);
   14355          758 :               break;
   14356            0 :             case GE:
   14357            0 :               if (TARGET_AVX)
   14358              :                 {
   14359            0 :                   fputs ("ge", file);
   14360            0 :                   break;
   14361              :                 }
   14362              :              /* FALLTHRU */
   14363          410 :             case UNGE:
   14364          410 :               fputs ("nlt", file);
   14365          410 :               break;
   14366            0 :             case GT:
   14367            0 :               if (TARGET_AVX)
   14368              :                 {
   14369            0 :                   fputs ("gt", file);
   14370            0 :                   break;
   14371              :                 }
   14372              :              /* FALLTHRU */
   14373          767 :             case UNGT:
   14374          767 :               fputs ("nle", file);
   14375          767 :               break;
   14376           83 :             case ORDERED:
   14377           83 :               fputs ("ord", file);
   14378           83 :               break;
   14379            0 :             default:
   14380            0 :               output_operand_lossage ("operand is not a condition code, "
   14381              :                                       "invalid operand code 'D'");
   14382            0 :               return;
   14383              :             }
   14384         8817 :           return;
   14385              : 
   14386      7161200 :         case 'F':
   14387      7161200 :         case 'f':
   14388              : #ifdef HAVE_AS_IX86_CMOV_SUN_SYNTAX
   14389              :           if (ASSEMBLER_DIALECT == ASM_ATT)
   14390              :             putc ('.', file);
   14391              :           gcc_fallthrough ();
   14392              : #endif
   14393              : 
   14394      7161200 :         case 'C':
   14395      7161200 :         case 'c':
   14396      7161200 :           if (!COMPARISON_P (x))
   14397              :             {
   14398            0 :               output_operand_lossage ("operand is not a condition code, "
   14399              :                                       "invalid operand code '%c'", code);
   14400            0 :               return;
   14401              :             }
   14402      7161200 :           put_condition_code (GET_CODE (x), GET_MODE (XEXP (x, 0)),
   14403      7161200 :                               code == 'c' || code == 'f',
   14404      7161200 :                               code == 'F' || code == 'f',
   14405              :                               file);
   14406      7161200 :           return;
   14407              : 
   14408           21 :         case 'G':
   14409           21 :           {
   14410           21 :             int dfv = INTVAL (x);
   14411           21 :             const char *dfv_suffix = ix86_ccmp_dfv_mapping[dfv];
   14412           21 :             fputs (dfv_suffix, file);
   14413              :           }
   14414           21 :           return;
   14415              : 
   14416         1301 :         case 'H':
   14417         1301 :           if (!offsettable_memref_p (x))
   14418              :             {
   14419            1 :               output_operand_lossage ("operand is not an offsettable memory "
   14420              :                                       "reference, invalid operand code 'H'");
   14421            1 :               return;
   14422              :             }
   14423              :           /* It doesn't actually matter what mode we use here, as we're
   14424              :              only going to use this for printing.  */
   14425         1300 :           x = adjust_address_nv (x, DImode, 8);
   14426              :           /* Output 'qword ptr' for intel assembler dialect.  */
   14427         1300 :           if (ASSEMBLER_DIALECT == ASM_INTEL)
   14428            0 :             code = 'q';
   14429              :           break;
   14430              : 
   14431        75748 :         case 'K':
   14432        75748 :           if (!CONST_INT_P (x))
   14433              :             {
   14434            1 :               output_operand_lossage ("operand is not an integer, invalid "
   14435              :                                       "operand code 'K'");
   14436            1 :               return;
   14437              :             }
   14438              : 
   14439        75747 :           if (INTVAL (x) & IX86_HLE_ACQUIRE)
   14440              : #ifdef HAVE_AS_IX86_HLE
   14441           22 :             fputs ("xacquire ", file);
   14442              : #else
   14443              :             fputs ("\n" ASM_BYTE "0xf2\n\t", file);
   14444              : #endif
   14445        75725 :           else if (INTVAL (x) & IX86_HLE_RELEASE)
   14446              : #ifdef HAVE_AS_IX86_HLE
   14447           24 :             fputs ("xrelease ", file);
   14448              : #else
   14449              :             fputs ("\n" ASM_BYTE "0xf3\n\t", file);
   14450              : #endif
   14451              :           /* We do not want to print value of the operand.  */
   14452        75747 :           return;
   14453              : 
   14454        42996 :         case 'N':
   14455        42996 :           if (x == const0_rtx || x == CONST0_RTX (GET_MODE (x)))
   14456        15481 :             fputs ("{z}", file);
   14457        42996 :           return;
   14458              : 
   14459         3999 :         case 'r':
   14460         3999 :           if (!CONST_INT_P (x) || INTVAL (x) != ROUND_SAE)
   14461              :             {
   14462            2 :               output_operand_lossage ("operand is not a specific integer, "
   14463              :                                       "invalid operand code 'r'");
   14464            2 :               return;
   14465              :             }
   14466              : 
   14467         3997 :           if (ASSEMBLER_DIALECT == ASM_INTEL)
   14468            1 :             fputs (", ", file);
   14469              : 
   14470         3997 :           fputs ("{sae}", file);
   14471              : 
   14472         3997 :           if (ASSEMBLER_DIALECT == ASM_ATT)
   14473         3996 :             fputs (", ", file);
   14474              : 
   14475         3997 :           return;
   14476              : 
   14477         5975 :         case 'R':
   14478         5975 :           if (!CONST_INT_P (x))
   14479              :             {
   14480            1 :               output_operand_lossage ("operand is not an integer, invalid "
   14481              :                                       "operand code 'R'");
   14482            1 :               return;
   14483              :             }
   14484              : 
   14485         5974 :           if (ASSEMBLER_DIALECT == ASM_INTEL)
   14486            2 :             fputs (", ", file);
   14487              : 
   14488         5974 :           switch (INTVAL (x))
   14489              :             {
   14490         5163 :             case ROUND_NEAREST_INT | ROUND_SAE:
   14491         5163 :               fputs ("{rn-sae}", file);
   14492         5163 :               break;
   14493          637 :             case ROUND_NEG_INF | ROUND_SAE:
   14494          637 :               fputs ("{rd-sae}", file);
   14495          637 :               break;
   14496           52 :             case ROUND_POS_INF | ROUND_SAE:
   14497           52 :               fputs ("{ru-sae}", file);
   14498           52 :               break;
   14499          121 :             case ROUND_ZERO | ROUND_SAE:
   14500          121 :               fputs ("{rz-sae}", file);
   14501          121 :               break;
   14502            1 :             default:
   14503            1 :               output_operand_lossage ("operand is not a specific integer, "
   14504              :                                       "invalid operand code 'R'");
   14505              :             }
   14506              : 
   14507         5974 :           if (ASSEMBLER_DIALECT == ASM_ATT)
   14508         5972 :             fputs (", ", file);
   14509              : 
   14510         5974 :           return;
   14511              : 
   14512         9168 :         case 'v':
   14513         9168 :           if (MEM_P (x))
   14514              :             {
   14515         9289 :               switch (MEM_ADDR_SPACE (x))
   14516              :                 {
   14517              :                 case ADDR_SPACE_GENERIC:
   14518              :                   break;
   14519            0 :                 case ADDR_SPACE_SEG_FS:
   14520            0 :                   fputs ("fs ", file);
   14521            0 :                   break;
   14522            0 :                 case ADDR_SPACE_SEG_GS:
   14523            0 :                   fputs ("gs ", file);
   14524            0 :                   break;
   14525            0 :                 default:
   14526            0 :                   gcc_unreachable ();
   14527              :                 }
   14528              :             }
   14529              :           else
   14530            0 :             output_operand_lossage ("operand is not a memory reference, "
   14531              :                                     "invalid operand code 'v'");
   14532         9168 :           return;
   14533              : 
   14534            0 :         case '*':
   14535            0 :           if (ASSEMBLER_DIALECT == ASM_ATT)
   14536            0 :             putc ('*', file);
   14537            0 :           return;
   14538              : 
   14539          202 :         case '&':
   14540          202 :           {
   14541          202 :             const char *name = get_some_local_dynamic_name ();
   14542          202 :             if (name == NULL)
   14543            1 :               output_operand_lossage ("'%%&' used without any "
   14544              :                                       "local dynamic TLS references");
   14545              :             else
   14546          201 :               assemble_name (file, name);
   14547          202 :             return;
   14548              :           }
   14549              : 
   14550      6523245 :         case '+':
   14551      6523245 :           {
   14552      6523245 :             rtx x;
   14553              : 
   14554      6523245 :             if (!optimize
   14555      5116158 :                 || optimize_function_for_size_p (cfun)
   14556     11448919 :                 || (!TARGET_BRANCH_PREDICTION_HINTS_NOT_TAKEN
   14557      4925674 :                     && !TARGET_BRANCH_PREDICTION_HINTS_TAKEN))
   14558      6523245 :               return;
   14559              : 
   14560            0 :             x = find_reg_note (current_output_insn, REG_BR_PROB, 0);
   14561            0 :             if (x)
   14562              :               {
   14563            0 :                 int pred_val = profile_probability::from_reg_br_prob_note
   14564            0 :                                  (XINT (x, 0)).to_reg_br_prob_base ();
   14565              : 
   14566            0 :                 bool taken = pred_val > REG_BR_PROB_BASE / 2;
   14567              :                 /* We use 3e (DS) prefix for taken branches and
   14568              :                    2e (CS) prefix for not taken branches.  */
   14569            0 :                 if (taken && TARGET_BRANCH_PREDICTION_HINTS_TAKEN)
   14570            0 :                   fputs ("ds ; ", file);
   14571            0 :                 else if (!taken && TARGET_BRANCH_PREDICTION_HINTS_NOT_TAKEN)
   14572            0 :                   fputs ("cs ; ", file);
   14573              :               }
   14574            0 :             return;
   14575              :           }
   14576              : 
   14577              :         case ';':
   14578              : #ifndef HAVE_AS_IX86_REP_LOCK_PREFIX
   14579              :           putc (';', file);
   14580              : #endif
   14581              :           return;
   14582              : 
   14583         3722 :         case '~':
   14584         3722 :           putc (TARGET_AVX2 ? 'i' : 'f', file);
   14585         3722 :           return;
   14586              : 
   14587         1675 :         case 'M':
   14588         1675 :           if (TARGET_X32)
   14589              :             {
   14590              :               /* NB: 32-bit indices in VSIB address are sign-extended
   14591              :                  to 64 bits. In x32, if 32-bit address 0xf7fa3010 is
   14592              :                  sign-extended to 0xfffffffff7fa3010 which is invalid
   14593              :                  address.  Add addr32 prefix if there is no base
   14594              :                  register nor symbol.  */
   14595           40 :               bool ok;
   14596           40 :               struct ix86_address parts;
   14597           40 :               ok = ix86_decompose_address (x, &parts);
   14598           40 :               gcc_assert (ok && parts.index == NULL_RTX);
   14599           40 :               if (parts.base == NULL_RTX
   14600           40 :                   && (parts.disp == NULL_RTX
   14601           34 :                       || !symbolic_operand (parts.disp,
   14602           34 :                                             GET_MODE (parts.disp))))
   14603           34 :                 fputs ("addr32 ", file);
   14604              :             }
   14605         1675 :           return;
   14606              : 
   14607        20184 :         case '^':
   14608        23346 :           if (Pmode != word_mode)
   14609            0 :             fputs ("addr32 ", file);
   14610        20184 :           return;
   14611              : 
   14612     14857954 :         case '!':
   14613     14857954 :           if (ix86_notrack_prefixed_insn_p (current_output_insn))
   14614         5508 :             fputs ("notrack ", file);
   14615     14857954 :           return;
   14616              : 
   14617            1 :         default:
   14618            1 :           output_operand_lossage ("invalid operand code '%c'", code);
   14619              :         }
   14620              :     }
   14621              : 
   14622    143593101 :   if (REG_P (x))
   14623     85579586 :     print_reg (x, code, file);
   14624              : 
   14625     58013515 :   else if (MEM_P (x))
   14626              :     {
   14627     33298688 :       rtx addr = XEXP (x, 0);
   14628              : 
   14629              :       /* No `byte ptr' prefix for call instructions ... */
   14630     33298688 :       if (ASSEMBLER_DIALECT == ASM_INTEL && code != 'X' && code != 'P')
   14631              :         {
   14632          266 :           machine_mode mode = GET_MODE (x);
   14633          266 :           const char *size;
   14634              : 
   14635              :           /* Check for explicit size override codes.  */
   14636          266 :           if (code == 'b')
   14637              :             size = "BYTE";
   14638              :           else if (code == 'w')
   14639              :             size = "WORD";
   14640              :           else if (code == 'k')
   14641              :             size = "DWORD";
   14642              :           else if (code == 'q')
   14643              :             size = "QWORD";
   14644              :           else if (code == 'x')
   14645              :             size = "XMMWORD";
   14646              :           else if (code == 't')
   14647              :             size = "YMMWORD";
   14648              :           else if (code == 'g')
   14649              :             size = "ZMMWORD";
   14650          191 :           else if (mode == BLKmode)
   14651              :             /* ... or BLKmode operands, when not overridden.  */
   14652              :             size = NULL;
   14653              :           else
   14654          378 :             switch (GET_MODE_SIZE (mode))
   14655              :               {
   14656              :               case 1: size = "BYTE"; break;
   14657              :               case 2: size = "WORD"; break;
   14658              :               case 4: size = "DWORD"; break;
   14659              :               case 8: size = "QWORD"; break;
   14660              :               case 12: size = "TBYTE"; break;
   14661            4 :               case 16:
   14662            4 :                 if (mode == XFmode)
   14663              :                   size = "TBYTE";
   14664              :                 else
   14665              :                   size = "XMMWORD";
   14666              :                 break;
   14667              :               case 32: size = "YMMWORD"; break;
   14668              :               case 64: size = "ZMMWORD"; break;
   14669            0 :               default:
   14670            0 :                 gcc_unreachable ();
   14671              :               }
   14672              :           if (size)
   14673              :             {
   14674          264 :               fputs (size, file);
   14675          264 :               fputs (" PTR ", file);
   14676              :             }
   14677              :         }
   14678              : 
   14679     33298688 :       if (this_is_asm_operands && ! address_operand (addr, VOIDmode))
   14680            0 :         output_operand_lossage ("invalid constraints for operand");
   14681              :       else
   14682     33298688 :         ix86_print_operand_address_as
   14683     33993506 :           (file, addr, MEM_ADDR_SPACE (x), code == 'p' || code == 'P');
   14684              :     }
   14685              : 
   14686     24714827 :   else if (CONST_DOUBLE_P (x) && GET_MODE (x) == HFmode)
   14687              :     {
   14688          762 :       long l = real_to_target (NULL, CONST_DOUBLE_REAL_VALUE (x),
   14689          762 :                                REAL_MODE_FORMAT (HFmode));
   14690          762 :       if (ASSEMBLER_DIALECT == ASM_ATT)
   14691          762 :         putc ('$', file);
   14692          762 :       fprintf (file, "0x%04x", (unsigned int) l);
   14693          762 :     }
   14694              : 
   14695     24714065 :   else if (CONST_DOUBLE_P (x) && GET_MODE (x) == SFmode)
   14696              :     {
   14697        22052 :       long l;
   14698              : 
   14699        22052 :       REAL_VALUE_TO_TARGET_SINGLE (*CONST_DOUBLE_REAL_VALUE (x), l);
   14700              : 
   14701        22052 :       if (ASSEMBLER_DIALECT == ASM_ATT)
   14702        22052 :         putc ('$', file);
   14703              :       /* Sign extend 32bit SFmode immediate to 8 bytes.  */
   14704        22052 :       if (code == 'q')
   14705          327 :         fprintf (file, "0x%08" HOST_LONG_LONG_FORMAT "x",
   14706              :                  (unsigned long long) (int) l);
   14707              :       else
   14708        21725 :         fprintf (file, "0x%08x", (unsigned int) l);
   14709              :     }
   14710              : 
   14711     24692013 :   else if (CONST_DOUBLE_P (x) && GET_MODE (x) == DFmode)
   14712              :     {
   14713         3704 :       long l[2];
   14714              : 
   14715         3704 :       REAL_VALUE_TO_TARGET_DOUBLE (*CONST_DOUBLE_REAL_VALUE (x), l);
   14716              : 
   14717         3704 :       if (ASSEMBLER_DIALECT == ASM_ATT)
   14718         3704 :         putc ('$', file);
   14719         3704 :       fprintf (file, "0x%lx%08lx", l[1] & 0xffffffff, l[0] & 0xffffffff);
   14720         3704 :     }
   14721              : 
   14722              :   /* These float cases don't actually occur as immediate operands.  */
   14723     24688309 :   else if (CONST_DOUBLE_P (x) && GET_MODE (x) == XFmode)
   14724              :     {
   14725            0 :       char dstr[30];
   14726              : 
   14727            0 :       real_to_decimal (dstr, CONST_DOUBLE_REAL_VALUE (x), sizeof (dstr), 0, 1);
   14728            0 :       fputs (dstr, file);
   14729            0 :     }
   14730              : 
   14731              :   /* Print bcst_mem_operand.  */
   14732     24688309 :   else if (GET_CODE (x) == VEC_DUPLICATE)
   14733              :     {
   14734          313 :       machine_mode vmode = GET_MODE (x);
   14735              :       /* Must be bcst_memory_operand.  */
   14736          313 :       gcc_assert (bcst_mem_operand (x, vmode));
   14737              : 
   14738          313 :       rtx mem = XEXP (x,0);
   14739          313 :       ix86_print_operand (file, mem, 0);
   14740              : 
   14741          313 :       switch (vmode)
   14742              :         {
   14743           28 :         case E_V2DImode:
   14744           28 :         case E_V2DFmode:
   14745           28 :           fputs ("{1to2}", file);
   14746           28 :           break;
   14747           74 :         case E_V4SImode:
   14748           74 :         case E_V4SFmode:
   14749           74 :         case E_V4DImode:
   14750           74 :         case E_V4DFmode:
   14751           74 :           fputs ("{1to4}", file);
   14752           74 :           break;
   14753           93 :         case E_V8SImode:
   14754           93 :         case E_V8SFmode:
   14755           93 :         case E_V8DFmode:
   14756           93 :         case E_V8DImode:
   14757           93 :         case E_V8HFmode:
   14758           93 :           fputs ("{1to8}", file);
   14759           93 :           break;
   14760          110 :         case E_V16SFmode:
   14761          110 :         case E_V16SImode:
   14762          110 :         case E_V16HFmode:
   14763          110 :           fputs ("{1to16}", file);
   14764          110 :           break;
   14765            8 :         case E_V32HFmode:
   14766            8 :           fputs ("{1to32}", file);
   14767            8 :           break;
   14768            0 :         default:
   14769            0 :           gcc_unreachable ();
   14770              :         }
   14771              :     }
   14772              : 
   14773              :   else
   14774              :     {
   14775              :       /* We have patterns that allow zero sets of memory, for instance.
   14776              :          In 64-bit mode, we should probably support all 8-byte vectors,
   14777              :          since we can in fact encode that into an immediate.  */
   14778     24687996 :       if (CONST_VECTOR_P (x))
   14779              :         {
   14780          118 :           if (x != CONST0_RTX (GET_MODE (x)))
   14781            2 :             output_operand_lossage ("invalid vector immediate");
   14782          118 :           x = const0_rtx;
   14783              :         }
   14784              : 
   14785     24687996 :       if (code == 'P')
   14786              :         {
   14787      5891245 :           if (ix86_force_load_from_GOT_p (x, true))
   14788              :             {
   14789              :               /* For inline assembly statement, load function address
   14790              :                  from GOT with 'P' operand modifier to avoid PLT.  */
   14791            4 :               x = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, x),
   14792              :                                   (TARGET_64BIT
   14793              :                                    ? UNSPEC_GOTPCREL
   14794              :                                    : UNSPEC_GOT));
   14795            4 :               x = gen_rtx_CONST (Pmode, x);
   14796            4 :               x = gen_const_mem (Pmode, x);
   14797            4 :               ix86_print_operand (file, x, 'A');
   14798            4 :               return;
   14799              :             }
   14800              :         }
   14801     18796751 :       else if (code != 'p')
   14802              :         {
   14803     18796642 :           if (CONST_INT_P (x))
   14804              :             {
   14805     15551435 :               if (ASSEMBLER_DIALECT == ASM_ATT)
   14806     15551210 :                 putc ('$', file);
   14807              :             }
   14808      3245207 :           else if (GET_CODE (x) == CONST || SYMBOL_REF_P (x)
   14809         9388 :                    || LABEL_REF_P (x))
   14810              :             {
   14811      3245205 :               if (ASSEMBLER_DIALECT == ASM_ATT)
   14812      3245181 :                 putc ('$', file);
   14813              :               else
   14814           24 :                 fputs ("OFFSET FLAT:", file);
   14815              :             }
   14816              :         }
   14817     24687992 :       if (CONST_INT_P (x))
   14818     15551521 :         fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (x));
   14819      9136471 :       else if (flag_pic || MACHOPIC_INDIRECT)
   14820       524656 :         output_pic_addr_const (file, x, code);
   14821              :       else
   14822      8611815 :         output_addr_const (file, x);
   14823              :     }
   14824              : }
   14825              : 
   14826              : static bool
   14827     21483708 : ix86_print_operand_punct_valid_p (unsigned char code)
   14828              : {
   14829     21483708 :   return (code == '*' || code == '+' || code == '&' || code == ';'
   14830     14878138 :           || code == '~' || code == '^' || code == '!');
   14831              : }
   14832              : 
   14833              : /* Print a memory operand whose address is ADDR.  */
   14834              : 
   14835              : static void
   14836     36858791 : ix86_print_operand_address_as (FILE *file, rtx addr,
   14837              :                                addr_space_t as, bool raw)
   14838              : {
   14839     36858791 :   struct ix86_address parts;
   14840     36858791 :   rtx base, index, disp;
   14841     36858791 :   int scale;
   14842     36858791 :   int ok;
   14843     36858791 :   bool vsib = false;
   14844     36858791 :   int code = 0;
   14845              : 
   14846     36858791 :   if (GET_CODE (addr) == UNSPEC && XINT (addr, 1) == UNSPEC_VSIBADDR)
   14847              :     {
   14848         1675 :       ok = ix86_decompose_address (XVECEXP (addr, 0, 0), &parts);
   14849         1675 :       gcc_assert (parts.index == NULL_RTX);
   14850         1675 :       parts.index = XVECEXP (addr, 0, 1);
   14851         1675 :       parts.scale = INTVAL (XVECEXP (addr, 0, 2));
   14852         1675 :       addr = XVECEXP (addr, 0, 0);
   14853         1675 :       vsib = true;
   14854              :     }
   14855     36857116 :   else if (GET_CODE (addr) == UNSPEC && XINT (addr, 1) == UNSPEC_LEA_ADDR)
   14856              :     {
   14857      3070353 :       gcc_assert (TARGET_64BIT);
   14858      3070353 :       ok = ix86_decompose_address (XVECEXP (addr, 0, 0), &parts);
   14859      3070353 :       code = 'q';
   14860              :     }
   14861              :   else
   14862     33786763 :     ok = ix86_decompose_address (addr, &parts);
   14863              : 
   14864     36858791 :   gcc_assert (ok);
   14865              : 
   14866     36858791 :   base = parts.base;
   14867     36858791 :   index = parts.index;
   14868     36858791 :   disp = parts.disp;
   14869     36858791 :   scale = parts.scale;
   14870              : 
   14871     36858791 :   if (ADDR_SPACE_GENERIC_P (as))
   14872     36577140 :     as = parts.seg;
   14873              :   else
   14874       281651 :     gcc_assert (ADDR_SPACE_GENERIC_P (parts.seg));
   14875              : 
   14876     36858791 :   if (!ADDR_SPACE_GENERIC_P (as) && !raw)
   14877              :     {
   14878       281666 :       if (ASSEMBLER_DIALECT == ASM_ATT)
   14879       281664 :         putc ('%', file);
   14880              : 
   14881       281666 :       switch (as)
   14882              :         {
   14883       182026 :         case ADDR_SPACE_SEG_FS:
   14884       182026 :           fputs ("fs:", file);
   14885       182026 :           break;
   14886        99640 :         case ADDR_SPACE_SEG_GS:
   14887        99640 :           fputs ("gs:", file);
   14888        99640 :           break;
   14889            0 :         default:
   14890            0 :           gcc_unreachable ();
   14891              :         }
   14892              :     }
   14893              : 
   14894              :   /* Use one byte shorter RIP relative addressing for 64bit mode.  */
   14895     36858791 :   if (TARGET_64BIT && !base && !index && !raw)
   14896              :     {
   14897      6010358 :       rtx symbol = disp;
   14898              : 
   14899      6010358 :       if (GET_CODE (disp) == CONST
   14900      2193774 :           && GET_CODE (XEXP (disp, 0)) == PLUS
   14901      2108940 :           && CONST_INT_P (XEXP (XEXP (disp, 0), 1)))
   14902      2108940 :         symbol = XEXP (XEXP (disp, 0), 0);
   14903              : 
   14904      6010358 :       if (LABEL_REF_P (symbol)
   14905      6010358 :           || (SYMBOL_REF_P (symbol)
   14906      5749359 :               && SYMBOL_REF_TLS_MODEL (symbol) == 0))
   14907      5749133 :         base = pc_rtx;
   14908              :     }
   14909              : 
   14910     36858791 :   if (!base && !index)
   14911              :     {
   14912              :       /* Displacement only requires special attention.  */
   14913       600530 :       if (CONST_INT_P (disp))
   14914              :         {
   14915       269317 :           if (ASSEMBLER_DIALECT == ASM_INTEL && ADDR_SPACE_GENERIC_P (as))
   14916            0 :             fputs ("ds:", file);
   14917       269317 :           fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (disp));
   14918              :         }
   14919              :       /* Load the external function address via the GOT slot to avoid PLT.  */
   14920       331213 :       else if (GET_CODE (disp) == CONST
   14921       113148 :                && GET_CODE (XEXP (disp, 0)) == UNSPEC
   14922        85072 :                && (XINT (XEXP (disp, 0), 1) == UNSPEC_GOTPCREL
   14923         9555 :                    || XINT (XEXP (disp, 0), 1) == UNSPEC_GOT)
   14924       406730 :                && ix86_force_load_from_GOT_p (XVECEXP (XEXP (disp, 0), 0, 0)))
   14925           24 :         output_pic_addr_const (file, disp, 0);
   14926       331189 :       else if (flag_pic)
   14927       114328 :         output_pic_addr_const (file, disp, 0);
   14928              :       else
   14929       216861 :         output_addr_const (file, disp);
   14930              :     }
   14931              :   else
   14932              :     {
   14933              :       /* Print SImode register names to force addr32 prefix.  */
   14934     36258261 :       if (SImode_address_operand (addr, VOIDmode))
   14935              :         {
   14936           37 :           if (flag_checking)
   14937              :             {
   14938           37 :               gcc_assert (TARGET_64BIT);
   14939           37 :               switch (GET_CODE (addr))
   14940              :                 {
   14941            0 :                 case SUBREG:
   14942            0 :                   gcc_assert (GET_MODE (addr) == SImode);
   14943            0 :                   gcc_assert (GET_MODE (SUBREG_REG (addr)) == DImode);
   14944              :                   break;
   14945           37 :                 case ZERO_EXTEND:
   14946           37 :                 case AND:
   14947           37 :                   gcc_assert (GET_MODE (addr) == DImode);
   14948              :                   break;
   14949            0 :                 default:
   14950            0 :                   gcc_unreachable ();
   14951              :                 }
   14952              :             }
   14953           37 :           gcc_assert (!code);
   14954              :           code = 'k';
   14955              :         }
   14956     36258224 :       else if (code == 0
   14957     33189502 :                && TARGET_X32
   14958          482 :                && disp
   14959          410 :                && CONST_INT_P (disp)
   14960          311 :                && INTVAL (disp) < -16*1024*1024)
   14961              :         {
   14962              :           /* X32 runs in 64-bit mode, where displacement, DISP, in
   14963              :              address DISP(%r64), is encoded as 32-bit immediate sign-
   14964              :              extended from 32-bit to 64-bit.  For -0x40000300(%r64),
   14965              :              address is %r64 + 0xffffffffbffffd00.  When %r64 <
   14966              :              0x40000300, like 0x37ffe064, address is 0xfffffffff7ffdd64,
   14967              :              which is invalid for x32.  The correct address is %r64
   14968              :              - 0x40000300 == 0xf7ffdd64.  To properly encode
   14969              :              -0x40000300(%r64) for x32, we zero-extend negative
   14970              :              displacement by forcing addr32 prefix which truncates
   14971              :              0xfffffffff7ffdd64 to 0xf7ffdd64.  In theory, we should
   14972              :              zero-extend all negative displacements, including -1(%rsp).
   14973              :              However, for small negative displacements, sign-extension
   14974              :              won't cause overflow.  We only zero-extend negative
   14975              :              displacements if they < -16*1024*1024, which is also used
   14976              :              to check legitimate address displacements for PIC.  */
   14977           38 :           code = 'k';
   14978              :         }
   14979              : 
   14980              :       /* Since the upper 32 bits of RSP are always zero for x32,
   14981              :          we can encode %esp as %rsp to avoid 0x67 prefix if
   14982              :          there is no index register.  */
   14983          976 :       if (TARGET_X32 && Pmode == SImode
   14984     36258665 :           && !index && base && REG_P (base) && REGNO (base) == SP_REG)
   14985              :         code = 'q';
   14986              : 
   14987     36258261 :       if (ASSEMBLER_DIALECT == ASM_ATT)
   14988              :         {
   14989     36257946 :           if (disp)
   14990              :             {
   14991     32153089 :               if (flag_pic)
   14992      2803646 :                 output_pic_addr_const (file, disp, 0);
   14993     29349443 :               else if (LABEL_REF_P (disp))
   14994         7043 :                 output_asm_label (disp);
   14995              :               else
   14996     29342400 :                 output_addr_const (file, disp);
   14997              :             }
   14998              : 
   14999     36257946 :           putc ('(', file);
   15000     36257946 :           if (base)
   15001     35837980 :             print_reg (base, code, file);
   15002     36257946 :           if (index)
   15003              :             {
   15004      1968042 :               putc (',', file);
   15005      3934457 :               print_reg (index, vsib ? 0 : code, file);
   15006      1968042 :               if (scale != 1 || vsib)
   15007      1030674 :                 fprintf (file, ",%d", scale);
   15008              :             }
   15009     36257946 :           putc (')', file);
   15010              :         }
   15011              :       else
   15012              :         {
   15013          315 :           rtx offset = NULL_RTX;
   15014              : 
   15015          315 :           if (disp)
   15016              :             {
   15017              :               /* Pull out the offset of a symbol; print any symbol itself.  */
   15018          255 :               if (GET_CODE (disp) == CONST
   15019           19 :                   && GET_CODE (XEXP (disp, 0)) == PLUS
   15020           19 :                   && CONST_INT_P (XEXP (XEXP (disp, 0), 1)))
   15021              :                 {
   15022           19 :                   offset = XEXP (XEXP (disp, 0), 1);
   15023           19 :                   disp = gen_rtx_CONST (VOIDmode,
   15024              :                                         XEXP (XEXP (disp, 0), 0));
   15025              :                 }
   15026              : 
   15027          255 :               if (flag_pic)
   15028            0 :                 output_pic_addr_const (file, disp, 0);
   15029          255 :               else if (LABEL_REF_P (disp))
   15030            0 :                 output_asm_label (disp);
   15031          255 :               else if (CONST_INT_P (disp))
   15032              :                 offset = disp;
   15033              :               else
   15034          121 :                 output_addr_const (file, disp);
   15035              :             }
   15036              : 
   15037          315 :           putc ('[', file);
   15038          315 :           if (base)
   15039              :             {
   15040          272 :               print_reg (base, code, file);
   15041          272 :               if (offset)
   15042              :                 {
   15043          153 :                   if (INTVAL (offset) >= 0)
   15044           19 :                     putc ('+', file);
   15045          153 :                   fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (offset));
   15046              :                 }
   15047              :             }
   15048           43 :           else if (offset)
   15049            0 :             fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (offset));
   15050              :           else
   15051           43 :             putc ('0', file);
   15052              : 
   15053          315 :           if (index)
   15054              :             {
   15055           96 :               putc ('+', file);
   15056          144 :               print_reg (index, vsib ? 0 : code, file);
   15057           96 :               if (scale != 1 || vsib)
   15058           94 :                 fprintf (file, "*%d", scale);
   15059              :             }
   15060          315 :           putc (']', file);
   15061              :         }
   15062              :     }
   15063     36858791 : }
   15064              : 
   15065              : static void
   15066      3560104 : ix86_print_operand_address (FILE *file, machine_mode /*mode*/, rtx addr)
   15067              : {
   15068      3560104 :   if (this_is_asm_operands && ! address_operand (addr, VOIDmode))
   15069            1 :     output_operand_lossage ("invalid constraints for operand");
   15070              :   else
   15071      3560103 :     ix86_print_operand_address_as (file, addr, ADDR_SPACE_GENERIC, false);
   15072      3560104 : }
   15073              : 
   15074              : /* Implementation of TARGET_ASM_OUTPUT_ADDR_CONST_EXTRA.  */
   15075              : 
   15076              : static bool
   15077        15329 : i386_asm_output_addr_const_extra (FILE *file, rtx x)
   15078              : {
   15079        15329 :   rtx op;
   15080              : 
   15081        15329 :   if (GET_CODE (x) != UNSPEC)
   15082              :     return false;
   15083              : 
   15084        15329 :   op = XVECEXP (x, 0, 0);
   15085        15329 :   switch (XINT (x, 1))
   15086              :     {
   15087         1315 :     case UNSPEC_GOTOFF:
   15088         1315 :       output_addr_const (file, op);
   15089         1315 :       fputs ("@gotoff", file);
   15090         1315 :       break;
   15091            0 :     case UNSPEC_GOTTPOFF:
   15092            0 :       output_addr_const (file, op);
   15093              :       /* FIXME: This might be @TPOFF in Sun ld.  */
   15094            0 :       fputs ("@gottpoff", file);
   15095            0 :       break;
   15096            0 :     case UNSPEC_TPOFF:
   15097            0 :       output_addr_const (file, op);
   15098            0 :       fputs ("@tpoff", file);
   15099            0 :       break;
   15100        10955 :     case UNSPEC_NTPOFF:
   15101        10955 :       output_addr_const (file, op);
   15102        10955 :       if (TARGET_64BIT)
   15103        10209 :         fputs ("@tpoff", file);
   15104              :       else
   15105          746 :         fputs ("@ntpoff", file);
   15106              :       break;
   15107            0 :     case UNSPEC_DTPOFF:
   15108            0 :       output_addr_const (file, op);
   15109            0 :       fputs ("@dtpoff", file);
   15110            0 :       break;
   15111         3058 :     case UNSPEC_GOTNTPOFF:
   15112         3058 :       output_addr_const (file, op);
   15113         3058 :       if (TARGET_64BIT)
   15114         3058 :         fputs (ASSEMBLER_DIALECT == ASM_ATT ?
   15115              :                "@gottpoff(%rip)" : "@gottpoff[rip]", file);
   15116              :       else
   15117            0 :         fputs ("@gotntpoff", file);
   15118              :       break;
   15119            1 :     case UNSPEC_INDNTPOFF:
   15120            1 :       output_addr_const (file, op);
   15121            1 :       fputs ("@indntpoff", file);
   15122            1 :       break;
   15123            0 :     case UNSPEC_SECREL32:
   15124            0 :       output_addr_const (file, op);
   15125            0 :       fputs ("@secrel32", file);
   15126            0 :       break;
   15127              : #if TARGET_MACHO
   15128              :     case UNSPEC_MACHOPIC_OFFSET:
   15129              :       output_addr_const (file, op);
   15130              :       putc ('-', file);
   15131              :       machopic_output_function_base_name (file);
   15132              :       break;
   15133              : #endif
   15134              : 
   15135              :     default:
   15136              :       return false;
   15137              :     }
   15138              : 
   15139              :   return true;
   15140              : }
   15141              : 
   15142              : 
   15143              : /* Output code to perform a 387 binary operation in INSN, one of PLUS,
   15144              :    MINUS, MULT or DIV.  OPERANDS are the insn operands, where operands[3]
   15145              :    is the expression of the binary operation.  The output may either be
   15146              :    emitted here, or returned to the caller, like all output_* functions.
   15147              : 
   15148              :    There is no guarantee that the operands are the same mode, as they
   15149              :    might be within FLOAT or FLOAT_EXTEND expressions.  */
   15150              : 
   15151              : #ifndef SYSV386_COMPAT
   15152              : /* Set to 1 for compatibility with brain-damaged assemblers.  No-one
   15153              :    wants to fix the assemblers because that causes incompatibility
   15154              :    with gcc.  No-one wants to fix gcc because that causes
   15155              :    incompatibility with assemblers...  You can use the option of
   15156              :    -DSYSV386_COMPAT=0 if you recompile both gcc and gas this way.  */
   15157              : #define SYSV386_COMPAT 1
   15158              : #endif
   15159              : 
   15160              : const char *
   15161       602946 : output_387_binary_op (rtx_insn *insn, rtx *operands)
   15162              : {
   15163       602946 :   static char buf[40];
   15164       602946 :   const char *p;
   15165       602946 :   bool is_sse
   15166       602946 :     = (SSE_REG_P (operands[0])
   15167       658008 :        || SSE_REG_P (operands[1]) || SSE_REG_P (operands[2]));
   15168              : 
   15169        55062 :   if (is_sse)
   15170              :     p = "%v";
   15171        55062 :   else if (GET_MODE_CLASS (GET_MODE (operands[1])) == MODE_INT
   15172        55055 :            || GET_MODE_CLASS (GET_MODE (operands[2])) == MODE_INT)
   15173              :     p = "fi";
   15174              :   else
   15175       602946 :     p = "f";
   15176              : 
   15177       602946 :   strcpy (buf, p);
   15178              : 
   15179       602946 :   switch (GET_CODE (operands[3]))
   15180              :     {
   15181              :     case PLUS:
   15182              :       p = "add"; break;
   15183              :     case MINUS:
   15184              :       p = "sub"; break;
   15185        93792 :     case MULT:
   15186        93792 :       p = "mul"; break;
   15187        27693 :     case DIV:
   15188        27693 :       p = "div"; break;
   15189            0 :     default:
   15190            0 :       gcc_unreachable ();
   15191              :     }
   15192              : 
   15193       602946 :   strcat (buf, p);
   15194              : 
   15195       602946 :   if (is_sse)
   15196              :    {
   15197       547884 :      p = GET_MODE (operands[0]) == SFmode ? "ss" : "sd";
   15198       547884 :      strcat (buf, p);
   15199              : 
   15200       547884 :      if (TARGET_AVX)
   15201              :        p = "\t{%2, %1, %0|%0, %1, %2}";
   15202              :      else
   15203       531412 :        p = "\t{%2, %0|%0, %2}";
   15204              : 
   15205       547884 :      strcat (buf, p);
   15206       547884 :      return buf;
   15207              :    }
   15208              : 
   15209              :   /* Even if we do not want to check the inputs, this documents input
   15210              :      constraints.  Which helps in understanding the following code.  */
   15211        55062 :   if (flag_checking)
   15212              :     {
   15213        55061 :       if (STACK_REG_P (operands[0])
   15214        55061 :           && ((REG_P (operands[1])
   15215        53482 :                && REGNO (operands[0]) == REGNO (operands[1])
   15216        49493 :                && (STACK_REG_P (operands[2]) || MEM_P (operands[2])))
   15217         5568 :               || (REG_P (operands[2])
   15218         5568 :                   && REGNO (operands[0]) == REGNO (operands[2])
   15219         5568 :                   && (STACK_REG_P (operands[1]) || MEM_P (operands[1]))))
   15220       110122 :           && (STACK_TOP_P (operands[1]) || STACK_TOP_P (operands[2])))
   15221              :         ; /* ok */
   15222              :       else
   15223            0 :         gcc_unreachable ();
   15224              :     }
   15225              : 
   15226        55062 :   switch (GET_CODE (operands[3]))
   15227              :     {
   15228        40440 :     case MULT:
   15229        40440 :     case PLUS:
   15230        40440 :       if (REG_P (operands[2]) && REGNO (operands[0]) == REGNO (operands[2]))
   15231         1991 :         std::swap (operands[1], operands[2]);
   15232              : 
   15233              :       /* know operands[0] == operands[1].  */
   15234              : 
   15235        40440 :       if (MEM_P (operands[2]))
   15236              :         {
   15237              :           p = "%Z2\t%2";
   15238              :           break;
   15239              :         }
   15240              : 
   15241        36078 :       if (find_regno_note (insn, REG_DEAD, REGNO (operands[2])))
   15242              :         {
   15243        21087 :           if (STACK_TOP_P (operands[0]))
   15244              :             /* How is it that we are storing to a dead operand[2]?
   15245              :                Well, presumably operands[1] is dead too.  We can't
   15246              :                store the result to st(0) as st(0) gets popped on this
   15247              :                instruction.  Instead store to operands[2] (which I
   15248              :                think has to be st(1)).  st(1) will be popped later.
   15249              :                gcc <= 2.8.1 didn't have this check and generated
   15250              :                assembly code that the Unixware assembler rejected.  */
   15251              :             p = "p\t{%0, %2|%2, %0}"; /* st(1) = st(0) op st(1); pop */
   15252              :           else
   15253              :             p = "p\t{%2, %0|%0, %2}"; /* st(r1) = st(r1) op st(0); pop */
   15254              :           break;
   15255              :         }
   15256              : 
   15257        14991 :       if (STACK_TOP_P (operands[0]))
   15258              :         p = "\t{%y2, %0|%0, %y2}";    /* st(0) = st(0) op st(r2) */
   15259              :       else
   15260              :         p = "\t{%2, %0|%0, %2}";      /* st(r1) = st(r1) op st(0) */
   15261              :       break;
   15262              : 
   15263        14622 :     case MINUS:
   15264        14622 :     case DIV:
   15265        14622 :       if (MEM_P (operands[1]))
   15266              :         {
   15267              :           p = "r%Z1\t%1";
   15268              :           break;
   15269              :         }
   15270              : 
   15271        14189 :       if (MEM_P (operands[2]))
   15272              :         {
   15273              :           p = "%Z2\t%2";
   15274              :           break;
   15275              :         }
   15276              : 
   15277        12682 :       if (find_regno_note (insn, REG_DEAD, REGNO (operands[2])))
   15278              :         {
   15279              : #if SYSV386_COMPAT
   15280              :           /* The SystemV/386 SVR3.2 assembler, and probably all AT&T
   15281              :              derived assemblers, confusingly reverse the direction of
   15282              :              the operation for fsub{r} and fdiv{r} when the
   15283              :              destination register is not st(0).  The Intel assembler
   15284              :              doesn't have this brain damage.  Read !SYSV386_COMPAT to
   15285              :              figure out what the hardware really does.  */
   15286         6093 :           if (STACK_TOP_P (operands[0]))
   15287              :             p = "{p\t%0, %2|rp\t%2, %0}";
   15288              :           else
   15289              :             p = "{rp\t%2, %0|p\t%0, %2}";
   15290              : #else
   15291              :           if (STACK_TOP_P (operands[0]))
   15292              :             /* As above for fmul/fadd, we can't store to st(0).  */
   15293              :             p = "rp\t{%0, %2|%2, %0}";        /* st(1) = st(0) op st(1); pop */
   15294              :           else
   15295              :             p = "p\t{%2, %0|%0, %2}"; /* st(r1) = st(r1) op st(0); pop */
   15296              : #endif
   15297              :           break;
   15298              :         }
   15299              : 
   15300         6589 :       if (find_regno_note (insn, REG_DEAD, REGNO (operands[1])))
   15301              :         {
   15302              : #if SYSV386_COMPAT
   15303         3074 :           if (STACK_TOP_P (operands[0]))
   15304              :             p = "{rp\t%0, %1|p\t%1, %0}";
   15305              :           else
   15306              :             p = "{p\t%1, %0|rp\t%0, %1}";
   15307              : #else
   15308              :           if (STACK_TOP_P (operands[0]))
   15309              :             p = "p\t{%0, %1|%1, %0}"; /* st(1) = st(1) op st(0); pop */
   15310              :           else
   15311              :             p = "rp\t{%1, %0|%0, %1}";        /* st(r2) = st(0) op st(r2); pop */
   15312              : #endif
   15313              :           break;
   15314              :         }
   15315              : 
   15316         3515 :       if (STACK_TOP_P (operands[0]))
   15317              :         {
   15318         2674 :           if (STACK_TOP_P (operands[1]))
   15319              :             p = "\t{%y2, %0|%0, %y2}";        /* st(0) = st(0) op st(r2) */
   15320              :           else
   15321              :             p = "r\t{%y1, %0|%0, %y1}";       /* st(0) = st(r1) op st(0) */
   15322              :           break;
   15323              :         }
   15324          841 :       else if (STACK_TOP_P (operands[1]))
   15325              :         {
   15326              : #if SYSV386_COMPAT
   15327              :           p = "{\t%1, %0|r\t%0, %1}";
   15328              : #else
   15329              :           p = "r\t{%1, %0|%0, %1}";   /* st(r2) = st(0) op st(r2) */
   15330              : #endif
   15331              :         }
   15332              :       else
   15333              :         {
   15334              : #if SYSV386_COMPAT
   15335              :           p = "{r\t%2, %0|\t%0, %2}";
   15336              : #else
   15337              :           p = "\t{%2, %0|%0, %2}";    /* st(r1) = st(r1) op st(0) */
   15338              : #endif
   15339              :         }
   15340              :       break;
   15341              : 
   15342            0 :     default:
   15343            0 :       gcc_unreachable ();
   15344              :     }
   15345              : 
   15346        55062 :   strcat (buf, p);
   15347        55062 :   return buf;
   15348              : }
   15349              : 
   15350              : /* Return needed mode for entity in optimize_mode_switching pass.  */
   15351              : 
   15352              : static int
   15353         1654 : ix86_dirflag_mode_needed (rtx_insn *insn)
   15354              : {
   15355         1654 :   if (CALL_P (insn))
   15356              :     {
   15357          339 :       if (cfun->machine->func_type == TYPE_NORMAL)
   15358              :         return X86_DIRFLAG_ANY;
   15359              :       else
   15360              :         /* No need to emit CLD in interrupt handler for TARGET_CLD.  */
   15361          339 :         return TARGET_CLD ? X86_DIRFLAG_ANY : X86_DIRFLAG_RESET;
   15362              :     }
   15363              : 
   15364         1315 :   if (recog_memoized (insn) < 0)
   15365              :     return X86_DIRFLAG_ANY;
   15366              : 
   15367         1313 :   if (get_attr_type (insn) == TYPE_STR)
   15368              :     {
   15369              :       /* Emit cld instruction if stringops are used in the function.  */
   15370            1 :       if (cfun->machine->func_type == TYPE_NORMAL)
   15371            0 :         return TARGET_CLD ? X86_DIRFLAG_RESET : X86_DIRFLAG_ANY;
   15372              :       else
   15373              :         return X86_DIRFLAG_RESET;
   15374              :     }
   15375              : 
   15376              :   return X86_DIRFLAG_ANY;
   15377              : }
   15378              : 
   15379              : /* Check if a 256bit or 512 bit AVX register is referenced inside of EXP.   */
   15380              : 
   15381              : static bool
   15382      2233821 : ix86_check_avx_upper_register (const_rtx exp)
   15383              : {
   15384              :   /* construct_container may return a parallel with expr_list
   15385              :      which contains the real reg and mode  */
   15386      2233821 :   subrtx_iterator::array_type array;
   15387      8519350 :   FOR_EACH_SUBRTX (iter, array, exp, NONCONST)
   15388              :     {
   15389      6448917 :       const_rtx x = *iter;
   15390      2596969 :       if (SSE_REG_P (x)
   15391       834001 :           && !EXT_REX_SSE_REG_P (x)
   15392      8104071 :           && GET_MODE_BITSIZE (GET_MODE (x)) > 128)
   15393       163388 :         return true;
   15394              :     }
   15395              : 
   15396      2070433 :   return false;
   15397      2233821 : }
   15398              : 
   15399              : /* Check if a 256bit or 512bit AVX register is referenced in stores.   */
   15400              : 
   15401              : static void
   15402        52024 : ix86_check_avx_upper_stores (rtx dest, const_rtx, void *data)
   15403              : {
   15404        52024 :   if (SSE_REG_P (dest)
   15405        12854 :       && !EXT_REX_SSE_REG_P (dest)
   15406        77732 :       && GET_MODE_BITSIZE (GET_MODE (dest)) > 128)
   15407              :     {
   15408          759 :       bool *used = (bool *) data;
   15409          759 :       *used = true;
   15410              :     }
   15411        52024 : }
   15412              : 
   15413              : /* Return needed mode for entity in optimize_mode_switching pass.  */
   15414              : 
   15415              : static int
   15416      2095356 : ix86_avx_u128_mode_needed (rtx_insn *insn)
   15417              : {
   15418      2095356 :   if (DEBUG_INSN_P (insn))
   15419              :     return AVX_U128_ANY;
   15420              : 
   15421      2095356 :   if (CALL_P (insn))
   15422              :     {
   15423        49705 :       rtx link;
   15424              : 
   15425              :       /* Needed mode is set to AVX_U128_CLEAN if there are
   15426              :          no 256bit or 512bit modes used in function arguments. */
   15427        49705 :       for (link = CALL_INSN_FUNCTION_USAGE (insn);
   15428       135293 :            link;
   15429        85588 :            link = XEXP (link, 1))
   15430              :         {
   15431        86639 :           if (GET_CODE (XEXP (link, 0)) == USE)
   15432              :             {
   15433        85193 :               rtx arg = XEXP (XEXP (link, 0), 0);
   15434              : 
   15435        85193 :               if (ix86_check_avx_upper_register (arg))
   15436              :                 return AVX_U128_DIRTY;
   15437              :             }
   15438              :         }
   15439              : 
   15440              :       /* Needed mode is set to AVX_U128_CLEAN if there are no 256bit
   15441              :          nor 512bit registers used in the function return register.  */
   15442        48654 :       bool avx_upper_reg_found = false;
   15443        48654 :       note_stores (insn, ix86_check_avx_upper_stores,
   15444              :                    &avx_upper_reg_found);
   15445        48654 :       if (avx_upper_reg_found)
   15446              :         return AVX_U128_DIRTY;
   15447              : 
   15448              :       /* If the function is known to preserve some SSE registers,
   15449              :          RA and previous passes can legitimately rely on that for
   15450              :          modes wider than 256 bits.  It's only safe to issue a
   15451              :          vzeroupper if all SSE registers are clobbered.  */
   15452        48470 :       const function_abi &abi = insn_callee_abi (insn);
   15453        48470 :       if (vzeroupper_pattern (PATTERN (insn), VOIDmode)
   15454              :           /* Should be safe to issue an vzeroupper before sibling_call_p.
   15455              :              Also there not mode_exit for sibling_call, so there could be
   15456              :              missing vzeroupper for that.  */
   15457        48470 :           || !(SIBLING_CALL_P (insn)
   15458        47189 :                || hard_reg_set_subset_p (reg_class_contents[SSE_REGS],
   15459        47189 :                                          abi.mode_clobbers (V4DImode))))
   15460         8441 :         return AVX_U128_ANY;
   15461              : 
   15462        40029 :       return AVX_U128_CLEAN;
   15463              :     }
   15464              : 
   15465      2045651 :   rtx set = single_set (insn);
   15466      2045651 :   if (set)
   15467              :     {
   15468      1973364 :       rtx dest = SET_DEST (set);
   15469      1973364 :       rtx src = SET_SRC (set);
   15470      1477604 :       if (SSE_REG_P (dest)
   15471       557789 :           && !EXT_REX_SSE_REG_P (dest)
   15472      3076812 :           && GET_MODE_BITSIZE (GET_MODE (dest)) > 128)
   15473              :         {
   15474              :           /* This is an YMM/ZMM load.  Return AVX_U128_DIRTY if the
   15475              :              source isn't zero.  */
   15476       176045 :           if (standard_sse_constant_p (src, GET_MODE (dest)) != 1)
   15477              :             return AVX_U128_DIRTY;
   15478              :           else
   15479              :             return AVX_U128_ANY;
   15480              :         }
   15481              :       else
   15482              :         {
   15483      1797319 :           if (ix86_check_avx_upper_register (src))
   15484              :             return AVX_U128_DIRTY;
   15485              :         }
   15486              : 
   15487              :       /* This isn't YMM/ZMM load/store.  */
   15488              :       return AVX_U128_ANY;
   15489              :     }
   15490              : 
   15491              :   /* Require DIRTY mode if a 256bit or 512bit AVX register is referenced.
   15492              :      Hardware changes state only when a 256bit register is written to,
   15493              :      but we need to prevent the compiler from moving optimal insertion
   15494              :      point above eventual read from 256bit or 512 bit register.  */
   15495        72287 :   if (ix86_check_avx_upper_register (PATTERN (insn)))
   15496              :     return AVX_U128_DIRTY;
   15497              : 
   15498              :   return AVX_U128_ANY;
   15499              : }
   15500              : 
   15501              : /* Return mode that i387 must be switched into
   15502              :    prior to the execution of insn.  */
   15503              : 
   15504              : static int
   15505       412467 : ix86_i387_mode_needed (int entity, rtx_insn *insn)
   15506              : {
   15507       412467 :   enum attr_i387_cw mode;
   15508              : 
   15509              :   /* The mode UNINITIALIZED is used to store control word after a
   15510              :      function call or ASM pattern.  The mode ANY specify that function
   15511              :      has no requirements on the control word and make no changes in the
   15512              :      bits we are interested in.  */
   15513              : 
   15514       412467 :   if (CALL_P (insn)
   15515       412467 :       || (NONJUMP_INSN_P (insn)
   15516       337730 :           && (asm_noperands (PATTERN (insn)) >= 0
   15517       337677 :               || GET_CODE (PATTERN (insn)) == ASM_INPUT)))
   15518        14339 :     return I387_CW_UNINITIALIZED;
   15519              : 
   15520       398128 :   if (recog_memoized (insn) < 0)
   15521              :     return I387_CW_ANY;
   15522              : 
   15523       397190 :   mode = get_attr_i387_cw (insn);
   15524              : 
   15525       397190 :   switch (entity)
   15526              :     {
   15527            0 :     case I387_ROUNDEVEN:
   15528            0 :       if (mode == I387_CW_ROUNDEVEN)
   15529              :         return mode;
   15530              :       break;
   15531              : 
   15532       391478 :     case I387_TRUNC:
   15533       391478 :       if (mode == I387_CW_TRUNC)
   15534              :         return mode;
   15535              :       break;
   15536              : 
   15537         4378 :     case I387_FLOOR:
   15538         4378 :       if (mode == I387_CW_FLOOR)
   15539              :         return mode;
   15540              :       break;
   15541              : 
   15542         1334 :     case I387_CEIL:
   15543         1334 :       if (mode == I387_CW_CEIL)
   15544              :         return mode;
   15545              :       break;
   15546              : 
   15547            0 :     default:
   15548            0 :       gcc_unreachable ();
   15549              :     }
   15550              : 
   15551              :   return I387_CW_ANY;
   15552              : }
   15553              : 
   15554              : /* Return mode that entity must be switched into
   15555              :    prior to the execution of insn.  */
   15556              : 
   15557              : static int
   15558      2509477 : ix86_mode_needed (int entity, rtx_insn *insn, HARD_REG_SET)
   15559              : {
   15560      2509477 :   switch (entity)
   15561              :     {
   15562         1654 :     case X86_DIRFLAG:
   15563         1654 :       return ix86_dirflag_mode_needed (insn);
   15564      2095356 :     case AVX_U128:
   15565      2095356 :       return ix86_avx_u128_mode_needed (insn);
   15566       412467 :     case I387_ROUNDEVEN:
   15567       412467 :     case I387_TRUNC:
   15568       412467 :     case I387_FLOOR:
   15569       412467 :     case I387_CEIL:
   15570       412467 :       return ix86_i387_mode_needed (entity, insn);
   15571            0 :     default:
   15572            0 :       gcc_unreachable ();
   15573              :     }
   15574              :   return 0;
   15575              : }
   15576              : 
   15577              : /* Calculate mode of upper 128bit AVX registers after the insn.  */
   15578              : 
   15579              : static int
   15580      2095356 : ix86_avx_u128_mode_after (int mode, rtx_insn *insn)
   15581              : {
   15582      2095356 :   rtx pat = PATTERN (insn);
   15583              : 
   15584      2095356 :   if (vzeroupper_pattern (pat, VOIDmode)
   15585      2095356 :       || vzeroall_pattern (pat, VOIDmode))
   15586          170 :     return AVX_U128_CLEAN;
   15587              : 
   15588              :   /* We know that state is clean after CALL insn if there are no
   15589              :      256bit or 512bit registers used in the function return register. */
   15590      2095186 :   if (CALL_P (insn))
   15591              :     {
   15592        49659 :       bool avx_upper_reg_found = false;
   15593        49659 :       note_stores (insn, ix86_check_avx_upper_stores, &avx_upper_reg_found);
   15594              : 
   15595        49659 :       if (avx_upper_reg_found)
   15596              :         return AVX_U128_DIRTY;
   15597              : 
   15598              :       /* If the function desn't clobber any sse registers or only clobber
   15599              :          128-bit part, Then vzeroupper isn't issued before the function exit.
   15600              :          the status not CLEAN but ANY after the function.  */
   15601        49084 :       const function_abi &abi = insn_callee_abi (insn);
   15602        49084 :       if (!(SIBLING_CALL_P (insn)
   15603        47808 :             || hard_reg_set_subset_p (reg_class_contents[SSE_REGS],
   15604        47808 :                                       abi.mode_clobbers (V4DImode))))
   15605         8737 :         return AVX_U128_ANY;
   15606              : 
   15607        40347 :       return  AVX_U128_CLEAN;
   15608              :     }
   15609              : 
   15610              :   /* Otherwise, return current mode.  Remember that if insn
   15611              :      references AVX 256bit or 512bit registers, the mode was already
   15612              :      changed to DIRTY from MODE_NEEDED.  */
   15613              :   return mode;
   15614              : }
   15615              : 
   15616              : /* Return the mode that an insn results in.  */
   15617              : 
   15618              : static int
   15619      2508636 : ix86_mode_after (int entity, int mode, rtx_insn *insn, HARD_REG_SET)
   15620              : {
   15621      2508636 :   switch (entity)
   15622              :     {
   15623              :     case X86_DIRFLAG:
   15624              :       return mode;
   15625      2095356 :     case AVX_U128:
   15626      2095356 :       return ix86_avx_u128_mode_after (mode, insn);
   15627              :     case I387_ROUNDEVEN:
   15628              :     case I387_TRUNC:
   15629              :     case I387_FLOOR:
   15630              :     case I387_CEIL:
   15631              :       return mode;
   15632            0 :     default:
   15633            0 :       gcc_unreachable ();
   15634              :     }
   15635              : }
   15636              : 
   15637              : static int
   15638          120 : ix86_dirflag_mode_entry (void)
   15639              : {
   15640              :   /* For TARGET_CLD or in the interrupt handler we can't assume
   15641              :      direction flag state at function entry.  */
   15642          120 :   if (TARGET_CLD
   15643          118 :       || cfun->machine->func_type != TYPE_NORMAL)
   15644          120 :     return X86_DIRFLAG_ANY;
   15645              : 
   15646              :   return X86_DIRFLAG_RESET;
   15647              : }
   15648              : 
   15649              : static int
   15650       122755 : ix86_avx_u128_mode_entry (void)
   15651              : {
   15652       122755 :   tree arg;
   15653              : 
   15654              :   /* Entry mode is set to AVX_U128_DIRTY if there are
   15655              :      256bit or 512bit modes used in function arguments.  */
   15656       309893 :   for (arg = DECL_ARGUMENTS (current_function_decl); arg;
   15657       187138 :        arg = TREE_CHAIN (arg))
   15658              :     {
   15659       221087 :       rtx incoming = DECL_INCOMING_RTL (arg);
   15660              : 
   15661       221087 :       if (incoming && ix86_check_avx_upper_register (incoming))
   15662              :         return AVX_U128_DIRTY;
   15663              :     }
   15664              : 
   15665              :   return AVX_U128_CLEAN;
   15666              : }
   15667              : 
   15668              : /* Return a mode that ENTITY is assumed to be
   15669              :    switched to at function entry.  */
   15670              : 
   15671              : static int
   15672        75712 : ix86_mode_entry (int entity)
   15673              : {
   15674        75712 :   switch (entity)
   15675              :     {
   15676          120 :     case X86_DIRFLAG:
   15677          120 :       return ix86_dirflag_mode_entry ();
   15678        74443 :     case AVX_U128:
   15679        74443 :       return ix86_avx_u128_mode_entry ();
   15680              :     case I387_ROUNDEVEN:
   15681              :     case I387_TRUNC:
   15682              :     case I387_FLOOR:
   15683              :     case I387_CEIL:
   15684              :       return I387_CW_ANY;
   15685            0 :     default:
   15686            0 :       gcc_unreachable ();
   15687              :     }
   15688              : }
   15689              : 
   15690              : static int
   15691        73200 : ix86_avx_u128_mode_exit (void)
   15692              : {
   15693        73200 :   rtx reg = crtl->return_rtx;
   15694              : 
   15695              :   /* Exit mode is set to AVX_U128_DIRTY if there are 256bit
   15696              :      or 512 bit modes used in the function return register. */
   15697        73200 :   if (reg && ix86_check_avx_upper_register (reg))
   15698              :     return AVX_U128_DIRTY;
   15699              : 
   15700              :   /* Exit mode is set to AVX_U128_DIRTY if there are 256bit or 512bit
   15701              :      modes used in function arguments, otherwise return AVX_U128_CLEAN.
   15702              :    */
   15703        48312 :   return ix86_avx_u128_mode_entry ();
   15704              : }
   15705              : 
   15706              : /* Return a mode that ENTITY is assumed to be
   15707              :    switched to at function exit.  */
   15708              : 
   15709              : static int
   15710        74324 : ix86_mode_exit (int entity)
   15711              : {
   15712        74324 :   switch (entity)
   15713              :     {
   15714              :     case X86_DIRFLAG:
   15715              :       return X86_DIRFLAG_ANY;
   15716        73200 :     case AVX_U128:
   15717        73200 :       return ix86_avx_u128_mode_exit ();
   15718         1090 :     case I387_ROUNDEVEN:
   15719         1090 :     case I387_TRUNC:
   15720         1090 :     case I387_FLOOR:
   15721         1090 :     case I387_CEIL:
   15722         1090 :       return I387_CW_ANY;
   15723            0 :     default:
   15724            0 :       gcc_unreachable ();
   15725              :     }
   15726              : }
   15727              : 
   15728              : static int
   15729      2177081 : ix86_mode_priority (int, int n)
   15730              : {
   15731      2177081 :   return n;
   15732              : }
   15733              : 
   15734              : /* Output code to initialize control word copies used by trunc?f?i and
   15735              :    rounding patterns.  CURRENT_MODE is set to current control word,
   15736              :    while NEW_MODE is set to new control word.  */
   15737              : 
   15738              : static void
   15739         3296 : emit_i387_cw_initialization (int mode)
   15740              : {
   15741         3296 :   rtx stored_mode = assign_386_stack_local (HImode, SLOT_CW_STORED);
   15742         3296 :   rtx new_mode;
   15743              : 
   15744         3296 :   enum ix86_stack_slot slot;
   15745              : 
   15746         3296 :   rtx reg = gen_reg_rtx (HImode);
   15747              : 
   15748         3296 :   emit_insn (gen_x86_fnstcw_1 (stored_mode));
   15749         3296 :   emit_move_insn (reg, copy_rtx (stored_mode));
   15750              : 
   15751         3296 :   switch (mode)
   15752              :     {
   15753            0 :     case I387_CW_ROUNDEVEN:
   15754              :       /* round to nearest */
   15755            0 :       emit_insn (gen_andhi3 (reg, reg, GEN_INT (~0x0c00)));
   15756            0 :       slot = SLOT_CW_ROUNDEVEN;
   15757            0 :       break;
   15758              : 
   15759         3076 :     case I387_CW_TRUNC:
   15760              :       /* round toward zero (truncate) */
   15761         3076 :       emit_insn (gen_iorhi3 (reg, reg, GEN_INT (0x0c00)));
   15762         3076 :       slot = SLOT_CW_TRUNC;
   15763         3076 :       break;
   15764              : 
   15765          153 :     case I387_CW_FLOOR:
   15766              :       /* round down toward -oo */
   15767          153 :       emit_insn (gen_andhi3 (reg, reg, GEN_INT (~0x0c00)));
   15768          153 :       emit_insn (gen_iorhi3 (reg, reg, GEN_INT (0x0400)));
   15769          153 :       slot = SLOT_CW_FLOOR;
   15770          153 :       break;
   15771              : 
   15772           67 :     case I387_CW_CEIL:
   15773              :       /* round up toward +oo */
   15774           67 :       emit_insn (gen_andhi3 (reg, reg, GEN_INT (~0x0c00)));
   15775           67 :       emit_insn (gen_iorhi3 (reg, reg, GEN_INT (0x0800)));
   15776           67 :       slot = SLOT_CW_CEIL;
   15777           67 :       break;
   15778              : 
   15779            0 :     default:
   15780            0 :       gcc_unreachable ();
   15781              :     }
   15782              : 
   15783         3296 :   gcc_assert (slot < MAX_386_STACK_LOCALS);
   15784              : 
   15785         3296 :   new_mode = assign_386_stack_local (HImode, slot);
   15786         3296 :   emit_move_insn (new_mode, reg);
   15787         3296 : }
   15788              : 
   15789              : /* Generate one or more insns to set ENTITY to MODE.  */
   15790              : 
   15791              : static void
   15792        52561 : ix86_emit_mode_set (int entity, int mode, int prev_mode ATTRIBUTE_UNUSED,
   15793              :                     HARD_REG_SET regs_live ATTRIBUTE_UNUSED)
   15794              : {
   15795        52561 :   switch (entity)
   15796              :     {
   15797          265 :     case X86_DIRFLAG:
   15798          265 :       if (mode == X86_DIRFLAG_RESET)
   15799          265 :         emit_insn (gen_cld ());
   15800              :       break;
   15801        44178 :     case AVX_U128:
   15802        44178 :       if (mode == AVX_U128_CLEAN)
   15803        22550 :         ix86_expand_avx_vzeroupper ();
   15804              :       break;
   15805         8118 :     case I387_ROUNDEVEN:
   15806         8118 :     case I387_TRUNC:
   15807         8118 :     case I387_FLOOR:
   15808         8118 :     case I387_CEIL:
   15809         8118 :       if (mode != I387_CW_ANY
   15810         8118 :           && mode != I387_CW_UNINITIALIZED)
   15811         3296 :         emit_i387_cw_initialization (mode);
   15812              :       break;
   15813            0 :     default:
   15814            0 :       gcc_unreachable ();
   15815              :     }
   15816        52561 : }
   15817              : 
   15818              : /* Output code for INSN to convert a float to a signed int.  OPERANDS
   15819              :    are the insn operands.  The output may be [HSD]Imode and the input
   15820              :    operand may be [SDX]Fmode.  */
   15821              : 
   15822              : const char *
   15823         7425 : output_fix_trunc (rtx_insn *insn, rtx *operands, bool fisttp)
   15824              : {
   15825         7425 :   bool stack_top_dies = find_regno_note (insn, REG_DEAD, FIRST_STACK_REG);
   15826         7425 :   bool dimode_p = GET_MODE (operands[0]) == DImode;
   15827         7425 :   int round_mode = get_attr_i387_cw (insn);
   15828              : 
   15829         7425 :   static char buf[40];
   15830         7425 :   const char *p;
   15831              : 
   15832              :   /* Jump through a hoop or two for DImode, since the hardware has no
   15833              :      non-popping instruction.  We used to do this a different way, but
   15834              :      that was somewhat fragile and broke with post-reload splitters.  */
   15835         7425 :   if ((dimode_p || fisttp) && !stack_top_dies)
   15836           25 :     output_asm_insn ("fld\t%y1", operands);
   15837              : 
   15838         7425 :   gcc_assert (STACK_TOP_P (operands[1]));
   15839         7425 :   gcc_assert (MEM_P (operands[0]));
   15840         7425 :   gcc_assert (GET_MODE (operands[1]) != TFmode);
   15841              : 
   15842         7425 :   if (fisttp)
   15843              :     return "fisttp%Z0\t%0";
   15844              : 
   15845         7424 :   strcpy (buf, "fist");
   15846              : 
   15847         7424 :   if (round_mode != I387_CW_ANY)
   15848         7376 :     output_asm_insn ("fldcw\t%3", operands);
   15849              : 
   15850         7424 :   p = "p%Z0\t%0";
   15851         7424 :   strcat (buf, p + !(stack_top_dies || dimode_p));
   15852              : 
   15853         7424 :   output_asm_insn (buf, operands);
   15854              : 
   15855         7424 :   if (round_mode != I387_CW_ANY)
   15856         7376 :     output_asm_insn ("fldcw\t%2", operands);
   15857              : 
   15858              :   return "";
   15859              : }
   15860              : 
   15861              : /* Output code for x87 ffreep insn.  The OPNO argument, which may only
   15862              :    have the values zero or one, indicates the ffreep insn's operand
   15863              :    from the OPERANDS array.  */
   15864              : 
   15865              : static const char *
   15866       276208 : output_387_ffreep (rtx *operands ATTRIBUTE_UNUSED, int opno)
   15867              : {
   15868            0 :   if (TARGET_USE_FFREEP)
   15869              : #ifdef HAVE_AS_IX86_FFREEP
   15870            0 :     return opno ? "ffreep\t%y1" : "ffreep\t%y0";
   15871              : #else
   15872              :     {
   15873              :       static char retval[32];
   15874              :       int regno = REGNO (operands[opno]);
   15875              : 
   15876              :       gcc_assert (STACK_REGNO_P (regno));
   15877              : 
   15878              :       regno -= FIRST_STACK_REG;
   15879              : 
   15880              :       snprintf (retval, sizeof (retval), ASM_SHORT "0xc%ddf", regno);
   15881              :       return retval;
   15882              :     }
   15883              : #endif
   15884              : 
   15885            0 :   return opno ? "fstp\t%y1" : "fstp\t%y0";
   15886              : }
   15887              : 
   15888              : 
   15889              : /* Output code for INSN to compare OPERANDS.  EFLAGS_P is 1 when fcomi
   15890              :    should be used.  UNORDERED_P is true when fucom should be used.  */
   15891              : 
   15892              : const char *
   15893       107600 : output_fp_compare (rtx_insn *insn, rtx *operands,
   15894              :                    bool eflags_p, bool unordered_p)
   15895              : {
   15896       107600 :   rtx *xops = eflags_p ? &operands[0] : &operands[1];
   15897       107600 :   bool stack_top_dies;
   15898              : 
   15899       107600 :   static char buf[40];
   15900       107600 :   const char *p;
   15901              : 
   15902       107600 :   gcc_assert (STACK_TOP_P (xops[0]));
   15903              : 
   15904       107600 :   stack_top_dies = find_regno_note (insn, REG_DEAD, FIRST_STACK_REG);
   15905              : 
   15906       107600 :   if (eflags_p)
   15907              :     {
   15908       107600 :       p = unordered_p ? "fucomi" : "fcomi";
   15909       107600 :       strcpy (buf, p);
   15910              : 
   15911       107600 :       p = "p\t{%y1, %0|%0, %y1}";
   15912       107600 :       strcat (buf, p + !stack_top_dies);
   15913              : 
   15914       107600 :       return buf;
   15915              :     }
   15916              : 
   15917            0 :   if (STACK_REG_P (xops[1])
   15918            0 :       && stack_top_dies
   15919            0 :       && find_regno_note (insn, REG_DEAD, FIRST_STACK_REG + 1))
   15920              :     {
   15921            0 :       gcc_assert (REGNO (xops[1]) == FIRST_STACK_REG + 1);
   15922              : 
   15923              :       /* If both the top of the 387 stack die, and the other operand
   15924              :          is also a stack register that dies, then this must be a
   15925              :          `fcompp' float compare.  */
   15926            0 :       p = unordered_p ? "fucompp" : "fcompp";
   15927            0 :       strcpy (buf, p);
   15928              :     }
   15929            0 :   else if (const0_operand (xops[1], VOIDmode))
   15930              :     {
   15931            0 :       gcc_assert (!unordered_p);
   15932            0 :       strcpy (buf, "ftst");
   15933              :     }
   15934              :   else
   15935              :     {
   15936            0 :       if (GET_MODE_CLASS (GET_MODE (xops[1])) == MODE_INT)
   15937              :         {
   15938            0 :           gcc_assert (!unordered_p);
   15939              :           p = "ficom";
   15940              :         }
   15941              :       else
   15942            0 :         p = unordered_p ? "fucom" : "fcom";
   15943              : 
   15944            0 :       strcpy (buf, p);
   15945              : 
   15946            0 :       p = "p%Z2\t%y2";
   15947            0 :       strcat (buf, p + !stack_top_dies);
   15948              :     }
   15949              : 
   15950            0 :   output_asm_insn (buf, operands);
   15951            0 :   return "fnstsw\t%0";
   15952              : }
   15953              : 
   15954              : void
   15955       139075 : ix86_output_addr_vec_elt (FILE *file, int value)
   15956              : {
   15957       139075 :   const char *directive = ASM_LONG;
   15958              : 
   15959              : #ifdef ASM_QUAD
   15960       139075 :   if (TARGET_LP64)
   15961       127436 :     directive = ASM_QUAD;
   15962              : #else
   15963              :   gcc_assert (!TARGET_64BIT);
   15964              : #endif
   15965              : 
   15966       139075 :   fprintf (file, "%s%s%d\n", directive, LPREFIX, value);
   15967       139075 : }
   15968              : 
   15969              : void
   15970        23276 : ix86_output_addr_diff_elt (FILE *file, int value, int rel)
   15971              : {
   15972        23276 :   const char *directive = ASM_LONG;
   15973              : 
   15974              : #ifdef ASM_QUAD
   15975        34745 :   if (TARGET_64BIT && CASE_VECTOR_MODE == DImode)
   15976              :     directive = ASM_QUAD;
   15977              : #else
   15978              :   gcc_assert (!TARGET_64BIT);
   15979              : #endif
   15980              :   /* We can't use @GOTOFF for text labels on VxWorks; see gotoff_operand.  */
   15981        23276 :   if (TARGET_64BIT || TARGET_VXWORKS_VAROFF)
   15982        11469 :     fprintf (file, "%s%s%d-%s%d\n",
   15983              :              directive, LPREFIX, value, LPREFIX, rel);
   15984              : #if TARGET_MACHO
   15985              :   else if (TARGET_MACHO)
   15986              :     {
   15987              :       fprintf (file, ASM_LONG "%s%d-", LPREFIX, value);
   15988              :       machopic_output_function_base_name (file);
   15989              :       putc ('\n', file);
   15990              :     }
   15991              : #endif
   15992        11807 :   else if (HAVE_AS_GOTOFF_IN_DATA)
   15993        11807 :     fprintf (file, ASM_LONG "%s%d@GOTOFF\n", LPREFIX, value);
   15994              :   else
   15995              :     asm_fprintf (file, ASM_LONG "%U%s+[.-%s%d]\n",
   15996              :                  GOT_SYMBOL_NAME, LPREFIX, value);
   15997        23276 : }
   15998              : 
   15999              : #define LEA_MAX_STALL (3)
   16000              : #define LEA_SEARCH_THRESHOLD (LEA_MAX_STALL << 1)
   16001              : 
   16002              : /* Increase given DISTANCE in half-cycles according to
   16003              :    dependencies between PREV and NEXT instructions.
   16004              :    Add 1 half-cycle if there is no dependency and
   16005              :    go to next cycle if there is some dependecy.  */
   16006              : 
   16007              : static unsigned int
   16008         2129 : increase_distance (rtx_insn *prev, rtx_insn *next, unsigned int distance)
   16009              : {
   16010         2129 :   df_ref def, use;
   16011              : 
   16012         2129 :   if (!prev || !next)
   16013          748 :     return distance + (distance & 1) + 2;
   16014              : 
   16015         1381 :   if (!DF_INSN_USES (next) || !DF_INSN_DEFS (prev))
   16016          226 :     return distance + 1;
   16017              : 
   16018         1920 :   FOR_EACH_INSN_USE (use, next)
   16019         2448 :     FOR_EACH_INSN_DEF (def, prev)
   16020         1683 :       if (!DF_REF_IS_ARTIFICIAL (def)
   16021         1683 :           && DF_REF_REGNO (use) == DF_REF_REGNO (def))
   16022          735 :         return distance + (distance & 1) + 2;
   16023              : 
   16024          420 :   return distance + 1;
   16025              : }
   16026              : 
   16027              : /* Function checks if instruction INSN defines register number
   16028              :    REGNO1 or REGNO2.  */
   16029              : 
   16030              : bool
   16031         2073 : insn_defines_reg (unsigned int regno1, unsigned int regno2,
   16032              :                   rtx_insn *insn)
   16033              : {
   16034         2073 :   df_ref def;
   16035              : 
   16036         3739 :   FOR_EACH_INSN_DEF (def, insn)
   16037         2070 :     if (DF_REF_REG_DEF_P (def)
   16038         2070 :         && !DF_REF_IS_ARTIFICIAL (def)
   16039         2070 :         && (regno1 == DF_REF_REGNO (def)
   16040         1682 :             || regno2 == DF_REF_REGNO (def)))
   16041              :       return true;
   16042              : 
   16043              :   return false;
   16044              : }
   16045              : 
   16046              : /* Function checks if instruction INSN uses register number
   16047              :    REGNO as a part of address expression.  */
   16048              : 
   16049              : static bool
   16050         1182 : insn_uses_reg_mem (unsigned int regno, rtx insn)
   16051              : {
   16052         1182 :   df_ref use;
   16053              : 
   16054         2475 :   FOR_EACH_INSN_USE (use, insn)
   16055         1384 :     if (DF_REF_REG_MEM_P (use) && regno == DF_REF_REGNO (use))
   16056              :       return true;
   16057              : 
   16058              :   return false;
   16059              : }
   16060              : 
   16061              : /* Search backward for non-agu definition of register number REGNO1
   16062              :    or register number REGNO2 in basic block starting from instruction
   16063              :    START up to head of basic block or instruction INSN.
   16064              : 
   16065              :    Function puts true value into *FOUND var if definition was found
   16066              :    and false otherwise.
   16067              : 
   16068              :    Distance in half-cycles between START and found instruction or head
   16069              :    of BB is added to DISTANCE and returned.  */
   16070              : 
   16071              : static int
   16072          624 : distance_non_agu_define_in_bb (unsigned int regno1, unsigned int regno2,
   16073              :                                rtx_insn *insn, int distance,
   16074              :                                rtx_insn *start, bool *found)
   16075              : {
   16076          624 :   basic_block bb = start ? BLOCK_FOR_INSN (start) : NULL;
   16077          624 :   rtx_insn *prev = start;
   16078          624 :   rtx_insn *next = NULL;
   16079              : 
   16080          624 :   *found = false;
   16081              : 
   16082          624 :   while (prev
   16083         1861 :          && prev != insn
   16084         1861 :          && distance < LEA_SEARCH_THRESHOLD)
   16085              :     {
   16086         1660 :       if (NONDEBUG_INSN_P (prev) && NONJUMP_INSN_P (prev))
   16087              :         {
   16088          947 :           distance = increase_distance (prev, next, distance);
   16089          947 :           if (insn_defines_reg (regno1, regno2, prev))
   16090              :             {
   16091          243 :               if (recog_memoized (prev) < 0
   16092          243 :                   || get_attr_type (prev) != TYPE_LEA)
   16093              :                 {
   16094          200 :                   *found = true;
   16095          200 :                   return distance;
   16096              :                 }
   16097              :             }
   16098              : 
   16099              :           next = prev;
   16100              :         }
   16101         1460 :       if (prev == BB_HEAD (bb))
   16102              :         break;
   16103              : 
   16104         1237 :       prev = PREV_INSN (prev);
   16105              :     }
   16106              : 
   16107              :   return distance;
   16108              : }
   16109              : 
   16110              : /* Search backward for non-agu definition of register number REGNO1
   16111              :    or register number REGNO2 in INSN's basic block until
   16112              :    1. Pass LEA_SEARCH_THRESHOLD instructions, or
   16113              :    2. Reach neighbor BBs boundary, or
   16114              :    3. Reach agu definition.
   16115              :    Returns the distance between the non-agu definition point and INSN.
   16116              :    If no definition point, returns -1.  */
   16117              : 
   16118              : static int
   16119          429 : distance_non_agu_define (unsigned int regno1, unsigned int regno2,
   16120              :                          rtx_insn *insn)
   16121              : {
   16122          429 :   basic_block bb = BLOCK_FOR_INSN (insn);
   16123          429 :   int distance = 0;
   16124          429 :   bool found = false;
   16125              : 
   16126          429 :   if (insn != BB_HEAD (bb))
   16127          429 :     distance = distance_non_agu_define_in_bb (regno1, regno2, insn,
   16128              :                                               distance, PREV_INSN (insn),
   16129              :                                               &found);
   16130              : 
   16131          429 :   if (!found && distance < LEA_SEARCH_THRESHOLD)
   16132              :     {
   16133          167 :       edge e;
   16134          167 :       edge_iterator ei;
   16135          167 :       bool simple_loop = false;
   16136              : 
   16137          336 :       FOR_EACH_EDGE (e, ei, bb->preds)
   16138          206 :         if (e->src == bb)
   16139              :           {
   16140              :             simple_loop = true;
   16141              :             break;
   16142              :           }
   16143              : 
   16144          167 :       if (simple_loop)
   16145           37 :         distance = distance_non_agu_define_in_bb (regno1, regno2,
   16146              :                                                   insn, distance,
   16147           37 :                                                   BB_END (bb), &found);
   16148              :       else
   16149              :         {
   16150          130 :           int shortest_dist = -1;
   16151          130 :           bool found_in_bb = false;
   16152              : 
   16153          288 :           FOR_EACH_EDGE (e, ei, bb->preds)
   16154              :             {
   16155          158 :               int bb_dist
   16156          316 :                 = distance_non_agu_define_in_bb (regno1, regno2,
   16157              :                                                  insn, distance,
   16158          158 :                                                  BB_END (e->src),
   16159              :                                                  &found_in_bb);
   16160          158 :               if (found_in_bb)
   16161              :                 {
   16162           24 :                   if (shortest_dist < 0)
   16163              :                     shortest_dist = bb_dist;
   16164            0 :                   else if (bb_dist > 0)
   16165            0 :                     shortest_dist = MIN (bb_dist, shortest_dist);
   16166              : 
   16167           24 :                   found = true;
   16168              :                 }
   16169              :             }
   16170              : 
   16171          130 :           distance = shortest_dist;
   16172              :         }
   16173              :     }
   16174              : 
   16175          429 :   if (!found)
   16176              :     return -1;
   16177              : 
   16178          200 :   return distance >> 1;
   16179              : }
   16180              : 
   16181              : /* Return the distance in half-cycles between INSN and the next
   16182              :    insn that uses register number REGNO in memory address added
   16183              :    to DISTANCE.  Return -1 if REGNO0 is set.
   16184              : 
   16185              :    Put true value into *FOUND if register usage was found and
   16186              :    false otherwise.
   16187              :    Put true value into *REDEFINED if register redefinition was
   16188              :    found and false otherwise.  */
   16189              : 
   16190              : static int
   16191          767 : distance_agu_use_in_bb (unsigned int regno,
   16192              :                         rtx_insn *insn, int distance, rtx_insn *start,
   16193              :                         bool *found, bool *redefined)
   16194              : {
   16195          767 :   basic_block bb = NULL;
   16196          767 :   rtx_insn *next = start;
   16197          767 :   rtx_insn *prev = NULL;
   16198              : 
   16199          767 :   *found = false;
   16200          767 :   *redefined = false;
   16201              : 
   16202          767 :   if (start != NULL_RTX)
   16203              :     {
   16204          750 :       bb = BLOCK_FOR_INSN (start);
   16205          750 :       if (start != BB_HEAD (bb))
   16206              :         /* If insn and start belong to the same bb, set prev to insn,
   16207              :            so the call to increase_distance will increase the distance
   16208              :            between insns by 1.  */
   16209          412 :         prev = insn;
   16210              :     }
   16211              : 
   16212         2566 :   while (next
   16213         2566 :          && next != insn
   16214         2566 :          && distance < LEA_SEARCH_THRESHOLD)
   16215              :     {
   16216         2378 :       if (NONDEBUG_INSN_P (next) && NONJUMP_INSN_P (next))
   16217              :         {
   16218         1182 :           distance = increase_distance(prev, next, distance);
   16219         1182 :           if (insn_uses_reg_mem (regno, next))
   16220              :             {
   16221              :               /* Return DISTANCE if OP0 is used in memory
   16222              :                  address in NEXT.  */
   16223           91 :               *found = true;
   16224           91 :               return distance;
   16225              :             }
   16226              : 
   16227         1091 :           if (insn_defines_reg (regno, INVALID_REGNUM, next))
   16228              :             {
   16229              :               /* Return -1 if OP0 is set in NEXT.  */
   16230          156 :               *redefined = true;
   16231          156 :               return -1;
   16232              :             }
   16233              : 
   16234              :           prev = next;
   16235              :         }
   16236              : 
   16237         2131 :       if (next == BB_END (bb))
   16238              :         break;
   16239              : 
   16240         1799 :       next = NEXT_INSN (next);
   16241              :     }
   16242              : 
   16243              :   return distance;
   16244              : }
   16245              : 
   16246              : /* Return the distance between INSN and the next insn that uses
   16247              :    register number REGNO0 in memory address.  Return -1 if no such
   16248              :    a use is found within LEA_SEARCH_THRESHOLD or REGNO0 is set.  */
   16249              : 
   16250              : static int
   16251          429 : distance_agu_use (unsigned int regno0, rtx_insn *insn)
   16252              : {
   16253          429 :   basic_block bb = BLOCK_FOR_INSN (insn);
   16254          429 :   int distance = 0;
   16255          429 :   bool found = false;
   16256          429 :   bool redefined = false;
   16257              : 
   16258          429 :   if (insn != BB_END (bb))
   16259          412 :     distance = distance_agu_use_in_bb (regno0, insn, distance,
   16260              :                                        NEXT_INSN (insn),
   16261              :                                        &found, &redefined);
   16262              : 
   16263          429 :   if (!found && !redefined && distance < LEA_SEARCH_THRESHOLD)
   16264              :     {
   16265          250 :       edge e;
   16266          250 :       edge_iterator ei;
   16267          250 :       bool simple_loop = false;
   16268              : 
   16269          535 :       FOR_EACH_EDGE (e, ei, bb->succs)
   16270          355 :         if (e->dest == bb)
   16271              :           {
   16272              :             simple_loop = true;
   16273              :             break;
   16274              :           }
   16275              : 
   16276          250 :       if (simple_loop)
   16277           70 :         distance = distance_agu_use_in_bb (regno0, insn,
   16278              :                                            distance, BB_HEAD (bb),
   16279              :                                            &found, &redefined);
   16280              :       else
   16281              :         {
   16282          180 :           int shortest_dist = -1;
   16283          180 :           bool found_in_bb = false;
   16284          180 :           bool redefined_in_bb = false;
   16285              : 
   16286          465 :           FOR_EACH_EDGE (e, ei, bb->succs)
   16287              :             {
   16288          285 :               int bb_dist
   16289          570 :                 = distance_agu_use_in_bb (regno0, insn,
   16290          285 :                                           distance, BB_HEAD (e->dest),
   16291              :                                           &found_in_bb, &redefined_in_bb);
   16292          285 :               if (found_in_bb)
   16293              :                 {
   16294           17 :                   if (shortest_dist < 0)
   16295              :                     shortest_dist = bb_dist;
   16296            2 :                   else if (bb_dist > 0)
   16297            2 :                     shortest_dist = MIN (bb_dist, shortest_dist);
   16298              : 
   16299           17 :                   found = true;
   16300              :                 }
   16301              :             }
   16302              : 
   16303          180 :           distance = shortest_dist;
   16304              :         }
   16305              :     }
   16306              : 
   16307          429 :   if (!found || redefined)
   16308              :     return -1;
   16309              : 
   16310           89 :   return distance >> 1;
   16311              : }
   16312              : 
   16313              : /* Define this macro to tune LEA priority vs ADD, it take effect when
   16314              :    there is a dilemma of choosing LEA or ADD
   16315              :    Negative value: ADD is more preferred than LEA
   16316              :    Zero: Neutral
   16317              :    Positive value: LEA is more preferred than ADD.  */
   16318              : #define IX86_LEA_PRIORITY 0
   16319              : 
   16320              : /* Return true if usage of lea INSN has performance advantage
   16321              :    over a sequence of instructions.  Instructions sequence has
   16322              :    SPLIT_COST cycles higher latency than lea latency.  */
   16323              : 
   16324              : static bool
   16325         1629 : ix86_lea_outperforms (rtx_insn *insn, unsigned int regno0, unsigned int regno1,
   16326              :                       unsigned int regno2, int split_cost, bool has_scale)
   16327              : {
   16328         1629 :   int dist_define, dist_use;
   16329              : 
   16330              :   /* For Atom processors newer than Bonnell, if using a 2-source or
   16331              :      3-source LEA for non-destructive destination purposes, or due to
   16332              :      wanting ability to use SCALE, the use of LEA is justified.  */
   16333         1629 :   if (!TARGET_CPU_P (BONNELL))
   16334              :     {
   16335         1200 :       if (has_scale)
   16336              :         return true;
   16337         1181 :       if (split_cost < 1)
   16338              :         return false;
   16339          406 :       if (regno0 == regno1 || regno0 == regno2)
   16340              :         return false;
   16341              :       return true;
   16342              :     }
   16343              : 
   16344              :   /* Remember recog_data content.  */
   16345          429 :   struct recog_data_d recog_data_save = recog_data;
   16346              : 
   16347          429 :   dist_define = distance_non_agu_define (regno1, regno2, insn);
   16348          429 :   dist_use = distance_agu_use (regno0, insn);
   16349              : 
   16350              :   /* distance_non_agu_define can call get_attr_type which can call
   16351              :      recog_memoized, restore recog_data back to previous content.  */
   16352          429 :   recog_data = recog_data_save;
   16353              : 
   16354          429 :   if (dist_define < 0 || dist_define >= LEA_MAX_STALL)
   16355              :     {
   16356              :       /* If there is no non AGU operand definition, no AGU
   16357              :          operand usage and split cost is 0 then both lea
   16358              :          and non lea variants have same priority.  Currently
   16359              :          we prefer lea for 64 bit code and non lea on 32 bit
   16360              :          code.  */
   16361          232 :       if (dist_use < 0 && split_cost == 0)
   16362           98 :         return TARGET_64BIT || IX86_LEA_PRIORITY;
   16363              :       else
   16364              :         return true;
   16365              :     }
   16366              : 
   16367              :   /* With longer definitions distance lea is more preferable.
   16368              :      Here we change it to take into account splitting cost and
   16369              :      lea priority.  */
   16370          197 :   dist_define += split_cost + IX86_LEA_PRIORITY;
   16371              : 
   16372              :   /* If there is no use in memory addess then we just check
   16373              :      that split cost exceeds AGU stall.  */
   16374          197 :   if (dist_use < 0)
   16375          193 :     return dist_define > LEA_MAX_STALL;
   16376              : 
   16377              :   /* If this insn has both backward non-agu dependence and forward
   16378              :      agu dependence, the one with short distance takes effect.  */
   16379            4 :   return dist_define >= dist_use;
   16380              : }
   16381              : 
   16382              : /* Return true if we need to split op0 = op1 + op2 into a sequence of
   16383              :    move and add to avoid AGU stalls.  */
   16384              : 
   16385              : bool
   16386      9128980 : ix86_avoid_lea_for_add (rtx_insn *insn, rtx operands[])
   16387              : {
   16388      9128980 :   unsigned int regno0, regno1, regno2;
   16389              : 
   16390              :   /* Check if we need to optimize.  */
   16391      9128980 :   if (!TARGET_OPT_AGU || optimize_function_for_size_p (cfun))
   16392      9128165 :     return false;
   16393              : 
   16394          815 :   regno0 = true_regnum (operands[0]);
   16395          815 :   regno1 = true_regnum (operands[1]);
   16396          815 :   regno2 = true_regnum (operands[2]);
   16397              : 
   16398              :   /* We need to split only adds with non destructive
   16399              :      destination operand.  */
   16400          815 :   if (regno0 == regno1 || regno0 == regno2)
   16401              :     return false;
   16402              :   else
   16403          245 :     return !ix86_lea_outperforms (insn, regno0, regno1, regno2, 1, false);
   16404              : }
   16405              : 
   16406              : /* Return true if we should emit lea instruction instead of mov
   16407              :    instruction.  */
   16408              : 
   16409              : bool
   16410     29480432 : ix86_use_lea_for_mov (rtx_insn *insn, rtx operands[])
   16411              : {
   16412     29480432 :   unsigned int regno0, regno1;
   16413              : 
   16414              :   /* Check if we need to optimize.  */
   16415     29480432 :   if (!TARGET_OPT_AGU || optimize_function_for_size_p (cfun))
   16416     29478146 :     return false;
   16417              : 
   16418              :   /* Use lea for reg to reg moves only.  */
   16419         2286 :   if (!REG_P (operands[0]) || !REG_P (operands[1]))
   16420              :     return false;
   16421              : 
   16422          464 :   regno0 = true_regnum (operands[0]);
   16423          464 :   regno1 = true_regnum (operands[1]);
   16424              : 
   16425          464 :   return ix86_lea_outperforms (insn, regno0, regno1, INVALID_REGNUM, 0, false);
   16426              : }
   16427              : 
   16428              : /* Return true if we need to split lea into a sequence of
   16429              :    instructions to avoid AGU stalls during peephole2. */
   16430              : 
   16431              : bool
   16432     11189509 : ix86_avoid_lea_for_addr (rtx_insn *insn, rtx operands[])
   16433              : {
   16434     11189509 :   unsigned int regno0, regno1, regno2;
   16435     11189509 :   int split_cost;
   16436     11189509 :   struct ix86_address parts;
   16437     11189509 :   int ok;
   16438              : 
   16439              :   /* The "at least two components" test below might not catch simple
   16440              :      move or zero extension insns if parts.base is non-NULL and parts.disp
   16441              :      is const0_rtx as the only components in the address, e.g. if the
   16442              :      register is %rbp or %r13.  As this test is much cheaper and moves or
   16443              :      zero extensions are the common case, do this check first.  */
   16444     11189509 :   if (REG_P (operands[1])
   16445     11189509 :       || (SImode_address_operand (operands[1], VOIDmode)
   16446       151115 :           && REG_P (XEXP (operands[1], 0))))
   16447      4118268 :     return false;
   16448              : 
   16449      7071241 :   ok = ix86_decompose_address (operands[1], &parts);
   16450      7071241 :   gcc_assert (ok);
   16451              : 
   16452              :   /* There should be at least two components in the address.  */
   16453      7071241 :   if ((parts.base != NULL_RTX) + (parts.index != NULL_RTX)
   16454      7071241 :       + (parts.disp != NULL_RTX) + (parts.scale > 1) < 2)
   16455              :     return false;
   16456              : 
   16457              :   /* We should not split into add if non legitimate pic
   16458              :      operand is used as displacement. */
   16459      2693190 :   if (parts.disp && flag_pic && !LEGITIMATE_PIC_OPERAND_P (parts.disp))
   16460              :     return false;
   16461              : 
   16462      2643412 :   regno0 = true_regnum (operands[0]) ;
   16463      2643412 :   regno1 = INVALID_REGNUM;
   16464      2643412 :   regno2 = INVALID_REGNUM;
   16465              : 
   16466      2643412 :   if (parts.base)
   16467      2567698 :     regno1 = true_regnum (parts.base);
   16468      2643412 :   if (parts.index)
   16469       492045 :     regno2 = true_regnum (parts.index);
   16470              : 
   16471              :   /* Use add for a = a + b and a = b + a since it is faster and shorter
   16472              :      than lea for most processors.  For the processors like BONNELL, if
   16473              :      the destination register of LEA holds an actual address which will
   16474              :      be used soon, LEA is better and otherwise ADD is better.  */
   16475      2643412 :   if (!TARGET_CPU_P (BONNELL)
   16476      2643283 :       && parts.scale == 1
   16477      2399298 :       && (!parts.disp || parts.disp == const0_rtx)
   16478       181510 :       && (regno0 == regno1 || regno0 == regno2))
   16479              :     return true;
   16480              : 
   16481              :   /* Split with -Oz if the encoding requires fewer bytes.  */
   16482      2637371 :   if (optimize_size > 1
   16483           27 :       && parts.scale > 1
   16484            4 :       && !parts.base
   16485            4 :       && (!parts.disp || parts.disp == const0_rtx))
   16486              :     return true;
   16487              : 
   16488              :   /* Check we need to optimize.  */
   16489      2637367 :   if (!TARGET_AVOID_LEA_FOR_ADDR || optimize_function_for_size_p (cfun))
   16490      2637026 :     return false;
   16491              : 
   16492          341 :   split_cost = 0;
   16493              : 
   16494              :   /* Compute how many cycles we will add to execution time
   16495              :      if split lea into a sequence of instructions.  */
   16496          341 :   if (parts.base || parts.index)
   16497              :     {
   16498              :       /* Have to use mov instruction if non desctructive
   16499              :          destination form is used.  */
   16500          341 :       if (regno1 != regno0 && regno2 != regno0)
   16501          266 :         split_cost += 1;
   16502              : 
   16503              :       /* Have to add index to base if both exist.  */
   16504          341 :       if (parts.base && parts.index)
   16505           54 :         split_cost += 1;
   16506              : 
   16507              :       /* Have to use shift and adds if scale is 2 or greater.  */
   16508          341 :       if (parts.scale > 1)
   16509              :         {
   16510           29 :           if (regno0 != regno1)
   16511           23 :             split_cost += 1;
   16512            6 :           else if (regno2 == regno0)
   16513            0 :             split_cost += 4;
   16514              :           else
   16515            6 :             split_cost += parts.scale;
   16516              :         }
   16517              : 
   16518              :       /* Have to use add instruction with immediate if
   16519              :          disp is non zero.  */
   16520          341 :       if (parts.disp && parts.disp != const0_rtx)
   16521          280 :         split_cost += 1;
   16522              : 
   16523              :       /* Subtract the price of lea.  */
   16524          341 :       split_cost -= 1;
   16525              :     }
   16526              : 
   16527          341 :   return !ix86_lea_outperforms (insn, regno0, regno1, regno2, split_cost,
   16528          341 :                                 parts.scale > 1);
   16529              : }
   16530              : 
   16531              : /* Return true if it is ok to optimize an ADD operation to LEA
   16532              :    operation to avoid flag register consumation.  For most processors,
   16533              :    ADD is faster than LEA.  For the processors like BONNELL, if the
   16534              :    destination register of LEA holds an actual address which will be
   16535              :    used soon, LEA is better and otherwise ADD is better.  */
   16536              : 
   16537              : bool
   16538      9189293 : ix86_lea_for_add_ok (rtx_insn *insn, rtx operands[])
   16539              : {
   16540      9189293 :   unsigned int regno0 = true_regnum (operands[0]);
   16541      9189293 :   unsigned int regno1 = true_regnum (operands[1]);
   16542      9189293 :   unsigned int regno2 = true_regnum (operands[2]);
   16543              : 
   16544              :   /* If a = b + c, (a!=b && a!=c), must use lea form. */
   16545      9189293 :   if (regno0 != regno1 && regno0 != regno2)
   16546              :     return true;
   16547              : 
   16548      7151644 :   if (!TARGET_OPT_AGU || optimize_function_for_size_p (cfun))
   16549      7151065 :     return false;
   16550              : 
   16551          579 :   return ix86_lea_outperforms (insn, regno0, regno1, regno2, 0, false);
   16552              : }
   16553              : 
   16554              : /* Return true if destination reg of SET_BODY is shift count of
   16555              :    USE_BODY.  */
   16556              : 
   16557              : static bool
   16558           89 : ix86_dep_by_shift_count_body (const_rtx set_body, const_rtx use_body)
   16559              : {
   16560           89 :   rtx set_dest;
   16561           89 :   rtx shift_rtx;
   16562           89 :   int i;
   16563              : 
   16564              :   /* Retrieve destination of SET_BODY.  */
   16565           89 :   switch (GET_CODE (set_body))
   16566              :     {
   16567           73 :     case SET:
   16568           73 :       set_dest = SET_DEST (set_body);
   16569           73 :       if (!set_dest || !REG_P (set_dest))
   16570              :         return false;
   16571           72 :       break;
   16572            8 :     case PARALLEL:
   16573           24 :       for (i = XVECLEN (set_body, 0) - 1; i >= 0; i--)
   16574           16 :         if (ix86_dep_by_shift_count_body (XVECEXP (set_body, 0, i),
   16575              :                                           use_body))
   16576              :           return true;
   16577              :       /* FALLTHROUGH */
   16578              :     default:
   16579              :       return false;
   16580              :     }
   16581              : 
   16582              :   /* Retrieve shift count of USE_BODY.  */
   16583           72 :   switch (GET_CODE (use_body))
   16584              :     {
   16585           24 :     case SET:
   16586           24 :       shift_rtx = XEXP (use_body, 1);
   16587           24 :       break;
   16588           24 :     case PARALLEL:
   16589           72 :       for (i = XVECLEN (use_body, 0) - 1; i >= 0; i--)
   16590           48 :         if (ix86_dep_by_shift_count_body (set_body,
   16591           48 :                                           XVECEXP (use_body, 0, i)))
   16592              :           return true;
   16593              :       /* FALLTHROUGH */
   16594              :     default:
   16595              :       return false;
   16596              :     }
   16597              : 
   16598           24 :   if (shift_rtx
   16599           24 :       && (GET_CODE (shift_rtx) == ASHIFT
   16600           21 :           || GET_CODE (shift_rtx) == LSHIFTRT
   16601            5 :           || GET_CODE (shift_rtx) == ASHIFTRT
   16602            0 :           || GET_CODE (shift_rtx) == ROTATE
   16603            0 :           || GET_CODE (shift_rtx) == ROTATERT))
   16604              :     {
   16605           24 :       rtx shift_count = XEXP (shift_rtx, 1);
   16606              : 
   16607              :       /* Return true if shift count is dest of SET_BODY.  */
   16608           24 :       if (REG_P (shift_count))
   16609              :         {
   16610              :           /* Add check since it can be invoked before register
   16611              :              allocation in pre-reload schedule.  */
   16612            0 :           if (reload_completed
   16613            0 :               && true_regnum (set_dest) == true_regnum (shift_count))
   16614              :             return true;
   16615            0 :           else if (REGNO(set_dest) == REGNO(shift_count))
   16616              :             return true;
   16617              :         }
   16618              :     }
   16619              : 
   16620              :   return false;
   16621              : }
   16622              : 
   16623              : /* Return true if destination reg of SET_INSN is shift count of
   16624              :    USE_INSN.  */
   16625              : 
   16626              : bool
   16627           25 : ix86_dep_by_shift_count (const_rtx set_insn, const_rtx use_insn)
   16628              : {
   16629           25 :   return ix86_dep_by_shift_count_body (PATTERN (set_insn),
   16630           25 :                                        PATTERN (use_insn));
   16631              : }
   16632              : 
   16633              : /* Return TRUE if the operands to a vec_interleave_{high,low}v2df
   16634              :    are ok, keeping in mind the possible movddup alternative.  */
   16635              : 
   16636              : bool
   16637        89963 : ix86_vec_interleave_v2df_operator_ok (rtx operands[3], bool high)
   16638              : {
   16639        89963 :   if (MEM_P (operands[0]))
   16640         2041 :     return rtx_equal_p (operands[0], operands[1 + high]);
   16641        87922 :   if (MEM_P (operands[1]) && MEM_P (operands[2]))
   16642          951 :     return false;
   16643              :   return true;
   16644              : }
   16645              : 
   16646              : /* A subroutine of ix86_build_signbit_mask.  If VECT is true,
   16647              :    then replicate the value for all elements of the vector
   16648              :    register.  */
   16649              : 
   16650              : rtx
   16651        74555 : ix86_build_const_vector (machine_mode mode, bool vect, rtx value)
   16652              : {
   16653        74555 :   int i, n_elt;
   16654        74555 :   rtvec v;
   16655        74555 :   machine_mode scalar_mode;
   16656              : 
   16657        74555 :   switch (mode)
   16658              :     {
   16659         1251 :     case E_V64QImode:
   16660         1251 :     case E_V32QImode:
   16661         1251 :     case E_V16QImode:
   16662         1251 :     case E_V32HImode:
   16663         1251 :     case E_V16HImode:
   16664         1251 :     case E_V8HImode:
   16665         1251 :     case E_V16SImode:
   16666         1251 :     case E_V8SImode:
   16667         1251 :     case E_V4SImode:
   16668         1251 :     case E_V2SImode:
   16669         1251 :     case E_V8DImode:
   16670         1251 :     case E_V4DImode:
   16671         1251 :     case E_V2DImode:
   16672         1251 :       gcc_assert (vect);
   16673              :       /* FALLTHRU */
   16674        74555 :     case E_V2HFmode:
   16675        74555 :     case E_V4HFmode:
   16676        74555 :     case E_V8HFmode:
   16677        74555 :     case E_V16HFmode:
   16678        74555 :     case E_V32HFmode:
   16679        74555 :     case E_V16SFmode:
   16680        74555 :     case E_V8SFmode:
   16681        74555 :     case E_V4SFmode:
   16682        74555 :     case E_V2SFmode:
   16683        74555 :     case E_V8DFmode:
   16684        74555 :     case E_V4DFmode:
   16685        74555 :     case E_V2DFmode:
   16686        74555 :     case E_V32BFmode:
   16687        74555 :     case E_V16BFmode:
   16688        74555 :     case E_V8BFmode:
   16689        74555 :     case E_V4BFmode:
   16690        74555 :     case E_V2BFmode:
   16691        74555 :       n_elt = GET_MODE_NUNITS (mode);
   16692        74555 :       v = rtvec_alloc (n_elt);
   16693        74555 :       scalar_mode = GET_MODE_INNER (mode);
   16694              : 
   16695        74555 :       RTVEC_ELT (v, 0) = value;
   16696              : 
   16697       231410 :       for (i = 1; i < n_elt; ++i)
   16698       156855 :         RTVEC_ELT (v, i) = vect ? value : CONST0_RTX (scalar_mode);
   16699              : 
   16700        74555 :       return gen_rtx_CONST_VECTOR (mode, v);
   16701              : 
   16702            0 :     default:
   16703            0 :       gcc_unreachable ();
   16704              :     }
   16705              : }
   16706              : 
   16707              : /* A subroutine of ix86_expand_fp_absneg_operator, copysign expanders
   16708              :    and ix86_expand_int_vcond.  Create a mask for the sign bit in MODE
   16709              :    for an SSE register.  If VECT is true, then replicate the mask for
   16710              :    all elements of the vector register.  If INVERT is true, then create
   16711              :    a mask excluding the sign bit.  */
   16712              : 
   16713              : rtx
   16714        76140 : ix86_build_signbit_mask (machine_mode mode, bool vect, bool invert)
   16715              : {
   16716        76140 :   machine_mode vec_mode, imode;
   16717        76140 :   wide_int w;
   16718        76140 :   rtx mask, v;
   16719              : 
   16720        76140 :   switch (mode)
   16721              :     {
   16722              :     case E_V2HFmode:
   16723              :     case E_V4HFmode:
   16724              :     case E_V8HFmode:
   16725              :     case E_V16HFmode:
   16726              :     case E_V32HFmode:
   16727              :     case E_V32BFmode:
   16728              :     case E_V16BFmode:
   16729              :     case E_V8BFmode:
   16730              :     case E_V4BFmode:
   16731              :     case E_V2BFmode:
   16732              :       vec_mode = mode;
   16733              :       imode = HImode;
   16734              :       break;
   16735              : 
   16736        33974 :     case E_V16SImode:
   16737        33974 :     case E_V16SFmode:
   16738        33974 :     case E_V8SImode:
   16739        33974 :     case E_V4SImode:
   16740        33974 :     case E_V8SFmode:
   16741        33974 :     case E_V4SFmode:
   16742        33974 :     case E_V2SFmode:
   16743        33974 :     case E_V2SImode:
   16744        33974 :       vec_mode = mode;
   16745        33974 :       imode = SImode;
   16746        33974 :       break;
   16747              : 
   16748        39123 :     case E_V8DImode:
   16749        39123 :     case E_V4DImode:
   16750        39123 :     case E_V2DImode:
   16751        39123 :     case E_V8DFmode:
   16752        39123 :     case E_V4DFmode:
   16753        39123 :     case E_V2DFmode:
   16754        39123 :       vec_mode = mode;
   16755        39123 :       imode = DImode;
   16756        39123 :       break;
   16757              : 
   16758         2544 :     case E_TImode:
   16759         2544 :     case E_TFmode:
   16760         2544 :       vec_mode = VOIDmode;
   16761         2544 :       imode = TImode;
   16762         2544 :       break;
   16763              : 
   16764            0 :     default:
   16765            0 :       gcc_unreachable ();
   16766              :     }
   16767              : 
   16768        76140 :   machine_mode inner_mode = GET_MODE_INNER (mode);
   16769       152280 :   w = wi::set_bit_in_zero (GET_MODE_BITSIZE (inner_mode) - 1,
   16770       152280 :                            GET_MODE_BITSIZE (inner_mode));
   16771        76140 :   if (invert)
   16772        39864 :     w = wi::bit_not (w);
   16773              : 
   16774              :   /* Force this value into the low part of a fp vector constant.  */
   16775        76140 :   mask = immed_wide_int_const (w, imode);
   16776        76140 :   mask = gen_lowpart (inner_mode, mask);
   16777              : 
   16778        76140 :   if (vec_mode == VOIDmode)
   16779         2544 :     return force_reg (inner_mode, mask);
   16780              : 
   16781        73596 :   v = ix86_build_const_vector (vec_mode, vect, mask);
   16782        73596 :   return force_reg (vec_mode, v);
   16783        76140 : }
   16784              : 
   16785              : /* Return HOST_WIDE_INT for const vector OP in MODE.  */
   16786              : 
   16787              : HOST_WIDE_INT
   16788       159725 : ix86_convert_const_vector_to_integer (rtx op, machine_mode mode)
   16789              : {
   16790       337309 :   if (GET_MODE_SIZE (mode) > UNITS_PER_WORD)
   16791            0 :     gcc_unreachable ();
   16792              : 
   16793       159725 :   int nunits = GET_MODE_NUNITS (mode);
   16794       319450 :   wide_int val = wi::zero (GET_MODE_BITSIZE (mode));
   16795       159725 :   machine_mode innermode = GET_MODE_INNER (mode);
   16796       159725 :   unsigned int innermode_bits = GET_MODE_BITSIZE (innermode);
   16797              : 
   16798       159725 :   switch (mode)
   16799              :     {
   16800              :     case E_V2QImode:
   16801              :     case E_V4QImode:
   16802              :     case E_V2HImode:
   16803              :     case E_V8QImode:
   16804              :     case E_V4HImode:
   16805              :     case E_V2SImode:
   16806       531743 :       for (int i = 0; i < nunits; ++i)
   16807              :         {
   16808       377372 :           int v = INTVAL (XVECEXP (op, 0, i));
   16809       377372 :           wide_int wv = wi::shwi (v, innermode_bits);
   16810       377372 :           val = wi::insert (val, wv, innermode_bits * i, innermode_bits);
   16811       377372 :         }
   16812              :       break;
   16813           92 :     case E_V1SImode:
   16814           92 :     case E_V1DImode:
   16815           92 :       op = CONST_VECTOR_ELT (op, 0);
   16816           92 :       return INTVAL (op);
   16817              :     case E_V2HFmode:
   16818              :     case E_V2BFmode:
   16819              :     case E_V4HFmode:
   16820              :     case E_V4BFmode:
   16821              :     case E_V2SFmode:
   16822        15810 :       for (int i = 0; i < nunits; ++i)
   16823              :         {
   16824        10548 :           rtx x = XVECEXP (op, 0, i);
   16825        10548 :           int v = real_to_target (NULL, CONST_DOUBLE_REAL_VALUE (x),
   16826        10548 :                                   REAL_MODE_FORMAT (innermode));
   16827        10548 :           wide_int wv = wi::shwi (v, innermode_bits);
   16828        10548 :           val = wi::insert (val, wv, innermode_bits * i, innermode_bits);
   16829        10548 :         }
   16830              :       break;
   16831            0 :     default:
   16832            0 :       gcc_unreachable ();
   16833              :     }
   16834              : 
   16835       159633 :   return val.to_shwi ();
   16836       159725 : }
   16837              : 
   16838           32 : int ix86_get_flags_cc (rtx_code code)
   16839              : {
   16840           32 :   switch (code)
   16841              :     {
   16842              :       case NE: return X86_CCNE;
   16843              :       case EQ: return X86_CCE;
   16844              :       case GE: return X86_CCNL;
   16845              :       case GT: return X86_CCNLE;
   16846              :       case LE: return X86_CCLE;
   16847              :       case LT: return X86_CCL;
   16848              :       case GEU: return X86_CCNB;
   16849              :       case GTU: return X86_CCNBE;
   16850              :       case LEU: return X86_CCBE;
   16851              :       case LTU: return X86_CCB;
   16852              :       default: return -1;
   16853              :     }
   16854              : }
   16855              : 
   16856              : /* Return TRUE or FALSE depending on whether the first SET in INSN
   16857              :    has source and destination with matching CC modes, and that the
   16858              :    CC mode is at least as constrained as REQ_MODE.  */
   16859              : 
   16860              : bool
   16861     54444934 : ix86_match_ccmode (rtx insn, machine_mode req_mode)
   16862              : {
   16863     54444934 :   rtx set;
   16864     54444934 :   machine_mode set_mode;
   16865              : 
   16866     54444934 :   set = PATTERN (insn);
   16867     54444934 :   if (GET_CODE (set) == PARALLEL)
   16868       497774 :     set = XVECEXP (set, 0, 0);
   16869     54444934 :   gcc_assert (GET_CODE (set) == SET);
   16870     54444934 :   gcc_assert (GET_CODE (SET_SRC (set)) == COMPARE);
   16871              : 
   16872     54444934 :   set_mode = GET_MODE (SET_DEST (set));
   16873     54444934 :   switch (set_mode)
   16874              :     {
   16875      1376048 :     case E_CCNOmode:
   16876      1376048 :       if (req_mode != CCNOmode
   16877        91121 :           && (req_mode != CCmode
   16878            0 :               || XEXP (SET_SRC (set), 1) != const0_rtx))
   16879              :         return false;
   16880              :       break;
   16881      5835107 :     case E_CCmode:
   16882      5835107 :       if (req_mode == CCGCmode)
   16883              :         return false;
   16884              :       /* FALLTHRU */
   16885      9428148 :     case E_CCGCmode:
   16886      9428148 :       if (req_mode == CCGOCmode || req_mode == CCNOmode)
   16887              :         return false;
   16888              :       /* FALLTHRU */
   16889     10470526 :     case E_CCGOCmode:
   16890     10470526 :       if (req_mode == CCZmode)
   16891              :         return false;
   16892              :       /* FALLTHRU */
   16893              :     case E_CCZmode:
   16894              :       break;
   16895              : 
   16896            0 :     case E_CCGZmode:
   16897              : 
   16898            0 :     case E_CCAmode:
   16899            0 :     case E_CCCmode:
   16900            0 :     case E_CCOmode:
   16901            0 :     case E_CCPmode:
   16902            0 :     case E_CCSmode:
   16903            0 :       if (set_mode != req_mode)
   16904              :         return false;
   16905              :       break;
   16906              : 
   16907            0 :     default:
   16908            0 :       gcc_unreachable ();
   16909              :     }
   16910              : 
   16911     54344973 :   return GET_MODE (SET_SRC (set)) == set_mode;
   16912              : }
   16913              : 
   16914              : machine_mode
   16915     13744504 : ix86_cc_mode (enum rtx_code code, rtx op0, rtx op1)
   16916              : {
   16917     13744504 :   machine_mode mode = GET_MODE (op0);
   16918              : 
   16919     13744504 :   if (SCALAR_FLOAT_MODE_P (mode))
   16920              :     {
   16921       135902 :       gcc_assert (!DECIMAL_FLOAT_MODE_P (mode));
   16922              :       return CCFPmode;
   16923              :     }
   16924              : 
   16925     13608602 :   switch (code)
   16926              :     {
   16927              :       /* Only zero flag is needed.  */
   16928              :     case EQ:                    /* ZF=0 */
   16929              :     case NE:                    /* ZF!=0 */
   16930              :       return CCZmode;
   16931              :       /* Codes needing carry flag.  */
   16932       994353 :     case GEU:                   /* CF=0 */
   16933       994353 :     case LTU:                   /* CF=1 */
   16934       994353 :       rtx geu;
   16935              :       /* Detect overflow checks.  They need just the carry flag.  */
   16936       994353 :       if (GET_CODE (op0) == PLUS
   16937       994353 :           && (rtx_equal_p (op1, XEXP (op0, 0))
   16938       130365 :               || rtx_equal_p (op1, XEXP (op0, 1))))
   16939        17559 :         return CCCmode;
   16940              :       /* Similarly for *setcc_qi_addqi3_cconly_overflow_1_* patterns.
   16941              :          Match LTU of op0
   16942              :          (neg:QI (geu:QI (reg:CC_CCC FLAGS_REG) (const_int 0)))
   16943              :          and op1
   16944              :          (ltu:QI (reg:CC_CCC FLAGS_REG) (const_int 0))
   16945              :          where CC_CCC is either CC or CCC.  */
   16946       976794 :       else if (code == LTU
   16947       385047 :                && GET_CODE (op0) == NEG
   16948         5018 :                && GET_CODE (geu = XEXP (op0, 0)) == GEU
   16949         3647 :                && REG_P (XEXP (geu, 0))
   16950         3337 :                && (GET_MODE (XEXP (geu, 0)) == CCCmode
   16951           37 :                    || GET_MODE (XEXP (geu, 0)) == CCmode)
   16952         3326 :                && REGNO (XEXP (geu, 0)) == FLAGS_REG
   16953         3326 :                && XEXP (geu, 1) == const0_rtx
   16954         3326 :                && GET_CODE (op1) == LTU
   16955         3326 :                && REG_P (XEXP (op1, 0))
   16956         3326 :                && GET_MODE (XEXP (op1, 0)) == GET_MODE (XEXP (geu, 0))
   16957         3326 :                && REGNO (XEXP (op1, 0)) == FLAGS_REG
   16958       980120 :                && XEXP (op1, 1) == const0_rtx)
   16959              :         return CCCmode;
   16960              :       /* Similarly for *x86_cmc pattern.
   16961              :          Match LTU of op0 (neg:QI (ltu:QI (reg:CCC FLAGS_REG) (const_int 0)))
   16962              :          and op1 (geu:QI (reg:CCC FLAGS_REG) (const_int 0)).
   16963              :          It is sufficient to test that the operand modes are CCCmode.  */
   16964       973468 :       else if (code == LTU
   16965       381721 :                && GET_CODE (op0) == NEG
   16966         1692 :                && GET_CODE (XEXP (op0, 0)) == LTU
   16967          372 :                && GET_MODE (XEXP (XEXP (op0, 0), 0)) == CCCmode
   16968            3 :                && GET_CODE (op1) == GEU
   16969            3 :                && GET_MODE (XEXP (op1, 0)) == CCCmode)
   16970              :         return CCCmode;
   16971              :       /* Similarly for the comparison of addcarry/subborrow pattern.  */
   16972       381718 :       else if (code == LTU
   16973       381718 :                && GET_CODE (op0) == ZERO_EXTEND
   16974        16050 :                && GET_CODE (op1) == PLUS
   16975        10122 :                && ix86_carry_flag_operator (XEXP (op1, 0), VOIDmode)
   16976        10122 :                && GET_CODE (XEXP (op1, 1)) == ZERO_EXTEND)
   16977              :         return CCCmode;
   16978              :       else
   16979       963343 :         return CCmode;
   16980              :     case GTU:                   /* CF=0 & ZF=0 */
   16981              :     case LEU:                   /* CF=1 | ZF=1 */
   16982              :       return CCmode;
   16983              :       /* Codes possibly doable only with sign flag when
   16984              :          comparing against zero.  */
   16985       770217 :     case GE:                    /* SF=OF   or   SF=0 */
   16986       770217 :     case LT:                    /* SF<>OF  or   SF=1 */
   16987       770217 :       if (op1 == const0_rtx)
   16988              :         return CCGOCmode;
   16989              :       else
   16990              :         /* For other cases Carry flag is not required.  */
   16991       436102 :         return CCGCmode;
   16992              :       /* Codes doable only with sign flag when comparing
   16993              :          against zero, but we miss jump instruction for it
   16994              :          so we need to use relational tests against overflow
   16995              :          that thus needs to be zero.  */
   16996       890924 :     case GT:                    /* ZF=0 & SF=OF */
   16997       890924 :     case LE:                    /* ZF=1 | SF<>OF */
   16998       890924 :       if (op1 == const0_rtx)
   16999              :         return CCNOmode;
   17000              :       else
   17001       591416 :         return CCGCmode;
   17002              :     default:
   17003              :       /* CCmode should be used in all other cases.  */
   17004              :       return CCmode;
   17005              :     }
   17006              : }
   17007              : 
   17008              : /* Return TRUE or FALSE depending on whether the ptest instruction
   17009              :    INSN has source and destination with suitable matching CC modes.  */
   17010              : 
   17011              : bool
   17012        94642 : ix86_match_ptest_ccmode (rtx insn)
   17013              : {
   17014        94642 :   rtx set, src;
   17015        94642 :   machine_mode set_mode;
   17016              : 
   17017        94642 :   set = PATTERN (insn);
   17018        94642 :   gcc_assert (GET_CODE (set) == SET);
   17019        94642 :   src = SET_SRC (set);
   17020        94642 :   gcc_assert (GET_CODE (src) == UNSPEC
   17021              :               && XINT (src, 1) == UNSPEC_PTEST);
   17022              : 
   17023        94642 :   set_mode = GET_MODE (src);
   17024        94642 :   if (set_mode != CCZmode
   17025              :       && set_mode != CCCmode
   17026              :       && set_mode != CCmode)
   17027              :     return false;
   17028        94642 :   return GET_MODE (SET_DEST (set)) == set_mode;
   17029              : }
   17030              : 
   17031              : /* Return the fixed registers used for condition codes.  */
   17032              : 
   17033              : static bool
   17034     18886541 : ix86_fixed_condition_code_regs (unsigned int *p1, unsigned int *p2)
   17035              : {
   17036     18886541 :   *p1 = FLAGS_REG;
   17037     18886541 :   *p2 = INVALID_REGNUM;
   17038     18886541 :   return true;
   17039              : }
   17040              : 
   17041              : /* If two condition code modes are compatible, return a condition code
   17042              :    mode which is compatible with both.  Otherwise, return
   17043              :    VOIDmode.  */
   17044              : 
   17045              : static machine_mode
   17046        30997 : ix86_cc_modes_compatible (machine_mode m1, machine_mode m2)
   17047              : {
   17048        30997 :   if (m1 == m2)
   17049              :     return m1;
   17050              : 
   17051        30344 :   if (GET_MODE_CLASS (m1) != MODE_CC || GET_MODE_CLASS (m2) != MODE_CC)
   17052              :     return VOIDmode;
   17053              : 
   17054        30344 :   if ((m1 == CCGCmode && m2 == CCGOCmode)
   17055        30344 :       || (m1 == CCGOCmode && m2 == CCGCmode))
   17056              :     return CCGCmode;
   17057              : 
   17058        30344 :   if ((m1 == CCNOmode && m2 == CCGOCmode)
   17059        30164 :       || (m1 == CCGOCmode && m2 == CCNOmode))
   17060              :     return CCNOmode;
   17061              : 
   17062        30047 :   if (m1 == CCZmode
   17063        15730 :       && (m2 == CCGCmode || m2 == CCGOCmode || m2 == CCNOmode))
   17064              :     return m2;
   17065        17599 :   else if (m2 == CCZmode
   17066        14064 :            && (m1 == CCGCmode || m1 == CCGOCmode || m1 == CCNOmode))
   17067              :     return m1;
   17068              : 
   17069         7146 :   switch (m1)
   17070              :     {
   17071            0 :     default:
   17072            0 :       gcc_unreachable ();
   17073              : 
   17074         7146 :     case E_CCmode:
   17075         7146 :     case E_CCGCmode:
   17076         7146 :     case E_CCGOCmode:
   17077         7146 :     case E_CCNOmode:
   17078         7146 :     case E_CCAmode:
   17079         7146 :     case E_CCCmode:
   17080         7146 :     case E_CCOmode:
   17081         7146 :     case E_CCPmode:
   17082         7146 :     case E_CCSmode:
   17083         7146 :     case E_CCZmode:
   17084         7146 :       switch (m2)
   17085              :         {
   17086              :         default:
   17087              :           return VOIDmode;
   17088              : 
   17089              :         case E_CCmode:
   17090              :         case E_CCGCmode:
   17091              :         case E_CCGOCmode:
   17092              :         case E_CCNOmode:
   17093              :         case E_CCAmode:
   17094              :         case E_CCCmode:
   17095              :         case E_CCOmode:
   17096              :         case E_CCPmode:
   17097              :         case E_CCSmode:
   17098              :         case E_CCZmode:
   17099              :           return CCmode;
   17100              :         }
   17101              : 
   17102              :     case E_CCFPmode:
   17103              :       /* These are only compatible with themselves, which we already
   17104              :          checked above.  */
   17105              :       return VOIDmode;
   17106              :     }
   17107              : }
   17108              : 
   17109              : /* Return strategy to use for floating-point.  We assume that fcomi is always
   17110              :    preferrable where available, since that is also true when looking at size
   17111              :    (2 bytes, vs. 3 for fnstsw+sahf and at least 5 for fnstsw+test).  */
   17112              : 
   17113              : enum ix86_fpcmp_strategy
   17114      5537122 : ix86_fp_comparison_strategy (enum rtx_code)
   17115              : {
   17116              :   /* Do fcomi/sahf based test when profitable.  */
   17117              : 
   17118      5537122 :   if (TARGET_CMOVE)
   17119              :     return IX86_FPCMP_COMI;
   17120              : 
   17121            0 :   if (TARGET_SAHF && (TARGET_USE_SAHF || optimize_insn_for_size_p ()))
   17122            0 :     return IX86_FPCMP_SAHF;
   17123              : 
   17124              :   return IX86_FPCMP_ARITH;
   17125              : }
   17126              : 
   17127              : /* Convert comparison codes we use to represent FP comparison to integer
   17128              :    code that will result in proper branch.  Return UNKNOWN if no such code
   17129              :    is available.  */
   17130              : 
   17131              : enum rtx_code
   17132       582866 : ix86_fp_compare_code_to_integer (enum rtx_code code)
   17133              : {
   17134       582866 :   switch (code)
   17135              :     {
   17136              :     case GT:
   17137              :       return GTU;
   17138        18184 :     case GE:
   17139        18184 :       return GEU;
   17140              :     case ORDERED:
   17141              :     case UNORDERED:
   17142              :       return code;
   17143       118703 :     case UNEQ:
   17144       118703 :       return EQ;
   17145        19542 :     case UNLT:
   17146        19542 :       return LTU;
   17147        31120 :     case UNLE:
   17148        31120 :       return LEU;
   17149       113272 :     case LTGT:
   17150       113272 :       return NE;
   17151          675 :     case EQ:
   17152          675 :     case NE:
   17153          675 :       if (TARGET_AVX10_2)
   17154              :         return code;
   17155              :       /* FALLTHRU.  */
   17156          215 :     default:
   17157          215 :       return UNKNOWN;
   17158              :     }
   17159              : }
   17160              : 
   17161              : /* Zero extend possibly SImode EXP to Pmode register.  */
   17162              : rtx
   17163        44169 : ix86_zero_extend_to_Pmode (rtx exp)
   17164              : {
   17165        56053 :   return force_reg (Pmode, convert_to_mode (Pmode, exp, 1));
   17166              : }
   17167              : 
   17168              : /* Return true if the function is called via PLT.   */
   17169              : 
   17170              : bool
   17171       986037 : ix86_call_use_plt_p (rtx call_op)
   17172              : {
   17173       986037 :   if (SYMBOL_REF_LOCAL_P (call_op))
   17174              :     {
   17175       196995 :       if (SYMBOL_REF_DECL (call_op)
   17176       196995 :           && TREE_CODE (SYMBOL_REF_DECL (call_op)) == FUNCTION_DECL)
   17177              :         {
   17178              :           /* NB: All ifunc functions must be called via PLT.  */
   17179       113835 :           cgraph_node *node
   17180       113835 :             = cgraph_node::get (SYMBOL_REF_DECL (call_op));
   17181       113835 :           if (node && node->ifunc_resolver)
   17182              :             return true;
   17183              :         }
   17184       196975 :       return false;
   17185              :     }
   17186              :   return true;
   17187              : }
   17188              : 
   17189              : /* Implement TARGET_IFUNC_REF_LOCAL_OK.  If this hook returns true,
   17190              :    the PLT entry will be used as the function address for local IFUNC
   17191              :    functions.  When the PIC register is needed for PLT call, indirect
   17192              :    call via the PLT entry will fail since the PIC register may not be
   17193              :    set up properly for indirect call.  In this case, we should return
   17194              :    false.  */
   17195              : 
   17196              : static bool
   17197    764553956 : ix86_ifunc_ref_local_ok (void)
   17198              : {
   17199    764553956 :   return !flag_pic || (TARGET_64BIT && ix86_cmodel != CM_LARGE_PIC);
   17200              : }
   17201              : 
   17202              : /* Return true if the function being called was marked with attribute
   17203              :    "noplt" or using -fno-plt and we are compiling for non-PIC.  We need
   17204              :    to handle the non-PIC case in the backend because there is no easy
   17205              :    interface for the front-end to force non-PLT calls to use the GOT.
   17206              :    This is currently used only with 64-bit or 32-bit GOT32X ELF targets
   17207              :    to call the function marked "noplt" indirectly.  */
   17208              : 
   17209              : bool
   17210      5875465 : ix86_nopic_noplt_attribute_p (rtx call_op)
   17211              : {
   17212      5388012 :   if (flag_pic || ix86_cmodel == CM_LARGE
   17213              :       || !(TARGET_64BIT || HAVE_AS_IX86_GOT32X)
   17214              :       || TARGET_MACHO || TARGET_SEH || TARGET_PECOFF
   17215     11263477 :       || SYMBOL_REF_LOCAL_P (call_op))
   17216              :     return false;
   17217              : 
   17218      3727390 :   tree symbol_decl = SYMBOL_REF_DECL (call_op);
   17219              : 
   17220      3727390 :   if (!flag_plt
   17221      3727390 :       || (symbol_decl != NULL_TREE
   17222      3727358 :           && lookup_attribute ("noplt", DECL_ATTRIBUTES (symbol_decl))))
   17223           34 :     return true;
   17224              : 
   17225              :   return false;
   17226              : }
   17227              : 
   17228              : /* Helper to output the jmp/call.  */
   17229              : static void
   17230           33 : ix86_output_jmp_thunk_or_indirect (const char *thunk_name, const int regno)
   17231              : {
   17232           33 :   if (thunk_name != NULL)
   17233              :     {
   17234           22 :       if ((REX_INT_REGNO_P (regno) || REX2_INT_REGNO_P (regno))
   17235            1 :           && ix86_indirect_branch_cs_prefix)
   17236            1 :         fprintf (asm_out_file, "\tcs\n");
   17237           22 :       fprintf (asm_out_file, "\tjmp\t");
   17238           22 :       assemble_name (asm_out_file, thunk_name);
   17239           22 :       putc ('\n', asm_out_file);
   17240           22 :       if ((ix86_harden_sls & harden_sls_indirect_jmp))
   17241            2 :         fputs ("\tint3\n", asm_out_file);
   17242              :     }
   17243              :   else
   17244           11 :     output_indirect_thunk (regno);
   17245           33 : }
   17246              : 
   17247              : /* Output indirect branch via a call and return thunk.  CALL_OP is a
   17248              :    register which contains the branch target.  XASM is the assembly
   17249              :    template for CALL_OP.  Branch is a tail call if SIBCALL_P is true.
   17250              :    A normal call is converted to:
   17251              : 
   17252              :         call __x86_indirect_thunk_reg
   17253              : 
   17254              :    and a tail call is converted to:
   17255              : 
   17256              :         jmp __x86_indirect_thunk_reg
   17257              :  */
   17258              : 
   17259              : static void
   17260           50 : ix86_output_indirect_branch_via_reg (rtx call_op, bool sibcall_p)
   17261              : {
   17262           50 :   char thunk_name_buf[32];
   17263           50 :   char *thunk_name;
   17264           50 :   enum indirect_thunk_prefix need_prefix
   17265           50 :     = indirect_thunk_need_prefix (current_output_insn);
   17266           50 :   int regno = REGNO (call_op);
   17267              : 
   17268           50 :   if (cfun->machine->indirect_branch_type
   17269           50 :       != indirect_branch_thunk_inline)
   17270              :     {
   17271           39 :       if (cfun->machine->indirect_branch_type == indirect_branch_thunk)
   17272           16 :         SET_HARD_REG_BIT (indirect_thunks_used, regno);
   17273              : 
   17274           39 :       indirect_thunk_name (thunk_name_buf, regno, need_prefix, false);
   17275           39 :       thunk_name = thunk_name_buf;
   17276              :     }
   17277              :   else
   17278              :     thunk_name = NULL;
   17279              : 
   17280           50 :   if (sibcall_p)
   17281           27 :      ix86_output_jmp_thunk_or_indirect (thunk_name, regno);
   17282              :   else
   17283              :     {
   17284           23 :       if (thunk_name != NULL)
   17285              :         {
   17286           17 :           if ((REX_INT_REGNO_P (regno) || REX_INT_REGNO_P (regno))
   17287            1 :               && ix86_indirect_branch_cs_prefix)
   17288            1 :             fprintf (asm_out_file, "\tcs\n");
   17289           17 :           fprintf (asm_out_file, "\tcall\t");
   17290           17 :           assemble_name (asm_out_file, thunk_name);
   17291           17 :           putc ('\n', asm_out_file);
   17292           17 :           return;
   17293              :         }
   17294              : 
   17295            6 :       char indirectlabel1[32];
   17296            6 :       char indirectlabel2[32];
   17297              : 
   17298            6 :       ASM_GENERATE_INTERNAL_LABEL (indirectlabel1,
   17299              :                                    INDIRECT_LABEL,
   17300              :                                    indirectlabelno++);
   17301            6 :       ASM_GENERATE_INTERNAL_LABEL (indirectlabel2,
   17302              :                                    INDIRECT_LABEL,
   17303              :                                    indirectlabelno++);
   17304              : 
   17305              :       /* Jump.  */
   17306            6 :       fputs ("\tjmp\t", asm_out_file);
   17307            6 :       assemble_name_raw (asm_out_file, indirectlabel2);
   17308            6 :       fputc ('\n', asm_out_file);
   17309              : 
   17310            6 :       ASM_OUTPUT_INTERNAL_LABEL (asm_out_file, indirectlabel1);
   17311              : 
   17312            6 :      ix86_output_jmp_thunk_or_indirect (thunk_name, regno);
   17313              : 
   17314            6 :       ASM_OUTPUT_INTERNAL_LABEL (asm_out_file, indirectlabel2);
   17315              : 
   17316              :       /* Call.  */
   17317            6 :       fputs ("\tcall\t", asm_out_file);
   17318            6 :       assemble_name_raw (asm_out_file, indirectlabel1);
   17319            6 :       fputc ('\n', asm_out_file);
   17320              :     }
   17321              : }
   17322              : 
   17323              : /* Output indirect branch via a call and return thunk.  CALL_OP is
   17324              :    the branch target.  XASM is the assembly template for CALL_OP.
   17325              :    Branch is a tail call if SIBCALL_P is true.  A normal call is
   17326              :    converted to:
   17327              : 
   17328              :         jmp L2
   17329              :    L1:
   17330              :         push CALL_OP
   17331              :         jmp __x86_indirect_thunk
   17332              :    L2:
   17333              :         call L1
   17334              : 
   17335              :    and a tail call is converted to:
   17336              : 
   17337              :         push CALL_OP
   17338              :         jmp __x86_indirect_thunk
   17339              :  */
   17340              : 
   17341              : static void
   17342            0 : ix86_output_indirect_branch_via_push (rtx call_op, const char *xasm,
   17343              :                                       bool sibcall_p)
   17344              : {
   17345            0 :   char thunk_name_buf[32];
   17346            0 :   char *thunk_name;
   17347            0 :   char push_buf[64];
   17348            0 :   enum indirect_thunk_prefix need_prefix
   17349            0 :     = indirect_thunk_need_prefix (current_output_insn);
   17350            0 :   int regno = -1;
   17351              : 
   17352            0 :   if (cfun->machine->indirect_branch_type
   17353            0 :       != indirect_branch_thunk_inline)
   17354              :     {
   17355            0 :       if (cfun->machine->indirect_branch_type == indirect_branch_thunk)
   17356            0 :         indirect_thunk_needed = true;
   17357            0 :       indirect_thunk_name (thunk_name_buf, regno, need_prefix, false);
   17358            0 :       thunk_name = thunk_name_buf;
   17359              :     }
   17360              :   else
   17361              :     thunk_name = NULL;
   17362              : 
   17363            0 :   snprintf (push_buf, sizeof (push_buf), "push{%c}\t%s",
   17364            0 :             TARGET_64BIT ? 'q' : 'l', xasm);
   17365              : 
   17366            0 :   if (sibcall_p)
   17367              :     {
   17368            0 :       output_asm_insn (push_buf, &call_op);
   17369            0 :       ix86_output_jmp_thunk_or_indirect (thunk_name, regno);
   17370              :     }
   17371              :   else
   17372              :     {
   17373            0 :       char indirectlabel1[32];
   17374            0 :       char indirectlabel2[32];
   17375              : 
   17376            0 :       ASM_GENERATE_INTERNAL_LABEL (indirectlabel1,
   17377              :                                    INDIRECT_LABEL,
   17378              :                                    indirectlabelno++);
   17379            0 :       ASM_GENERATE_INTERNAL_LABEL (indirectlabel2,
   17380              :                                    INDIRECT_LABEL,
   17381              :                                    indirectlabelno++);
   17382              : 
   17383              :       /* Jump.  */
   17384            0 :       fputs ("\tjmp\t", asm_out_file);
   17385            0 :       assemble_name_raw (asm_out_file, indirectlabel2);
   17386            0 :       fputc ('\n', asm_out_file);
   17387              : 
   17388            0 :       ASM_OUTPUT_INTERNAL_LABEL (asm_out_file, indirectlabel1);
   17389              : 
   17390              :       /* An external function may be called via GOT, instead of PLT.  */
   17391            0 :       if (MEM_P (call_op))
   17392              :         {
   17393            0 :           struct ix86_address parts;
   17394            0 :           rtx addr = XEXP (call_op, 0);
   17395            0 :           if (ix86_decompose_address (addr, &parts)
   17396            0 :               && parts.base == stack_pointer_rtx)
   17397              :             {
   17398              :               /* Since call will adjust stack by -UNITS_PER_WORD,
   17399              :                  we must convert "disp(stack, index, scale)" to
   17400              :                  "disp+UNITS_PER_WORD(stack, index, scale)".  */
   17401            0 :               if (parts.index)
   17402              :                 {
   17403            0 :                   addr = gen_rtx_MULT (Pmode, parts.index,
   17404              :                                        GEN_INT (parts.scale));
   17405            0 :                   addr = gen_rtx_PLUS (Pmode, stack_pointer_rtx,
   17406              :                                        addr);
   17407              :                 }
   17408              :               else
   17409              :                 addr = stack_pointer_rtx;
   17410              : 
   17411            0 :               rtx disp;
   17412            0 :               if (parts.disp != NULL_RTX)
   17413            0 :                 disp = plus_constant (Pmode, parts.disp,
   17414            0 :                                       UNITS_PER_WORD);
   17415              :               else
   17416            0 :                 disp = GEN_INT (UNITS_PER_WORD);
   17417              : 
   17418            0 :               addr = gen_rtx_PLUS (Pmode, addr, disp);
   17419            0 :               call_op = gen_rtx_MEM (GET_MODE (call_op), addr);
   17420              :             }
   17421              :         }
   17422              : 
   17423            0 :       output_asm_insn (push_buf, &call_op);
   17424              : 
   17425            0 :       ix86_output_jmp_thunk_or_indirect (thunk_name, regno);
   17426              : 
   17427            0 :       ASM_OUTPUT_INTERNAL_LABEL (asm_out_file, indirectlabel2);
   17428              : 
   17429              :       /* Call.  */
   17430            0 :       fputs ("\tcall\t", asm_out_file);
   17431            0 :       assemble_name_raw (asm_out_file, indirectlabel1);
   17432            0 :       fputc ('\n', asm_out_file);
   17433              :     }
   17434            0 : }
   17435              : 
   17436              : /* Output indirect branch via a call and return thunk.  CALL_OP is
   17437              :    the branch target.  XASM is the assembly template for CALL_OP.
   17438              :    Branch is a tail call if SIBCALL_P is true.   */
   17439              : 
   17440              : static void
   17441           50 : ix86_output_indirect_branch (rtx call_op, const char *xasm,
   17442              :                              bool sibcall_p)
   17443              : {
   17444           50 :   if (REG_P (call_op))
   17445           50 :     ix86_output_indirect_branch_via_reg (call_op, sibcall_p);
   17446              :   else
   17447            0 :     ix86_output_indirect_branch_via_push (call_op, xasm, sibcall_p);
   17448           50 : }
   17449              : 
   17450              : /* Output indirect jump.  CALL_OP is the jump target.  */
   17451              : 
   17452              : const char *
   17453         9341 : ix86_output_indirect_jmp (rtx call_op)
   17454              : {
   17455         9341 :   if (cfun->machine->indirect_branch_type != indirect_branch_keep)
   17456              :     {
   17457              :       /* We can't have red-zone since "call" in the indirect thunk
   17458              :          pushes the return address onto stack, destroying red-zone.  */
   17459            4 :       if (ix86_red_zone_used)
   17460            0 :         gcc_unreachable ();
   17461              : 
   17462            4 :       ix86_output_indirect_branch (call_op, "%0", true);
   17463              :     }
   17464              :   else
   17465         9337 :     output_asm_insn ("%!jmp\t%A0", &call_op);
   17466         9341 :   return (ix86_harden_sls & harden_sls_indirect_jmp) ? "int3" : "";
   17467              : }
   17468              : 
   17469              : /* Output return instrumentation for current function if needed.  */
   17470              : 
   17471              : static void
   17472      1703015 : output_return_instrumentation (void)
   17473              : {
   17474      1703015 :   if (ix86_instrument_return != instrument_return_none
   17475            6 :       && flag_fentry
   17476      1703021 :       && !DECL_NO_INSTRUMENT_FUNCTION_ENTRY_EXIT (cfun->decl))
   17477              :     {
   17478            5 :       if (ix86_flag_record_return)
   17479            5 :         fprintf (asm_out_file, "1:\n");
   17480            5 :       switch (ix86_instrument_return)
   17481              :         {
   17482            2 :         case instrument_return_call:
   17483            2 :           fprintf (asm_out_file, "\tcall\t__return__\n");
   17484            2 :           break;
   17485            3 :         case instrument_return_nop5:
   17486              :           /* 5 byte nop: nopl 0(%[re]ax,%[re]ax,1)  */
   17487            3 :           fprintf (asm_out_file, ASM_BYTE "0x0f, 0x1f, 0x44, 0x00, 0x00\n");
   17488            3 :           break;
   17489              :         case instrument_return_none:
   17490              :           break;
   17491              :         }
   17492              : 
   17493            5 :       if (ix86_flag_record_return)
   17494              :         {
   17495            5 :           fprintf (asm_out_file, "\t.section __return_loc, \"a\",@progbits\n");
   17496            5 :           fprintf (asm_out_file, "\t.%s 1b\n", TARGET_64BIT ? "quad" : "long");
   17497            5 :           fprintf (asm_out_file, "\t.previous\n");
   17498              :         }
   17499              :     }
   17500      1703015 : }
   17501              : 
   17502              : /* Output function return.  CALL_OP is the jump target.  Add a REP
   17503              :    prefix to RET if LONG_P is true and function return is kept.  */
   17504              : 
   17505              : const char *
   17506      1572687 : ix86_output_function_return (bool long_p)
   17507              : {
   17508      1572687 :   output_return_instrumentation ();
   17509              : 
   17510      1572687 :   if (cfun->machine->function_return_type != indirect_branch_keep)
   17511              :     {
   17512           18 :       char thunk_name[32];
   17513           18 :       enum indirect_thunk_prefix need_prefix
   17514           18 :         = indirect_thunk_need_prefix (current_output_insn);
   17515              : 
   17516           18 :       if (cfun->machine->function_return_type
   17517           18 :           != indirect_branch_thunk_inline)
   17518              :         {
   17519           13 :           bool need_thunk = (cfun->machine->function_return_type
   17520              :                              == indirect_branch_thunk);
   17521           13 :           indirect_thunk_name (thunk_name, INVALID_REGNUM, need_prefix,
   17522              :                                true);
   17523           13 :           indirect_return_needed |= need_thunk;
   17524           13 :           fprintf (asm_out_file, "\tjmp\t");
   17525           13 :           assemble_name (asm_out_file, thunk_name);
   17526           13 :           putc ('\n', asm_out_file);
   17527              :         }
   17528              :       else
   17529            5 :         output_indirect_thunk (INVALID_REGNUM);
   17530              : 
   17531           18 :       return "";
   17532              :     }
   17533              : 
   17534      3144849 :   output_asm_insn (long_p ? "rep%; ret" : "ret", nullptr);
   17535      1572669 :   return (ix86_harden_sls & harden_sls_return) ? "int3" : "";
   17536              : }
   17537              : 
   17538              : /* Output indirect function return.  RET_OP is the function return
   17539              :    target.  */
   17540              : 
   17541              : const char *
   17542           17 : ix86_output_indirect_function_return (rtx ret_op)
   17543              : {
   17544           17 :   if (cfun->machine->function_return_type != indirect_branch_keep)
   17545              :     {
   17546            0 :       char thunk_name[32];
   17547            0 :       enum indirect_thunk_prefix need_prefix
   17548            0 :         = indirect_thunk_need_prefix (current_output_insn);
   17549            0 :       unsigned int regno = REGNO (ret_op);
   17550            0 :       gcc_assert (regno == CX_REG);
   17551              : 
   17552            0 :       if (cfun->machine->function_return_type
   17553            0 :           != indirect_branch_thunk_inline)
   17554              :         {
   17555            0 :           bool need_thunk = (cfun->machine->function_return_type
   17556              :                              == indirect_branch_thunk);
   17557            0 :           indirect_thunk_name (thunk_name, regno, need_prefix, true);
   17558              : 
   17559            0 :           if (need_thunk)
   17560              :             {
   17561            0 :               indirect_return_via_cx = true;
   17562            0 :               SET_HARD_REG_BIT (indirect_thunks_used, CX_REG);
   17563              :             }
   17564            0 :           fprintf (asm_out_file, "\tjmp\t");
   17565            0 :           assemble_name (asm_out_file, thunk_name);
   17566            0 :           putc ('\n', asm_out_file);
   17567              :         }
   17568              :       else
   17569            0 :         output_indirect_thunk (regno);
   17570              :     }
   17571              :   else
   17572              :     {
   17573           17 :       output_asm_insn ("%!jmp\t%A0", &ret_op);
   17574           17 :       if (ix86_harden_sls & harden_sls_indirect_jmp)
   17575            1 :         fputs ("\tint3\n", asm_out_file);
   17576              :     }
   17577           17 :   return "";
   17578              : }
   17579              : 
   17580              : /* Output the assembly for a call instruction.  */
   17581              : 
   17582              : const char *
   17583      6060877 : ix86_output_call_insn (rtx_insn *insn, rtx call_op)
   17584              : {
   17585      6060877 :   bool direct_p = constant_call_address_operand (call_op, VOIDmode);
   17586      6060877 :   bool output_indirect_p
   17587              :     = (!TARGET_SEH
   17588      6060877 :        && cfun->machine->indirect_branch_type != indirect_branch_keep);
   17589      6060877 :   bool seh_nop_p = false;
   17590      6060877 :   const char *xasm;
   17591              : 
   17592      6060877 :   if (SIBLING_CALL_P (insn))
   17593              :     {
   17594       130328 :       output_return_instrumentation ();
   17595       130328 :       if (direct_p)
   17596              :         {
   17597       120584 :           if (ix86_nopic_noplt_attribute_p (call_op))
   17598              :             {
   17599            4 :               direct_p = false;
   17600            4 :               if (TARGET_64BIT)
   17601              :                 {
   17602            4 :                   if (output_indirect_p)
   17603              :                     xasm = "{%p0@GOTPCREL(%%rip)|[QWORD PTR %p0@GOTPCREL[rip]]}";
   17604              :                   else
   17605            4 :                     xasm = "%!jmp\t{*%p0@GOTPCREL(%%rip)|[QWORD PTR %p0@GOTPCREL[rip]]}";
   17606              :                 }
   17607              :               else
   17608              :                 {
   17609            0 :                   if (output_indirect_p)
   17610              :                     xasm = "{%p0@GOT|[DWORD PTR %p0@GOT]}";
   17611              :                   else
   17612            0 :                     xasm = "%!jmp\t{*%p0@GOT|[DWORD PTR %p0@GOT]}";
   17613              :                 }
   17614              :             }
   17615              :           else
   17616              :             xasm = "%!jmp\t%P0";
   17617              :         }
   17618              :       /* SEH epilogue detection requires the indirect branch case
   17619              :          to include REX.W.  */
   17620         9744 :       else if (TARGET_SEH)
   17621              :         xasm = "%!rex.W jmp\t%A0";
   17622              :       else
   17623              :         {
   17624         9744 :           if (output_indirect_p)
   17625              :             xasm = "%0";
   17626              :           else
   17627         9721 :             xasm = "%!jmp\t%A0";
   17628              :         }
   17629              : 
   17630       130328 :       if (output_indirect_p && !direct_p)
   17631           23 :         ix86_output_indirect_branch (call_op, xasm, true);
   17632              :       else
   17633              :         {
   17634       130305 :           output_asm_insn (xasm, &call_op);
   17635       130305 :           if (!direct_p
   17636         9725 :               && (ix86_harden_sls & harden_sls_indirect_jmp))
   17637              :             return "int3";
   17638              :         }
   17639       130327 :       return "";
   17640              :     }
   17641              : 
   17642              :   /* SEH unwinding can require an extra nop to be emitted in several
   17643              :      circumstances.  Determine if we have one of those.  */
   17644      5930549 :   if (TARGET_SEH)
   17645              :     {
   17646              :       rtx_insn *i;
   17647              : 
   17648              :       for (i = NEXT_INSN (insn); i ; i = NEXT_INSN (i))
   17649              :         {
   17650              :           /* Prevent a catch region from being adjacent to a jump that would
   17651              :              be interpreted as an epilogue sequence by the unwinder.  */
   17652              :           if (JUMP_P(i) && CROSSING_JUMP_P (i))
   17653              :             {
   17654              :               seh_nop_p = true;
   17655              :               break;
   17656              :             }
   17657              : 
   17658              :           /* If we get to another real insn, we don't need the nop.  */
   17659              :           if (INSN_P (i))
   17660              :             break;
   17661              : 
   17662              :           /* If we get to the epilogue note, prevent a catch region from
   17663              :              being adjacent to the standard epilogue sequence.  Note that,
   17664              :              if non-call exceptions are enabled, we already did it during
   17665              :              epilogue expansion, or else, if the insn can throw internally,
   17666              :              we already did it during the reorg pass.  */
   17667              :           if (NOTE_P (i) && NOTE_KIND (i) == NOTE_INSN_EPILOGUE_BEG
   17668              :               && !flag_non_call_exceptions
   17669              :               && !can_throw_internal (insn))
   17670              :             {
   17671              :               seh_nop_p = true;
   17672              :               break;
   17673              :             }
   17674              :         }
   17675              : 
   17676              :       /* If we didn't find a real insn following the call, prevent the
   17677              :          unwinder from looking into the next function.  */
   17678              :       if (i == NULL)
   17679              :         seh_nop_p = true;
   17680              :     }
   17681              : 
   17682      5930549 :   if (direct_p)
   17683              :     {
   17684      5753859 :       if (ix86_nopic_noplt_attribute_p (call_op))
   17685              :         {
   17686            6 :           direct_p = false;
   17687            6 :           if (TARGET_64BIT)
   17688              :             {
   17689            6 :               if (output_indirect_p)
   17690              :                 xasm = "{%p0@GOTPCREL(%%rip)|[QWORD PTR %p0@GOTPCREL[rip]]}";
   17691              :               else
   17692            6 :                 xasm = "%!call\t{*%p0@GOTPCREL(%%rip)|[QWORD PTR %p0@GOTPCREL[rip]]}";
   17693              :             }
   17694              :           else
   17695              :             {
   17696            0 :               if (output_indirect_p)
   17697              :                 xasm = "{%p0@GOT|[DWORD PTR %p0@GOT]}";
   17698              :               else
   17699            0 :                 xasm = "%!call\t{*%p0@GOT|[DWORD PTR %p0@GOT]}";
   17700              :             }
   17701              :         }
   17702              :       else
   17703              :         xasm = "%!call\t%P0";
   17704              :     }
   17705              :   else
   17706              :     {
   17707       176690 :       if (output_indirect_p)
   17708              :         xasm = "%0";
   17709              :       else
   17710       176667 :         xasm = "%!call\t%A0";
   17711              :     }
   17712              : 
   17713      5930549 :   if (output_indirect_p && !direct_p)
   17714           23 :     ix86_output_indirect_branch (call_op, xasm, false);
   17715              :   else
   17716      5930526 :     output_asm_insn (xasm, &call_op);
   17717              : 
   17718              :   if (seh_nop_p)
   17719              :     return "nop";
   17720              : 
   17721              :   return "";
   17722              : }
   17723              : 
   17724              : /* Return a MEM corresponding to a stack slot with mode MODE.
   17725              :    Allocate a new slot if necessary.
   17726              : 
   17727              :    The RTL for a function can have several slots available: N is
   17728              :    which slot to use.  */
   17729              : 
   17730              : rtx
   17731        22366 : assign_386_stack_local (machine_mode mode, enum ix86_stack_slot n)
   17732              : {
   17733        22366 :   struct stack_local_entry *s;
   17734              : 
   17735        22366 :   gcc_assert (n < MAX_386_STACK_LOCALS);
   17736              : 
   17737        33727 :   for (s = ix86_stack_locals; s; s = s->next)
   17738        31116 :     if (s->mode == mode && s->n == n)
   17739        19755 :       return validize_mem (copy_rtx (s->rtl));
   17740              : 
   17741         2611 :   int align = 0;
   17742              :   /* For DImode with SLOT_FLOATxFDI_387 use 32-bit
   17743              :      alignment with -m32 -mpreferred-stack-boundary=2.  */
   17744         2611 :   if (mode == DImode
   17745          329 :       && !TARGET_64BIT
   17746          329 :       && n == SLOT_FLOATxFDI_387
   17747         2940 :       && ix86_preferred_stack_boundary < GET_MODE_ALIGNMENT (DImode))
   17748              :     align = 32;
   17749         2611 :   s = ggc_alloc<stack_local_entry> ();
   17750         2611 :   s->n = n;
   17751         2611 :   s->mode = mode;
   17752         5222 :   s->rtl = assign_stack_local (mode, GET_MODE_SIZE (mode), align);
   17753              : 
   17754         2611 :   s->next = ix86_stack_locals;
   17755         2611 :   ix86_stack_locals = s;
   17756         2611 :   return validize_mem (copy_rtx (s->rtl));
   17757              : }
   17758              : 
   17759              : static void
   17760      1471357 : ix86_instantiate_decls (void)
   17761              : {
   17762      1471357 :   struct stack_local_entry *s;
   17763              : 
   17764      1471357 :   for (s = ix86_stack_locals; s; s = s->next)
   17765            0 :     if (s->rtl != NULL_RTX)
   17766            0 :       instantiate_decl_rtl (s->rtl);
   17767      1471357 : }
   17768              : 
   17769              : /* Check whether x86 address PARTS is a pc-relative address.  */
   17770              : 
   17771              : bool
   17772     27212290 : ix86_rip_relative_addr_p (struct ix86_address *parts)
   17773              : {
   17774     27212290 :   rtx base, index, disp;
   17775              : 
   17776     27212290 :   base = parts->base;
   17777     27212290 :   index = parts->index;
   17778     27212290 :   disp = parts->disp;
   17779              : 
   17780     27212290 :   if (disp && !base && !index)
   17781              :     {
   17782     25452983 :       if (TARGET_64BIT)
   17783              :         {
   17784     23788244 :           rtx symbol = disp;
   17785              : 
   17786     23788244 :           if (GET_CODE (disp) == CONST)
   17787      7772798 :             symbol = XEXP (disp, 0);
   17788     23788244 :           if (GET_CODE (symbol) == PLUS
   17789      7259094 :               && CONST_INT_P (XEXP (symbol, 1)))
   17790      7259094 :             symbol = XEXP (symbol, 0);
   17791              : 
   17792     23788244 :           if (LABEL_REF_P (symbol)
   17793     23781311 :               || (SYMBOL_REF_P (symbol)
   17794     22515626 :                   && SYMBOL_REF_TLS_MODEL (symbol) == 0)
   17795     25053929 :               || (GET_CODE (symbol) == UNSPEC
   17796       532764 :                   && (XINT (symbol, 1) == UNSPEC_GOTPCREL
   17797              :                       || XINT (symbol, 1) == UNSPEC_PCREL
   17798              :                       || XINT (symbol, 1) == UNSPEC_GOTNTPOFF)))
   17799     23027658 :             return true;
   17800              :         }
   17801              :     }
   17802              :   return false;
   17803              : }
   17804              : 
   17805              : /* Calculate the length of the memory address in the instruction encoding.
   17806              :    Includes addr32 prefix, does not include the one-byte modrm, opcode,
   17807              :    or other prefixes.  We never generate addr32 prefix for LEA insn.  */
   17808              : 
   17809              : int
   17810    271560808 : memory_address_length (rtx addr, bool lea)
   17811              : {
   17812    271560808 :   struct ix86_address parts;
   17813    271560808 :   rtx base, index, disp;
   17814    271560808 :   int len;
   17815    271560808 :   int ok;
   17816              : 
   17817    271560808 :   if (GET_CODE (addr) == PRE_DEC
   17818    263043951 :       || GET_CODE (addr) == POST_INC
   17819    258557348 :       || GET_CODE (addr) == PRE_MODIFY
   17820    258557348 :       || GET_CODE (addr) == POST_MODIFY)
   17821              :     return 0;
   17822              : 
   17823    258557348 :   ok = ix86_decompose_address (addr, &parts);
   17824    258557348 :   gcc_assert (ok);
   17825              : 
   17826    258557348 :   len = (parts.seg == ADDR_SPACE_GENERIC) ? 0 : 1;
   17827              : 
   17828              :   /*  If this is not LEA instruction, add the length of addr32 prefix.  */
   17829    220202934 :   if (TARGET_64BIT && !lea
   17830    453765746 :       && (SImode_address_operand (addr, VOIDmode)
   17831    195208245 :           || (parts.base && GET_MODE (parts.base) == SImode)
   17832    195198015 :           || (parts.index && GET_MODE (parts.index) == SImode)))
   17833        10383 :     len++;
   17834              : 
   17835    258557348 :   base = parts.base;
   17836    258557348 :   index = parts.index;
   17837    258557348 :   disp = parts.disp;
   17838              : 
   17839    258557348 :   if (base && SUBREG_P (base))
   17840            2 :     base = SUBREG_REG (base);
   17841    258557348 :   if (index && SUBREG_P (index))
   17842            0 :     index = SUBREG_REG (index);
   17843              : 
   17844    258557348 :   gcc_assert (base == NULL_RTX || REG_P (base));
   17845    258557348 :   gcc_assert (index == NULL_RTX || REG_P (index));
   17846              : 
   17847              :   /* Rule of thumb:
   17848              :        - esp as the base always wants an index,
   17849              :        - ebp as the base always wants a displacement,
   17850              :        - r12 as the base always wants an index,
   17851              :        - r13 as the base always wants a displacement.  */
   17852              : 
   17853              :   /* Register Indirect.  */
   17854    258557348 :   if (base && !index && !disp)
   17855              :     {
   17856              :       /* esp (for its index) and ebp (for its displacement) need
   17857              :          the two-byte modrm form.  Similarly for r12 and r13 in 64-bit
   17858              :          code.  */
   17859     16941837 :       if (base == arg_pointer_rtx
   17860     16941837 :           || base == frame_pointer_rtx
   17861     16941837 :           || REGNO (base) == SP_REG
   17862     10093794 :           || REGNO (base) == BP_REG
   17863     10093794 :           || REGNO (base) == R12_REG
   17864     26535536 :           || REGNO (base) == R13_REG)
   17865      7348138 :         len++;
   17866              :     }
   17867              : 
   17868              :   /* Direct Addressing.  In 64-bit mode mod 00 r/m 5
   17869              :      is not disp32, but disp32(%rip), so for disp32
   17870              :      SIB byte is needed, unless print_operand_address
   17871              :      optimizes it into disp32(%rip) or (%rip) is implied
   17872              :      by UNSPEC.  */
   17873    241615511 :   else if (disp && !base && !index)
   17874              :     {
   17875     24477600 :       len += 4;
   17876     24477600 :       if (!ix86_rip_relative_addr_p (&parts))
   17877      1852811 :         len++;
   17878              :     }
   17879              :   else
   17880              :     {
   17881              :       /* Find the length of the displacement constant.  */
   17882    217137911 :       if (disp)
   17883              :         {
   17884    212975977 :           if (base && satisfies_constraint_K (disp))
   17885    124628228 :             len += 1;
   17886              :           else
   17887     88347749 :             len += 4;
   17888              :         }
   17889              :       /* ebp always wants a displacement.  Similarly r13.  */
   17890      4161934 :       else if (base && (REGNO (base) == BP_REG || REGNO (base) == R13_REG))
   17891         8149 :         len++;
   17892              : 
   17893              :       /* An index requires the two-byte modrm form....  */
   17894    217137911 :       if (index
   17895              :           /* ...like esp (or r12), which always wants an index.  */
   17896    206149196 :           || base == arg_pointer_rtx
   17897    206149196 :           || base == frame_pointer_rtx
   17898    423287107 :           || (base && (REGNO (base) == SP_REG || REGNO (base) == R12_REG)))
   17899    156060513 :         len++;
   17900              :     }
   17901              : 
   17902              :   return len;
   17903              : }
   17904              : 
   17905              : /* Compute default value for "length_immediate" attribute.  When SHORTFORM
   17906              :    is set, expect that insn have 8bit immediate alternative.  */
   17907              : int
   17908    317721687 : ix86_attr_length_immediate_default (rtx_insn *insn, bool shortform)
   17909              : {
   17910    317721687 :   int len = 0;
   17911    317721687 :   int i;
   17912    317721687 :   extract_insn_cached (insn);
   17913    991340806 :   for (i = recog_data.n_operands - 1; i >= 0; --i)
   17914    673619119 :     if (CONSTANT_P (recog_data.operand[i]))
   17915              :       {
   17916    139282301 :         enum attr_mode mode = get_attr_mode (insn);
   17917              : 
   17918    139282301 :         gcc_assert (!len);
   17919    139282301 :         if (shortform && CONST_INT_P (recog_data.operand[i]))
   17920              :           {
   17921     37733319 :             HOST_WIDE_INT ival = INTVAL (recog_data.operand[i]);
   17922     37733319 :             switch (mode)
   17923              :               {
   17924      1379269 :               case MODE_QI:
   17925      1379269 :                 len = 1;
   17926      1379269 :                 continue;
   17927       438566 :               case MODE_HI:
   17928       438566 :                 ival = trunc_int_for_mode (ival, HImode);
   17929       438566 :                 break;
   17930     16043714 :               case MODE_SI:
   17931     16043714 :                 ival = trunc_int_for_mode (ival, SImode);
   17932     16043714 :                 break;
   17933              :               default:
   17934              :                 break;
   17935              :               }
   17936     36354050 :             if (IN_RANGE (ival, -128, 127))
   17937              :               {
   17938     32241146 :                 len = 1;
   17939     32241146 :                 continue;
   17940              :               }
   17941              :           }
   17942    105661886 :         switch (mode)
   17943              :           {
   17944              :           case MODE_QI:
   17945              :             len = 1;
   17946              :             break;
   17947              :           case MODE_HI:
   17948    673619119 :             len = 2;
   17949              :             break;
   17950              :           case MODE_SI:
   17951     99906244 :             len = 4;
   17952              :             break;
   17953              :           /* Immediates for DImode instructions are encoded
   17954              :              as 32bit sign extended values.  */
   17955              :           case MODE_DI:
   17956     99906244 :             len = 4;
   17957              :             break;
   17958            0 :           default:
   17959            0 :             fatal_insn ("unknown insn mode", insn);
   17960              :         }
   17961              :       }
   17962    317721687 :   return len;
   17963              : }
   17964              : 
   17965              : /* Compute default value for "length_address" attribute.  */
   17966              : int
   17967    444480112 : ix86_attr_length_address_default (rtx_insn *insn)
   17968              : {
   17969    444480112 :   int i;
   17970              : 
   17971    444480112 :   if (get_attr_type (insn) == TYPE_LEA)
   17972              :     {
   17973     27717056 :       rtx set = PATTERN (insn), addr;
   17974              : 
   17975     27717056 :       if (GET_CODE (set) == PARALLEL)
   17976        88124 :         set = XVECEXP (set, 0, 0);
   17977              : 
   17978     27717056 :       gcc_assert (GET_CODE (set) == SET);
   17979              : 
   17980     27717056 :       addr = SET_SRC (set);
   17981              : 
   17982     27717056 :       return memory_address_length (addr, true);
   17983              :     }
   17984              : 
   17985    416763056 :   extract_insn_cached (insn);
   17986    956534704 :   for (i = recog_data.n_operands - 1; i >= 0; --i)
   17987              :     {
   17988    783335782 :       rtx op = recog_data.operand[i];
   17989    783335782 :       if (MEM_P (op))
   17990              :         {
   17991    243841419 :           constrain_operands_cached (insn, reload_completed);
   17992    243841419 :           if (which_alternative != -1)
   17993              :             {
   17994    243841419 :               const char *constraints = recog_data.constraints[i];
   17995    243841419 :               int alt = which_alternative;
   17996              : 
   17997    386493076 :               while (*constraints == '=' || *constraints == '+')
   17998    142651657 :                 constraints++;
   17999   1109490073 :               while (alt-- > 0)
   18000   2123742045 :                 while (*constraints++ != ',')
   18001              :                   ;
   18002              :               /* Skip ignored operands.  */
   18003    243841419 :               if (*constraints == 'X')
   18004       277285 :                 continue;
   18005              :             }
   18006              : 
   18007    243564134 :           int len = memory_address_length (XEXP (op, 0), false);
   18008              : 
   18009              :           /* Account for segment prefix for non-default addr spaces.  */
   18010    257213014 :           if (!ADDR_SPACE_GENERIC_P (MEM_ADDR_SPACE (op)))
   18011       783720 :             len++;
   18012              : 
   18013    243564134 :           return len;
   18014              :         }
   18015              :     }
   18016              :   return 0;
   18017              : }
   18018              : 
   18019              : /* Compute default value for "length_vex" attribute. It includes
   18020              :    2 or 3 byte VEX prefix and 1 opcode byte.  */
   18021              : 
   18022              : int
   18023      5101247 : ix86_attr_length_vex_default (rtx_insn *insn, bool has_0f_opcode,
   18024              :                               bool has_vex_w)
   18025              : {
   18026      5101247 :   int i, reg_only = 2 + 1;
   18027      5101247 :   bool has_mem = false;
   18028              : 
   18029              :   /* Only 0f opcode can use 2 byte VEX prefix and  VEX W bit uses 3
   18030              :      byte VEX prefix.  */
   18031      5101247 :   if (!has_0f_opcode || has_vex_w)
   18032              :     return 3 + 1;
   18033              : 
   18034              :  /* We can always use 2 byte VEX prefix in 32bit.  */
   18035      4647205 :   if (!TARGET_64BIT)
   18036              :     return 2 + 1;
   18037              : 
   18038      3544461 :   extract_insn_cached (insn);
   18039              : 
   18040     11036984 :   for (i = recog_data.n_operands - 1; i >= 0; --i)
   18041      7831189 :     if (REG_P (recog_data.operand[i]))
   18042              :       {
   18043              :         /* REX.W bit uses 3 byte VEX prefix.
   18044              :            REX2 with vex use extended EVEX prefix length is 4-byte.  */
   18045      5138079 :         if (GET_MODE (recog_data.operand[i]) == DImode
   18046      5138079 :             && GENERAL_REG_P (recog_data.operand[i]))
   18047              :           return 3 + 1;
   18048              : 
   18049              :         /* REX.B bit requires 3-byte VEX. Right here we don't know which
   18050              :            operand will be encoded using VEX.B, so be conservative.
   18051              :            REX2 with vex use extended EVEX prefix length is 4-byte.  */
   18052      5126240 :         if (REX_INT_REGNO_P (recog_data.operand[i])
   18053      5126240 :             || REX2_INT_REGNO_P (recog_data.operand[i])
   18054      5126240 :             || REX_SSE_REGNO_P (recog_data.operand[i]))
   18055            0 :           reg_only = 3 + 1;
   18056              :       }
   18057      2693110 :     else if (MEM_P (recog_data.operand[i]))
   18058              :       {
   18059              :         /* REX2.X or REX2.B bits use 3 byte VEX prefix.  */
   18060      2089258 :         if (x86_extended_rex2reg_mentioned_p (recog_data.operand[i]))
   18061              :           return 4;
   18062              : 
   18063              :         /* REX.X or REX.B bits use 3 byte VEX prefix.  */
   18064      2089004 :         if (x86_extended_reg_mentioned_p (recog_data.operand[i]))
   18065              :           return 3 + 1;
   18066              : 
   18067              :         has_mem = true;
   18068              :       }
   18069              : 
   18070      3205795 :   return has_mem ? 2 + 1 : reg_only;
   18071              : }
   18072              : 
   18073              : 
   18074              : static bool
   18075              : ix86_class_likely_spilled_p (reg_class_t);
   18076              : 
   18077              : /* Returns true if lhs of insn is HW function argument register and set up
   18078              :    is_spilled to true if it is likely spilled HW register.  */
   18079              : static bool
   18080         1145 : insn_is_function_arg (rtx insn, bool* is_spilled)
   18081              : {
   18082         1145 :   rtx dst;
   18083              : 
   18084         1145 :   if (!NONDEBUG_INSN_P (insn))
   18085              :     return false;
   18086              :   /* Call instructions are not movable, ignore it.  */
   18087         1145 :   if (CALL_P (insn))
   18088              :     return false;
   18089         1071 :   insn = PATTERN (insn);
   18090         1071 :   if (GET_CODE (insn) == PARALLEL)
   18091           73 :     insn = XVECEXP (insn, 0, 0);
   18092         1071 :   if (GET_CODE (insn) != SET)
   18093              :     return false;
   18094         1071 :   dst = SET_DEST (insn);
   18095          975 :   if (REG_P (dst) && HARD_REGISTER_P (dst)
   18096         1940 :       && ix86_function_arg_regno_p (REGNO (dst)))
   18097              :     {
   18098              :       /* Is it likely spilled HW register?  */
   18099          869 :       if (!TEST_HARD_REG_BIT (fixed_reg_set, REGNO (dst))
   18100          869 :           && ix86_class_likely_spilled_p (REGNO_REG_CLASS (REGNO (dst))))
   18101          825 :         *is_spilled = true;
   18102          869 :       return true;
   18103              :     }
   18104              :   return false;
   18105              : }
   18106              : 
   18107              : /* Add output dependencies for chain of function adjacent arguments if only
   18108              :    there is a move to likely spilled HW register.  Return first argument
   18109              :    if at least one dependence was added or NULL otherwise.  */
   18110              : static rtx_insn *
   18111          414 : add_parameter_dependencies (rtx_insn *call, rtx_insn *head)
   18112              : {
   18113          414 :   rtx_insn *insn;
   18114          414 :   rtx_insn *last = call;
   18115          414 :   rtx_insn *first_arg = NULL;
   18116          414 :   bool is_spilled = false;
   18117              : 
   18118          414 :   head = PREV_INSN (head);
   18119              : 
   18120              :   /* Find nearest to call argument passing instruction.  */
   18121          414 :   while (true)
   18122              :     {
   18123          414 :       last = PREV_INSN (last);
   18124          414 :       if (last == head)
   18125              :         return NULL;
   18126          414 :       if (!NONDEBUG_INSN_P (last))
   18127            0 :         continue;
   18128          414 :       if (insn_is_function_arg (last, &is_spilled))
   18129              :         break;
   18130              :       return NULL;
   18131              :     }
   18132              : 
   18133              :   first_arg = last;
   18134         1050 :   while (true)
   18135              :     {
   18136         1050 :       insn = PREV_INSN (last);
   18137         1050 :       if (!INSN_P (insn))
   18138              :         break;
   18139          953 :       if (insn == head)
   18140              :         break;
   18141          912 :       if (!NONDEBUG_INSN_P (insn))
   18142              :         {
   18143          181 :           last = insn;
   18144          181 :           continue;
   18145              :         }
   18146          731 :       if (insn_is_function_arg (insn, &is_spilled))
   18147              :         {
   18148              :           /* Add output depdendence between two function arguments if chain
   18149              :              of output arguments contains likely spilled HW registers.  */
   18150          463 :           if (is_spilled)
   18151          463 :             add_dependence (first_arg, insn, REG_DEP_OUTPUT);
   18152              :           first_arg = last = insn;
   18153              :         }
   18154              :       else
   18155              :         break;
   18156              :     }
   18157          406 :   if (!is_spilled)
   18158              :     return NULL;
   18159              :   return first_arg;
   18160              : }
   18161              : 
   18162              : /* Add output or anti dependency from insn to first_arg to restrict its code
   18163              :    motion.  */
   18164              : static void
   18165         2335 : avoid_func_arg_motion (rtx_insn *first_arg, rtx_insn *insn)
   18166              : {
   18167         2335 :   rtx set;
   18168         2335 :   rtx tmp;
   18169              : 
   18170         2335 :   set = single_set (insn);
   18171         2335 :   if (!set)
   18172              :     return;
   18173         1453 :   tmp = SET_DEST (set);
   18174         1453 :   if (REG_P (tmp))
   18175              :     {
   18176              :       /* Add output dependency to the first function argument.  */
   18177         1258 :       add_dependence (first_arg, insn, REG_DEP_OUTPUT);
   18178         1258 :       return;
   18179              :     }
   18180              :   /* Add anti dependency.  */
   18181          195 :   add_dependence (first_arg, insn, REG_DEP_ANTI);
   18182              : }
   18183              : 
   18184              : /* Avoid cross block motion of function argument through adding dependency
   18185              :    from the first non-jump instruction in bb.  */
   18186              : static void
   18187           68 : add_dependee_for_func_arg (rtx_insn *arg, basic_block bb)
   18188              : {
   18189           68 :   rtx_insn *insn = BB_END (bb);
   18190              : 
   18191          134 :   while (insn)
   18192              :     {
   18193          134 :       if (NONDEBUG_INSN_P (insn) && NONJUMP_INSN_P (insn))
   18194              :         {
   18195           67 :           rtx set = single_set (insn);
   18196           67 :           if (set)
   18197              :             {
   18198           67 :               avoid_func_arg_motion (arg, insn);
   18199           67 :               return;
   18200              :             }
   18201              :         }
   18202           67 :       if (insn == BB_HEAD (bb))
   18203              :         return;
   18204           66 :       insn = PREV_INSN (insn);
   18205              :     }
   18206              : }
   18207              : 
   18208              : /* Hook for pre-reload schedule - avoid motion of function arguments
   18209              :    passed in likely spilled HW registers.  */
   18210              : static void
   18211     10331646 : ix86_dependencies_evaluation_hook (rtx_insn *head, rtx_insn *tail)
   18212              : {
   18213     10331646 :   rtx_insn *insn;
   18214     10331646 :   rtx_insn *first_arg = NULL;
   18215     10331646 :   if (reload_completed)
   18216              :     return;
   18217         1579 :   while (head != tail && DEBUG_INSN_P (head))
   18218          346 :     head = NEXT_INSN (head);
   18219        10676 :   for (insn = tail; insn != head; insn = PREV_INSN (insn))
   18220         9578 :     if (INSN_P (insn) && CALL_P (insn))
   18221              :       {
   18222          414 :         first_arg = add_parameter_dependencies (insn, head);
   18223          414 :         if (first_arg)
   18224              :           {
   18225              :             /* Add dependee for first argument to predecessors if only
   18226              :                region contains more than one block.  */
   18227          406 :             basic_block bb =  BLOCK_FOR_INSN (insn);
   18228          406 :             int rgn = CONTAINING_RGN (bb->index);
   18229          406 :             int nr_blks = RGN_NR_BLOCKS (rgn);
   18230              :             /* Skip trivial regions and region head blocks that can have
   18231              :                predecessors outside of region.  */
   18232          406 :             if (nr_blks > 1 && BLOCK_TO_BB (bb->index) != 0)
   18233              :               {
   18234           67 :                 edge e;
   18235           67 :                 edge_iterator ei;
   18236              : 
   18237              :                 /* Regions are SCCs with the exception of selective
   18238              :                    scheduling with pipelining of outer blocks enabled.
   18239              :                    So also check that immediate predecessors of a non-head
   18240              :                    block are in the same region.  */
   18241          137 :                 FOR_EACH_EDGE (e, ei, bb->preds)
   18242              :                   {
   18243              :                     /* Avoid creating of loop-carried dependencies through
   18244              :                        using topological ordering in the region.  */
   18245           70 :                     if (rgn == CONTAINING_RGN (e->src->index)
   18246           69 :                         && BLOCK_TO_BB (bb->index) > BLOCK_TO_BB (e->src->index))
   18247           68 :                       add_dependee_for_func_arg (first_arg, e->src);
   18248              :                   }
   18249              :               }
   18250          406 :             insn = first_arg;
   18251          406 :             if (insn == head)
   18252              :               break;
   18253              :           }
   18254              :       }
   18255         9164 :     else if (first_arg)
   18256         2268 :       avoid_func_arg_motion (first_arg, insn);
   18257              : }
   18258              : 
   18259              : /* Hook for pre-reload schedule - set priority of moves from likely spilled
   18260              :    HW registers to maximum, to schedule them at soon as possible. These are
   18261              :    moves from function argument registers at the top of the function entry
   18262              :    and moves from function return value registers after call.  */
   18263              : static int
   18264    108639027 : ix86_adjust_priority (rtx_insn *insn, int priority)
   18265              : {
   18266    108639027 :   rtx set;
   18267              : 
   18268    108639027 :   if (reload_completed)
   18269              :     return priority;
   18270              : 
   18271        14050 :   if (!NONDEBUG_INSN_P (insn))
   18272              :     return priority;
   18273              : 
   18274        12484 :   set = single_set (insn);
   18275        12484 :   if (set)
   18276              :     {
   18277        11912 :       rtx tmp = SET_SRC (set);
   18278        11912 :       if (REG_P (tmp)
   18279         2498 :           && HARD_REGISTER_P (tmp)
   18280          499 :           && !TEST_HARD_REG_BIT (fixed_reg_set, REGNO (tmp))
   18281        11912 :           && ix86_class_likely_spilled_p (REGNO_REG_CLASS (REGNO (tmp))))
   18282          448 :         return current_sched_info->sched_max_insns_priority;
   18283              :     }
   18284              : 
   18285              :   return priority;
   18286              : }
   18287              : 
   18288              : /* Prepare for scheduling pass.  */
   18289              : static void
   18290       964480 : ix86_sched_init_global (FILE *, int, int)
   18291              : {
   18292              :   /* Install scheduling hooks for current CPU.  Some of these hooks are used
   18293              :      in time-critical parts of the scheduler, so we only set them up when
   18294              :      they are actually used.  */
   18295       964480 :   switch (ix86_tune)
   18296              :     {
   18297       917945 :     case PROCESSOR_CORE2:
   18298       917945 :     case PROCESSOR_NEHALEM:
   18299       917945 :     case PROCESSOR_SANDYBRIDGE:
   18300       917945 :     case PROCESSOR_HASWELL:
   18301       917945 :     case PROCESSOR_TREMONT:
   18302       917945 :     case PROCESSOR_ALDERLAKE:
   18303       917945 :     case PROCESSOR_GENERIC:
   18304              :       /* Do not perform multipass scheduling for pre-reload schedule
   18305              :          to save compile time.  */
   18306       917945 :       if (reload_completed)
   18307              :         {
   18308       917458 :           ix86_core2i7_init_hooks ();
   18309       917458 :           break;
   18310              :         }
   18311              :       /* Fall through.  */
   18312        47022 :     default:
   18313        47022 :       targetm.sched.dfa_post_advance_cycle = NULL;
   18314        47022 :       targetm.sched.first_cycle_multipass_init = NULL;
   18315        47022 :       targetm.sched.first_cycle_multipass_begin = NULL;
   18316        47022 :       targetm.sched.first_cycle_multipass_issue = NULL;
   18317        47022 :       targetm.sched.first_cycle_multipass_backtrack = NULL;
   18318        47022 :       targetm.sched.first_cycle_multipass_end = NULL;
   18319        47022 :       targetm.sched.first_cycle_multipass_fini = NULL;
   18320        47022 :       break;
   18321              :     }
   18322       964480 : }
   18323              : 
   18324              : 
   18325              : /* Implement TARGET_STATIC_RTX_ALIGNMENT.  */
   18326              : 
   18327              : static HOST_WIDE_INT
   18328       718485 : ix86_static_rtx_alignment (machine_mode mode)
   18329              : {
   18330       718485 :   if (mode == DFmode)
   18331              :     return 64;
   18332              :   if (ALIGN_MODE_128 (mode))
   18333       156355 :     return MAX (128, GET_MODE_ALIGNMENT (mode));
   18334       477350 :   return GET_MODE_ALIGNMENT (mode);
   18335              : }
   18336              : 
   18337              : /* Implement TARGET_CONSTANT_ALIGNMENT.  */
   18338              : 
   18339              : static HOST_WIDE_INT
   18340      6816681 : ix86_constant_alignment (const_tree exp, HOST_WIDE_INT align)
   18341              : {
   18342      6816681 :   if (TREE_CODE (exp) == REAL_CST || TREE_CODE (exp) == VECTOR_CST
   18343              :       || TREE_CODE (exp) == INTEGER_CST)
   18344              :     {
   18345       364001 :       machine_mode mode = TYPE_MODE (TREE_TYPE (exp));
   18346       364001 :       HOST_WIDE_INT mode_align = ix86_static_rtx_alignment (mode);
   18347       364001 :       return MAX (mode_align, align);
   18348              :     }
   18349      6312536 :   else if (!optimize_size && TREE_CODE (exp) == STRING_CST
   18350      9514864 :            && TREE_STRING_LENGTH (exp) >= 31 && align < BITS_PER_WORD)
   18351              :     return BITS_PER_WORD;
   18352              : 
   18353              :   return align;
   18354              : }
   18355              : 
   18356              : /* Implement TARGET_EMPTY_RECORD_P.  */
   18357              : 
   18358              : static bool
   18359   1482923405 : ix86_is_empty_record (const_tree type)
   18360              : {
   18361   1482923405 :   if (!TARGET_64BIT)
   18362              :     return false;
   18363   1452388039 :   return default_is_empty_record (type);
   18364              : }
   18365              : 
   18366              : /* Implement TARGET_WARN_PARAMETER_PASSING_ABI.  */
   18367              : 
   18368              : static void
   18369     15129616 : ix86_warn_parameter_passing_abi (cumulative_args_t cum_v, tree type)
   18370              : {
   18371     15129616 :   CUMULATIVE_ARGS *cum = get_cumulative_args (cum_v);
   18372              : 
   18373     15129616 :   if (!cum->warn_empty)
   18374              :     return;
   18375              : 
   18376     12944700 :   if (!TYPE_EMPTY_P (type))
   18377              :     return;
   18378              : 
   18379              :   /* Don't warn if the function isn't visible outside of the TU.  */
   18380        14578 :   if (cum->decl && !TREE_PUBLIC (cum->decl))
   18381              :     return;
   18382              : 
   18383        13124 :   tree decl = cum->decl;
   18384        13124 :   if (!decl)
   18385              :     /* If we don't know the target, look at the current TU.  */
   18386           39 :     decl = current_function_decl;
   18387              : 
   18388        13124 :   const_tree ctx = get_ultimate_context (decl);
   18389        13124 :   if (ctx == NULL_TREE
   18390        26215 :       || !TRANSLATION_UNIT_WARN_EMPTY_P (ctx))
   18391              :     return;
   18392              : 
   18393              :   /* If the actual size of the type is zero, then there is no change
   18394              :      in how objects of this size are passed.  */
   18395           72 :   if (int_size_in_bytes (type) == 0)
   18396              :     return;
   18397              : 
   18398           66 :   warning (OPT_Wabi, "empty class %qT parameter passing ABI "
   18399              :            "changes in %<-fabi-version=12%> (GCC 8)", type);
   18400              : 
   18401              :   /* Only warn once.  */
   18402           66 :   cum->warn_empty = false;
   18403              : }
   18404              : 
   18405              : /* This hook returns name of multilib ABI.  */
   18406              : 
   18407              : static const char *
   18408      3378459 : ix86_get_multilib_abi_name (void)
   18409              : {
   18410      3378459 :   if (!(TARGET_64BIT_P (ix86_isa_flags)))
   18411              :     return "i386";
   18412      3334503 :   else if (TARGET_X32_P (ix86_isa_flags))
   18413              :     return "x32";
   18414              :   else
   18415      3334503 :     return "x86_64";
   18416              : }
   18417              : 
   18418              : /* Compute the alignment for a variable for Intel MCU psABI.  TYPE is
   18419              :    the data type, and ALIGN is the alignment that the object would
   18420              :    ordinarily have.  */
   18421              : 
   18422              : static int
   18423            0 : iamcu_alignment (tree type, int align)
   18424              : {
   18425            0 :   machine_mode mode;
   18426              : 
   18427            0 :   if (align < 32 || TYPE_USER_ALIGN (type))
   18428              :     return align;
   18429              : 
   18430              :   /* Intel MCU psABI specifies scalar types > 4 bytes aligned to 4
   18431              :      bytes.  */
   18432            0 :   type = strip_array_types (type);
   18433            0 :   if (TYPE_ATOMIC (type))
   18434              :     return align;
   18435              : 
   18436            0 :   mode = TYPE_MODE (type);
   18437            0 :   switch (GET_MODE_CLASS (mode))
   18438              :     {
   18439              :     case MODE_INT:
   18440              :     case MODE_COMPLEX_INT:
   18441              :     case MODE_COMPLEX_FLOAT:
   18442              :     case MODE_FLOAT:
   18443              :     case MODE_DECIMAL_FLOAT:
   18444              :       return 32;
   18445              :     default:
   18446              :       return align;
   18447              :     }
   18448              : }
   18449              : 
   18450              : /* Compute the alignment for a static variable.
   18451              :    TYPE is the data type, and ALIGN is the alignment that
   18452              :    the object would ordinarily have.  The value of this function is used
   18453              :    instead of that alignment to align the object.  */
   18454              : 
   18455              : int
   18456     12074721 : ix86_data_alignment (tree type, unsigned int align, bool opt)
   18457              : {
   18458              :   /* GCC 4.8 and earlier used to incorrectly assume this alignment even
   18459              :      for symbols from other compilation units or symbols that don't need
   18460              :      to bind locally.  In order to preserve some ABI compatibility with
   18461              :      those compilers, ensure we don't decrease alignment from what we
   18462              :      used to assume.  */
   18463              : 
   18464     12074721 :   unsigned int max_align_compat = MIN (256, MAX_OFILE_ALIGNMENT);
   18465              : 
   18466              :   /* A data structure, equal or greater than the size of a cache line
   18467              :      (64 bytes in the Pentium 4 and other recent Intel processors, including
   18468              :      processors based on Intel Core microarchitecture) should be aligned
   18469              :      so that its base address is a multiple of a cache line size.  */
   18470              : 
   18471     24149442 :   unsigned int max_align
   18472     12074721 :     = MIN ((unsigned) ix86_tune_cost->prefetch_block * 8, MAX_OFILE_ALIGNMENT);
   18473              : 
   18474     14656891 :   if (max_align < BITS_PER_WORD)
   18475            0 :     max_align = BITS_PER_WORD;
   18476              : 
   18477     12074721 :   switch (ix86_align_data_type)
   18478              :     {
   18479     12074721 :     case ix86_align_data_type_abi: opt = false; break;
   18480     12074701 :     case ix86_align_data_type_compat: max_align = BITS_PER_WORD; break;
   18481              :     case ix86_align_data_type_cacheline: break;
   18482              :     }
   18483              : 
   18484     12074721 :   if (TARGET_IAMCU)
   18485            0 :     align = iamcu_alignment (type, align);
   18486              : 
   18487     12074721 :   if (opt
   18488      5771200 :       && AGGREGATE_TYPE_P (type)
   18489      3698404 :       && TYPE_SIZE (type)
   18490     15773073 :       && TREE_CODE (TYPE_SIZE (type)) == INTEGER_CST)
   18491              :     {
   18492      6696720 :       if (wi::geu_p (wi::to_wide (TYPE_SIZE (type)), max_align_compat)
   18493      3698352 :           && align < max_align_compat)
   18494       699984 :         align = max_align_compat;
   18495      7334439 :       if (wi::geu_p (wi::to_wide (TYPE_SIZE (type)), max_align)
   18496      3698352 :           && align < max_align)
   18497        62265 :         align = max_align;
   18498              :     }
   18499              : 
   18500              :   /* x86-64 ABI requires arrays greater than 16 bytes to be aligned
   18501              :      to 16byte boundary.  */
   18502     12074721 :   if (TARGET_64BIT)
   18503              :     {
   18504      5011726 :       if ((opt ? AGGREGATE_TYPE_P (type) : TREE_CODE (type) == ARRAY_TYPE)
   18505      3246882 :           && TYPE_SIZE (type)
   18506      3246820 :           && TREE_CODE (TYPE_SIZE (type)) == INTEGER_CST
   18507     10920323 :           && wi::geu_p (wi::to_wide (TYPE_SIZE (type)), 128)
   18508     11530694 :           && align < 128)
   18509       610371 :         return 128;
   18510              :     }
   18511              : 
   18512     11464350 :   if (!opt)
   18513      6109154 :     return align;
   18514              : 
   18515      5355196 :   if (TREE_CODE (type) == ARRAY_TYPE)
   18516              :     {
   18517      1098081 :       if (TYPE_MODE (TREE_TYPE (type)) == DFmode && align < 64)
   18518              :         return 64;
   18519      1098081 :       if (ALIGN_MODE_128 (TYPE_MODE (TREE_TYPE (type))) && align < 128)
   18520              :         return 128;
   18521              :     }
   18522      4257115 :   else if (TREE_CODE (type) == COMPLEX_TYPE)
   18523              :     {
   18524              : 
   18525        12945 :       if (TYPE_MODE (type) == DCmode && align < 64)
   18526              :         return 64;
   18527        12945 :       if ((TYPE_MODE (type) == XCmode
   18528        12945 :            || TYPE_MODE (type) == TCmode) && align < 128)
   18529              :         return 128;
   18530              :     }
   18531      4244170 :   else if (RECORD_OR_UNION_TYPE_P (type)
   18532      4244170 :            && TYPE_FIELDS (type))
   18533              :     {
   18534      2182578 :       if (DECL_MODE (TYPE_FIELDS (type)) == DFmode && align < 64)
   18535              :         return 64;
   18536      2182578 :       if (ALIGN_MODE_128 (DECL_MODE (TYPE_FIELDS (type))) && align < 128)
   18537              :         return 128;
   18538              :     }
   18539      2061592 :   else if (SCALAR_FLOAT_TYPE_P (type) || VECTOR_TYPE_P (type)
   18540              :            || TREE_CODE (type) == INTEGER_TYPE)
   18541              :     {
   18542      1915741 :       if (TYPE_MODE (type) == DFmode && align < 64)
   18543              :         return 64;
   18544      1915741 :       if (ALIGN_MODE_128 (TYPE_MODE (type)) && align < 128)
   18545              :         return 128;
   18546              :     }
   18547              : 
   18548      5355083 :   return align;
   18549              : }
   18550              : 
   18551              : /* Implememnt TARGET_LOWER_LOCAL_DECL_ALIGNMENT.  */
   18552              : static void
   18553     38520498 : ix86_lower_local_decl_alignment (tree decl)
   18554              : {
   18555     38520498 :   unsigned int new_align = ix86_local_alignment (decl, VOIDmode,
   18556     38520498 :                                                  DECL_ALIGN (decl), true);
   18557     38520498 :   if (new_align < DECL_ALIGN (decl))
   18558            0 :     SET_DECL_ALIGN (decl, new_align);
   18559     38520498 : }
   18560              : 
   18561              : /* Compute the alignment for a local variable or a stack slot.  EXP is
   18562              :    the data type or decl itself, MODE is the widest mode available and
   18563              :    ALIGN is the alignment that the object would ordinarily have.  The
   18564              :    value of this macro is used instead of that alignment to align the
   18565              :    object.  */
   18566              : 
   18567              : unsigned int
   18568     55806952 : ix86_local_alignment (tree exp, machine_mode mode,
   18569              :                       unsigned int align, bool may_lower)
   18570              : {
   18571     55806952 :   tree type, decl;
   18572              : 
   18573     55806952 :   if (exp && DECL_P (exp))
   18574              :     {
   18575     53650275 :       type = TREE_TYPE (exp);
   18576     53650275 :       decl = exp;
   18577              :     }
   18578              :   else
   18579              :     {
   18580              :       type = exp;
   18581              :       decl = NULL;
   18582              :     }
   18583              : 
   18584              :   /* Don't do dynamic stack realignment for long long objects with
   18585              :      -mpreferred-stack-boundary=2.  */
   18586     55806952 :   if (may_lower
   18587     38520498 :       && !TARGET_64BIT
   18588       245560 :       && align == 64
   18589        38825 :       && ix86_preferred_stack_boundary < 64
   18590            0 :       && (mode == DImode || (type && TYPE_MODE (type) == DImode))
   18591            0 :       && (!type || (!TYPE_USER_ALIGN (type)
   18592            0 :                     && !TYPE_ATOMIC (strip_array_types (type))))
   18593     55806952 :       && (!decl || !DECL_USER_ALIGN (decl)))
   18594              :     align = 32;
   18595              : 
   18596              :   /* If TYPE is NULL, we are allocating a stack slot for caller-save
   18597              :      register in MODE.  We will return the largest alignment of XF
   18598              :      and DF.  */
   18599     55806952 :   if (!type)
   18600              :     {
   18601      1420453 :       if (mode == XFmode && align < GET_MODE_ALIGNMENT (DFmode))
   18602         1467 :         align = GET_MODE_ALIGNMENT (DFmode);
   18603      1420453 :       return align;
   18604              :     }
   18605              : 
   18606              :   /* Don't increase alignment for Intel MCU psABI.  */
   18607     54386499 :   if (TARGET_IAMCU)
   18608              :     return align;
   18609              : 
   18610              :   /* x86-64 ABI requires arrays greater than 16 bytes to be aligned
   18611              :      to 16byte boundary.  Exact wording is:
   18612              : 
   18613              :      An array uses the same alignment as its elements, except that a local or
   18614              :      global array variable of length at least 16 bytes or
   18615              :      a C99 variable-length array variable always has alignment of at least 16 bytes.
   18616              : 
   18617              :      This was added to allow use of aligned SSE instructions at arrays.  This
   18618              :      rule is meant for static storage (where compiler cannot do the analysis
   18619              :      by itself).  We follow it for automatic variables only when convenient.
   18620              :      We fully control everything in the function compiled and functions from
   18621              :      other unit cannot rely on the alignment.
   18622              : 
   18623              :      Exclude va_list type.  It is the common case of local array where
   18624              :      we cannot benefit from the alignment.
   18625              : 
   18626              :      TODO: Probably one should optimize for size only when var is not escaping.  */
   18627     51566365 :   if (TARGET_64BIT && optimize_function_for_speed_p (cfun)
   18628    105605700 :       && TARGET_SSE)
   18629              :     {
   18630     51179614 :       if (AGGREGATE_TYPE_P (type)
   18631     11683936 :           && (va_list_type_node == NULL_TREE
   18632     11683936 :               || (TYPE_MAIN_VARIANT (type)
   18633     11683936 :                   != TYPE_MAIN_VARIANT (va_list_type_node)))
   18634     11585687 :           && TYPE_SIZE (type)
   18635     11585687 :           && TREE_CODE (TYPE_SIZE (type)) == INTEGER_CST
   18636     52232389 :           && wi::geu_p (wi::to_wide (TYPE_SIZE (type)), 128)
   18637     59657885 :           && align < 128)
   18638      7425496 :         return 128;
   18639              :     }
   18640     46961003 :   if (TREE_CODE (type) == ARRAY_TYPE)
   18641              :     {
   18642       846009 :       if (TYPE_MODE (TREE_TYPE (type)) == DFmode && align < 64)
   18643              :         return 64;
   18644       846009 :       if (ALIGN_MODE_128 (TYPE_MODE (TREE_TYPE (type))) && align < 128)
   18645              :         return 128;
   18646              :     }
   18647     46114994 :   else if (TREE_CODE (type) == COMPLEX_TYPE)
   18648              :     {
   18649       154219 :       if (TYPE_MODE (type) == DCmode && align < 64)
   18650              :         return 64;
   18651       154219 :       if ((TYPE_MODE (type) == XCmode
   18652       154219 :            || TYPE_MODE (type) == TCmode) && align < 128)
   18653              :         return 128;
   18654              :     }
   18655     45960775 :   else if (RECORD_OR_UNION_TYPE_P (type)
   18656     45960775 :            && TYPE_FIELDS (type))
   18657              :     {
   18658      5376292 :       if (DECL_MODE (TYPE_FIELDS (type)) == DFmode && align < 64)
   18659              :         return 64;
   18660      5373187 :       if (ALIGN_MODE_128 (DECL_MODE (TYPE_FIELDS (type))) && align < 128)
   18661              :         return 128;
   18662              :     }
   18663     40584483 :   else if (SCALAR_FLOAT_TYPE_P (type) || VECTOR_TYPE_P (type)
   18664              :            || TREE_CODE (type) == INTEGER_TYPE)
   18665              :     {
   18666              : 
   18667     31802982 :       if (TYPE_MODE (type) == DFmode && align < 64)
   18668              :         return 64;
   18669     31802982 :       if (ALIGN_MODE_128 (TYPE_MODE (type)) && align < 128)
   18670              :         return 128;
   18671              :     }
   18672              :   return align;
   18673              : }
   18674              : 
   18675              : /* Compute the minimum required alignment for dynamic stack realignment
   18676              :    purposes for a local variable, parameter or a stack slot.  EXP is
   18677              :    the data type or decl itself, MODE is its mode and ALIGN is the
   18678              :    alignment that the object would ordinarily have.  */
   18679              : 
   18680              : unsigned int
   18681     47783925 : ix86_minimum_alignment (tree exp, machine_mode mode,
   18682              :                         unsigned int align)
   18683              : {
   18684     47783925 :   tree type, decl;
   18685              : 
   18686     47783925 :   if (exp && DECL_P (exp))
   18687              :     {
   18688     14909607 :       type = TREE_TYPE (exp);
   18689     14909607 :       decl = exp;
   18690              :     }
   18691              :   else
   18692              :     {
   18693              :       type = exp;
   18694              :       decl = NULL;
   18695              :     }
   18696              : 
   18697     47783925 :   if (TARGET_64BIT || align != 64 || ix86_preferred_stack_boundary >= 64)
   18698              :     return align;
   18699              : 
   18700              :   /* Don't do dynamic stack realignment for long long objects with
   18701              :      -mpreferred-stack-boundary=2.  */
   18702            0 :   if ((mode == DImode || (type && TYPE_MODE (type) == DImode))
   18703            0 :       && (!type || (!TYPE_USER_ALIGN (type)
   18704            0 :                     && !TYPE_ATOMIC (strip_array_types (type))))
   18705            0 :       && (!decl || !DECL_USER_ALIGN (decl)))
   18706              :     {
   18707            0 :       gcc_checking_assert (!TARGET_STV);
   18708              :       return 32;
   18709              :     }
   18710              : 
   18711              :   return align;
   18712              : }
   18713              : 
   18714              : /* Find a location for the static chain incoming to a nested function.
   18715              :    This is a register, unless all free registers are used by arguments.  */
   18716              : 
   18717              : static rtx
   18718       269232 : ix86_static_chain (const_tree fndecl_or_type, bool incoming_p)
   18719              : {
   18720       269232 :   unsigned regno;
   18721              : 
   18722       269232 :   if (TARGET_64BIT)
   18723              :     {
   18724              :       /* We always use R10 in 64-bit mode.  */
   18725              :       regno = R10_REG;
   18726              :     }
   18727              :   else
   18728              :     {
   18729        88535 :       const_tree fntype, fndecl;
   18730        88535 :       unsigned int ccvt;
   18731              : 
   18732              :       /* By default in 32-bit mode we use ECX to pass the static chain.  */
   18733        88535 :       regno = CX_REG;
   18734              : 
   18735        88535 :       if (TREE_CODE (fndecl_or_type) == FUNCTION_DECL)
   18736              :         {
   18737        78559 :           fntype = TREE_TYPE (fndecl_or_type);
   18738        78559 :           fndecl = fndecl_or_type;
   18739              :         }
   18740              :       else
   18741              :         {
   18742              :           fntype = fndecl_or_type;
   18743              :           fndecl = NULL;
   18744              :         }
   18745              : 
   18746        88535 :       ccvt = ix86_get_callcvt (fntype);
   18747        88535 :       if ((ccvt & IX86_CALLCVT_FASTCALL) != 0)
   18748              :         {
   18749              :           /* Fastcall functions use ecx/edx for arguments, which leaves
   18750              :              us with EAX for the static chain.
   18751              :              Thiscall functions use ecx for arguments, which also
   18752              :              leaves us with EAX for the static chain.  */
   18753              :           regno = AX_REG;
   18754              :         }
   18755        88535 :       else if ((ccvt & IX86_CALLCVT_THISCALL) != 0)
   18756              :         {
   18757              :           /* Thiscall functions use ecx for arguments, which leaves
   18758              :              us with EAX and EDX for the static chain.
   18759              :              We are using for abi-compatibility EAX.  */
   18760              :           regno = AX_REG;
   18761              :         }
   18762        88535 :       else if (ix86_function_regparm (fntype, fndecl) == 3)
   18763              :         {
   18764              :           /* For regparm 3, we have no free call-clobbered registers in
   18765              :              which to store the static chain.  In order to implement this,
   18766              :              we have the trampoline push the static chain to the stack.
   18767              :              However, we can't push a value below the return address when
   18768              :              we call the nested function directly, so we have to use an
   18769              :              alternate entry point.  For this we use ESI, and have the
   18770              :              alternate entry point push ESI, so that things appear the
   18771              :              same once we're executing the nested function.  */
   18772            0 :           if (incoming_p)
   18773              :             {
   18774            0 :               if (fndecl == current_function_decl
   18775            0 :                   && !ix86_static_chain_on_stack)
   18776              :                 {
   18777            0 :                   gcc_assert (!reload_completed);
   18778            0 :                   ix86_static_chain_on_stack = true;
   18779              :                 }
   18780            0 :               return gen_frame_mem (SImode,
   18781            0 :                                     plus_constant (Pmode,
   18782              :                                                    arg_pointer_rtx, -8));
   18783              :             }
   18784              :           regno = SI_REG;
   18785              :         }
   18786              :     }
   18787              : 
   18788       357780 :   return gen_rtx_REG (Pmode, regno);
   18789              : }
   18790              : 
   18791              : /* Emit RTL insns to initialize the variable parts of a trampoline.
   18792              :    FNDECL is the decl of the target address; M_TRAMP is a MEM for
   18793              :    the trampoline, and CHAIN_VALUE is an RTX for the static chain
   18794              :    to be passed to the target function.  */
   18795              : 
   18796              : static void
   18797          295 : ix86_trampoline_init (rtx m_tramp, tree fndecl, rtx chain_value)
   18798              : {
   18799          295 :   rtx mem, fnaddr;
   18800          295 :   int opcode;
   18801          295 :   int offset = 0;
   18802          295 :   bool need_endbr = (flag_cf_protection & CF_BRANCH);
   18803              : 
   18804          295 :   fnaddr = XEXP (DECL_RTL (fndecl), 0);
   18805              : 
   18806          295 :   if (TARGET_64BIT)
   18807              :     {
   18808          295 :       int size;
   18809              : 
   18810          295 :       if (need_endbr)
   18811              :         {
   18812              :           /* Insert ENDBR64.  */
   18813            1 :           mem = adjust_address (m_tramp, SImode, offset);
   18814            1 :           emit_move_insn (mem, gen_int_mode (0xfa1e0ff3, SImode));
   18815            1 :           offset += 4;
   18816              :         }
   18817              : 
   18818              :       /* Load the function address to r11.  Try to load address using
   18819              :          the shorter movl instead of movabs.  We may want to support
   18820              :          movq for kernel mode, but kernel does not use trampolines at
   18821              :          the moment.  FNADDR is a 32bit address and may not be in
   18822              :          DImode when ptr_mode == SImode.  Always use movl in this
   18823              :          case.  */
   18824          295 :       if (ptr_mode == SImode
   18825          295 :           || x86_64_zext_immediate_operand (fnaddr, VOIDmode))
   18826              :         {
   18827          263 :           fnaddr = copy_addr_to_reg (fnaddr);
   18828              : 
   18829          263 :           mem = adjust_address (m_tramp, HImode, offset);
   18830          263 :           emit_move_insn (mem, gen_int_mode (0xbb41, HImode));
   18831              : 
   18832          263 :           mem = adjust_address (m_tramp, SImode, offset + 2);
   18833          263 :           emit_move_insn (mem, gen_lowpart (SImode, fnaddr));
   18834          263 :           offset += 6;
   18835              :         }
   18836              :       else
   18837              :         {
   18838           32 :           mem = adjust_address (m_tramp, HImode, offset);
   18839           32 :           emit_move_insn (mem, gen_int_mode (0xbb49, HImode));
   18840              : 
   18841           32 :           mem = adjust_address (m_tramp, DImode, offset + 2);
   18842           32 :           emit_move_insn (mem, fnaddr);
   18843           32 :           offset += 10;
   18844              :         }
   18845              : 
   18846              :       /* Load static chain using movabs to r10.  Use the shorter movl
   18847              :          instead of movabs when ptr_mode == SImode.  */
   18848          295 :       if (ptr_mode == SImode)
   18849              :         {
   18850              :           opcode = 0xba41;
   18851              :           size = 6;
   18852              :         }
   18853              :       else
   18854              :         {
   18855          295 :           opcode = 0xba49;
   18856          295 :           size = 10;
   18857              :         }
   18858              : 
   18859          295 :       mem = adjust_address (m_tramp, HImode, offset);
   18860          295 :       emit_move_insn (mem, gen_int_mode (opcode, HImode));
   18861              : 
   18862          295 :       mem = adjust_address (m_tramp, ptr_mode, offset + 2);
   18863          295 :       emit_move_insn (mem, chain_value);
   18864          295 :       offset += size;
   18865              : 
   18866              :       /* Jump to r11; the last (unused) byte is a nop, only there to
   18867              :          pad the write out to a single 32-bit store.  */
   18868          295 :       mem = adjust_address (m_tramp, SImode, offset);
   18869          295 :       emit_move_insn (mem, gen_int_mode (0x90e3ff49, SImode));
   18870          295 :       offset += 4;
   18871              :     }
   18872              :   else
   18873              :     {
   18874            0 :       rtx disp, chain;
   18875              : 
   18876              :       /* Depending on the static chain location, either load a register
   18877              :          with a constant, or push the constant to the stack.  All of the
   18878              :          instructions are the same size.  */
   18879            0 :       chain = ix86_static_chain (fndecl, true);
   18880            0 :       if (REG_P (chain))
   18881              :         {
   18882            0 :           switch (REGNO (chain))
   18883              :             {
   18884              :             case AX_REG:
   18885              :               opcode = 0xb8; break;
   18886            0 :             case CX_REG:
   18887            0 :               opcode = 0xb9; break;
   18888            0 :             default:
   18889            0 :               gcc_unreachable ();
   18890              :             }
   18891              :         }
   18892              :       else
   18893              :         opcode = 0x68;
   18894              : 
   18895            0 :       if (need_endbr)
   18896              :         {
   18897              :           /* Insert ENDBR32.  */
   18898            0 :           mem = adjust_address (m_tramp, SImode, offset);
   18899            0 :           emit_move_insn (mem, gen_int_mode (0xfb1e0ff3, SImode));
   18900            0 :           offset += 4;
   18901              :         }
   18902              : 
   18903            0 :       mem = adjust_address (m_tramp, QImode, offset);
   18904            0 :       emit_move_insn (mem, gen_int_mode (opcode, QImode));
   18905              : 
   18906            0 :       mem = adjust_address (m_tramp, SImode, offset + 1);
   18907            0 :       emit_move_insn (mem, chain_value);
   18908            0 :       offset += 5;
   18909              : 
   18910            0 :       mem = adjust_address (m_tramp, QImode, offset);
   18911            0 :       emit_move_insn (mem, gen_int_mode (0xe9, QImode));
   18912              : 
   18913            0 :       mem = adjust_address (m_tramp, SImode, offset + 1);
   18914              : 
   18915              :       /* Compute offset from the end of the jmp to the target function.
   18916              :          In the case in which the trampoline stores the static chain on
   18917              :          the stack, we need to skip the first insn which pushes the
   18918              :          (call-saved) register static chain; this push is 1 byte.  */
   18919            0 :       offset += 5;
   18920            0 :       int skip = MEM_P (chain) ? 1 : 0;
   18921              :       /* Skip ENDBR32 at the entry of the target function.  */
   18922            0 :       if (need_endbr
   18923            0 :           && !cgraph_node::get (fndecl)->only_called_directly_p ())
   18924            0 :         skip += 4;
   18925            0 :       disp = expand_binop (SImode, sub_optab, fnaddr,
   18926            0 :                            plus_constant (Pmode, XEXP (m_tramp, 0),
   18927            0 :                                           offset - skip),
   18928              :                            NULL_RTX, 1, OPTAB_DIRECT);
   18929            0 :       emit_move_insn (mem, disp);
   18930              :     }
   18931              : 
   18932          295 :   gcc_assert (offset <= TRAMPOLINE_SIZE);
   18933              : 
   18934              : #ifdef HAVE_ENABLE_EXECUTE_STACK
   18935              : #ifdef CHECK_EXECUTE_STACK_ENABLED
   18936              :   if (CHECK_EXECUTE_STACK_ENABLED)
   18937              : #endif
   18938              :   emit_library_call (gen_rtx_SYMBOL_REF (Pmode, "__enable_execute_stack"),
   18939              :                      LCT_NORMAL, VOIDmode, XEXP (m_tramp, 0), Pmode);
   18940              : #endif
   18941          295 : }
   18942              : 
   18943              : static bool
   18944     54025244 : ix86_allocate_stack_slots_for_args (void)
   18945              : {
   18946              :   /* Naked functions should not allocate stack slots for arguments.  */
   18947     54025244 :   return !ix86_function_naked (current_function_decl);
   18948              : }
   18949              : 
   18950              : static bool
   18951     42198375 : ix86_warn_func_return (tree decl)
   18952              : {
   18953              :   /* Naked functions are implemented entirely in assembly, including the
   18954              :      return sequence, so suppress warnings about this.  */
   18955     42198375 :   return !ix86_function_naked (decl);
   18956              : }
   18957              : 
   18958              : /* Return the shift count of a vector by scalar shift builtin second argument
   18959              :    ARG1.  */
   18960              : static tree
   18961        14142 : ix86_vector_shift_count (tree arg1)
   18962              : {
   18963        14142 :   if (tree_fits_uhwi_p (arg1))
   18964              :     return arg1;
   18965         8316 :   else if (TREE_CODE (arg1) == VECTOR_CST && CHAR_BIT == 8)
   18966              :     {
   18967              :       /* The count argument is weird, passed in as various 128-bit
   18968              :          (or 64-bit) vectors, the low 64 bits from it are the count.  */
   18969          162 :       unsigned char buf[16];
   18970          162 :       int len = native_encode_expr (arg1, buf, 16);
   18971          162 :       if (len == 0)
   18972          162 :         return NULL_TREE;
   18973          162 :       tree t = native_interpret_expr (uint64_type_node, buf, len);
   18974          162 :       if (t && tree_fits_uhwi_p (t))
   18975              :         return t;
   18976              :     }
   18977              :   return NULL_TREE;
   18978              : }
   18979              : 
   18980              : /* Return true if arg_mask is all ones, ELEMS is elements number of
   18981              :    corresponding vector.  */
   18982              : static bool
   18983        25042 : ix86_masked_all_ones (unsigned HOST_WIDE_INT elems, tree arg_mask)
   18984              : {
   18985        25042 :   if (TREE_CODE (arg_mask) != INTEGER_CST)
   18986              :     return false;
   18987              : 
   18988         7462 :   unsigned HOST_WIDE_INT mask = TREE_INT_CST_LOW (arg_mask);
   18989         7462 :   if (elems == HOST_BITS_PER_WIDE_INT)
   18990           33 :     return  mask == HOST_WIDE_INT_M1U;
   18991         7429 :   if ((mask | (HOST_WIDE_INT_M1U << elems)) != HOST_WIDE_INT_M1U)
   18992         2681 :     return false;
   18993              : 
   18994              :   return true;
   18995              : }
   18996              : 
   18997              : static tree
   18998     67970077 : ix86_fold_builtin (tree fndecl, int n_args,
   18999              :                    tree *args, bool ignore ATTRIBUTE_UNUSED)
   19000              : {
   19001     67970077 :   if (DECL_BUILT_IN_CLASS (fndecl) == BUILT_IN_MD)
   19002              :     {
   19003     67970077 :       enum ix86_builtins fn_code
   19004     67970077 :         = (enum ix86_builtins) DECL_MD_FUNCTION_CODE (fndecl);
   19005     67970077 :       enum rtx_code rcode;
   19006     67970077 :       bool is_vshift;
   19007     67970077 :       enum tree_code tcode;
   19008     67970077 :       bool is_scalar;
   19009     67970077 :       unsigned HOST_WIDE_INT mask;
   19010              : 
   19011     67970077 :       switch (fn_code)
   19012              :         {
   19013         8746 :         case IX86_BUILTIN_CPU_IS:
   19014         8746 :         case IX86_BUILTIN_CPU_SUPPORTS:
   19015         8746 :           gcc_assert (n_args == 1);
   19016         8746 :           return fold_builtin_cpu (fndecl, args);
   19017              : 
   19018        24315 :         case IX86_BUILTIN_NANQ:
   19019        24315 :         case IX86_BUILTIN_NANSQ:
   19020        24315 :           {
   19021        24315 :             tree type = TREE_TYPE (TREE_TYPE (fndecl));
   19022        24315 :             const char *str = c_getstr (*args);
   19023        24315 :             int quiet = fn_code == IX86_BUILTIN_NANQ;
   19024        24315 :             REAL_VALUE_TYPE real;
   19025              : 
   19026        24315 :             if (str && real_nan (&real, str, quiet, TYPE_MODE (type)))
   19027        24315 :               return build_real (type, real);
   19028            0 :             return NULL_TREE;
   19029              :           }
   19030              : 
   19031          108 :         case IX86_BUILTIN_INFQ:
   19032          108 :         case IX86_BUILTIN_HUGE_VALQ:
   19033          108 :           {
   19034          108 :             tree type = TREE_TYPE (TREE_TYPE (fndecl));
   19035          108 :             REAL_VALUE_TYPE inf;
   19036          108 :             real_inf (&inf);
   19037          108 :             return build_real (type, inf);
   19038              :           }
   19039              : 
   19040        62297 :         case IX86_BUILTIN_TZCNT16:
   19041        62297 :         case IX86_BUILTIN_CTZS:
   19042        62297 :         case IX86_BUILTIN_TZCNT32:
   19043        62297 :         case IX86_BUILTIN_TZCNT64:
   19044        62297 :           gcc_assert (n_args == 1);
   19045        62297 :           if (TREE_CODE (args[0]) == INTEGER_CST)
   19046              :             {
   19047           45 :               tree type = TREE_TYPE (TREE_TYPE (fndecl));
   19048           45 :               tree arg = args[0];
   19049           45 :               if (fn_code == IX86_BUILTIN_TZCNT16
   19050           45 :                   || fn_code == IX86_BUILTIN_CTZS)
   19051            3 :                 arg = fold_convert (short_unsigned_type_node, arg);
   19052           45 :               if (integer_zerop (arg))
   19053            6 :                 return build_int_cst (type, TYPE_PRECISION (TREE_TYPE (arg)));
   19054              :               else
   19055           39 :                 return fold_const_call (CFN_CTZ, type, arg);
   19056              :             }
   19057              :           break;
   19058              : 
   19059        51869 :         case IX86_BUILTIN_LZCNT16:
   19060        51869 :         case IX86_BUILTIN_CLZS:
   19061        51869 :         case IX86_BUILTIN_LZCNT32:
   19062        51869 :         case IX86_BUILTIN_LZCNT64:
   19063        51869 :           gcc_assert (n_args == 1);
   19064        51869 :           if (TREE_CODE (args[0]) == INTEGER_CST)
   19065              :             {
   19066           54 :               tree type = TREE_TYPE (TREE_TYPE (fndecl));
   19067           54 :               tree arg = args[0];
   19068           54 :               if (fn_code == IX86_BUILTIN_LZCNT16
   19069           54 :                   || fn_code == IX86_BUILTIN_CLZS)
   19070           18 :                 arg = fold_convert (short_unsigned_type_node, arg);
   19071           54 :               if (integer_zerop (arg))
   19072            3 :                 return build_int_cst (type, TYPE_PRECISION (TREE_TYPE (arg)));
   19073              :               else
   19074           51 :                 return fold_const_call (CFN_CLZ, type, arg);
   19075              :             }
   19076              :           break;
   19077              : 
   19078        61081 :         case IX86_BUILTIN_BEXTR32:
   19079        61081 :         case IX86_BUILTIN_BEXTR64:
   19080        61081 :         case IX86_BUILTIN_BEXTRI32:
   19081        61081 :         case IX86_BUILTIN_BEXTRI64:
   19082        61081 :           gcc_assert (n_args == 2);
   19083        61081 :           if (tree_fits_uhwi_p (args[1]))
   19084              :             {
   19085          152 :               unsigned HOST_WIDE_INT res = 0;
   19086          152 :               unsigned int prec = TYPE_PRECISION (TREE_TYPE (args[0]));
   19087          152 :               unsigned int start = tree_to_uhwi (args[1]);
   19088          152 :               unsigned int len = (start & 0xff00) >> 8;
   19089          152 :               tree lhs_type = TREE_TYPE (TREE_TYPE (fndecl));
   19090          152 :               start &= 0xff;
   19091          152 :               if (start >= prec || len == 0)
   19092          111 :                 return omit_one_operand (lhs_type, build_zero_cst (lhs_type),
   19093              :                                          args[0]);
   19094           41 :               else if (!tree_fits_uhwi_p (args[0]))
   19095              :                 break;
   19096              :               else
   19097           24 :                 res = tree_to_uhwi (args[0]) >> start;
   19098           24 :               if (len > prec)
   19099              :                 len = prec;
   19100           24 :               if (len < HOST_BITS_PER_WIDE_INT)
   19101           15 :                 res &= (HOST_WIDE_INT_1U << len) - 1;
   19102           24 :               return build_int_cstu (lhs_type, res);
   19103              :             }
   19104              :           break;
   19105              : 
   19106        20984 :         case IX86_BUILTIN_BZHI32:
   19107        20984 :         case IX86_BUILTIN_BZHI64:
   19108        20984 :           gcc_assert (n_args == 2);
   19109        20984 :           if (tree_fits_uhwi_p (args[1]))
   19110              :             {
   19111          190 :               unsigned int idx = tree_to_uhwi (args[1]) & 0xff;
   19112          190 :               tree lhs_type = TREE_TYPE (TREE_TYPE (fndecl));
   19113          190 :               if (idx >= TYPE_PRECISION (TREE_TYPE (args[0])))
   19114              :                 return args[0];
   19115          190 :               if (idx == 0)
   19116           52 :                 return omit_one_operand (lhs_type, build_zero_cst (lhs_type),
   19117              :                                          args[0]);
   19118          138 :               if (!tree_fits_uhwi_p (args[0]))
   19119              :                 break;
   19120           12 :               unsigned HOST_WIDE_INT res = tree_to_uhwi (args[0]);
   19121           12 :               res &= ~(HOST_WIDE_INT_M1U << idx);
   19122           12 :               return build_int_cstu (lhs_type, res);
   19123              :             }
   19124              :           break;
   19125              : 
   19126        20742 :         case IX86_BUILTIN_PDEP32:
   19127        20742 :         case IX86_BUILTIN_PDEP64:
   19128        20742 :           gcc_assert (n_args == 2);
   19129        20742 :           if (tree_fits_uhwi_p (args[0]) && tree_fits_uhwi_p (args[1]))
   19130              :             {
   19131           46 :               unsigned HOST_WIDE_INT src = tree_to_uhwi (args[0]);
   19132           46 :               unsigned HOST_WIDE_INT mask = tree_to_uhwi (args[1]);
   19133           46 :               unsigned HOST_WIDE_INT res = 0;
   19134           46 :               unsigned HOST_WIDE_INT m, k = 1;
   19135         2990 :               for (m = 1; m; m <<= 1)
   19136         2944 :                 if ((mask & m) != 0)
   19137              :                   {
   19138         1440 :                     if ((src & k) != 0)
   19139          789 :                       res |= m;
   19140         1440 :                     k <<= 1;
   19141              :                   }
   19142           46 :               return build_int_cstu (TREE_TYPE (TREE_TYPE (fndecl)), res);
   19143              :             }
   19144              :           break;
   19145              : 
   19146        20744 :         case IX86_BUILTIN_PEXT32:
   19147        20744 :         case IX86_BUILTIN_PEXT64:
   19148        20744 :           gcc_assert (n_args == 2);
   19149        20744 :           if (tree_fits_uhwi_p (args[0]) && tree_fits_uhwi_p (args[1]))
   19150              :             {
   19151           46 :               unsigned HOST_WIDE_INT src = tree_to_uhwi (args[0]);
   19152           46 :               unsigned HOST_WIDE_INT mask = tree_to_uhwi (args[1]);
   19153           46 :               unsigned HOST_WIDE_INT res = 0;
   19154           46 :               unsigned HOST_WIDE_INT m, k = 1;
   19155         2990 :               for (m = 1; m; m <<= 1)
   19156         2944 :                 if ((mask & m) != 0)
   19157              :                   {
   19158         2016 :                     if ((src & m) != 0)
   19159         1063 :                       res |= k;
   19160         2016 :                     k <<= 1;
   19161              :                   }
   19162           46 :               return build_int_cstu (TREE_TYPE (TREE_TYPE (fndecl)), res);
   19163              :             }
   19164              :           break;
   19165              : 
   19166        81068 :         case IX86_BUILTIN_MOVMSKPS:
   19167        81068 :         case IX86_BUILTIN_PMOVMSKB:
   19168        81068 :         case IX86_BUILTIN_MOVMSKPD:
   19169        81068 :         case IX86_BUILTIN_PMOVMSKB128:
   19170        81068 :         case IX86_BUILTIN_MOVMSKPD256:
   19171        81068 :         case IX86_BUILTIN_MOVMSKPS256:
   19172        81068 :         case IX86_BUILTIN_PMOVMSKB256:
   19173        81068 :           gcc_assert (n_args == 1);
   19174        81068 :           if (TREE_CODE (args[0]) == VECTOR_CST)
   19175              :             {
   19176              :               HOST_WIDE_INT res = 0;
   19177          139 :               for (unsigned i = 0; i < VECTOR_CST_NELTS (args[0]); ++i)
   19178              :                 {
   19179          124 :                   tree e = VECTOR_CST_ELT (args[0], i);
   19180          124 :                   if (TREE_CODE (e) == INTEGER_CST && !TREE_OVERFLOW (e))
   19181              :                     {
   19182           80 :                       if (wi::neg_p (wi::to_wide (e)))
   19183           31 :                         res |= HOST_WIDE_INT_1 << i;
   19184              :                     }
   19185           44 :                   else if (TREE_CODE (e) == REAL_CST && !TREE_OVERFLOW (e))
   19186              :                     {
   19187           44 :                       if (TREE_REAL_CST (e).sign)
   19188           19 :                         res |= HOST_WIDE_INT_1 << i;
   19189              :                     }
   19190              :                   else
   19191              :                     return NULL_TREE;
   19192              :                 }
   19193           15 :               return build_int_cst (TREE_TYPE (TREE_TYPE (fndecl)), res);
   19194              :             }
   19195              :           break;
   19196              : 
   19197       658268 :         case IX86_BUILTIN_PSLLD:
   19198       658268 :         case IX86_BUILTIN_PSLLD128:
   19199       658268 :         case IX86_BUILTIN_PSLLD128_MASK:
   19200       658268 :         case IX86_BUILTIN_PSLLD256:
   19201       658268 :         case IX86_BUILTIN_PSLLD256_MASK:
   19202       658268 :         case IX86_BUILTIN_PSLLD512:
   19203       658268 :         case IX86_BUILTIN_PSLLDI:
   19204       658268 :         case IX86_BUILTIN_PSLLDI128:
   19205       658268 :         case IX86_BUILTIN_PSLLDI128_MASK:
   19206       658268 :         case IX86_BUILTIN_PSLLDI256:
   19207       658268 :         case IX86_BUILTIN_PSLLDI256_MASK:
   19208       658268 :         case IX86_BUILTIN_PSLLDI512:
   19209       658268 :         case IX86_BUILTIN_PSLLQ:
   19210       658268 :         case IX86_BUILTIN_PSLLQ128:
   19211       658268 :         case IX86_BUILTIN_PSLLQ128_MASK:
   19212       658268 :         case IX86_BUILTIN_PSLLQ256:
   19213       658268 :         case IX86_BUILTIN_PSLLQ256_MASK:
   19214       658268 :         case IX86_BUILTIN_PSLLQ512:
   19215       658268 :         case IX86_BUILTIN_PSLLQI:
   19216       658268 :         case IX86_BUILTIN_PSLLQI128:
   19217       658268 :         case IX86_BUILTIN_PSLLQI128_MASK:
   19218       658268 :         case IX86_BUILTIN_PSLLQI256:
   19219       658268 :         case IX86_BUILTIN_PSLLQI256_MASK:
   19220       658268 :         case IX86_BUILTIN_PSLLQI512:
   19221       658268 :         case IX86_BUILTIN_PSLLW:
   19222       658268 :         case IX86_BUILTIN_PSLLW128:
   19223       658268 :         case IX86_BUILTIN_PSLLW128_MASK:
   19224       658268 :         case IX86_BUILTIN_PSLLW256:
   19225       658268 :         case IX86_BUILTIN_PSLLW256_MASK:
   19226       658268 :         case IX86_BUILTIN_PSLLW512_MASK:
   19227       658268 :         case IX86_BUILTIN_PSLLWI:
   19228       658268 :         case IX86_BUILTIN_PSLLWI128:
   19229       658268 :         case IX86_BUILTIN_PSLLWI128_MASK:
   19230       658268 :         case IX86_BUILTIN_PSLLWI256:
   19231       658268 :         case IX86_BUILTIN_PSLLWI256_MASK:
   19232       658268 :         case IX86_BUILTIN_PSLLWI512_MASK:
   19233       658268 :           rcode = ASHIFT;
   19234       658268 :           is_vshift = false;
   19235       658268 :           goto do_shift;
   19236       599991 :         case IX86_BUILTIN_PSRAD:
   19237       599991 :         case IX86_BUILTIN_PSRAD128:
   19238       599991 :         case IX86_BUILTIN_PSRAD128_MASK:
   19239       599991 :         case IX86_BUILTIN_PSRAD256:
   19240       599991 :         case IX86_BUILTIN_PSRAD256_MASK:
   19241       599991 :         case IX86_BUILTIN_PSRAD512:
   19242       599991 :         case IX86_BUILTIN_PSRADI:
   19243       599991 :         case IX86_BUILTIN_PSRADI128:
   19244       599991 :         case IX86_BUILTIN_PSRADI128_MASK:
   19245       599991 :         case IX86_BUILTIN_PSRADI256:
   19246       599991 :         case IX86_BUILTIN_PSRADI256_MASK:
   19247       599991 :         case IX86_BUILTIN_PSRADI512:
   19248       599991 :         case IX86_BUILTIN_PSRAQ128_MASK:
   19249       599991 :         case IX86_BUILTIN_PSRAQ256_MASK:
   19250       599991 :         case IX86_BUILTIN_PSRAQ512:
   19251       599991 :         case IX86_BUILTIN_PSRAQI128_MASK:
   19252       599991 :         case IX86_BUILTIN_PSRAQI256_MASK:
   19253       599991 :         case IX86_BUILTIN_PSRAQI512:
   19254       599991 :         case IX86_BUILTIN_PSRAW:
   19255       599991 :         case IX86_BUILTIN_PSRAW128:
   19256       599991 :         case IX86_BUILTIN_PSRAW128_MASK:
   19257       599991 :         case IX86_BUILTIN_PSRAW256:
   19258       599991 :         case IX86_BUILTIN_PSRAW256_MASK:
   19259       599991 :         case IX86_BUILTIN_PSRAW512:
   19260       599991 :         case IX86_BUILTIN_PSRAWI:
   19261       599991 :         case IX86_BUILTIN_PSRAWI128:
   19262       599991 :         case IX86_BUILTIN_PSRAWI128_MASK:
   19263       599991 :         case IX86_BUILTIN_PSRAWI256:
   19264       599991 :         case IX86_BUILTIN_PSRAWI256_MASK:
   19265       599991 :         case IX86_BUILTIN_PSRAWI512:
   19266       599991 :           rcode = ASHIFTRT;
   19267       599991 :           is_vshift = false;
   19268       599991 :           goto do_shift;
   19269       632213 :         case IX86_BUILTIN_PSRLD:
   19270       632213 :         case IX86_BUILTIN_PSRLD128:
   19271       632213 :         case IX86_BUILTIN_PSRLD128_MASK:
   19272       632213 :         case IX86_BUILTIN_PSRLD256:
   19273       632213 :         case IX86_BUILTIN_PSRLD256_MASK:
   19274       632213 :         case IX86_BUILTIN_PSRLD512:
   19275       632213 :         case IX86_BUILTIN_PSRLDI:
   19276       632213 :         case IX86_BUILTIN_PSRLDI128:
   19277       632213 :         case IX86_BUILTIN_PSRLDI128_MASK:
   19278       632213 :         case IX86_BUILTIN_PSRLDI256:
   19279       632213 :         case IX86_BUILTIN_PSRLDI256_MASK:
   19280       632213 :         case IX86_BUILTIN_PSRLDI512:
   19281       632213 :         case IX86_BUILTIN_PSRLQ:
   19282       632213 :         case IX86_BUILTIN_PSRLQ128:
   19283       632213 :         case IX86_BUILTIN_PSRLQ128_MASK:
   19284       632213 :         case IX86_BUILTIN_PSRLQ256:
   19285       632213 :         case IX86_BUILTIN_PSRLQ256_MASK:
   19286       632213 :         case IX86_BUILTIN_PSRLQ512:
   19287       632213 :         case IX86_BUILTIN_PSRLQI:
   19288       632213 :         case IX86_BUILTIN_PSRLQI128:
   19289       632213 :         case IX86_BUILTIN_PSRLQI128_MASK:
   19290       632213 :         case IX86_BUILTIN_PSRLQI256:
   19291       632213 :         case IX86_BUILTIN_PSRLQI256_MASK:
   19292       632213 :         case IX86_BUILTIN_PSRLQI512:
   19293       632213 :         case IX86_BUILTIN_PSRLW:
   19294       632213 :         case IX86_BUILTIN_PSRLW128:
   19295       632213 :         case IX86_BUILTIN_PSRLW128_MASK:
   19296       632213 :         case IX86_BUILTIN_PSRLW256:
   19297       632213 :         case IX86_BUILTIN_PSRLW256_MASK:
   19298       632213 :         case IX86_BUILTIN_PSRLW512:
   19299       632213 :         case IX86_BUILTIN_PSRLWI:
   19300       632213 :         case IX86_BUILTIN_PSRLWI128:
   19301       632213 :         case IX86_BUILTIN_PSRLWI128_MASK:
   19302       632213 :         case IX86_BUILTIN_PSRLWI256:
   19303       632213 :         case IX86_BUILTIN_PSRLWI256_MASK:
   19304       632213 :         case IX86_BUILTIN_PSRLWI512:
   19305       632213 :           rcode = LSHIFTRT;
   19306       632213 :           is_vshift = false;
   19307       632213 :           goto do_shift;
   19308       275402 :         case IX86_BUILTIN_PSLLVV16HI:
   19309       275402 :         case IX86_BUILTIN_PSLLVV16SI:
   19310       275402 :         case IX86_BUILTIN_PSLLVV2DI:
   19311       275402 :         case IX86_BUILTIN_PSLLVV2DI_MASK:
   19312       275402 :         case IX86_BUILTIN_PSLLVV32HI:
   19313       275402 :         case IX86_BUILTIN_PSLLVV4DI:
   19314       275402 :         case IX86_BUILTIN_PSLLVV4DI_MASK:
   19315       275402 :         case IX86_BUILTIN_PSLLVV4SI:
   19316       275402 :         case IX86_BUILTIN_PSLLVV4SI_MASK:
   19317       275402 :         case IX86_BUILTIN_PSLLVV8DI:
   19318       275402 :         case IX86_BUILTIN_PSLLVV8HI:
   19319       275402 :         case IX86_BUILTIN_PSLLVV8SI:
   19320       275402 :         case IX86_BUILTIN_PSLLVV8SI_MASK:
   19321       275402 :           rcode = ASHIFT;
   19322       275402 :           is_vshift = true;
   19323       275402 :           goto do_shift;
   19324       274983 :         case IX86_BUILTIN_PSRAVQ128:
   19325       274983 :         case IX86_BUILTIN_PSRAVQ256:
   19326       274983 :         case IX86_BUILTIN_PSRAVV16HI:
   19327       274983 :         case IX86_BUILTIN_PSRAVV16SI:
   19328       274983 :         case IX86_BUILTIN_PSRAVV32HI:
   19329       274983 :         case IX86_BUILTIN_PSRAVV4SI:
   19330       274983 :         case IX86_BUILTIN_PSRAVV4SI_MASK:
   19331       274983 :         case IX86_BUILTIN_PSRAVV8DI:
   19332       274983 :         case IX86_BUILTIN_PSRAVV8HI:
   19333       274983 :         case IX86_BUILTIN_PSRAVV8SI:
   19334       274983 :         case IX86_BUILTIN_PSRAVV8SI_MASK:
   19335       274983 :           rcode = ASHIFTRT;
   19336       274983 :           is_vshift = true;
   19337       274983 :           goto do_shift;
   19338       275393 :         case IX86_BUILTIN_PSRLVV16HI:
   19339       275393 :         case IX86_BUILTIN_PSRLVV16SI:
   19340       275393 :         case IX86_BUILTIN_PSRLVV2DI:
   19341       275393 :         case IX86_BUILTIN_PSRLVV2DI_MASK:
   19342       275393 :         case IX86_BUILTIN_PSRLVV32HI:
   19343       275393 :         case IX86_BUILTIN_PSRLVV4DI:
   19344       275393 :         case IX86_BUILTIN_PSRLVV4DI_MASK:
   19345       275393 :         case IX86_BUILTIN_PSRLVV4SI:
   19346       275393 :         case IX86_BUILTIN_PSRLVV4SI_MASK:
   19347       275393 :         case IX86_BUILTIN_PSRLVV8DI:
   19348       275393 :         case IX86_BUILTIN_PSRLVV8HI:
   19349       275393 :         case IX86_BUILTIN_PSRLVV8SI:
   19350       275393 :         case IX86_BUILTIN_PSRLVV8SI_MASK:
   19351       275393 :           rcode = LSHIFTRT;
   19352       275393 :           is_vshift = true;
   19353       275393 :           goto do_shift;
   19354              : 
   19355      2716250 :         do_shift:
   19356      2716250 :           gcc_assert (n_args >= 2);
   19357      2716250 :           if (TREE_CODE (args[0]) != VECTOR_CST)
   19358              :             break;
   19359          927 :           mask = HOST_WIDE_INT_M1U;
   19360          927 :           if (n_args > 2)
   19361              :             {
   19362              :               /* This is masked shift.  */
   19363          678 :               if (!tree_fits_uhwi_p (args[n_args - 1])
   19364          678 :                   || TREE_SIDE_EFFECTS (args[n_args - 2]))
   19365              :                 break;
   19366          678 :               mask = tree_to_uhwi (args[n_args - 1]);
   19367          678 :               unsigned elems = TYPE_VECTOR_SUBPARTS (TREE_TYPE (args[0]));
   19368          678 :               mask |= HOST_WIDE_INT_M1U << elems;
   19369          678 :               if (mask != HOST_WIDE_INT_M1U
   19370          567 :                   && TREE_CODE (args[n_args - 2]) != VECTOR_CST)
   19371              :                 break;
   19372          633 :               if (mask == (HOST_WIDE_INT_M1U << elems))
   19373              :                 return args[n_args - 2];
   19374              :             }
   19375          879 :           if (is_vshift && TREE_CODE (args[1]) != VECTOR_CST)
   19376              :             break;
   19377          879 :           if (tree tem = (is_vshift ? integer_one_node
   19378          879 :                           : ix86_vector_shift_count (args[1])))
   19379              :             {
   19380          558 :               unsigned HOST_WIDE_INT count = tree_to_uhwi (tem);
   19381          558 :               unsigned HOST_WIDE_INT prec
   19382          558 :                 = TYPE_PRECISION (TREE_TYPE (TREE_TYPE (args[0])));
   19383          558 :               if (count == 0 && mask == HOST_WIDE_INT_M1U)
   19384              :                 return args[0];
   19385          558 :               if (count >= prec)
   19386              :                 {
   19387           72 :                   if (rcode == ASHIFTRT)
   19388           27 :                     count = prec - 1;
   19389           45 :                   else if (mask == HOST_WIDE_INT_M1U)
   19390            3 :                     return build_zero_cst (TREE_TYPE (args[0]));
   19391              :                 }
   19392          555 :               tree countt = NULL_TREE;
   19393          555 :               if (!is_vshift)
   19394              :                 {
   19395          377 :                   if (count >= prec)
   19396           42 :                     countt = integer_zero_node;
   19397              :                   else
   19398          335 :                     countt = build_int_cst (integer_type_node, count);
   19399              :                 }
   19400          555 :               tree_vector_builder builder;
   19401          555 :               if (mask != HOST_WIDE_INT_M1U || is_vshift)
   19402          392 :                 builder.new_vector (TREE_TYPE (args[0]),
   19403          784 :                                     TYPE_VECTOR_SUBPARTS (TREE_TYPE (args[0])),
   19404              :                                     1);
   19405              :               else
   19406          163 :                 builder.new_unary_operation (TREE_TYPE (args[0]), args[0],
   19407              :                                              false);
   19408          555 :               unsigned int cnt = builder.encoded_nelts ();
   19409         5967 :               for (unsigned int i = 0; i < cnt; ++i)
   19410              :                 {
   19411         5412 :                   tree elt = VECTOR_CST_ELT (args[0], i);
   19412         5412 :                   if (TREE_CODE (elt) != INTEGER_CST || TREE_OVERFLOW (elt))
   19413            0 :                     return NULL_TREE;
   19414         5412 :                   tree type = TREE_TYPE (elt);
   19415         5412 :                   if (rcode == LSHIFTRT)
   19416         2040 :                     elt = fold_convert (unsigned_type_for (type), elt);
   19417         5412 :                   if (is_vshift)
   19418              :                     {
   19419         1846 :                       countt = VECTOR_CST_ELT (args[1], i);
   19420         1846 :                       if (TREE_CODE (countt) != INTEGER_CST
   19421         1846 :                           || TREE_OVERFLOW (countt))
   19422              :                         return NULL_TREE;
   19423         1846 :                       if (wi::neg_p (wi::to_wide (countt))
   19424         3610 :                           || wi::to_widest (countt) >= prec)
   19425              :                         {
   19426          325 :                           if (rcode == ASHIFTRT)
   19427          108 :                             countt = build_int_cst (TREE_TYPE (countt),
   19428          108 :                                                     prec - 1);
   19429              :                           else
   19430              :                             {
   19431          217 :                               elt = build_zero_cst (TREE_TYPE (elt));
   19432          217 :                               countt = build_zero_cst (TREE_TYPE (countt));
   19433              :                             }
   19434              :                         }
   19435              :                     }
   19436         3566 :                   else if (count >= prec)
   19437          504 :                     elt = build_zero_cst (TREE_TYPE (elt));
   19438         8950 :                   elt = const_binop (rcode == ASHIFT
   19439              :                                      ? LSHIFT_EXPR : RSHIFT_EXPR,
   19440         5412 :                                      TREE_TYPE (elt), elt, countt);
   19441         5412 :                   if (!elt || TREE_CODE (elt) != INTEGER_CST)
   19442              :                     return NULL_TREE;
   19443         5412 :                   if (rcode == LSHIFTRT)
   19444         2040 :                     elt = fold_convert (type, elt);
   19445         5412 :                   if ((mask & (HOST_WIDE_INT_1U << i)) == 0)
   19446              :                     {
   19447         1566 :                       elt = VECTOR_CST_ELT (args[n_args - 2], i);
   19448         1566 :                       if (TREE_CODE (elt) != INTEGER_CST
   19449         1566 :                           || TREE_OVERFLOW (elt))
   19450              :                         return NULL_TREE;
   19451              :                     }
   19452         5412 :                   builder.quick_push (elt);
   19453              :                 }
   19454          555 :               return builder.build ();
   19455          555 :             }
   19456              :           break;
   19457              : 
   19458        32650 :         case IX86_BUILTIN_MINSS:
   19459        32650 :         case IX86_BUILTIN_MINSH_MASK:
   19460        32650 :           tcode = LT_EXPR;
   19461        32650 :           is_scalar = true;
   19462        32650 :           goto do_minmax;
   19463              : 
   19464        32650 :         case IX86_BUILTIN_MAXSS:
   19465        32650 :         case IX86_BUILTIN_MAXSH_MASK:
   19466        32650 :           tcode = GT_EXPR;
   19467        32650 :           is_scalar = true;
   19468        32650 :           goto do_minmax;
   19469              : 
   19470       349807 :         case IX86_BUILTIN_MINPS:
   19471       349807 :         case IX86_BUILTIN_MINPD:
   19472       349807 :         case IX86_BUILTIN_MINPS256:
   19473       349807 :         case IX86_BUILTIN_MINPD256:
   19474       349807 :         case IX86_BUILTIN_MINPS512:
   19475       349807 :         case IX86_BUILTIN_MINPD512:
   19476       349807 :         case IX86_BUILTIN_MINPS128_MASK:
   19477       349807 :         case IX86_BUILTIN_MINPD128_MASK:
   19478       349807 :         case IX86_BUILTIN_MINPS256_MASK:
   19479       349807 :         case IX86_BUILTIN_MINPD256_MASK:
   19480       349807 :         case IX86_BUILTIN_MINPH128_MASK:
   19481       349807 :         case IX86_BUILTIN_MINPH256_MASK:
   19482       349807 :         case IX86_BUILTIN_MINPH512_MASK:
   19483       349807 :           tcode = LT_EXPR;
   19484       349807 :           is_scalar = false;
   19485       349807 :           goto do_minmax;
   19486              : 
   19487              :         case IX86_BUILTIN_MAXPS:
   19488              :         case IX86_BUILTIN_MAXPD:
   19489              :         case IX86_BUILTIN_MAXPS256:
   19490              :         case IX86_BUILTIN_MAXPD256:
   19491              :         case IX86_BUILTIN_MAXPS512:
   19492              :         case IX86_BUILTIN_MAXPD512:
   19493              :         case IX86_BUILTIN_MAXPS128_MASK:
   19494              :         case IX86_BUILTIN_MAXPD128_MASK:
   19495              :         case IX86_BUILTIN_MAXPS256_MASK:
   19496              :         case IX86_BUILTIN_MAXPD256_MASK:
   19497              :         case IX86_BUILTIN_MAXPH128_MASK:
   19498              :         case IX86_BUILTIN_MAXPH256_MASK:
   19499              :         case IX86_BUILTIN_MAXPH512_MASK:
   19500              :           tcode = GT_EXPR;
   19501              :           is_scalar = false;
   19502       764934 :         do_minmax:
   19503       764934 :           gcc_assert (n_args >= 2);
   19504       764934 :           if (TREE_CODE (args[0]) != VECTOR_CST
   19505           76 :               || TREE_CODE (args[1]) != VECTOR_CST)
   19506              :             break;
   19507           76 :           mask = HOST_WIDE_INT_M1U;
   19508           76 :           if (n_args > 2)
   19509              :             {
   19510           36 :               gcc_assert (n_args >= 4);
   19511              :               /* This is masked minmax.  */
   19512           36 :               if (TREE_CODE (args[3]) != INTEGER_CST
   19513           36 :                   || TREE_SIDE_EFFECTS (args[2]))
   19514              :                 break;
   19515           36 :               mask = TREE_INT_CST_LOW (args[3]);
   19516           36 :               unsigned elems = TYPE_VECTOR_SUBPARTS (TREE_TYPE (args[0]));
   19517           36 :               mask |= HOST_WIDE_INT_M1U << elems;
   19518           36 :               if (mask != HOST_WIDE_INT_M1U
   19519           32 :                   && TREE_CODE (args[2]) != VECTOR_CST)
   19520              :                 break;
   19521           36 :               if (n_args >= 5)
   19522              :                 {
   19523           20 :                   if (!tree_fits_uhwi_p (args[4]))
   19524              :                     break;
   19525           20 :                   if (tree_to_uhwi (args[4]) != 4
   19526            0 :                       && tree_to_uhwi (args[4]) != 8)
   19527              :                     break;
   19528              :                 }
   19529           36 :               if (mask == (HOST_WIDE_INT_M1U << elems))
   19530              :                 return args[2];
   19531              :             }
   19532              :           /* Punt on NaNs, unless exceptions are disabled.  */
   19533           76 :           if (HONOR_NANS (args[0])
   19534           76 :               && (n_args < 5 || tree_to_uhwi (args[4]) != 8))
   19535          184 :             for (int i = 0; i < 2; ++i)
   19536              :               {
   19537          134 :                 unsigned count = vector_cst_encoded_nelts (args[i]);
   19538          957 :                 for (unsigned j = 0; j < count; ++j)
   19539          849 :                   if (tree_expr_nan_p (VECTOR_CST_ENCODED_ELT (args[i], j)))
   19540              :                     return NULL_TREE;
   19541              :               }
   19542           50 :           {
   19543           50 :             tree res = const_binop (tcode,
   19544           50 :                                     truth_type_for (TREE_TYPE (args[0])),
   19545              :                                     args[0], args[1]);
   19546           50 :             if (res == NULL_TREE || TREE_CODE (res) != VECTOR_CST)
   19547              :               break;
   19548           50 :             res = fold_ternary (VEC_COND_EXPR, TREE_TYPE (args[0]), res,
   19549              :                                 args[0], args[1]);
   19550           50 :             if (res == NULL_TREE || TREE_CODE (res) != VECTOR_CST)
   19551              :               break;
   19552           50 :             if (mask != HOST_WIDE_INT_M1U)
   19553              :               {
   19554           32 :                 unsigned nelts = TYPE_VECTOR_SUBPARTS (TREE_TYPE (args[0]));
   19555           32 :                 vec_perm_builder sel (nelts, nelts, 1);
   19556          328 :                 for (unsigned int i = 0; i < nelts; i++)
   19557          296 :                   if (mask & (HOST_WIDE_INT_1U << i))
   19558          160 :                     sel.quick_push (i);
   19559              :                   else
   19560          136 :                     sel.quick_push (nelts + i);
   19561           32 :                 vec_perm_indices indices (sel, 2, nelts);
   19562           32 :                 res = fold_vec_perm (TREE_TYPE (args[0]), res, args[2],
   19563              :                                      indices);
   19564           32 :                 if (res == NULL_TREE || TREE_CODE (res) != VECTOR_CST)
   19565              :                   break;
   19566           32 :               }
   19567           50 :             if (is_scalar)
   19568              :               {
   19569           10 :                 unsigned nelts = TYPE_VECTOR_SUBPARTS (TREE_TYPE (args[0]));
   19570           10 :                 vec_perm_builder sel (nelts, nelts, 1);
   19571           10 :                 sel.quick_push (0);
   19572           40 :                 for (unsigned int i = 1; i < nelts; i++)
   19573           30 :                   sel.quick_push (nelts + i);
   19574           10 :                 vec_perm_indices indices (sel, 2, nelts);
   19575           10 :                 res = fold_vec_perm (TREE_TYPE (args[0]), res, args[0],
   19576              :                                      indices);
   19577           10 :                 if (res == NULL_TREE || TREE_CODE (res) != VECTOR_CST)
   19578              :                   break;
   19579           10 :               }
   19580           50 :             return res;
   19581              :           }
   19582              : 
   19583              :         default:
   19584              :           break;
   19585              :         }
   19586              :     }
   19587              : 
   19588              : #ifdef SUBTARGET_FOLD_BUILTIN
   19589              :   return SUBTARGET_FOLD_BUILTIN (fndecl, n_args, args, ignore);
   19590              : #endif
   19591              : 
   19592              :   return NULL_TREE;
   19593              : }
   19594              : 
   19595              : /* Fold a MD builtin (use ix86_fold_builtin for folding into
   19596              :    constant) in GIMPLE.  */
   19597              : 
   19598              : bool
   19599      1095065 : ix86_gimple_fold_builtin (gimple_stmt_iterator *gsi)
   19600              : {
   19601      1095065 :   gimple *stmt = gsi_stmt (*gsi), *g;
   19602      1095065 :   gimple_seq stmts = NULL;
   19603      1095065 :   tree fndecl = gimple_call_fndecl (stmt);
   19604      1095065 :   gcc_checking_assert (fndecl && fndecl_built_in_p (fndecl, BUILT_IN_MD));
   19605      1095065 :   int n_args = gimple_call_num_args (stmt);
   19606      1095065 :   enum ix86_builtins fn_code
   19607      1095065 :     = (enum ix86_builtins) DECL_MD_FUNCTION_CODE (fndecl);
   19608      1095065 :   tree decl = NULL_TREE;
   19609      1095065 :   tree arg0, arg1, arg2;
   19610      1095065 :   enum rtx_code rcode;
   19611      1095065 :   enum tree_code tcode;
   19612      1095065 :   unsigned HOST_WIDE_INT count;
   19613      1095065 :   bool is_vshift;
   19614      1095065 :   unsigned HOST_WIDE_INT elems;
   19615      1095065 :   location_t loc;
   19616              : 
   19617              :   /* Don't fold when there's isa mismatch.  */
   19618      1095065 :   if (!ix86_check_builtin_isa_match (fn_code, NULL, NULL))
   19619              :     return false;
   19620              : 
   19621      1094938 :   switch (fn_code)
   19622              :     {
   19623          288 :     case IX86_BUILTIN_TZCNT32:
   19624          288 :       decl = builtin_decl_implicit (BUILT_IN_CTZ);
   19625          288 :       goto fold_tzcnt_lzcnt;
   19626              : 
   19627          237 :     case IX86_BUILTIN_TZCNT64:
   19628          237 :       decl = builtin_decl_implicit (BUILT_IN_CTZLL);
   19629          237 :       goto fold_tzcnt_lzcnt;
   19630              : 
   19631          215 :     case IX86_BUILTIN_LZCNT32:
   19632          215 :       decl = builtin_decl_implicit (BUILT_IN_CLZ);
   19633          215 :       goto fold_tzcnt_lzcnt;
   19634              : 
   19635          224 :     case IX86_BUILTIN_LZCNT64:
   19636          224 :       decl = builtin_decl_implicit (BUILT_IN_CLZLL);
   19637          224 :       goto fold_tzcnt_lzcnt;
   19638              : 
   19639          964 :     fold_tzcnt_lzcnt:
   19640          964 :       gcc_assert (n_args == 1);
   19641          964 :       arg0 = gimple_call_arg (stmt, 0);
   19642          964 :       if (TREE_CODE (arg0) == SSA_NAME && decl && gimple_call_lhs (stmt))
   19643              :         {
   19644          799 :           int prec = TYPE_PRECISION (TREE_TYPE (arg0));
   19645              :           /* If arg0 is provably non-zero, optimize into generic
   19646              :              __builtin_c[tl]z{,ll} function the middle-end handles
   19647              :              better.  */
   19648          799 :           if (!expr_not_equal_to (arg0, wi::zero (prec)))
   19649              :             return false;
   19650              : 
   19651            9 :           loc = gimple_location (stmt);
   19652            9 :           g = gimple_build_call (decl, 1, arg0);
   19653            9 :           gimple_set_location (g, loc);
   19654            9 :           tree lhs = make_ssa_name (integer_type_node);
   19655            9 :           gimple_call_set_lhs (g, lhs);
   19656            9 :           gsi_insert_before (gsi, g, GSI_SAME_STMT);
   19657            9 :           g = gimple_build_assign (gimple_call_lhs (stmt), NOP_EXPR, lhs);
   19658            9 :           gimple_set_location (g, loc);
   19659            9 :           gsi_replace (gsi, g, false);
   19660            9 :           return true;
   19661              :         }
   19662              :       break;
   19663              : 
   19664          491 :     case IX86_BUILTIN_BZHI32:
   19665          491 :     case IX86_BUILTIN_BZHI64:
   19666          491 :       gcc_assert (n_args == 2);
   19667          491 :       arg1 = gimple_call_arg (stmt, 1);
   19668          491 :       if (tree_fits_uhwi_p (arg1) && gimple_call_lhs (stmt))
   19669              :         {
   19670          195 :           unsigned int idx = tree_to_uhwi (arg1) & 0xff;
   19671          195 :           arg0 = gimple_call_arg (stmt, 0);
   19672          195 :           if (idx < TYPE_PRECISION (TREE_TYPE (arg0)))
   19673              :             break;
   19674           31 :           loc = gimple_location (stmt);
   19675           31 :           g = gimple_build_assign (gimple_call_lhs (stmt), arg0);
   19676           31 :           gimple_set_location (g, loc);
   19677           31 :           gsi_replace (gsi, g, false);
   19678           31 :           return true;
   19679              :         }
   19680              :       break;
   19681              : 
   19682          502 :     case IX86_BUILTIN_PDEP32:
   19683          502 :     case IX86_BUILTIN_PDEP64:
   19684          502 :     case IX86_BUILTIN_PEXT32:
   19685          502 :     case IX86_BUILTIN_PEXT64:
   19686          502 :       gcc_assert (n_args == 2);
   19687          502 :       arg1 = gimple_call_arg (stmt, 1);
   19688          502 :       if (integer_all_onesp (arg1) && gimple_call_lhs (stmt))
   19689              :         {
   19690            4 :           loc = gimple_location (stmt);
   19691            4 :           arg0 = gimple_call_arg (stmt, 0);
   19692            4 :           g = gimple_build_assign (gimple_call_lhs (stmt), arg0);
   19693            4 :           gimple_set_location (g, loc);
   19694            4 :           gsi_replace (gsi, g, false);
   19695            4 :           return true;
   19696              :         }
   19697              :       break;
   19698              : 
   19699          145 :     case IX86_BUILTIN_PBLENDVB256:
   19700          145 :     case IX86_BUILTIN_BLENDVPS256:
   19701          145 :     case IX86_BUILTIN_BLENDVPD256:
   19702              :       /* pcmpeqb/d/q is under avx2, w/o avx2, it's veclower
   19703              :          to scalar operations and not combined back.  */
   19704          145 :       if (!TARGET_AVX2)
   19705              :         break;
   19706              : 
   19707              :       /* FALLTHRU.  */
   19708          112 :     case IX86_BUILTIN_BLENDVPD:
   19709              :       /* blendvpd is under sse4.1 but pcmpgtq is under sse4.2,
   19710              :          w/o sse4.2, it's veclowered to scalar operations and
   19711              :          not combined back.  */
   19712          112 :       if (!TARGET_SSE4_2)
   19713              :         break;
   19714              :       /* FALLTHRU.  */
   19715          166 :     case IX86_BUILTIN_PBLENDVB128:
   19716          166 :     case IX86_BUILTIN_BLENDVPS:
   19717          166 :       gcc_assert (n_args == 3);
   19718          166 :       arg0 = gimple_call_arg (stmt, 0);
   19719          166 :       arg1 = gimple_call_arg (stmt, 1);
   19720          166 :       arg2 = gimple_call_arg (stmt, 2);
   19721          166 :       if (gimple_call_lhs (stmt))
   19722              :         {
   19723          166 :           loc = gimple_location (stmt);
   19724          166 :           tree type = TREE_TYPE (arg2);
   19725          166 :           if (VECTOR_FLOAT_TYPE_P (type))
   19726              :             {
   19727           73 :               tree itype = GET_MODE_INNER (TYPE_MODE (type)) == E_SFmode
   19728           73 :                 ? intSI_type_node : intDI_type_node;
   19729           73 :               type = get_same_sized_vectype (itype, type);
   19730              :             }
   19731              :           else
   19732           93 :             type = signed_type_for (type);
   19733          166 :           arg2 = gimple_build (&stmts, VIEW_CONVERT_EXPR, type, arg2);
   19734          166 :           tree zero_vec = build_zero_cst (type);
   19735          166 :           tree cmp_type = truth_type_for (type);
   19736          166 :           tree cmp = gimple_build (&stmts, LT_EXPR, cmp_type, arg2, zero_vec);
   19737          166 :           gsi_insert_seq_before (gsi, stmts, GSI_SAME_STMT);
   19738          166 :           g = gimple_build_assign (gimple_call_lhs (stmt),
   19739              :                                    VEC_COND_EXPR, cmp,
   19740              :                                    arg1, arg0);
   19741          166 :           gimple_set_location (g, loc);
   19742          166 :           gsi_replace (gsi, g, false);
   19743              :         }
   19744              :       else
   19745            0 :         gsi_replace (gsi, gimple_build_nop (), false);
   19746              :       return true;
   19747              : 
   19748              : 
   19749           16 :     case IX86_BUILTIN_PCMPEQB128:
   19750           16 :     case IX86_BUILTIN_PCMPEQW128:
   19751           16 :     case IX86_BUILTIN_PCMPEQD128:
   19752           16 :     case IX86_BUILTIN_PCMPEQQ:
   19753           16 :     case IX86_BUILTIN_PCMPEQB256:
   19754           16 :     case IX86_BUILTIN_PCMPEQW256:
   19755           16 :     case IX86_BUILTIN_PCMPEQD256:
   19756           16 :     case IX86_BUILTIN_PCMPEQQ256:
   19757           16 :       tcode = EQ_EXPR;
   19758           16 :       goto do_cmp;
   19759              : 
   19760              :     case IX86_BUILTIN_PCMPGTB128:
   19761              :     case IX86_BUILTIN_PCMPGTW128:
   19762              :     case IX86_BUILTIN_PCMPGTD128:
   19763              :     case IX86_BUILTIN_PCMPGTQ:
   19764              :     case IX86_BUILTIN_PCMPGTB256:
   19765              :     case IX86_BUILTIN_PCMPGTW256:
   19766              :     case IX86_BUILTIN_PCMPGTD256:
   19767              :     case IX86_BUILTIN_PCMPGTQ256:
   19768              :       tcode = GT_EXPR;
   19769              : 
   19770           33 :     do_cmp:
   19771           33 :       gcc_assert (n_args == 2);
   19772           33 :       arg0 = gimple_call_arg (stmt, 0);
   19773           33 :       arg1 = gimple_call_arg (stmt, 1);
   19774           33 :       if (gimple_call_lhs (stmt))
   19775              :         {
   19776           32 :           loc = gimple_location (stmt);
   19777           32 :           tree type = TREE_TYPE (arg0);
   19778           32 :           tree zero_vec = build_zero_cst (type);
   19779           32 :           tree minus_one_vec = build_minus_one_cst (type);
   19780           32 :           tree cmp_type = truth_type_for (type);
   19781           32 :           tree cmp = gimple_build (&stmts, tcode, cmp_type, arg0, arg1);
   19782           32 :           gsi_insert_seq_before (gsi, stmts, GSI_SAME_STMT);
   19783           32 :           g = gimple_build_assign (gimple_call_lhs (stmt),
   19784              :                                    VEC_COND_EXPR, cmp,
   19785              :                                    minus_one_vec, zero_vec);
   19786           32 :           gimple_set_location (g, loc);
   19787           32 :           gsi_replace (gsi, g, false);
   19788              :         }
   19789              :       else
   19790            1 :         gsi_replace (gsi, gimple_build_nop (), false);
   19791              :       return true;
   19792              : 
   19793         9297 :     case IX86_BUILTIN_PSLLD:
   19794         9297 :     case IX86_BUILTIN_PSLLD128:
   19795         9297 :     case IX86_BUILTIN_PSLLD128_MASK:
   19796         9297 :     case IX86_BUILTIN_PSLLD256:
   19797         9297 :     case IX86_BUILTIN_PSLLD256_MASK:
   19798         9297 :     case IX86_BUILTIN_PSLLD512:
   19799         9297 :     case IX86_BUILTIN_PSLLDI:
   19800         9297 :     case IX86_BUILTIN_PSLLDI128:
   19801         9297 :     case IX86_BUILTIN_PSLLDI128_MASK:
   19802         9297 :     case IX86_BUILTIN_PSLLDI256:
   19803         9297 :     case IX86_BUILTIN_PSLLDI256_MASK:
   19804         9297 :     case IX86_BUILTIN_PSLLDI512:
   19805         9297 :     case IX86_BUILTIN_PSLLQ:
   19806         9297 :     case IX86_BUILTIN_PSLLQ128:
   19807         9297 :     case IX86_BUILTIN_PSLLQ128_MASK:
   19808         9297 :     case IX86_BUILTIN_PSLLQ256:
   19809         9297 :     case IX86_BUILTIN_PSLLQ256_MASK:
   19810         9297 :     case IX86_BUILTIN_PSLLQ512:
   19811         9297 :     case IX86_BUILTIN_PSLLQI:
   19812         9297 :     case IX86_BUILTIN_PSLLQI128:
   19813         9297 :     case IX86_BUILTIN_PSLLQI128_MASK:
   19814         9297 :     case IX86_BUILTIN_PSLLQI256:
   19815         9297 :     case IX86_BUILTIN_PSLLQI256_MASK:
   19816         9297 :     case IX86_BUILTIN_PSLLQI512:
   19817         9297 :     case IX86_BUILTIN_PSLLW:
   19818         9297 :     case IX86_BUILTIN_PSLLW128:
   19819         9297 :     case IX86_BUILTIN_PSLLW128_MASK:
   19820         9297 :     case IX86_BUILTIN_PSLLW256:
   19821         9297 :     case IX86_BUILTIN_PSLLW256_MASK:
   19822         9297 :     case IX86_BUILTIN_PSLLW512_MASK:
   19823         9297 :     case IX86_BUILTIN_PSLLWI:
   19824         9297 :     case IX86_BUILTIN_PSLLWI128:
   19825         9297 :     case IX86_BUILTIN_PSLLWI128_MASK:
   19826         9297 :     case IX86_BUILTIN_PSLLWI256:
   19827         9297 :     case IX86_BUILTIN_PSLLWI256_MASK:
   19828         9297 :     case IX86_BUILTIN_PSLLWI512_MASK:
   19829         9297 :       rcode = ASHIFT;
   19830         9297 :       is_vshift = false;
   19831         9297 :       goto do_shift;
   19832         6495 :     case IX86_BUILTIN_PSRAD:
   19833         6495 :     case IX86_BUILTIN_PSRAD128:
   19834         6495 :     case IX86_BUILTIN_PSRAD128_MASK:
   19835         6495 :     case IX86_BUILTIN_PSRAD256:
   19836         6495 :     case IX86_BUILTIN_PSRAD256_MASK:
   19837         6495 :     case IX86_BUILTIN_PSRAD512:
   19838         6495 :     case IX86_BUILTIN_PSRADI:
   19839         6495 :     case IX86_BUILTIN_PSRADI128:
   19840         6495 :     case IX86_BUILTIN_PSRADI128_MASK:
   19841         6495 :     case IX86_BUILTIN_PSRADI256:
   19842         6495 :     case IX86_BUILTIN_PSRADI256_MASK:
   19843         6495 :     case IX86_BUILTIN_PSRADI512:
   19844         6495 :     case IX86_BUILTIN_PSRAQ128_MASK:
   19845         6495 :     case IX86_BUILTIN_PSRAQ256_MASK:
   19846         6495 :     case IX86_BUILTIN_PSRAQ512:
   19847         6495 :     case IX86_BUILTIN_PSRAQI128_MASK:
   19848         6495 :     case IX86_BUILTIN_PSRAQI256_MASK:
   19849         6495 :     case IX86_BUILTIN_PSRAQI512:
   19850         6495 :     case IX86_BUILTIN_PSRAW:
   19851         6495 :     case IX86_BUILTIN_PSRAW128:
   19852         6495 :     case IX86_BUILTIN_PSRAW128_MASK:
   19853         6495 :     case IX86_BUILTIN_PSRAW256:
   19854         6495 :     case IX86_BUILTIN_PSRAW256_MASK:
   19855         6495 :     case IX86_BUILTIN_PSRAW512:
   19856         6495 :     case IX86_BUILTIN_PSRAWI:
   19857         6495 :     case IX86_BUILTIN_PSRAWI128:
   19858         6495 :     case IX86_BUILTIN_PSRAWI128_MASK:
   19859         6495 :     case IX86_BUILTIN_PSRAWI256:
   19860         6495 :     case IX86_BUILTIN_PSRAWI256_MASK:
   19861         6495 :     case IX86_BUILTIN_PSRAWI512:
   19862         6495 :       rcode = ASHIFTRT;
   19863         6495 :       is_vshift = false;
   19864         6495 :       goto do_shift;
   19865         7960 :     case IX86_BUILTIN_PSRLD:
   19866         7960 :     case IX86_BUILTIN_PSRLD128:
   19867         7960 :     case IX86_BUILTIN_PSRLD128_MASK:
   19868         7960 :     case IX86_BUILTIN_PSRLD256:
   19869         7960 :     case IX86_BUILTIN_PSRLD256_MASK:
   19870         7960 :     case IX86_BUILTIN_PSRLD512:
   19871         7960 :     case IX86_BUILTIN_PSRLDI:
   19872         7960 :     case IX86_BUILTIN_PSRLDI128:
   19873         7960 :     case IX86_BUILTIN_PSRLDI128_MASK:
   19874         7960 :     case IX86_BUILTIN_PSRLDI256:
   19875         7960 :     case IX86_BUILTIN_PSRLDI256_MASK:
   19876         7960 :     case IX86_BUILTIN_PSRLDI512:
   19877         7960 :     case IX86_BUILTIN_PSRLQ:
   19878         7960 :     case IX86_BUILTIN_PSRLQ128:
   19879         7960 :     case IX86_BUILTIN_PSRLQ128_MASK:
   19880         7960 :     case IX86_BUILTIN_PSRLQ256:
   19881         7960 :     case IX86_BUILTIN_PSRLQ256_MASK:
   19882         7960 :     case IX86_BUILTIN_PSRLQ512:
   19883         7960 :     case IX86_BUILTIN_PSRLQI:
   19884         7960 :     case IX86_BUILTIN_PSRLQI128:
   19885         7960 :     case IX86_BUILTIN_PSRLQI128_MASK:
   19886         7960 :     case IX86_BUILTIN_PSRLQI256:
   19887         7960 :     case IX86_BUILTIN_PSRLQI256_MASK:
   19888         7960 :     case IX86_BUILTIN_PSRLQI512:
   19889         7960 :     case IX86_BUILTIN_PSRLW:
   19890         7960 :     case IX86_BUILTIN_PSRLW128:
   19891         7960 :     case IX86_BUILTIN_PSRLW128_MASK:
   19892         7960 :     case IX86_BUILTIN_PSRLW256:
   19893         7960 :     case IX86_BUILTIN_PSRLW256_MASK:
   19894         7960 :     case IX86_BUILTIN_PSRLW512:
   19895         7960 :     case IX86_BUILTIN_PSRLWI:
   19896         7960 :     case IX86_BUILTIN_PSRLWI128:
   19897         7960 :     case IX86_BUILTIN_PSRLWI128_MASK:
   19898         7960 :     case IX86_BUILTIN_PSRLWI256:
   19899         7960 :     case IX86_BUILTIN_PSRLWI256_MASK:
   19900         7960 :     case IX86_BUILTIN_PSRLWI512:
   19901         7960 :       rcode = LSHIFTRT;
   19902         7960 :       is_vshift = false;
   19903         7960 :       goto do_shift;
   19904         2384 :     case IX86_BUILTIN_PSLLVV16HI:
   19905         2384 :     case IX86_BUILTIN_PSLLVV16SI:
   19906         2384 :     case IX86_BUILTIN_PSLLVV2DI:
   19907         2384 :     case IX86_BUILTIN_PSLLVV2DI_MASK:
   19908         2384 :     case IX86_BUILTIN_PSLLVV32HI:
   19909         2384 :     case IX86_BUILTIN_PSLLVV4DI:
   19910         2384 :     case IX86_BUILTIN_PSLLVV4DI_MASK:
   19911         2384 :     case IX86_BUILTIN_PSLLVV4SI:
   19912         2384 :     case IX86_BUILTIN_PSLLVV4SI_MASK:
   19913         2384 :     case IX86_BUILTIN_PSLLVV8DI:
   19914         2384 :     case IX86_BUILTIN_PSLLVV8HI:
   19915         2384 :     case IX86_BUILTIN_PSLLVV8SI:
   19916         2384 :     case IX86_BUILTIN_PSLLVV8SI_MASK:
   19917         2384 :       rcode = ASHIFT;
   19918         2384 :       is_vshift = true;
   19919         2384 :       goto do_shift;
   19920         2341 :     case IX86_BUILTIN_PSRAVQ128:
   19921         2341 :     case IX86_BUILTIN_PSRAVQ256:
   19922         2341 :     case IX86_BUILTIN_PSRAVV16HI:
   19923         2341 :     case IX86_BUILTIN_PSRAVV16SI:
   19924         2341 :     case IX86_BUILTIN_PSRAVV32HI:
   19925         2341 :     case IX86_BUILTIN_PSRAVV4SI:
   19926         2341 :     case IX86_BUILTIN_PSRAVV4SI_MASK:
   19927         2341 :     case IX86_BUILTIN_PSRAVV8DI:
   19928         2341 :     case IX86_BUILTIN_PSRAVV8HI:
   19929         2341 :     case IX86_BUILTIN_PSRAVV8SI:
   19930         2341 :     case IX86_BUILTIN_PSRAVV8SI_MASK:
   19931         2341 :       rcode = ASHIFTRT;
   19932         2341 :       is_vshift = true;
   19933         2341 :       goto do_shift;
   19934         2380 :     case IX86_BUILTIN_PSRLVV16HI:
   19935         2380 :     case IX86_BUILTIN_PSRLVV16SI:
   19936         2380 :     case IX86_BUILTIN_PSRLVV2DI:
   19937         2380 :     case IX86_BUILTIN_PSRLVV2DI_MASK:
   19938         2380 :     case IX86_BUILTIN_PSRLVV32HI:
   19939         2380 :     case IX86_BUILTIN_PSRLVV4DI:
   19940         2380 :     case IX86_BUILTIN_PSRLVV4DI_MASK:
   19941         2380 :     case IX86_BUILTIN_PSRLVV4SI:
   19942         2380 :     case IX86_BUILTIN_PSRLVV4SI_MASK:
   19943         2380 :     case IX86_BUILTIN_PSRLVV8DI:
   19944         2380 :     case IX86_BUILTIN_PSRLVV8HI:
   19945         2380 :     case IX86_BUILTIN_PSRLVV8SI:
   19946         2380 :     case IX86_BUILTIN_PSRLVV8SI_MASK:
   19947         2380 :       rcode = LSHIFTRT;
   19948         2380 :       is_vshift = true;
   19949         2380 :       goto do_shift;
   19950              : 
   19951        30857 :     do_shift:
   19952        30857 :       gcc_assert (n_args >= 2);
   19953        30857 :       if (!gimple_call_lhs (stmt))
   19954              :         {
   19955            1 :           gsi_replace (gsi, gimple_build_nop (), false);
   19956            1 :           return true;
   19957              :         }
   19958        30856 :       arg0 = gimple_call_arg (stmt, 0);
   19959        30856 :       arg1 = gimple_call_arg (stmt, 1);
   19960        30856 :       elems = TYPE_VECTOR_SUBPARTS (TREE_TYPE (arg0));
   19961              :       /* For masked shift, only optimize if the mask is all ones.  */
   19962        30856 :       if (n_args > 2
   19963        30856 :           && !ix86_masked_all_ones (elems, gimple_call_arg (stmt, n_args - 1)))
   19964              :         break;
   19965        16081 :       if (is_vshift)
   19966              :         {
   19967         2640 :           if (TREE_CODE (arg1) != VECTOR_CST)
   19968              :             break;
   19969           69 :           count = TYPE_PRECISION (TREE_TYPE (TREE_TYPE (arg0)));
   19970           69 :           if (integer_zerop (arg1))
   19971           27 :             count = 0;
   19972           42 :           else if (rcode == ASHIFTRT)
   19973              :             break;
   19974              :           else
   19975          230 :             for (unsigned int i = 0; i < VECTOR_CST_NELTS (arg1); ++i)
   19976              :               {
   19977          212 :                 tree elt = VECTOR_CST_ELT (arg1, i);
   19978          212 :                 if (!wi::neg_p (wi::to_wide (elt))
   19979          375 :                     && wi::to_widest (elt) < count)
   19980           16 :                   return false;
   19981              :               }
   19982              :         }
   19983              :       else
   19984              :         {
   19985        13441 :           arg1 = ix86_vector_shift_count (arg1);
   19986        13441 :           if (!arg1)
   19987              :             break;
   19988         5608 :           count = tree_to_uhwi (arg1);
   19989              :         }
   19990         5653 :       if (count == 0)
   19991              :         {
   19992              :           /* Just return the first argument for shift by 0.  */
   19993           93 :           loc = gimple_location (stmt);
   19994           93 :           g = gimple_build_assign (gimple_call_lhs (stmt), arg0);
   19995           93 :           gimple_set_location (g, loc);
   19996           93 :           gsi_replace (gsi, g, false);
   19997           93 :           return true;
   19998              :         }
   19999         5560 :       if (rcode != ASHIFTRT
   20000         5560 :           && count >= TYPE_PRECISION (TREE_TYPE (TREE_TYPE (arg0))))
   20001              :         {
   20002              :           /* For shift counts equal or greater than precision, except for
   20003              :              arithmetic right shift the result is zero.  */
   20004           78 :           loc = gimple_location (stmt);
   20005           78 :           g = gimple_build_assign (gimple_call_lhs (stmt),
   20006           78 :                                    build_zero_cst (TREE_TYPE (arg0)));
   20007           78 :           gimple_set_location (g, loc);
   20008           78 :           gsi_replace (gsi, g, false);
   20009           78 :           return true;
   20010              :         }
   20011              :       break;
   20012              : 
   20013          531 :     case IX86_BUILTIN_SHUFPD512:
   20014          531 :     case IX86_BUILTIN_SHUFPS512:
   20015          531 :     case IX86_BUILTIN_SHUFPD:
   20016          531 :     case IX86_BUILTIN_SHUFPD256:
   20017          531 :     case IX86_BUILTIN_SHUFPS:
   20018          531 :     case IX86_BUILTIN_SHUFPS256:
   20019          531 :       arg0 = gimple_call_arg (stmt, 0);
   20020          531 :       elems = TYPE_VECTOR_SUBPARTS (TREE_TYPE (arg0));
   20021              :       /* This is masked shuffle.  Only optimize if the mask is all ones.  */
   20022          531 :       if (n_args > 3
   20023          895 :           && !ix86_masked_all_ones (elems,
   20024          364 :                                     gimple_call_arg (stmt, n_args - 1)))
   20025              :         break;
   20026          203 :       arg2 = gimple_call_arg (stmt, 2);
   20027          203 :       if (TREE_CODE (arg2) == INTEGER_CST && gimple_call_lhs (stmt))
   20028              :         {
   20029          146 :           unsigned HOST_WIDE_INT shuffle_mask = TREE_INT_CST_LOW (arg2);
   20030              :           /* Check valid imm, refer to gcc.target/i386/testimm-10.c.  */
   20031          146 :           if (shuffle_mask > 255)
   20032              :             return false;
   20033              : 
   20034          144 :           machine_mode imode = GET_MODE_INNER (TYPE_MODE (TREE_TYPE (arg0)));
   20035          144 :           loc = gimple_location (stmt);
   20036          144 :           tree itype = (imode == E_DFmode
   20037          144 :                         ? long_long_integer_type_node : integer_type_node);
   20038          144 :           tree vtype = build_vector_type (itype, elems);
   20039          144 :           tree_vector_builder elts (vtype, elems, 1);
   20040              : 
   20041              : 
   20042              :           /* Transform integer shuffle_mask to vector perm_mask which
   20043              :              is used by vec_perm_expr, refer to shuflp[sd]256/512 in sse.md.  */
   20044          840 :           for (unsigned i = 0; i != elems; i++)
   20045              :             {
   20046          696 :               unsigned sel_idx;
   20047              :               /* Imm[1:0](if VL > 128, then use Imm[3:2],Imm[5:4],Imm[7:6])
   20048              :                  provide 2 select constrols for each element of the
   20049              :                  destination.  */
   20050          696 :               if (imode == E_DFmode)
   20051          240 :                 sel_idx = (i & 1) * elems + (i & ~1)
   20052          240 :                           + ((shuffle_mask >> i) & 1);
   20053              :               else
   20054              :                 {
   20055              :                   /* Imm[7:0](if VL > 128, also use Imm[7:0]) provide 4 select
   20056              :                      controls for each element of the destination.  */
   20057          456 :                   unsigned j = i % 4;
   20058          456 :                   sel_idx = ((i >> 1) & 1) * elems + (i & ~3)
   20059          456 :                             + ((shuffle_mask >> 2 * j) & 3);
   20060              :                 }
   20061          696 :               elts.quick_push (build_int_cst (itype, sel_idx));
   20062              :             }
   20063              : 
   20064          144 :           tree perm_mask = elts.build ();
   20065          144 :           arg1 = gimple_call_arg (stmt, 1);
   20066          144 :           g = gimple_build_assign (gimple_call_lhs (stmt),
   20067              :                                    VEC_PERM_EXPR,
   20068              :                                    arg0, arg1, perm_mask);
   20069          144 :           gimple_set_location (g, loc);
   20070          144 :           gsi_replace (gsi, g, false);
   20071          144 :           return true;
   20072          144 :         }
   20073              :       // Do not error yet, the constant could be propagated later?
   20074              :       break;
   20075              : 
   20076           48 :     case IX86_BUILTIN_PABSB:
   20077           48 :     case IX86_BUILTIN_PABSW:
   20078           48 :     case IX86_BUILTIN_PABSD:
   20079              :       /* 64-bit vector abs<mode>2 is only supported under TARGET_MMX_WITH_SSE.  */
   20080           48 :       if (!TARGET_MMX_WITH_SSE)
   20081              :         break;
   20082              :       /* FALLTHRU.  */
   20083         2189 :     case IX86_BUILTIN_PABSB128:
   20084         2189 :     case IX86_BUILTIN_PABSB256:
   20085         2189 :     case IX86_BUILTIN_PABSB512:
   20086         2189 :     case IX86_BUILTIN_PABSW128:
   20087         2189 :     case IX86_BUILTIN_PABSW256:
   20088         2189 :     case IX86_BUILTIN_PABSW512:
   20089         2189 :     case IX86_BUILTIN_PABSD128:
   20090         2189 :     case IX86_BUILTIN_PABSD256:
   20091         2189 :     case IX86_BUILTIN_PABSD512:
   20092         2189 :     case IX86_BUILTIN_PABSQ128:
   20093         2189 :     case IX86_BUILTIN_PABSQ256:
   20094         2189 :     case IX86_BUILTIN_PABSQ512:
   20095         2189 :     case IX86_BUILTIN_PABSB128_MASK:
   20096         2189 :     case IX86_BUILTIN_PABSB256_MASK:
   20097         2189 :     case IX86_BUILTIN_PABSW128_MASK:
   20098         2189 :     case IX86_BUILTIN_PABSW256_MASK:
   20099         2189 :     case IX86_BUILTIN_PABSD128_MASK:
   20100         2189 :     case IX86_BUILTIN_PABSD256_MASK:
   20101         2189 :       gcc_assert (n_args >= 1);
   20102         2189 :       if (!gimple_call_lhs (stmt))
   20103              :         {
   20104            1 :           gsi_replace (gsi, gimple_build_nop (), false);
   20105            1 :           return true;
   20106              :         }
   20107         2188 :       arg0 = gimple_call_arg (stmt, 0);
   20108         2188 :       elems = TYPE_VECTOR_SUBPARTS (TREE_TYPE (arg0));
   20109              :       /* For masked ABS, only optimize if the mask is all ones.  */
   20110         2188 :       if (n_args > 1
   20111         2188 :           && !ix86_masked_all_ones (elems, gimple_call_arg (stmt, n_args - 1)))
   20112              :         break;
   20113          228 :       {
   20114          228 :         tree utype, ures, vce;
   20115          228 :         utype = unsigned_type_for (TREE_TYPE (arg0));
   20116              :         /* PABSB/W/D/Q store the unsigned result in dst, use ABSU_EXPR
   20117              :            instead of ABS_EXPR to handle overflow case(TYPE_MIN).  */
   20118          228 :         ures = gimple_build (&stmts, ABSU_EXPR, utype, arg0);
   20119          228 :         gsi_insert_seq_before (gsi, stmts, GSI_SAME_STMT);
   20120          228 :         loc = gimple_location (stmt);
   20121          228 :         vce = build1 (VIEW_CONVERT_EXPR, TREE_TYPE (arg0), ures);
   20122          228 :         g = gimple_build_assign (gimple_call_lhs (stmt),
   20123              :                                  VIEW_CONVERT_EXPR, vce);
   20124          228 :         gsi_replace (gsi, g, false);
   20125              :       }
   20126          228 :       return true;
   20127              : 
   20128         2225 :     case IX86_BUILTIN_MINPS:
   20129         2225 :     case IX86_BUILTIN_MINPD:
   20130         2225 :     case IX86_BUILTIN_MINPS256:
   20131         2225 :     case IX86_BUILTIN_MINPD256:
   20132         2225 :     case IX86_BUILTIN_MINPS512:
   20133         2225 :     case IX86_BUILTIN_MINPD512:
   20134         2225 :     case IX86_BUILTIN_MINPS128_MASK:
   20135         2225 :     case IX86_BUILTIN_MINPD128_MASK:
   20136         2225 :     case IX86_BUILTIN_MINPS256_MASK:
   20137         2225 :     case IX86_BUILTIN_MINPD256_MASK:
   20138         2225 :     case IX86_BUILTIN_MINPH128_MASK:
   20139         2225 :     case IX86_BUILTIN_MINPH256_MASK:
   20140         2225 :     case IX86_BUILTIN_MINPH512_MASK:
   20141         2225 :       tcode = LT_EXPR;
   20142         2225 :       goto do_minmax;
   20143              : 
   20144              :     case IX86_BUILTIN_MAXPS:
   20145              :     case IX86_BUILTIN_MAXPD:
   20146              :     case IX86_BUILTIN_MAXPS256:
   20147              :     case IX86_BUILTIN_MAXPD256:
   20148              :     case IX86_BUILTIN_MAXPS512:
   20149              :     case IX86_BUILTIN_MAXPD512:
   20150              :     case IX86_BUILTIN_MAXPS128_MASK:
   20151              :     case IX86_BUILTIN_MAXPD128_MASK:
   20152              :     case IX86_BUILTIN_MAXPS256_MASK:
   20153              :     case IX86_BUILTIN_MAXPD256_MASK:
   20154              :     case IX86_BUILTIN_MAXPH128_MASK:
   20155              :     case IX86_BUILTIN_MAXPH256_MASK:
   20156              :     case IX86_BUILTIN_MAXPH512_MASK:
   20157              :       tcode = GT_EXPR;
   20158         4435 :     do_minmax:
   20159         4435 :       gcc_assert (n_args >= 2);
   20160              :       /* Without SSE4.1 we often aren't able to pattern match it back to the
   20161              :          desired instruction.  */
   20162         4435 :       if (!gimple_call_lhs (stmt) || !optimize || !TARGET_SSE4_1)
   20163              :         break;
   20164         3865 :       arg0 = gimple_call_arg (stmt, 0);
   20165         3865 :       arg1 = gimple_call_arg (stmt, 1);
   20166         3865 :       elems = TYPE_VECTOR_SUBPARTS (TREE_TYPE (arg0));
   20167              :       /* For masked minmax, only optimize if the mask is all ones.  */
   20168         3865 :       if (n_args > 2
   20169         3865 :           && !ix86_masked_all_ones (elems, gimple_call_arg (stmt, 3)))
   20170              :         break;
   20171          647 :       if (n_args >= 5)
   20172              :         {
   20173          436 :           tree arg4 = gimple_call_arg (stmt, 4);
   20174          436 :           if (!tree_fits_uhwi_p (arg4))
   20175              :             break;
   20176          424 :           if (tree_to_uhwi (arg4) == 4)
   20177              :             /* Ok.  */;
   20178          416 :           else if (tree_to_uhwi (arg4) != 8)
   20179              :             /* Invalid round argument.  */
   20180              :             break;
   20181          416 :           else if (HONOR_NANS (arg0))
   20182              :             /* Lowering to comparison would raise exceptions which
   20183              :                shouldn't be raised.  */
   20184              :             break;
   20185              :         }
   20186          219 :       {
   20187          219 :         tree type = truth_type_for (TREE_TYPE (arg0));
   20188          219 :         tree cmpres = gimple_build (&stmts, tcode, type, arg0, arg1);
   20189          219 :         gsi_insert_seq_before (gsi, stmts, GSI_SAME_STMT);
   20190          219 :         g = gimple_build_assign (gimple_call_lhs (stmt),
   20191              :                                  VEC_COND_EXPR, cmpres, arg0, arg1);
   20192          219 :         gsi_replace (gsi, g, false);
   20193              :       }
   20194          219 :       return true;
   20195              : 
   20196              :     default:
   20197              :       break;
   20198              :     }
   20199              : 
   20200              :   return false;
   20201              : }
   20202              : 
   20203              : /* Handler for an SVML-style interface to
   20204              :    a library with vectorized intrinsics.  */
   20205              : 
   20206              : tree
   20207           10 : ix86_veclibabi_svml (combined_fn fn, tree type_out, tree type_in)
   20208              : {
   20209           10 :   char name[20];
   20210           10 :   tree fntype, new_fndecl, args;
   20211           10 :   unsigned arity;
   20212           10 :   const char *bname;
   20213           10 :   machine_mode el_mode, in_mode;
   20214           10 :   int n, in_n;
   20215              : 
   20216              :   /* The SVML is suitable for unsafe math only.  */
   20217           10 :   if (!flag_unsafe_math_optimizations)
   20218              :     return NULL_TREE;
   20219              : 
   20220           10 :   el_mode = TYPE_MODE (TREE_TYPE (type_out));
   20221           10 :   n = TYPE_VECTOR_SUBPARTS (type_out);
   20222           10 :   in_mode = TYPE_MODE (TREE_TYPE (type_in));
   20223           10 :   in_n = TYPE_VECTOR_SUBPARTS (type_in);
   20224           10 :   if (el_mode != in_mode
   20225           10 :       || n != in_n)
   20226              :     return NULL_TREE;
   20227              : 
   20228           10 :   switch (fn)
   20229              :     {
   20230           10 :     CASE_CFN_EXP:
   20231           10 :     CASE_CFN_LOG:
   20232           10 :     CASE_CFN_LOG10:
   20233           10 :     CASE_CFN_POW:
   20234           10 :     CASE_CFN_TANH:
   20235           10 :     CASE_CFN_TAN:
   20236           10 :     CASE_CFN_ATAN:
   20237           10 :     CASE_CFN_ATAN2:
   20238           10 :     CASE_CFN_ATANH:
   20239           10 :     CASE_CFN_CBRT:
   20240           10 :     CASE_CFN_SINH:
   20241           10 :     CASE_CFN_SIN:
   20242           10 :     CASE_CFN_ASINH:
   20243           10 :     CASE_CFN_ASIN:
   20244           10 :     CASE_CFN_COSH:
   20245           10 :     CASE_CFN_COS:
   20246           10 :     CASE_CFN_ACOSH:
   20247           10 :     CASE_CFN_ACOS:
   20248           10 :       if ((el_mode != DFmode || n != 2)
   20249            8 :           && (el_mode != SFmode || n != 4))
   20250              :         return NULL_TREE;
   20251            6 :       break;
   20252              : 
   20253              :     default:
   20254              :       return NULL_TREE;
   20255              :     }
   20256              : 
   20257            6 :   tree fndecl = mathfn_built_in (el_mode == DFmode
   20258              :                                  ? double_type_node : float_type_node, fn);
   20259            6 :   bname = IDENTIFIER_POINTER (DECL_NAME (fndecl));
   20260              : 
   20261            6 :   if (DECL_FUNCTION_CODE (fndecl) == BUILT_IN_LOGF)
   20262            2 :     strcpy (name, "vmlsLn4");
   20263            4 :   else if (DECL_FUNCTION_CODE (fndecl) == BUILT_IN_LOG)
   20264            0 :     strcpy (name, "vmldLn2");
   20265            4 :   else if (n == 4)
   20266              :     {
   20267            2 :       sprintf (name, "vmls%s", bname+10);
   20268            2 :       name[strlen (name)-1] = '4';
   20269              :     }
   20270              :   else
   20271            2 :     sprintf (name, "vmld%s2", bname+10);
   20272              : 
   20273              :   /* Convert to uppercase. */
   20274            6 :   name[4] &= ~0x20;
   20275              : 
   20276            6 :   arity = 0;
   20277            6 :   for (args = DECL_ARGUMENTS (fndecl); args; args = TREE_CHAIN (args))
   20278            0 :     arity++;
   20279              : 
   20280            6 :   if (arity == 1)
   20281            0 :     fntype = build_function_type_list (type_out, type_in, NULL);
   20282              :   else
   20283            6 :     fntype = build_function_type_list (type_out, type_in, type_in, NULL);
   20284              : 
   20285              :   /* Build a function declaration for the vectorized function.  */
   20286            6 :   new_fndecl = build_decl (BUILTINS_LOCATION,
   20287              :                            FUNCTION_DECL, get_identifier (name), fntype);
   20288            6 :   TREE_PUBLIC (new_fndecl) = 1;
   20289            6 :   DECL_EXTERNAL (new_fndecl) = 1;
   20290            6 :   DECL_IS_NOVOPS (new_fndecl) = 1;
   20291            6 :   TREE_READONLY (new_fndecl) = 1;
   20292              : 
   20293            6 :   return new_fndecl;
   20294              : }
   20295              : 
   20296              : /* Handler for an ACML-style interface to
   20297              :    a library with vectorized intrinsics.  */
   20298              : 
   20299              : tree
   20300            3 : ix86_veclibabi_acml (combined_fn fn, tree type_out, tree type_in)
   20301              : {
   20302            3 :   char name[20] = "__vr.._";
   20303            3 :   tree fntype, new_fndecl, args;
   20304            3 :   unsigned arity;
   20305            3 :   const char *bname;
   20306            3 :   machine_mode el_mode, in_mode;
   20307            3 :   int n, in_n;
   20308              : 
   20309              :   /* The ACML is 64bits only and suitable for unsafe math only as
   20310              :      it does not correctly support parts of IEEE with the required
   20311              :      precision such as denormals.  */
   20312            3 :   if (!TARGET_64BIT
   20313            3 :       || !flag_unsafe_math_optimizations)
   20314              :     return NULL_TREE;
   20315              : 
   20316            3 :   el_mode = TYPE_MODE (TREE_TYPE (type_out));
   20317            3 :   n = TYPE_VECTOR_SUBPARTS (type_out);
   20318            3 :   in_mode = TYPE_MODE (TREE_TYPE (type_in));
   20319            3 :   in_n = TYPE_VECTOR_SUBPARTS (type_in);
   20320            3 :   if (el_mode != in_mode
   20321            3 :       || n != in_n)
   20322              :     return NULL_TREE;
   20323              : 
   20324            3 :   switch (fn)
   20325              :     {
   20326            3 :     CASE_CFN_SIN:
   20327            3 :     CASE_CFN_COS:
   20328            3 :     CASE_CFN_EXP:
   20329            3 :     CASE_CFN_LOG:
   20330            3 :     CASE_CFN_LOG2:
   20331            3 :     CASE_CFN_LOG10:
   20332            3 :       if (el_mode == DFmode && n == 2)
   20333              :         {
   20334            3 :           name[4] = 'd';
   20335            3 :           name[5] = '2';
   20336              :         }
   20337            0 :       else if (el_mode == SFmode && n == 4)
   20338              :         {
   20339            0 :           name[4] = 's';
   20340            0 :           name[5] = '4';
   20341              :         }
   20342              :       else
   20343              :         return NULL_TREE;
   20344            3 :       break;
   20345              : 
   20346              :     default:
   20347              :       return NULL_TREE;
   20348              :     }
   20349              : 
   20350            3 :   tree fndecl = mathfn_built_in (el_mode == DFmode
   20351              :                                  ? double_type_node : float_type_node, fn);
   20352            3 :   bname = IDENTIFIER_POINTER (DECL_NAME (fndecl));
   20353            3 :   sprintf (name + 7, "%s", bname+10);
   20354              : 
   20355            3 :   arity = 0;
   20356            3 :   for (args = DECL_ARGUMENTS (fndecl); args; args = TREE_CHAIN (args))
   20357            0 :     arity++;
   20358              : 
   20359            3 :   if (arity == 1)
   20360            0 :     fntype = build_function_type_list (type_out, type_in, NULL);
   20361              :   else
   20362            3 :     fntype = build_function_type_list (type_out, type_in, type_in, NULL);
   20363              : 
   20364              :   /* Build a function declaration for the vectorized function.  */
   20365            3 :   new_fndecl = build_decl (BUILTINS_LOCATION,
   20366              :                            FUNCTION_DECL, get_identifier (name), fntype);
   20367            3 :   TREE_PUBLIC (new_fndecl) = 1;
   20368            3 :   DECL_EXTERNAL (new_fndecl) = 1;
   20369            3 :   DECL_IS_NOVOPS (new_fndecl) = 1;
   20370            3 :   TREE_READONLY (new_fndecl) = 1;
   20371              : 
   20372            3 :   return new_fndecl;
   20373              : }
   20374              : 
   20375              : /* Handler for an AOCL-LibM-style interface to
   20376              :    a library with vectorized intrinsics.  */
   20377              : 
   20378              : tree
   20379          220 : ix86_veclibabi_aocl (combined_fn fn, tree type_out, tree type_in)
   20380              : {
   20381          220 :   char name[20] = "amd_vr";
   20382          220 :   int name_len = 6;
   20383          220 :   tree fntype, new_fndecl, args;
   20384          220 :   unsigned arity;
   20385          220 :   const char *bname;
   20386          220 :   machine_mode el_mode, in_mode;
   20387          220 :   int n, in_n;
   20388              : 
   20389              :   /* AOCL-LibM is 64bits only.  It is also only suitable for unsafe math only
   20390              :      as it trades off some accuracy for increased performance.  */
   20391          220 :   if (!TARGET_64BIT
   20392          220 :       || !flag_unsafe_math_optimizations)
   20393              :     return NULL_TREE;
   20394              : 
   20395          220 :   el_mode = TYPE_MODE (TREE_TYPE (type_out));
   20396          220 :   n = TYPE_VECTOR_SUBPARTS (type_out);
   20397          220 :   in_mode = TYPE_MODE (TREE_TYPE (type_in));
   20398          220 :   in_n = TYPE_VECTOR_SUBPARTS (type_in);
   20399          220 :   if (el_mode != in_mode
   20400          220 :       || n != in_n)
   20401              :     return NULL_TREE;
   20402              : 
   20403          220 :   gcc_checking_assert (n > 0);
   20404              : 
   20405              :   /* Decide whether there exists a function for the combination of FN, the mode
   20406              :      and the vector width.  Return early if it doesn't.  */
   20407              : 
   20408          220 :   if (el_mode != DFmode && el_mode != SFmode)
   20409              :     return NULL_TREE;
   20410              : 
   20411              :   /* Supported vector widths for given FN and single/double precision.  Zeros
   20412              :      are used to fill out unused positions in the arrays.  */
   20413          220 :   static const int supported_n[][2][3] = {
   20414              :   /*   Single prec. ,  Double prec.  */
   20415              :     { { 16,  0,  0 }, {  2,  4,  8 } }, /* TAN.  */
   20416              :     { {  4,  8, 16 }, {  2,  4,  8 } }, /* EXP.  */
   20417              :     { {  4,  8, 16 }, {  2,  4,  8 } }, /* EXP2.  */
   20418              :     { {  4,  8, 16 }, {  2,  4,  8 } }, /* LOG.  */
   20419              :     { {  4,  8, 16 }, {  2,  4,  8 } }, /* LOG2.  */
   20420              :     { {  4,  8, 16 }, {  2,  4,  8 } }, /* COS.  */
   20421              :     { {  4,  8, 16 }, {  2,  4,  8 } }, /* SIN.  */
   20422              :     { {  4,  8, 16 }, {  2,  4,  8 } }, /* POW.  */
   20423              :     { {  4,  8, 16 }, {  2,  4,  8 } }, /* ERF.  */
   20424              :     { {  4,  8, 16 }, {  2,  8,  0 } }, /* ATAN.  */
   20425              :     { {  4,  8, 16 }, {  2,  0,  0 } }, /* LOG10.  */
   20426              :     { {  4,  0,  0 }, {  2,  0,  0 } }, /* EXP10.  */
   20427              :     { {  4,  0,  0 }, {  2,  0,  0 } }, /* LOG1P.  */
   20428              :     { {  4,  8, 16 }, {  8,  0,  0 } }, /* ASIN.  */
   20429              :     { {  4, 16,  0 }, {  0,  0,  0 } }, /* ACOS.  */
   20430              :     { {  4,  8, 16 }, {  0,  0,  0 } }, /* TANH.  */
   20431              :     { {  4,  0,  0 }, {  0,  0,  0 } }, /* EXPM1.  */
   20432              :     { {  4,  8,  0 }, {  0,  0,  0 } }, /* COSH.  */
   20433              :   };
   20434              : 
   20435              :   /* We cannot simply index the supported_n array with FN since multiple FNs
   20436              :      may correspond to a single operation (see the definitions of these
   20437              :      CASE_CFN_* macros).  */
   20438          220 :   int i;
   20439          220 :   switch (fn)
   20440              :     {
   20441              :     CASE_CFN_TAN   :  i = 0; break;
   20442           16 :     CASE_CFN_EXP   :  i = 1; break;
   20443           16 :     CASE_CFN_EXP2  :  i = 2; break;
   20444           16 :     CASE_CFN_LOG   :  i = 3; break;
   20445           16 :     CASE_CFN_LOG2  :  i = 4; break;
   20446           16 :     CASE_CFN_COS   :  i = 5; break;
   20447           16 :     CASE_CFN_SIN   :  i = 6; break;
   20448           16 :     CASE_CFN_POW   :  i = 7; break;
   20449           16 :     CASE_CFN_ERF   :  i = 8; break;
   20450           13 :     CASE_CFN_ATAN  :  i = 9; break;
   20451           11 :     CASE_CFN_LOG10 : i = 10; break;
   20452            8 :     CASE_CFN_EXP10 : i = 11; break;
   20453            8 :     CASE_CFN_LOG1P : i = 12; break;
   20454           11 :     CASE_CFN_ASIN  : i = 13; break;
   20455            7 :     CASE_CFN_ACOS  : i = 14; break;
   20456            9 :     CASE_CFN_TANH  : i = 15; break;
   20457            7 :     CASE_CFN_EXPM1 : i = 16; break;
   20458            9 :     CASE_CFN_COSH  : i = 17; break;
   20459              :     default: return NULL_TREE;
   20460              :     }
   20461              : 
   20462          220 :   int j = el_mode == DFmode;
   20463          220 :   bool n_is_supported = false;
   20464          489 :   for (unsigned k = 0; k < 3; k++)
   20465          470 :     if (supported_n[i][j][k] == n)
   20466              :       {
   20467              :         n_is_supported = true;
   20468              :         break;
   20469              :       }
   20470          220 :   if (!n_is_supported)
   20471              :     return NULL_TREE;
   20472              : 
   20473              :   /* Append the precision and the vector width to the function name we are
   20474              :      constructing.  */
   20475          201 :   name[name_len++] = el_mode == DFmode ? 'd' : 's';
   20476          201 :   switch (n)
   20477              :     {
   20478          148 :       case 2:
   20479          148 :       case 4:
   20480          148 :       case 8:
   20481          148 :         name[name_len++] = '0' + n;
   20482          148 :         break;
   20483           53 :       case 16:
   20484           53 :         name[name_len++] = '1';
   20485           53 :         name[name_len++] = '6';
   20486           53 :         break;
   20487            0 :       default:
   20488            0 :         gcc_unreachable ();
   20489              :     }
   20490          201 :   name[name_len++] = '_';
   20491              : 
   20492              :   /* Append the operation name (steal it from the name of a builtin).  */
   20493          201 :   tree fndecl = mathfn_built_in (el_mode == DFmode
   20494              :                                  ? double_type_node : float_type_node, fn);
   20495          201 :   bname = IDENTIFIER_POINTER (DECL_NAME (fndecl));
   20496          201 :   sprintf (name + name_len, "%s", bname + 10);
   20497              : 
   20498          201 :   arity = 0;
   20499          201 :   for (args = DECL_ARGUMENTS (fndecl); args; args = TREE_CHAIN (args))
   20500            0 :     arity++;
   20501              : 
   20502          201 :   if (arity == 1)
   20503            0 :     fntype = build_function_type_list (type_out, type_in, NULL);
   20504              :   else
   20505          201 :     fntype = build_function_type_list (type_out, type_in, type_in, NULL);
   20506              : 
   20507              :   /* Build a function declaration for the vectorized function.  */
   20508          201 :   new_fndecl = build_decl (BUILTINS_LOCATION,
   20509              :                            FUNCTION_DECL, get_identifier (name), fntype);
   20510          201 :   TREE_PUBLIC (new_fndecl) = 1;
   20511          201 :   DECL_EXTERNAL (new_fndecl) = 1;
   20512          201 :   TREE_READONLY (new_fndecl) = 1;
   20513              : 
   20514          201 :   return new_fndecl;
   20515              : }
   20516              : 
   20517              : /* Returns a decl of a function that implements scatter store with
   20518              :    register type VECTYPE and index type INDEX_TYPE and SCALE.
   20519              :    Return NULL_TREE if it is not available.  */
   20520              : 
   20521              : static tree
   20522       151870 : ix86_vectorize_builtin_scatter (const_tree vectype,
   20523              :                                 const_tree index_type, int scale)
   20524              : {
   20525       151870 :   bool si;
   20526       151870 :   enum ix86_builtins code;
   20527              : 
   20528       151870 :   if (!TARGET_AVX512F)
   20529              :     return NULL_TREE;
   20530              : 
   20531         3207 :   if (known_eq (TYPE_VECTOR_SUBPARTS (vectype), 2u)
   20532         5760 :       ? !TARGET_USE_SCATTER_2PARTS
   20533         5760 :       : (known_eq (TYPE_VECTOR_SUBPARTS (vectype), 4u)
   20534         2553 :          ? !TARGET_USE_SCATTER_4PARTS
   20535         1773 :          : !TARGET_USE_SCATTER_8PARTS))
   20536              :     return NULL_TREE;
   20537              : 
   20538         3207 :   if ((TREE_CODE (index_type) != INTEGER_TYPE
   20539          463 :        && !POINTER_TYPE_P (index_type))
   20540         3670 :       || (TYPE_MODE (index_type) != SImode
   20541         1392 :           && TYPE_MODE (index_type) != DImode))
   20542            0 :     return NULL_TREE;
   20543              : 
   20544         3399 :   if (TYPE_PRECISION (index_type) > POINTER_SIZE)
   20545              :     return NULL_TREE;
   20546              : 
   20547              :   /* v*scatter* insn sign extends index to pointer mode.  */
   20548         3207 :   if (TYPE_PRECISION (index_type) < POINTER_SIZE
   20549         3207 :       && TYPE_UNSIGNED (index_type))
   20550              :     return NULL_TREE;
   20551              : 
   20552              :   /* Scale can be 1, 2, 4 or 8.  */
   20553         3207 :   if (scale <= 0
   20554         3207 :       || scale > 8
   20555         3193 :       || (scale & (scale - 1)) != 0)
   20556              :     return NULL_TREE;
   20557              : 
   20558         3193 :   si = TYPE_MODE (index_type) == SImode;
   20559         3193 :   switch (TYPE_MODE (vectype))
   20560              :     {
   20561          169 :     case E_V8DFmode:
   20562          169 :       code = si ? IX86_BUILTIN_SCATTERALTSIV8DF : IX86_BUILTIN_SCATTERDIV8DF;
   20563              :       break;
   20564          104 :     case E_V8DImode:
   20565          104 :       code = si ? IX86_BUILTIN_SCATTERALTSIV8DI : IX86_BUILTIN_SCATTERDIV8DI;
   20566              :       break;
   20567          177 :     case E_V16SFmode:
   20568          177 :       code = si ? IX86_BUILTIN_SCATTERSIV16SF : IX86_BUILTIN_SCATTERALTDIV16SF;
   20569              :       break;
   20570          257 :     case E_V16SImode:
   20571          257 :       code = si ? IX86_BUILTIN_SCATTERSIV16SI : IX86_BUILTIN_SCATTERALTDIV16SI;
   20572              :       break;
   20573          151 :     case E_V4DFmode:
   20574          151 :       if (TARGET_AVX512VL)
   20575           34 :         code = si ? IX86_BUILTIN_SCATTERALTSIV4DF : IX86_BUILTIN_SCATTERDIV4DF;
   20576              :       else
   20577              :         return NULL_TREE;
   20578              :       break;
   20579          115 :     case E_V4DImode:
   20580          115 :       if (TARGET_AVX512VL)
   20581           34 :         code = si ? IX86_BUILTIN_SCATTERALTSIV4DI : IX86_BUILTIN_SCATTERDIV4DI;
   20582              :       else
   20583              :         return NULL_TREE;
   20584              :       break;
   20585          132 :     case E_V8SFmode:
   20586          132 :       if (TARGET_AVX512VL)
   20587           40 :         code = si ? IX86_BUILTIN_SCATTERSIV8SF : IX86_BUILTIN_SCATTERALTDIV8SF;
   20588              :       else
   20589              :         return NULL_TREE;
   20590              :       break;
   20591          202 :     case E_V8SImode:
   20592          202 :       if (TARGET_AVX512VL)
   20593           82 :         code = si ? IX86_BUILTIN_SCATTERSIV8SI : IX86_BUILTIN_SCATTERALTDIV8SI;
   20594              :       else
   20595              :         return NULL_TREE;
   20596              :       break;
   20597          171 :     case E_V2DFmode:
   20598          171 :       if (TARGET_AVX512VL)
   20599           66 :         code = si ? IX86_BUILTIN_SCATTERALTSIV2DF : IX86_BUILTIN_SCATTERDIV2DF;
   20600              :       else
   20601              :         return NULL_TREE;
   20602              :       break;
   20603          141 :     case E_V2DImode:
   20604          141 :       if (TARGET_AVX512VL)
   20605           66 :         code = si ? IX86_BUILTIN_SCATTERALTSIV2DI : IX86_BUILTIN_SCATTERDIV2DI;
   20606              :       else
   20607              :         return NULL_TREE;
   20608              :       break;
   20609          156 :     case E_V4SFmode:
   20610          156 :       if (TARGET_AVX512VL)
   20611           68 :         code = si ? IX86_BUILTIN_SCATTERSIV4SF : IX86_BUILTIN_SCATTERALTDIV4SF;
   20612              :       else
   20613              :         return NULL_TREE;
   20614              :       break;
   20615          226 :     case E_V4SImode:
   20616          226 :       if (TARGET_AVX512VL)
   20617          110 :         code = si ? IX86_BUILTIN_SCATTERSIV4SI : IX86_BUILTIN_SCATTERALTDIV4SI;
   20618              :       else
   20619              :         return NULL_TREE;
   20620              :       break;
   20621              :     default:
   20622              :       return NULL_TREE;
   20623              :     }
   20624              : 
   20625         1207 :   return get_ix86_builtin (code);
   20626              : }
   20627              : 
   20628              : /* Return true if it is safe to use the rsqrt optabs to optimize
   20629              :    1.0/sqrt.  */
   20630              : 
   20631              : static bool
   20632           84 : use_rsqrt_p (machine_mode mode)
   20633              : {
   20634           84 :   return ((mode == HFmode
   20635           36 :            || (TARGET_SSE && TARGET_SSE_MATH))
   20636           84 :           && flag_finite_math_only
   20637           83 :           && !flag_trapping_math
   20638          149 :           && flag_unsafe_math_optimizations);
   20639              : }
   20640              : 
   20641              : /* Helper for avx_vpermilps256_operand et al.  This is also used by
   20642              :    the expansion functions to turn the parallel back into a mask.
   20643              :    The return value is 0 for no match and the imm8+1 for a match.  */
   20644              : 
   20645              : int
   20646        64041 : avx_vpermilp_parallel (rtx par, machine_mode mode)
   20647              : {
   20648        64041 :   unsigned i, nelt = GET_MODE_NUNITS (mode);
   20649        64041 :   unsigned mask = 0;
   20650        64041 :   unsigned char ipar[16] = {};  /* Silence -Wuninitialized warning.  */
   20651              : 
   20652        64041 :   if (XVECLEN (par, 0) != (int) nelt)
   20653              :     return 0;
   20654              : 
   20655              :   /* Validate that all of the elements are constants, and not totally
   20656              :      out of range.  Copy the data into an integral array to make the
   20657              :      subsequent checks easier.  */
   20658       320409 :   for (i = 0; i < nelt; ++i)
   20659              :     {
   20660       256368 :       rtx er = XVECEXP (par, 0, i);
   20661       256368 :       unsigned HOST_WIDE_INT ei;
   20662              : 
   20663       256368 :       if (!CONST_INT_P (er))
   20664              :         return 0;
   20665       256368 :       ei = INTVAL (er);
   20666       256368 :       if (ei >= nelt)
   20667              :         return 0;
   20668       256368 :       ipar[i] = ei;
   20669              :     }
   20670              : 
   20671        64041 :   switch (mode)
   20672              :     {
   20673              :     case E_V8DFmode:
   20674              :     case E_V8DImode:
   20675              :       /* In the 512-bit DFmode case, we can only move elements within
   20676              :          a 128-bit lane.  First fill the second part of the mask,
   20677              :          then fallthru.  */
   20678         4945 :       for (i = 4; i < 6; ++i)
   20679              :         {
   20680         3427 :           if (!IN_RANGE (ipar[i], 4, 5))
   20681              :             return 0;
   20682         3202 :           mask |= (ipar[i] - 4) << i;
   20683              :         }
   20684         3702 :       for (i = 6; i < 8; ++i)
   20685              :         {
   20686         2610 :           if (!IN_RANGE (ipar[i], 6, 7))
   20687              :             return 0;
   20688         2184 :           mask |= (ipar[i] - 6) << i;
   20689              :         }
   20690              :       /* FALLTHRU */
   20691              : 
   20692              :     case E_V4DFmode:
   20693              :     case E_V4DImode:
   20694              :       /* In the 256-bit DFmode case, we can only move elements within
   20695              :          a 128-bit lane.  */
   20696        46279 :       for (i = 0; i < 2; ++i)
   20697              :         {
   20698        38872 :           if (!IN_RANGE (ipar[i], 0, 1))
   20699              :             return 0;
   20700        25997 :           mask |= ipar[i] << i;
   20701              :         }
   20702        19523 :       for (i = 2; i < 4; ++i)
   20703              :         {
   20704        13470 :           if (!IN_RANGE (ipar[i], 2, 3))
   20705              :             return 0;
   20706        12116 :           mask |= (ipar[i] - 2) << i;
   20707              :         }
   20708              :       break;
   20709              : 
   20710              :     case E_V16SFmode:
   20711              :     case E_V16SImode:
   20712              :       /* In 512 bit SFmode case, permutation in the upper 256 bits
   20713              :          must mirror the permutation in the lower 256-bits.  */
   20714         4398 :       for (i = 0; i < 8; ++i)
   20715         3918 :         if (ipar[i] + 8 != ipar[i + 8])
   20716              :           return 0;
   20717              :       /* FALLTHRU */
   20718              : 
   20719              :     case E_V8SFmode:
   20720              :     case E_V8SImode:
   20721              :       /* In 256 bit SFmode case, we have full freedom of
   20722              :          movement within the low 128-bit lane, but the high 128-bit
   20723              :          lane must mirror the exact same pattern.  */
   20724        37835 :       for (i = 0; i < 4; ++i)
   20725        32127 :         if (ipar[i] + 4 != ipar[i + 4])
   20726              :           return 0;
   20727              :       nelt = 4;
   20728              :       /* FALLTHRU */
   20729              : 
   20730        37609 :     case E_V2DFmode:
   20731        37609 :     case E_V2DImode:
   20732        37609 :     case E_V4SFmode:
   20733        37609 :     case E_V4SImode:
   20734              :       /* In the 128-bit case, we've full freedom in the placement of
   20735              :          the elements from the source operand.  */
   20736       132001 :       for (i = 0; i < nelt; ++i)
   20737        94392 :         mask |= ipar[i] << (i * (nelt / 2));
   20738              :       break;
   20739              : 
   20740            0 :     default:
   20741            0 :       gcc_unreachable ();
   20742              :     }
   20743              : 
   20744              :   /* Make sure success has a non-zero value by adding one.  */
   20745        43662 :   return mask + 1;
   20746              : }
   20747              : 
   20748              : /* Helper for avx_vperm2f128_v4df_operand et al.  This is also used by
   20749              :    the expansion functions to turn the parallel back into a mask.
   20750              :    The return value is 0 for no match and the imm8+1 for a match.  */
   20751              : 
   20752              : int
   20753        50646 : avx_vperm2f128_parallel (rtx par, machine_mode mode)
   20754              : {
   20755        50646 :   unsigned i, nelt = GET_MODE_NUNITS (mode), nelt2 = nelt / 2;
   20756        50646 :   unsigned mask = 0;
   20757        50646 :   unsigned char ipar[8] = {};  /* Silence -Wuninitialized warning.  */
   20758              : 
   20759        50646 :   if (XVECLEN (par, 0) != (int) nelt)
   20760              :     return 0;
   20761              : 
   20762              :   /* Validate that all of the elements are constants, and not totally
   20763              :      out of range.  Copy the data into an integral array to make the
   20764              :      subsequent checks easier.  */
   20765       404750 :   for (i = 0; i < nelt; ++i)
   20766              :     {
   20767       354104 :       rtx er = XVECEXP (par, 0, i);
   20768       354104 :       unsigned HOST_WIDE_INT ei;
   20769              : 
   20770       354104 :       if (!CONST_INT_P (er))
   20771              :         return 0;
   20772       354104 :       ei = INTVAL (er);
   20773       354104 :       if (ei >= 2 * nelt)
   20774              :         return 0;
   20775       354104 :       ipar[i] = ei;
   20776              :     }
   20777              : 
   20778              :   /* Validate that the halves of the permute are halves.  */
   20779        98909 :   for (i = 0; i < nelt2 - 1; ++i)
   20780        79326 :     if (ipar[i] + 1 != ipar[i + 1])
   20781              :       return 0;
   20782        57980 :   for (i = nelt2; i < nelt - 1; ++i)
   20783        39803 :     if (ipar[i] + 1 != ipar[i + 1])
   20784              :       return 0;
   20785              : 
   20786              :   /* Reconstruct the mask.  */
   20787        54443 :   for (i = 0; i < 2; ++i)
   20788              :     {
   20789        36312 :       unsigned e = ipar[i * nelt2];
   20790        36312 :       if (e % nelt2)
   20791              :         return 0;
   20792        36266 :       e /= nelt2;
   20793        36266 :       mask |= e << (i * 4);
   20794              :     }
   20795              : 
   20796              :   /* Make sure success has a non-zero value by adding one.  */
   20797        18131 :   return mask + 1;
   20798              : }
   20799              : 
   20800              : /* Return a mask of VPTERNLOG operands that do not affect output.  */
   20801              : 
   20802              : int
   20803         2425 : vpternlog_redundant_operand_mask (rtx pternlog_imm)
   20804              : {
   20805         2425 :   int mask = 0;
   20806         2425 :   int imm8 = INTVAL (pternlog_imm);
   20807              : 
   20808         2425 :   if (((imm8 >> 4) & 0x0F) == (imm8 & 0x0F))
   20809            6 :     mask |= 1;
   20810         2425 :   if (((imm8 >> 2) & 0x33) == (imm8 & 0x33))
   20811            6 :     mask |= 2;
   20812         2425 :   if (((imm8 >> 1) & 0x55) == (imm8 & 0x55))
   20813          147 :     mask |= 4;
   20814              : 
   20815         2425 :   return mask;
   20816              : }
   20817              : 
   20818              : /* Eliminate false dependencies on operands that do not affect output
   20819              :    by substituting other operands of a VPTERNLOG.  */
   20820              : 
   20821              : void
   20822           77 : substitute_vpternlog_operands (rtx *operands)
   20823              : {
   20824           77 :   int mask = vpternlog_redundant_operand_mask (operands[4]);
   20825              : 
   20826           77 :   if (mask & 1) /* The first operand is redundant.  */
   20827            2 :     operands[1] = operands[2];
   20828              : 
   20829           77 :   if (mask & 2) /* The second operand is redundant.  */
   20830            2 :     operands[2] = operands[1];
   20831              : 
   20832           77 :   if (mask & 4) /* The third operand is redundant.  */
   20833           73 :     operands[3] = operands[1];
   20834            4 :   else if (REG_P (operands[3]))
   20835              :     {
   20836            0 :       if (mask & 1)
   20837            0 :         operands[1] = operands[3];
   20838            0 :       if (mask & 2)
   20839            0 :         operands[2] = operands[3];
   20840              :     }
   20841           77 : }
   20842              : 
   20843              : /* Return a register priority for hard reg REGNO.  */
   20844              : static int
   20845     58298372 : ix86_register_priority (int hard_regno)
   20846              : {
   20847              :   /* ebp and r13 as the base always wants a displacement, r12 as the
   20848              :      base always wants an index.  So discourage their usage in an
   20849              :      address.  */
   20850     58298372 :   if (hard_regno == R12_REG || hard_regno == R13_REG)
   20851              :     return 0;
   20852     53820055 :   if (hard_regno == BP_REG)
   20853              :     return 1;
   20854              :   /* New x86-64 int registers result in bigger code size.  Discourage them.  */
   20855     51828803 :   if (REX_INT_REGNO_P (hard_regno))
   20856              :     return 2;
   20857     35281039 :   if (REX2_INT_REGNO_P (hard_regno))
   20858              :     return 2;
   20859              :   /* New x86-64 SSE registers result in bigger code size.  Discourage them.  */
   20860     35278599 :   if (REX_SSE_REGNO_P (hard_regno))
   20861              :     return 2;
   20862     29235527 :   if (EXT_REX_SSE_REGNO_P (hard_regno))
   20863              :     return 1;
   20864              :   /* Usage of AX register results in smaller code.  Prefer it.  */
   20865     28957265 :   if (hard_regno == AX_REG)
   20866      3794239 :     return 4;
   20867              :   return 3;
   20868              : }
   20869              : 
   20870              : /* Implement TARGET_PREFERRED_RELOAD_CLASS.
   20871              : 
   20872              :    Put float CONST_DOUBLE in the constant pool instead of fp regs.
   20873              :    QImode must go into class Q_REGS.
   20874              :    Narrow ALL_REGS to GENERAL_REGS.  This supports allowing movsf and
   20875              :    movdf to do mem-to-mem moves through integer regs.  */
   20876              : 
   20877              : static reg_class_t
   20878    546988971 : ix86_preferred_reload_class (rtx x, reg_class_t regclass)
   20879              : {
   20880    546988971 :   machine_mode mode = GET_MODE (x);
   20881              : 
   20882              :   /* We're only allowed to return a subclass of CLASS.  Many of the
   20883              :      following checks fail for NO_REGS, so eliminate that early.  */
   20884    546988971 :   if (regclass == NO_REGS)
   20885              :     return NO_REGS;
   20886              : 
   20887              :   /* All classes can load zeros.  */
   20888    546096324 :   if (x == CONST0_RTX (mode))
   20889              :     return regclass;
   20890              : 
   20891              :   /* Force constants into memory if we are loading a (nonzero) constant into
   20892              :      an MMX, SSE or MASK register.  This is because there are no MMX/SSE/MASK
   20893              :      instructions to load from a constant.  */
   20894    521004515 :   if (CONSTANT_P (x)
   20895    521004515 :       && (MAYBE_MMX_CLASS_P (regclass)
   20896    152069855 :           || MAYBE_SSE_CLASS_P (regclass)
   20897    122292756 :           || MAYBE_MASK_CLASS_P (regclass)))
   20898     29907075 :     return NO_REGS;
   20899              : 
   20900              :   /* Floating-point constants need more complex checks.  */
   20901    491097440 :   if (CONST_DOUBLE_P (x))
   20902              :     {
   20903              :       /* General regs can load everything.  */
   20904       301993 :       if (INTEGER_CLASS_P (regclass))
   20905              :         return regclass;
   20906              : 
   20907              :       /* Floats can load 0 and 1 plus some others.  Note that we eliminated
   20908              :          zero above.  We only want to wind up preferring 80387 registers if
   20909              :          we plan on doing computation with them.  */
   20910       179369 :       if (IS_STACK_MODE (mode)
   20911       237593 :           && standard_80387_constant_p (x) > 0)
   20912              :         {
   20913              :           /* Limit class to FP regs.  */
   20914        40456 :           if (FLOAT_CLASS_P (regclass))
   20915              :             return FLOAT_REGS;
   20916              :         }
   20917              : 
   20918       138913 :       return NO_REGS;
   20919              :     }
   20920              : 
   20921              :   /* Prefer SSE if we can use them for math.  Also allow integer regs
   20922              :      when moves between register units are cheap.  */
   20923    490795447 :   if (SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH)
   20924              :     {
   20925     31076840 :       if (TARGET_INTER_UNIT_MOVES_FROM_VEC
   20926     31061927 :           && TARGET_INTER_UNIT_MOVES_TO_VEC
   20927     93191182 :           && GET_MODE_SIZE (mode) <= GET_MODE_SIZE (word_mode))
   20928     30910566 :         return INT_SSE_CLASS_P (regclass) ? regclass : NO_REGS;
   20929              :       else
   20930       166274 :         return SSE_CLASS_P (regclass) ? regclass : NO_REGS;
   20931              :     }
   20932              : 
   20933              :   /* Generally when we see PLUS here, it's the function invariant
   20934              :      (plus soft-fp const_int).  Which can only be computed into general
   20935              :      regs.  */
   20936    459718607 :   if (GET_CODE (x) == PLUS)
   20937      1890058 :     return INTEGER_CLASS_P (regclass) ? regclass : NO_REGS;
   20938              : 
   20939              :   /* QImode constants are easy to load, but non-constant QImode data
   20940              :      must go into Q_REGS or ALL_MASK_REGS.  */
   20941    457828549 :   if (GET_MODE (x) == QImode && !CONSTANT_P (x))
   20942              :     {
   20943     25388443 :       if (Q_CLASS_P (regclass))
   20944              :         return regclass;
   20945     20548637 :       else if (reg_class_subset_p (Q_REGS, regclass))
   20946              :         return Q_REGS;
   20947        54778 :       else if (MASK_CLASS_P (regclass))
   20948              :         return regclass;
   20949              :       else
   20950              :         return NO_REGS;
   20951              :     }
   20952              : 
   20953              :   return regclass;
   20954              : }
   20955              : 
   20956              : /* Discourage putting floating-point values in SSE registers unless
   20957              :    SSE math is being used, and likewise for the 387 registers.  */
   20958              : static reg_class_t
   20959     74549757 : ix86_preferred_output_reload_class (rtx x, reg_class_t regclass)
   20960              : {
   20961              :   /* Restrict the output reload class to the register bank that we are doing
   20962              :      math on.  If we would like not to return a subset of CLASS, reject this
   20963              :      alternative: if reload cannot do this, it will still use its choice.  */
   20964     74549757 :   machine_mode mode = GET_MODE (x);
   20965     74549757 :   if (SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH)
   20966      7216712 :     return MAYBE_SSE_CLASS_P (regclass) ? ALL_SSE_REGS : NO_REGS;
   20967              : 
   20968     67333045 :   if (IS_STACK_MODE (mode))
   20969       209603 :     return FLOAT_CLASS_P (regclass) ? regclass : NO_REGS;
   20970              : 
   20971              :   return regclass;
   20972              : }
   20973              : 
   20974              : static reg_class_t
   20975    385297044 : ix86_secondary_reload (bool in_p, rtx x, reg_class_t rclass,
   20976              :                        machine_mode mode, secondary_reload_info *sri)
   20977              : {
   20978              :   /* Double-word spills from general registers to non-offsettable memory
   20979              :      references (zero-extended addresses) require special handling.  */
   20980    385297044 :   if (TARGET_64BIT
   20981    332439219 :       && MEM_P (x)
   20982    180672577 :       && GET_MODE_SIZE (mode) > UNITS_PER_WORD
   20983     19037387 :       && INTEGER_CLASS_P (rclass)
   20984    388050155 :       && !offsettable_memref_p (x))
   20985              :     {
   20986      2579562 :       sri->icode = (in_p
   20987      1289781 :                     ? CODE_FOR_reload_noff_load
   20988              :                     : CODE_FOR_reload_noff_store);
   20989              :       /* Add the cost of moving address to a temporary.  */
   20990      1289781 :       sri->extra_cost = 1;
   20991              : 
   20992      1289781 :       return NO_REGS;
   20993              :     }
   20994              : 
   20995              :   /* QImode spills from non-QI registers require
   20996              :      intermediate register on 32bit targets.  */
   20997    384007263 :   if (mode == QImode
   20998    384007263 :       && ((!TARGET_64BIT && !in_p
   20999       578684 :            && INTEGER_CLASS_P (rclass)
   21000       578640 :            && MAYBE_NON_Q_CLASS_P (rclass))
   21001     22792888 :           || (!TARGET_AVX512DQ
   21002     22593980 :               && MAYBE_MASK_CLASS_P (rclass))))
   21003              :     {
   21004         6470 :       int regno = true_regnum (x);
   21005              : 
   21006              :       /* Return Q_REGS if the operand is in memory.  */
   21007         6470 :       if (regno == -1)
   21008              :         return Q_REGS;
   21009              : 
   21010              :       return NO_REGS;
   21011              :     }
   21012              : 
   21013              :   /* Require movement to gpr, and then store to memory.  */
   21014    384000793 :   if ((mode == HFmode || mode == HImode || mode == V2QImode
   21015              :        || mode == BFmode)
   21016      3838280 :       && !TARGET_SSE4_1
   21017      3236284 :       && SSE_CLASS_P (rclass)
   21018       225316 :       && !in_p && MEM_P (x))
   21019              :     {
   21020       115536 :       sri->extra_cost = 1;
   21021       115536 :       return GENERAL_REGS;
   21022              :     }
   21023              : 
   21024              :   /* This condition handles corner case where an expression involving
   21025              :      pointers gets vectorized.  We're trying to use the address of a
   21026              :      stack slot as a vector initializer.
   21027              : 
   21028              :      (set (reg:V2DI 74 [ vect_cst_.2 ])
   21029              :           (vec_duplicate:V2DI (reg/f:DI 20 frame)))
   21030              : 
   21031              :      Eventually frame gets turned into sp+offset like this:
   21032              : 
   21033              :      (set (reg:V2DI 21 xmm0 [orig:74 vect_cst_.2 ] [74])
   21034              :           (vec_duplicate:V2DI (plus:DI (reg/f:DI 7 sp)
   21035              :                                        (const_int 392 [0x188]))))
   21036              : 
   21037              :      That later gets turned into:
   21038              : 
   21039              :      (set (reg:V2DI 21 xmm0 [orig:74 vect_cst_.2 ] [74])
   21040              :           (vec_duplicate:V2DI (plus:DI (reg/f:DI 7 sp)
   21041              :             (mem/u/c/i:DI (symbol_ref/u:DI ("*.LC0") [flags 0x2]) [0 S8 A64]))))
   21042              : 
   21043              :      We'll have the following reload recorded:
   21044              : 
   21045              :      Reload 0: reload_in (DI) =
   21046              :            (plus:DI (reg/f:DI 7 sp)
   21047              :             (mem/u/c/i:DI (symbol_ref/u:DI ("*.LC0") [flags 0x2]) [0 S8 A64]))
   21048              :      reload_out (V2DI) = (reg:V2DI 21 xmm0 [orig:74 vect_cst_.2 ] [74])
   21049              :      SSE_REGS, RELOAD_OTHER (opnum = 0), can't combine
   21050              :      reload_in_reg: (plus:DI (reg/f:DI 7 sp) (const_int 392 [0x188]))
   21051              :      reload_out_reg: (reg:V2DI 21 xmm0 [orig:74 vect_cst_.2 ] [74])
   21052              :      reload_reg_rtx: (reg:V2DI 22 xmm1)
   21053              : 
   21054              :      Which isn't going to work since SSE instructions can't handle scalar
   21055              :      additions.  Returning GENERAL_REGS forces the addition into integer
   21056              :      register and reload can handle subsequent reloads without problems.  */
   21057              : 
   21058    221647070 :   if (in_p && GET_CODE (x) == PLUS
   21059            2 :       && SSE_CLASS_P (rclass)
   21060    383885257 :       && SCALAR_INT_MODE_P (mode))
   21061              :     return GENERAL_REGS;
   21062              : 
   21063              :   return NO_REGS;
   21064              : }
   21065              : 
   21066              : /* Implement TARGET_CLASS_LIKELY_SPILLED_P.  */
   21067              : 
   21068              : static bool
   21069    717454158 : ix86_class_likely_spilled_p (reg_class_t rclass)
   21070              : {
   21071    707406620 :   switch (rclass)
   21072              :     {
   21073              :       case AREG:
   21074              :       case DREG:
   21075              :       case CREG:
   21076              :       case BREG:
   21077              :       case AD_REGS:
   21078              :       case SIREG:
   21079              :       case DIREG:
   21080              :       case SSE_FIRST_REG:
   21081              :       case FP_TOP_REG:
   21082              :       case FP_SECOND_REG:
   21083              :         return true;
   21084              : 
   21085    685923854 :       default:
   21086    685923854 :         break;
   21087              :     }
   21088              : 
   21089    685923854 :   return false;
   21090              : }
   21091              : 
   21092              : /* Implement TARGET_CALLEE_SAVE_COST.  */
   21093              : 
   21094              : static int
   21095     82460410 : ix86_callee_save_cost (spill_cost_type, unsigned int hard_regno, machine_mode,
   21096              :                        unsigned int, int mem_cost, const HARD_REG_SET &, bool)
   21097              : {
   21098              :   /* Account for the fact that push and pop are shorter and do their
   21099              :      own allocation and deallocation.  */
   21100     82460410 :   if (GENERAL_REGNO_P (hard_regno))
   21101              :     {
   21102              :       /* push is 1 byte while typical spill is 4-5 bytes.
   21103              :          ??? We probably should adjust size costs accordingly.
   21104              :          Costs are relative to reg-reg move that has 2 bytes for 32bit
   21105              :          and 3 bytes otherwise.  Be sure that no cost table sets cost
   21106              :          to 2, so we end up with 0.  */
   21107     82451152 :       if (mem_cost <= 2 || optimize_function_for_size_p (cfun))
   21108      3580312 :         return 1;
   21109     78870840 :       return mem_cost - 2;
   21110              :     }
   21111              :   return mem_cost;
   21112              : }
   21113              : 
   21114              : /* Return true if a set of DST by the expression SRC should be allowed.
   21115              :    This prevents complex sets of likely_spilled hard regs before split1.  */
   21116              : 
   21117              : bool
   21118    627123398 : ix86_hardreg_mov_ok (rtx dst, rtx src)
   21119              : {
   21120              :   /* Avoid complex sets of likely_spilled hard registers before reload.  */
   21121    511062628 :   if (REG_P (dst) && HARD_REGISTER_P (dst)
   21122    301013308 :       && !REG_P (src) && !MEM_P (src)
   21123     94832294 :       && !(VECTOR_MODE_P (GET_MODE (dst))
   21124     94832294 :            ? standard_sse_constant_p (src, GET_MODE (dst))
   21125     47182472 :            : x86_64_immediate_operand (src, GET_MODE (dst)))
   21126     10047538 :       && ix86_class_likely_spilled_p (REGNO_REG_CLASS (REGNO (dst)))
   21127    635933063 :       && ix86_pre_reload_split ())
   21128              :     return false;
   21129              :   return true;
   21130              : }
   21131              : 
   21132              : /* If we are copying between registers from different register sets
   21133              :    (e.g. FP and integer), we may need a memory location.
   21134              : 
   21135              :    The function can't work reliably when one of the CLASSES is a class
   21136              :    containing registers from multiple sets.  We avoid this by never combining
   21137              :    different sets in a single alternative in the machine description.
   21138              :    Ensure that this constraint holds to avoid unexpected surprises.
   21139              : 
   21140              :    When STRICT is false, we are being called from REGISTER_MOVE_COST,
   21141              :    so do not enforce these sanity checks.
   21142              : 
   21143              :    To optimize register_move_cost performance, define inline variant.  */
   21144              : 
   21145              : static inline bool
   21146   5614218433 : inline_secondary_memory_needed (machine_mode mode, reg_class_t class1,
   21147              :                                 reg_class_t class2, int strict)
   21148              : {
   21149   5614218433 :   if (lra_in_progress && (class1 == NO_REGS || class2 == NO_REGS))
   21150              :     return false;
   21151              : 
   21152   5581896566 :   if (MAYBE_FLOAT_CLASS_P (class1) != FLOAT_CLASS_P (class1)
   21153   4756819266 :       || MAYBE_FLOAT_CLASS_P (class2) != FLOAT_CLASS_P (class2)
   21154   4062149458 :       || MAYBE_SSE_CLASS_P (class1) != SSE_CLASS_P (class1)
   21155   3875740156 :       || MAYBE_SSE_CLASS_P (class2) != SSE_CLASS_P (class2)
   21156   3699329781 :       || MAYBE_MMX_CLASS_P (class1) != MMX_CLASS_P (class1)
   21157   3699329781 :       || MAYBE_MMX_CLASS_P (class2) != MMX_CLASS_P (class2)
   21158   3699329781 :       || MAYBE_MASK_CLASS_P (class1) != MASK_CLASS_P (class1)
   21159   9113123377 :       || MAYBE_MASK_CLASS_P (class2) != MASK_CLASS_P (class2))
   21160              :     {
   21161   2210853143 :       gcc_assert (!strict || lra_in_progress);
   21162              :       return true;
   21163              :     }
   21164              : 
   21165   3371043423 :   if (FLOAT_CLASS_P (class1) != FLOAT_CLASS_P (class2))
   21166              :     return true;
   21167              : 
   21168              :   /* ??? This is a lie.  We do have moves between mmx/general, and for
   21169              :      mmx/sse2.  But by saying we need secondary memory we discourage the
   21170              :      register allocator from using the mmx registers unless needed.  */
   21171   3223626683 :   if (MMX_CLASS_P (class1) != MMX_CLASS_P (class2))
   21172              :     return true;
   21173              : 
   21174              :   /* Between mask and general, we have moves no larger than word size.  */
   21175   3128456624 :   if (MASK_CLASS_P (class1) != MASK_CLASS_P (class2))
   21176              :     {
   21177      2592574 :       if (!(INTEGER_CLASS_P (class1) || INTEGER_CLASS_P (class2))
   21178      3390990 :           || GET_MODE_SIZE (mode) > UNITS_PER_WORD)
   21179       190973 :         return true;
   21180              :     }
   21181              : 
   21182   3128265651 :   if (SSE_CLASS_P (class1) != SSE_CLASS_P (class2))
   21183              :     {
   21184              :       /* SSE1 doesn't have any direct moves from other classes.  */
   21185    679554046 :       if (!TARGET_SSE2)
   21186              :         return true;
   21187              : 
   21188    676943508 :       if (!(INTEGER_CLASS_P (class1) || INTEGER_CLASS_P (class2)))
   21189              :         return true;
   21190              : 
   21191              :       /* If the target says that inter-unit moves are more expensive
   21192              :          than moving through memory, then don't generate them.  */
   21193   1014969440 :       if ((SSE_CLASS_P (class1) && !TARGET_INTER_UNIT_MOVES_FROM_VEC)
   21194   1014483660 :           || (SSE_CLASS_P (class2) && !TARGET_INTER_UNIT_MOVES_TO_VEC))
   21195      1320929 :         return true;
   21196              : 
   21197              :       /* With SSE4.1, *mov{ti,di}_internal supports moves between
   21198              :          SSE_REGS and GENERAL_REGS using pinsr{q,d} or pextr{q,d}.  */
   21199    675622579 :       if (TARGET_SSE4_1
   21200     36340249 :           && (TARGET_64BIT ? mode == TImode : mode == DImode))
   21201              :         return false;
   21202              : 
   21203    674047317 :       int msize = GET_MODE_SIZE (mode);
   21204              : 
   21205              :       /* Between SSE and general, we have moves no larger than word size.  */
   21206    690416569 :       if (msize > UNITS_PER_WORD)
   21207              :         return true;
   21208              : 
   21209              :       /* In addition to SImode moves, HImode moves are supported for SSE2 and above,
   21210              :          Use vmovw with AVX512FP16, or pinsrw/pextrw without AVX512FP16.  */
   21211    582991166 :       int minsize = GET_MODE_SIZE (TARGET_SSE2 ? HImode : SImode);
   21212              : 
   21213    582991166 :       if (msize < minsize)
   21214              :         return true;
   21215              :     }
   21216              : 
   21217              :   return false;
   21218              : }
   21219              : 
   21220              : /* Implement TARGET_SECONDARY_MEMORY_NEEDED.  */
   21221              : 
   21222              : static bool
   21223     71021878 : ix86_secondary_memory_needed (machine_mode mode, reg_class_t class1,
   21224              :                               reg_class_t class2)
   21225              : {
   21226     71021878 :   return inline_secondary_memory_needed (mode, class1, class2, true);
   21227              : }
   21228              : 
   21229              : /* Implement TARGET_SECONDARY_MEMORY_NEEDED_MODE.
   21230              : 
   21231              :    get_secondary_mem widens integral modes to BITS_PER_WORD.
   21232              :    There is no need to emit full 64 bit move on 64 bit targets
   21233              :    for integral modes that can be moved using 32 bit move.  */
   21234              : 
   21235              : static machine_mode
   21236        13204 : ix86_secondary_memory_needed_mode (machine_mode mode)
   21237              : {
   21238        26408 :   if (GET_MODE_BITSIZE (mode) < 32 && INTEGRAL_MODE_P (mode))
   21239           17 :     return mode_for_size (32, GET_MODE_CLASS (mode), 0).require ();
   21240              :   return mode;
   21241              : }
   21242              : 
   21243              : /* Implement the TARGET_CLASS_MAX_NREGS hook.
   21244              : 
   21245              :    On the 80386, this is the size of MODE in words,
   21246              :    except in the FP regs, where a single reg is always enough.  */
   21247              : 
   21248              : static unsigned char
   21249   5897988628 : ix86_class_max_nregs (reg_class_t rclass, machine_mode mode)
   21250              : {
   21251   5897988628 :   if (MAYBE_INTEGER_CLASS_P (rclass))
   21252              :     {
   21253   3967247858 :       if (mode == XFmode)
   21254    144229809 :         return (TARGET_64BIT ? 2 : 3);
   21255   3823018049 :       else if (mode == XCmode)
   21256    144229440 :         return (TARGET_64BIT ? 4 : 6);
   21257              :       else
   21258   7463522693 :         return CEIL (GET_MODE_SIZE (mode), UNITS_PER_WORD);
   21259              :     }
   21260              :   else
   21261              :     {
   21262   1930740770 :       if (COMPLEX_MODE_P (mode))
   21263              :         return 2;
   21264              :       else
   21265   1649103179 :         return 1;
   21266              :     }
   21267              : }
   21268              : 
   21269              : /* Implement TARGET_CAN_CHANGE_MODE_CLASS.  */
   21270              : 
   21271              : static bool
   21272     40241722 : ix86_can_change_mode_class (machine_mode from, machine_mode to,
   21273              :                             reg_class_t regclass)
   21274              : {
   21275     40241722 :   if (from == to)
   21276              :     return true;
   21277              : 
   21278              :   /* x87 registers can't do subreg at all, as all values are reformatted
   21279              :      to extended precision.
   21280              : 
   21281              :      ??? middle-end queries mode changes for ALL_REGS and this makes
   21282              :      vec_series_lowpart_p to always return false.  We probably should
   21283              :      restrict this to modes supported by i387 and check if it is enabled.  */
   21284     38873005 :   if (MAYBE_FLOAT_CLASS_P (regclass))
   21285              :     return false;
   21286              : 
   21287     34272359 :   if (MAYBE_SSE_CLASS_P (regclass) || MAYBE_MMX_CLASS_P (regclass))
   21288              :     {
   21289              :       /* Vector registers do not support QI or HImode loads.  If we don't
   21290              :          disallow a change to these modes, reload will assume it's ok to
   21291              :          drop the subreg from (subreg:SI (reg:HI 100) 0).  This affects
   21292              :          the vec_dupv4hi pattern.
   21293              :          NB: SSE2 can load 16bit data to sse register via pinsrw.  */
   21294     16996021 :       int mov_size = MAYBE_SSE_CLASS_P (regclass) && TARGET_SSE2 ? 2 : 4;
   21295     16996021 :       if (GET_MODE_SIZE (from) < mov_size
   21296     33991771 :           || GET_MODE_SIZE (to) < mov_size)
   21297              :         return false;
   21298              :     }
   21299              : 
   21300              :   return true;
   21301              : }
   21302              : 
   21303              : /* Return index of MODE in the sse load/store tables.  */
   21304              : 
   21305              : static inline int
   21306    764221115 : sse_store_index (machine_mode mode)
   21307              : {
   21308              :   /* NB: Use SFmode cost for HFmode instead of adding HFmode load/store
   21309              :      costs to processor_costs, which requires changes to all entries in
   21310              :      processor cost table.  */
   21311    764221115 :   if (mode == E_HFmode)
   21312    135477880 :     mode = E_SFmode;
   21313              : 
   21314   1528442230 :   switch (GET_MODE_SIZE (mode))
   21315              :     {
   21316              :     case 4:
   21317              :       return 0;
   21318              :     case 8:
   21319              :       return 1;
   21320              :     case 16:
   21321              :       return 2;
   21322              :     case 32:
   21323              :       return 3;
   21324              :     case 64:
   21325              :       return 4;
   21326              :     default:
   21327              :       return -1;
   21328              :     }
   21329              : }
   21330              : 
   21331              : /* Return the cost of moving data of mode M between a
   21332              :    register and memory.  A value of 2 is the default; this cost is
   21333              :    relative to those in `REGISTER_MOVE_COST'.
   21334              : 
   21335              :    This function is used extensively by register_move_cost that is used to
   21336              :    build tables at startup.  Make it inline in this case.
   21337              :    When IN is 2, return maximum of in and out move cost.
   21338              : 
   21339              :    If moving between registers and memory is more expensive than
   21340              :    between two registers, you should define this macro to express the
   21341              :    relative cost.
   21342              : 
   21343              :    Model also increased moving costs of QImode registers in non
   21344              :    Q_REGS classes.
   21345              :  */
   21346              : static inline int
   21347   6831953271 : inline_memory_move_cost (machine_mode mode, enum reg_class regclass, int in)
   21348              : {
   21349   6831953271 :   int cost;
   21350              : 
   21351   6831953271 :   if (FLOAT_CLASS_P (regclass))
   21352              :     {
   21353    348903607 :       int index;
   21354    348903607 :       switch (mode)
   21355              :         {
   21356              :           case E_SFmode:
   21357              :             index = 0;
   21358              :             break;
   21359              :           case E_DFmode:
   21360              :             index = 1;
   21361              :             break;
   21362              :           case E_XFmode:
   21363              :             index = 2;
   21364              :             break;
   21365              :           default:
   21366              :             return 100;
   21367              :         }
   21368    104292665 :       if (in == 2)
   21369    100400817 :         return MAX (ix86_cost->hard_register.fp_load [index],
   21370              :                     ix86_cost->hard_register.fp_store [index]);
   21371      3891848 :       return in ? ix86_cost->hard_register.fp_load [index]
   21372      3891848 :                 : ix86_cost->hard_register.fp_store [index];
   21373              :     }
   21374   6483049664 :   if (SSE_CLASS_P (regclass))
   21375              :     {
   21376    637152245 :       int index = sse_store_index (mode);
   21377    637152245 :       if (index == -1)
   21378              :         return 100;
   21379    554114731 :       if (in == 2)
   21380    392691617 :         return MAX (ix86_cost->hard_register.sse_load [index],
   21381              :                     ix86_cost->hard_register.sse_store [index]);
   21382    161423114 :       return in ? ix86_cost->hard_register.sse_load [index]
   21383    161423114 :                 : ix86_cost->hard_register.sse_store [index];
   21384              :     }
   21385   5845897419 :   if (MASK_CLASS_P (regclass))
   21386              :     {
   21387    106984702 :       int index;
   21388    213969404 :       switch (GET_MODE_SIZE (mode))
   21389              :         {
   21390              :         case 1:
   21391              :           index = 0;
   21392              :           break;
   21393      8827357 :         case 2:
   21394      8827357 :           index = 1;
   21395      8827357 :           break;
   21396              :         /* DImode loads and stores assumed to cost the same as SImode.  */
   21397     39703401 :         case 4:
   21398     39703401 :         case 8:
   21399     39703401 :           index = 2;
   21400     39703401 :           break;
   21401              :         default:
   21402              :           return 100;
   21403              :         }
   21404              : 
   21405     52065790 :       if (in == 2)
   21406       579259 :         return MAX (ix86_cost->hard_register.mask_load[index],
   21407              :                     ix86_cost->hard_register.mask_store[index]);
   21408     51486531 :       return in ? ix86_cost->hard_register.mask_load[2]
   21409     51486531 :                 : ix86_cost->hard_register.mask_store[2];
   21410              :     }
   21411   5738912717 :   if (MMX_CLASS_P (regclass))
   21412              :     {
   21413    170351813 :       int index;
   21414    340703626 :       switch (GET_MODE_SIZE (mode))
   21415              :         {
   21416              :           case 4:
   21417              :             index = 0;
   21418              :             break;
   21419     99978349 :           case 8:
   21420     99978349 :             index = 1;
   21421     99978349 :             break;
   21422              :           default:
   21423              :             return 100;
   21424              :         }
   21425    136885601 :       if (in == 2)
   21426    117149095 :         return MAX (ix86_cost->hard_register.mmx_load [index],
   21427              :                     ix86_cost->hard_register.mmx_store [index]);
   21428     19736506 :       return in ? ix86_cost->hard_register.mmx_load [index]
   21429     19736506 :                 : ix86_cost->hard_register.mmx_store [index];
   21430              :     }
   21431  11137121808 :   switch (GET_MODE_SIZE (mode))
   21432              :     {
   21433    123270431 :       case 1:
   21434    123270431 :         if (Q_CLASS_P (regclass) || TARGET_64BIT)
   21435              :           {
   21436    120642049 :             if (!in)
   21437     19353032 :               return ix86_cost->hard_register.int_store[0];
   21438    101289017 :             if (TARGET_PARTIAL_REG_DEPENDENCY
   21439    101289017 :                 && optimize_function_for_speed_p (cfun))
   21440     94423813 :               cost = ix86_cost->hard_register.movzbl_load;
   21441              :             else
   21442      6865204 :               cost = ix86_cost->hard_register.int_load[0];
   21443    101289017 :             if (in == 2)
   21444     81906760 :               return MAX (cost, ix86_cost->hard_register.int_store[0]);
   21445              :             return cost;
   21446              :           }
   21447              :         else
   21448              :           {
   21449      2628382 :            if (in == 2)
   21450      1861370 :              return MAX (ix86_cost->hard_register.movzbl_load,
   21451              :                          ix86_cost->hard_register.int_store[0] + 4);
   21452       767012 :            if (in)
   21453       383560 :              return ix86_cost->hard_register.movzbl_load;
   21454              :            else
   21455       383452 :              return ix86_cost->hard_register.int_store[0] + 4;
   21456              :           }
   21457    636302905 :         break;
   21458    636302905 :       case 2:
   21459    636302905 :         {
   21460    636302905 :           int cost;
   21461    636302905 :           if (in == 2)
   21462    537599674 :             cost = MAX (ix86_cost->hard_register.int_load[1],
   21463              :                         ix86_cost->hard_register.int_store[1]);
   21464              :           else
   21465     98703231 :             cost = in ? ix86_cost->hard_register.int_load[1]
   21466              :                       : ix86_cost->hard_register.int_store[1];
   21467              : 
   21468    636302905 :           if (mode == E_HFmode)
   21469              :             {
   21470              :               /* Prefer SSE over GPR for HFmode.  */
   21471    123300536 :               int sse_cost;
   21472    123300536 :               int index = sse_store_index (mode);
   21473    123300536 :               if (in == 2)
   21474    113432232 :                 sse_cost = MAX (ix86_cost->hard_register.sse_load[index],
   21475              :                                 ix86_cost->hard_register.sse_store[index]);
   21476              :               else
   21477     19736608 :                 sse_cost = (in
   21478      9868304 :                             ? ix86_cost->hard_register.sse_load [index]
   21479              :                             : ix86_cost->hard_register.sse_store [index]);
   21480    123300536 :               if (sse_cost >= cost)
   21481    123300536 :                 cost = sse_cost + 1;
   21482              :             }
   21483              :           return cost;
   21484              :         }
   21485   4808987568 :       default:
   21486   4808987568 :         if (in == 2)
   21487   3723091082 :           cost = MAX (ix86_cost->hard_register.int_load[2],
   21488              :                       ix86_cost->hard_register.int_store[2]);
   21489   1085896486 :         else if (in)
   21490    543143176 :           cost = ix86_cost->hard_register.int_load[2];
   21491              :         else
   21492    542753310 :           cost = ix86_cost->hard_register.int_store[2];
   21493              :         /* Multiply with the number of GPR moves needed.  */
   21494   9736916766 :         return cost * CEIL ((int) GET_MODE_SIZE (mode), UNITS_PER_WORD);
   21495              :     }
   21496              : }
   21497              : 
   21498              : static int
   21499   1756349345 : ix86_memory_move_cost (machine_mode mode, reg_class_t regclass, bool in)
   21500              : {
   21501   2634193829 :   return inline_memory_move_cost (mode, (enum reg_class) regclass, in ? 1 : 0);
   21502              : }
   21503              : 
   21504              : 
   21505              : /* Return the cost of moving data from a register in class CLASS1 to
   21506              :    one in class CLASS2.
   21507              : 
   21508              :    It is not required that the cost always equal 2 when FROM is the same as TO;
   21509              :    on some machines it is expensive to move between registers if they are not
   21510              :    general registers.  */
   21511              : 
   21512              : static int
   21513   5543196555 : ix86_register_move_cost (machine_mode mode, reg_class_t class1_i,
   21514              :                          reg_class_t class2_i)
   21515              : {
   21516   5543196555 :   enum reg_class class1 = (enum reg_class) class1_i;
   21517   5543196555 :   enum reg_class class2 = (enum reg_class) class2_i;
   21518              : 
   21519              :   /* In case we require secondary memory, compute cost of the store followed
   21520              :      by load.  In order to avoid bad register allocation choices, we need
   21521              :      for this to be *at least* as high as the symmetric MEMORY_MOVE_COST.  */
   21522              : 
   21523   5543196555 :   if (inline_secondary_memory_needed (mode, class1, class2, false))
   21524              :     {
   21525   2537801963 :       int cost = 1;
   21526              : 
   21527   2537801963 :       cost += inline_memory_move_cost (mode, class1, 2);
   21528   2537801963 :       cost += inline_memory_move_cost (mode, class2, 2);
   21529              : 
   21530              :       /* In case of copying from general_purpose_register we may emit multiple
   21531              :          stores followed by single load causing memory size mismatch stall.
   21532              :          Count this as arbitrarily high cost of 20.  */
   21533   5075603926 :       if (GET_MODE_BITSIZE (mode) > BITS_PER_WORD
   21534    759980730 :           && TARGET_MEMORY_MISMATCH_STALL
   21535   4057763423 :           && targetm.class_max_nregs (class1, mode)
   21536    759980730 :              > targetm.class_max_nregs (class2, mode))
   21537    144628567 :         cost += 20;
   21538              : 
   21539              :       /* In the case of FP/MMX moves, the registers actually overlap, and we
   21540              :          have to switch modes in order to treat them differently.  */
   21541     58574584 :       if ((MMX_CLASS_P (class1) && MAYBE_FLOAT_CLASS_P (class2))
   21542   2587151886 :           || (MMX_CLASS_P (class2) && MAYBE_FLOAT_CLASS_P (class1)))
   21543     18449322 :         cost += 20;
   21544              : 
   21545   2537801963 :       return cost;
   21546              :     }
   21547              : 
   21548              :   /* Moves between MMX and non-MMX units require secondary memory.  */
   21549   3005394592 :   if (MMX_CLASS_P (class1) != MMX_CLASS_P (class2))
   21550            0 :     gcc_unreachable ();
   21551              : 
   21552   3005394592 :   if (SSE_CLASS_P (class1) != SSE_CLASS_P (class2))
   21553    575202598 :     return (SSE_CLASS_P (class1)
   21554    575202598 :             ? ix86_cost->hard_register.sse_to_integer
   21555    575202598 :             : ix86_cost->hard_register.integer_to_sse);
   21556              : 
   21557              :   /* Moves between mask register and GPR.  */
   21558   2430191994 :   if (MASK_CLASS_P (class1) != MASK_CLASS_P (class2))
   21559              :     {
   21560      1046453 :       return (MASK_CLASS_P (class1)
   21561      1046453 :               ? ix86_cost->hard_register.mask_to_integer
   21562      1046453 :               : ix86_cost->hard_register.integer_to_mask);
   21563              :     }
   21564              :   /* Moving between mask registers.  */
   21565   2429145541 :   if (MASK_CLASS_P (class1) && MASK_CLASS_P (class2))
   21566       100488 :     return ix86_cost->hard_register.mask_move;
   21567              : 
   21568   2429045053 :   if (MAYBE_FLOAT_CLASS_P (class1))
   21569     11660278 :     return ix86_cost->hard_register.fp_move;
   21570   2417384775 :   if (MAYBE_SSE_CLASS_P (class1))
   21571              :     {
   21572    226916320 :       if (GET_MODE_BITSIZE (mode) <= 128)
   21573    111005008 :         return ix86_cost->hard_register.xmm_move;
   21574      4906304 :       if (GET_MODE_BITSIZE (mode) <= 256)
   21575      1559147 :         return ix86_cost->hard_register.ymm_move;
   21576       894005 :       return ix86_cost->hard_register.zmm_move;
   21577              :     }
   21578   2303926615 :   if (MAYBE_MMX_CLASS_P (class1))
   21579      2145324 :     return ix86_cost->hard_register.mmx_move;
   21580              :   return 2;
   21581              : }
   21582              : 
   21583              : /* Implement TARGET_HARD_REGNO_NREGS.  This is ordinarily the length in
   21584              :    words of a value of mode MODE but can be less for certain modes in
   21585              :    special long registers.
   21586              : 
   21587              :    Actually there are no two word move instructions for consecutive
   21588              :    registers.  And only registers 0-3 may have mov byte instructions
   21589              :    applied to them.  */
   21590              : 
   21591              : static unsigned int
   21592   8833761984 : ix86_hard_regno_nregs (unsigned int regno, machine_mode mode)
   21593              : {
   21594   8833761984 :   if (GENERAL_REGNO_P (regno))
   21595              :     {
   21596   3072612864 :       if (mode == XFmode)
   21597     25257664 :         return TARGET_64BIT ? 2 : 3;
   21598   3047833728 :       if (mode == XCmode)
   21599     25257664 :         return TARGET_64BIT ? 4 : 6;
   21600   6104489600 :       return CEIL (GET_MODE_SIZE (mode), UNITS_PER_WORD);
   21601              :     }
   21602   5761149120 :   if (COMPLEX_MODE_P (mode))
   21603              :     return 2;
   21604              :   /* Register pair for mask registers.  */
   21605   5017775040 :   if (mode == P2QImode || mode == P2HImode)
   21606     92921760 :     return 2;
   21607              : 
   21608              :   return 1;
   21609              : }
   21610              : 
   21611              : /* Implement REGMODE_NATURAL_SIZE(MODE).  */
   21612              : unsigned int
   21613    108004041 : ix86_regmode_natural_size (machine_mode mode)
   21614              : {
   21615    108004041 :   if (mode == P2HImode || mode == P2QImode)
   21616         2462 :     return GET_MODE_SIZE (mode) / 2;
   21617    108002810 :   return UNITS_PER_WORD;
   21618              : }
   21619              : 
   21620              : /* Implement TARGET_HARD_REGNO_MODE_OK.  */
   21621              : 
   21622              : static bool
   21623  53725774225 : ix86_hard_regno_mode_ok (unsigned int regno, machine_mode mode)
   21624              : {
   21625              :   /* Flags and only flags can only hold CCmode values.  */
   21626  53725774225 :   if (CC_REGNO_P (regno))
   21627    428225348 :     return GET_MODE_CLASS (mode) == MODE_CC;
   21628  53297548877 :   if (GET_MODE_CLASS (mode) == MODE_CC
   21629              :       || GET_MODE_CLASS (mode) == MODE_RANDOM)
   21630              :     return false;
   21631  47818087581 :   if (STACK_REGNO_P (regno))
   21632   4652460066 :     return VALID_FP_MODE_P (mode);
   21633  43165627515 :   if (MASK_REGNO_P (regno))
   21634              :     {
   21635              :       /* Register pair only starts at even register number.  */
   21636   3639747288 :       if ((mode == P2QImode || mode == P2HImode))
   21637     50741108 :         return MASK_PAIR_REGNO_P(regno);
   21638              : 
   21639    998851804 :       return ((TARGET_AVX512F && VALID_MASK_REG_MODE (mode))
   21640   4567544573 :               || (TARGET_AVX512BW && VALID_MASK_AVX512BW_MODE (mode)));
   21641              :     }
   21642              : 
   21643  39525880227 :   if (GET_MODE_CLASS (mode) == MODE_PARTIAL_INT)
   21644              :     return false;
   21645              : 
   21646  38556663157 :   if (SSE_REGNO_P (regno))
   21647              :     {
   21648              :       /* We implement the move patterns for all vector modes into and
   21649              :          out of SSE registers, even when no operation instructions
   21650              :          are available.  */
   21651              : 
   21652              :       /* For AVX-512 we allow, regardless of regno:
   21653              :           - XI mode
   21654              :           - any of 512-bit wide vector mode
   21655              :           - any scalar mode.  */
   21656  16681196506 :       if (TARGET_AVX512F
   21657              :           && ((VALID_AVX512F_REG_OR_XI_MODE (mode))
   21658              :               || VALID_AVX512F_SCALAR_MODE (mode)))
   21659              :         return true;
   21660              : 
   21661              :       /* TODO check for QI/HI scalars.  */
   21662              :       /* AVX512VL allows sse regs16+ for 128/256 bit modes.  */
   21663  15992163505 :       if (TARGET_AVX512VL
   21664   1746674171 :           && (VALID_AVX256_REG_OR_OI_MODE (mode)
   21665   1534954810 :               || VALID_AVX512VL_128_REG_MODE (mode)))
   21666              :         return true;
   21667              : 
   21668              :       /* xmm16-xmm31 are only available for AVX-512.  */
   21669  15546765805 :       if (EXT_REX_SSE_REGNO_P (regno))
   21670              :         return false;
   21671              : 
   21672              :       /* OImode and AVX modes are available only when AVX is enabled.  */
   21673   8986542376 :       return ((TARGET_AVX
   21674   1919528172 :                && VALID_AVX256_REG_OR_OI_MODE (mode))
   21675              :               || VALID_SSE_REG_MODE (mode)
   21676              :               || VALID_SSE2_REG_MODE (mode)
   21677              :               || VALID_MMX_REG_MODE (mode)
   21678   8986542376 :               || VALID_MMX_REG_MODE_3DNOW (mode));
   21679              :     }
   21680  21875466651 :   if (MMX_REGNO_P (regno))
   21681              :     {
   21682              :       /* We implement the move patterns for 3DNOW modes even in MMX mode,
   21683              :          so if the register is available at all, then we can move data of
   21684              :          the given mode into or out of it.  */
   21685   3901281476 :       return (VALID_MMX_REG_MODE (mode)
   21686              :               || VALID_MMX_REG_MODE_3DNOW (mode));
   21687              :     }
   21688              : 
   21689  17974185175 :   if (mode == QImode)
   21690              :     {
   21691              :       /* Take care for QImode values - they can be in non-QI regs,
   21692              :          but then they do cause partial register stalls.  */
   21693    204675969 :       if (ANY_QI_REGNO_P (regno))
   21694              :         return true;
   21695     14212383 :       if (!TARGET_PARTIAL_REG_STALL)
   21696              :         return true;
   21697              :       /* LRA checks if the hard register is OK for the given mode.
   21698              :          QImode values can live in non-QI regs, so we allow all
   21699              :          registers here.  */
   21700            0 :       if (lra_in_progress)
   21701              :        return true;
   21702            0 :       return !can_create_pseudo_p ();
   21703              :     }
   21704              :   /* We handle both integer and floats in the general purpose registers.  */
   21705  17769509206 :   else if (VALID_INT_MODE_P (mode)
   21706  12996924945 :            || VALID_FP_MODE_P (mode))
   21707              :     return true;
   21708              :   /* Lots of MMX code casts 8 byte vector modes to DImode.  If we then go
   21709              :      on to use that value in smaller contexts, this can easily force a
   21710              :      pseudo to be allocated to GENERAL_REGS.  Since this is no worse than
   21711              :      supporting DImode, allow it.  */
   21712  11949759850 :   else if (VALID_MMX_REG_MODE_3DNOW (mode) || VALID_MMX_REG_MODE (mode))
   21713              :     return true;
   21714              : 
   21715              :   return false;
   21716              : }
   21717              : 
   21718              : /* Implement TARGET_INSN_CALLEE_ABI.  */
   21719              : 
   21720              : const predefined_function_abi &
   21721    247898082 : ix86_insn_callee_abi (const rtx_insn *insn)
   21722              : {
   21723    247898082 :   unsigned int abi_id = 0;
   21724    247898082 :   rtx pat = PATTERN (insn);
   21725    247898082 :   if (vzeroupper_pattern (pat, VOIDmode))
   21726       409965 :     abi_id = ABI_VZEROUPPER;
   21727              : 
   21728    247898082 :   return function_abis[abi_id];
   21729              : }
   21730              : 
   21731              : /* Initialize function_abis with corresponding abi_id,
   21732              :    currently only handle vzeroupper.  */
   21733              : void
   21734        22602 : ix86_initialize_callee_abi (unsigned int abi_id)
   21735              : {
   21736        22602 :   gcc_assert (abi_id == ABI_VZEROUPPER);
   21737        22602 :   predefined_function_abi &vzeroupper_abi = function_abis[abi_id];
   21738        22602 :   if (!vzeroupper_abi.initialized_p ())
   21739              :     {
   21740              :       HARD_REG_SET full_reg_clobbers;
   21741         4264 :       CLEAR_HARD_REG_SET (full_reg_clobbers);
   21742         4264 :       vzeroupper_abi.initialize (ABI_VZEROUPPER, full_reg_clobbers);
   21743              :     }
   21744        22602 : }
   21745              : 
   21746              : void
   21747        22602 : ix86_expand_avx_vzeroupper (void)
   21748              : {
   21749              :   /* Initialize vzeroupper_abi here.  */
   21750        22602 :   ix86_initialize_callee_abi (ABI_VZEROUPPER);
   21751        22602 :   rtx_insn *insn = emit_call_insn (gen_avx_vzeroupper_callee_abi ());
   21752              :   /* Return false for non-local goto in can_nonlocal_goto.  */
   21753        22602 :   make_reg_eh_region_note (insn, 0, INT_MIN);
   21754              :   /* Flag used for call_insn indicates it's a fake call.  */
   21755        22602 :   RTX_FLAG (insn, used) = 1;
   21756        22602 : }
   21757              : 
   21758              : 
   21759              : /* Implement TARGET_HARD_REGNO_CALL_PART_CLOBBERED.  The only ABI that
   21760              :    saves SSE registers across calls is Win64 (thus no need to check the
   21761              :    current ABI here), and with AVX enabled Win64 only guarantees that
   21762              :    the low 16 bytes are saved.  */
   21763              : 
   21764              : static bool
   21765   2024890971 : ix86_hard_regno_call_part_clobbered (unsigned int abi_id, unsigned int regno,
   21766              :                                      machine_mode mode)
   21767              : {
   21768              :   /* Special ABI for vzeroupper which only clobber higher part of sse regs.  */
   21769   2024890971 :   if (abi_id == ABI_VZEROUPPER)
   21770     30828658 :       return (GET_MODE_SIZE (mode) > 16
   21771     30828658 :               && ((TARGET_64BIT && REX_SSE_REGNO_P (regno))
   21772      4717624 :                   || LEGACY_SSE_REGNO_P (regno)));
   21773              : 
   21774   2624894017 :   return SSE_REGNO_P (regno) && GET_MODE_SIZE (mode) > 16;
   21775              : }
   21776              : 
   21777              : /* A subroutine of ix86_modes_tieable_p.  Return true if MODE is a
   21778              :    tieable integer mode.  */
   21779              : 
   21780              : static bool
   21781     51890441 : ix86_tieable_integer_mode_p (machine_mode mode)
   21782              : {
   21783     51890441 :   switch (mode)
   21784              :     {
   21785              :     case E_HImode:
   21786              :     case E_SImode:
   21787              :       return true;
   21788              : 
   21789      5227234 :     case E_QImode:
   21790      5227234 :       return TARGET_64BIT || !TARGET_PARTIAL_REG_STALL;
   21791              : 
   21792     10191038 :     case E_DImode:
   21793     10191038 :       return TARGET_64BIT;
   21794              : 
   21795              :     default:
   21796              :       return false;
   21797              :     }
   21798              : }
   21799              : 
   21800              : /* Implement TARGET_MODES_TIEABLE_P.
   21801              : 
   21802              :    Return true if MODE1 is accessible in a register that can hold MODE2
   21803              :    without copying.  That is, all register classes that can hold MODE2
   21804              :    can also hold MODE1.  */
   21805              : 
   21806              : static bool
   21807     33590985 : ix86_modes_tieable_p (machine_mode mode1, machine_mode mode2)
   21808              : {
   21809     33590985 :   if (mode1 == mode2)
   21810              :     return true;
   21811              : 
   21812     33504546 :   if (ix86_tieable_integer_mode_p (mode1)
   21813     33504546 :       && ix86_tieable_integer_mode_p (mode2))
   21814              :     return true;
   21815              : 
   21816              :   /* MODE2 being XFmode implies fp stack or general regs, which means we
   21817              :      can tie any smaller floating point modes to it.  Note that we do not
   21818              :      tie this with TFmode.  */
   21819     24556197 :   if (mode2 == XFmode)
   21820         4310 :     return mode1 == SFmode || mode1 == DFmode;
   21821              : 
   21822              :   /* MODE2 being DFmode implies fp stack, general or sse regs, which means
   21823              :      that we can tie it with SFmode.  */
   21824     24551887 :   if (mode2 == DFmode)
   21825       250073 :     return mode1 == SFmode;
   21826              : 
   21827              :   /* If MODE2 is only appropriate for an SSE register, then tie with
   21828              :      any vector modes or scalar floating point modes acceptable to SSE
   21829              :      registers, excluding scalar integer modes with SUBREG:
   21830              :         (subreg:QI (reg:TI 99) 0))
   21831              :         (subreg:HI (reg:TI 99) 0))
   21832              :         (subreg:SI (reg:TI 99) 0))
   21833              :         (subreg:DI (reg:TI 99) 0))
   21834              :      to avoid unnecessary move from SSE register to integer register.
   21835              :    */
   21836     24301814 :   if (GET_MODE_SIZE (mode2) >= 16
   21837     38058948 :       && (GET_MODE_SIZE (mode1) == GET_MODE_SIZE (mode2)
   21838     13425914 :           || ((VECTOR_MODE_P (mode1) || SCALAR_FLOAT_MODE_P (mode1))
   21839       481832 :               && GET_MODE_SIZE (mode1) <= GET_MODE_SIZE (mode2)))
   21840     30145701 :       && ix86_hard_regno_mode_ok (FIRST_SSE_REG, mode2))
   21841      5415307 :     return ix86_hard_regno_mode_ok (FIRST_SSE_REG, mode1);
   21842              : 
   21843              :   /* If MODE2 is appropriate for an MMX register, then tie
   21844              :      with any other mode acceptable to MMX registers.  */
   21845     18886507 :   if (GET_MODE_SIZE (mode2) == 8
   21846     18886507 :       && ix86_hard_regno_mode_ok (FIRST_MMX_REG, mode2))
   21847      3276884 :     return (GET_MODE_SIZE (mode1) == 8
   21848      3276884 :             && ix86_hard_regno_mode_ok (FIRST_MMX_REG, mode1));
   21849              : 
   21850              :   /* SCmode and DImode can be tied.  */
   21851     15609623 :   if ((mode1 == E_SCmode && mode2 == E_DImode)
   21852     15609623 :       || (mode1 == E_DImode && mode2 == E_SCmode))
   21853          108 :     return TARGET_64BIT;
   21854              : 
   21855              :   /* [SD]Cmode and V2[SD]Fmode modes can be tied.  */
   21856     15609515 :   if ((mode1 == E_SCmode && mode2 == E_V2SFmode)
   21857     15609515 :       || (mode1 == E_V2SFmode && mode2 == E_SCmode)
   21858     15609515 :       || (mode1 == E_DCmode && mode2 == E_V2DFmode)
   21859     15609515 :       || (mode1 == E_V2DFmode && mode2 == E_DCmode))
   21860            0 :     return true;
   21861              : 
   21862              :   return false;
   21863              : }
   21864              : 
   21865              : /* Return the cost of moving between two registers of mode MODE.  */
   21866              : 
   21867              : static int
   21868     28921501 : ix86_set_reg_reg_cost (machine_mode mode)
   21869              : {
   21870     28921501 :   unsigned int units = UNITS_PER_WORD;
   21871              : 
   21872     28921501 :   switch (GET_MODE_CLASS (mode))
   21873              :     {
   21874              :     default:
   21875              :       break;
   21876              : 
   21877              :     case MODE_CC:
   21878     28921501 :       units = GET_MODE_SIZE (CCmode);
   21879              :       break;
   21880              : 
   21881      1154795 :     case MODE_FLOAT:
   21882      1154795 :       if ((TARGET_SSE && mode == TFmode)
   21883       676920 :           || (TARGET_80387 && mode == XFmode)
   21884       209399 :           || ((TARGET_80387 || TARGET_SSE2) && mode == DFmode)
   21885       141798 :           || ((TARGET_80387 || TARGET_SSE) && mode == SFmode))
   21886      2279732 :         units = GET_MODE_SIZE (mode);
   21887              :       break;
   21888              : 
   21889      1291918 :     case MODE_COMPLEX_FLOAT:
   21890      1291918 :       if ((TARGET_SSE && mode == TCmode)
   21891       866208 :           || (TARGET_80387 && mode == XCmode)
   21892       440378 :           || ((TARGET_80387 || TARGET_SSE2) && mode == DCmode)
   21893        14520 :           || ((TARGET_80387 || TARGET_SSE) && mode == SCmode))
   21894      2577352 :         units = GET_MODE_SIZE (mode);
   21895              :       break;
   21896              : 
   21897     18486096 :     case MODE_VECTOR_INT:
   21898     18486096 :     case MODE_VECTOR_FLOAT:
   21899     18486096 :       if ((TARGET_AVX512F && VALID_AVX512F_REG_MODE (mode))
   21900     18390787 :           || (TARGET_AVX && VALID_AVX256_REG_MODE (mode))
   21901     18220043 :           || (TARGET_SSE2 && VALID_SSE2_REG_MODE (mode))
   21902     15623537 :           || (TARGET_SSE && VALID_SSE_REG_MODE (mode))
   21903     14333802 :           || ((TARGET_MMX || TARGET_MMX_WITH_SSE)
   21904     14288292 :               && VALID_MMX_REG_MODE (mode)))
   21905      8411366 :         units = GET_MODE_SIZE (mode);
   21906              :     }
   21907              : 
   21908              :   /* Return the cost of moving between two registers of mode MODE,
   21909              :      assuming that the move will be in pieces of at most UNITS bytes.  */
   21910     28921501 :   return COSTS_N_INSNS (CEIL (GET_MODE_SIZE (mode), units));
   21911              : }
   21912              : 
   21913              : /* Return cost of vector operation in MODE given that scalar version has
   21914              :    COST.  */
   21915              : 
   21916              : static int
   21917   2797768689 : ix86_vec_cost (machine_mode mode, int cost)
   21918              : {
   21919   2797768689 :   if (!VECTOR_MODE_P (mode))
   21920              :     return cost;
   21921              : 
   21922   2797541187 :   if (GET_MODE_BITSIZE (mode) == 128
   21923   2797541187 :       && TARGET_SSE_SPLIT_REGS)
   21924      2862046 :     return cost * GET_MODE_BITSIZE (mode) / 64;
   21925   2796110164 :   else if (GET_MODE_BITSIZE (mode) > 128
   21926   2796110164 :       && TARGET_AVX256_SPLIT_REGS)
   21927      1676180 :     return cost * GET_MODE_BITSIZE (mode) / 128;
   21928   2795272074 :   else if (GET_MODE_BITSIZE (mode) > 256
   21929   2795272074 :       && TARGET_AVX512_SPLIT_REGS)
   21930       194508 :     return cost * GET_MODE_BITSIZE (mode) / 256;
   21931              :   return cost;
   21932              : }
   21933              : 
   21934              : /* Return cost of vec_widen_<s>mult_hi/lo_<mode>,
   21935              :    vec_widen_<s>mul_hi/lo_<mode> is only available for VI124_AVX2.  */
   21936              : static int
   21937         1018 : ix86_widen_mult_cost (const struct processor_costs *cost,
   21938              :                       enum machine_mode mode, bool uns_p)
   21939              : {
   21940         1018 :   gcc_assert (GET_MODE_CLASS (mode) == MODE_VECTOR_INT);
   21941         1018 :   int extra_cost = 0;
   21942         1018 :   int basic_cost = 0;
   21943         1018 :   switch (mode)
   21944              :     {
   21945          108 :     case V8HImode:
   21946          108 :     case V16HImode:
   21947          108 :       if (!uns_p || mode == V16HImode)
   21948           43 :         extra_cost = cost->sse_op * 2;
   21949          108 :       basic_cost = cost->mulss * 2 + cost->sse_op * 4;
   21950          108 :       break;
   21951          188 :     case V4SImode:
   21952          188 :     case V8SImode:
   21953              :       /* pmulhw/pmullw can be used.  */
   21954          188 :       basic_cost = cost->mulss * 2 + cost->sse_op * 2;
   21955          188 :       break;
   21956          659 :     case V2DImode:
   21957              :       /* pmuludq under sse2, pmuldq under sse4.1, for sign_extend,
   21958              :          require extra 4 mul, 4 add, 4 cmp and 2 shift.  */
   21959          659 :       if (!TARGET_SSE4_1 && !uns_p)
   21960          403 :         extra_cost = (cost->mulss + cost->sse_op + cost->sse_op) * 4
   21961          403 :                       + cost->sse_op * 2;
   21962              :       /* Fallthru.  */
   21963          706 :     case V4DImode:
   21964          706 :       basic_cost = cost->mulss * 2 + cost->sse_op * 4;
   21965          706 :       break;
   21966              :     default:
   21967              :       /* Not implemented.  */
   21968              :       return 100;
   21969              :     }
   21970         1002 :   return ix86_vec_cost (mode, basic_cost + extra_cost);
   21971              : }
   21972              : 
   21973              : /* Return cost of multiplication in MODE.  */
   21974              : 
   21975              : static int
   21976   1194139846 : ix86_multiplication_cost (const struct processor_costs *cost,
   21977              :                           enum machine_mode mode)
   21978              : {
   21979   1194139846 :   machine_mode inner_mode = mode;
   21980   1194139846 :   if (VECTOR_MODE_P (mode))
   21981   1193168111 :     inner_mode = GET_MODE_INNER (mode);
   21982              : 
   21983   1194139846 :   if (SSE_FLOAT_MODE_SSEMATH_OR_HFBF_P (mode))
   21984       723253 :     return inner_mode == DFmode ? cost->mulsd : cost->mulss;
   21985   1193416593 :   else if (X87_FLOAT_MODE_P (mode))
   21986       162330 :     return cost->fmul;
   21987   1193254263 :   else if (FLOAT_MODE_P (mode))
   21988       213415 :     return  ix86_vec_cost (mode,
   21989       213415 :                            inner_mode == DFmode ? cost->mulsd : cost->mulss);
   21990   1193040848 :   else if (GET_MODE_CLASS (mode) == MODE_VECTOR_INT)
   21991              :     {
   21992   1192978942 :       int nmults, nops;
   21993              :       /* Cost of reading the memory.  */
   21994   1192978942 :       int extra;
   21995              : 
   21996   1192978942 :       switch (mode)
   21997              :         {
   21998     18784948 :         case V4QImode:
   21999     18784948 :         case V8QImode:
   22000              :           /* Partial V*QImode is emulated with 4-6 insns.  */
   22001     18784948 :           nmults = 1;
   22002     18784948 :           nops = 3;
   22003     18784948 :           extra = 0;
   22004              : 
   22005     18784948 :           if (TARGET_AVX512BW && TARGET_AVX512VL)
   22006              :             ;
   22007     18676767 :           else if (TARGET_AVX2)
   22008              :             nops += 2;
   22009     18174792 :           else if (TARGET_XOP)
   22010         9504 :             extra += COSTS_N_INSNS (cost->sse_load[2]) / 2;
   22011              :           else
   22012              :             {
   22013     18165288 :               nops += 1;
   22014     18165288 :               extra += COSTS_N_INSNS (cost->sse_load[2]) / 2;
   22015              :             }
   22016     18784948 :           goto do_qimode;
   22017              : 
   22018      9393708 :         case V16QImode:
   22019              :           /* V*QImode is emulated with 4-11 insns.  */
   22020      9393708 :           nmults = 1;
   22021      9393708 :           nops = 3;
   22022      9393708 :           extra = 0;
   22023              : 
   22024      9393708 :           if (TARGET_AVX2 && !TARGET_PREFER_AVX128)
   22025              :             {
   22026       303343 :               if (!(TARGET_AVX512BW && TARGET_AVX512VL))
   22027       249575 :                 nops += 3;
   22028              :             }
   22029      9090365 :           else if (TARGET_XOP)
   22030              :             {
   22031         5200 :               nmults += 1;
   22032         5200 :               nops += 2;
   22033         5200 :               extra += COSTS_N_INSNS (cost->sse_load[2]) / 2;
   22034              :             }
   22035              :           else
   22036              :             {
   22037      9085165 :               nmults += 1;
   22038      9085165 :               nops += 4;
   22039      9085165 :               extra += COSTS_N_INSNS (cost->sse_load[2]) / 2;
   22040              :             }
   22041      9393708 :           goto do_qimode;
   22042              : 
   22043      9392425 :         case V32QImode:
   22044      9392425 :           nmults = 1;
   22045      9392425 :           nops = 3;
   22046      9392425 :           extra = 0;
   22047              : 
   22048      9392425 :           if (!TARGET_AVX512BW || TARGET_PREFER_AVX256)
   22049              :             {
   22050      9310712 :               nmults += 1;
   22051      9310712 :               nops += 4;
   22052              :               /* 2 loads, so no division by 2.  */
   22053      9310712 :               extra += COSTS_N_INSNS (cost->sse_load[3]);
   22054              :             }
   22055      9392425 :           goto do_qimode;
   22056              : 
   22057      9391830 :         case V64QImode:
   22058      9391830 :           nmults = 2;
   22059      9391830 :           nops = 9;
   22060              :           /* 2 loads of each size, so no division by 2.  */
   22061      9391830 :           extra = COSTS_N_INSNS (cost->sse_load[3] + cost->sse_load[4]);
   22062              : 
   22063     46962911 :         do_qimode:
   22064     46962911 :           return ix86_vec_cost (mode, cost->mulss * nmults
   22065     46962911 :                                 + cost->sse_op * nops) + extra;
   22066              : 
   22067     40163604 :         case V4SImode:
   22068              :           /* pmulld is used in this case. No emulation is needed.  */
   22069     40163604 :           if (TARGET_SSE4_1)
   22070      2221009 :             goto do_native;
   22071              :           /* V4SImode is emulated with 7 insns.  */
   22072              :           else
   22073     37942595 :             return ix86_vec_cost (mode, cost->mulss * 2 + cost->sse_op * 5);
   22074              : 
   22075    162342487 :         case V2DImode:
   22076    162342487 :         case V4DImode:
   22077              :           /* vpmullq is used in this case. No emulation is needed.  */
   22078    162342487 :           if (TARGET_AVX512DQ && TARGET_AVX512VL)
   22079       583938 :             goto do_native;
   22080              :           /* V*DImode is emulated with 6-8 insns.  */
   22081    161758549 :           else if (TARGET_XOP && mode == V2DImode)
   22082        52592 :             return ix86_vec_cost (mode, cost->mulss * 2 + cost->sse_op * 4);
   22083              :           /* FALLTHRU */
   22084    242818035 :         case V8DImode:
   22085              :           /* vpmullq is used in this case. No emulation is needed.  */
   22086    242818035 :           if (TARGET_AVX512DQ && mode == V8DImode)
   22087       383334 :             goto do_native;
   22088              :           else
   22089    242434701 :             return ix86_vec_cost (mode, cost->mulss * 3 + cost->sse_op * 5);
   22090              : 
   22091    865586143 :         default:
   22092    865586143 :         do_native:
   22093    865586143 :           return ix86_vec_cost (mode, cost->mulss);
   22094              :         }
   22095              :     }
   22096              :   else
   22097       123804 :     return (cost->mult_init[MODE_INDEX (mode)] + cost->mult_bit * 7);
   22098              : }
   22099              : 
   22100              : /* Return cost of multiplication in MODE.  */
   22101              : 
   22102              : static int
   22103     71755017 : ix86_division_cost (const struct processor_costs *cost,
   22104              :                           enum machine_mode mode)
   22105              : {
   22106     71755017 :   machine_mode inner_mode = mode;
   22107     71755017 :   if (VECTOR_MODE_P (mode))
   22108     52948425 :     inner_mode = GET_MODE_INNER (mode);
   22109              : 
   22110     71755017 :   if (SSE_FLOAT_MODE_SSEMATH_OR_HFBF_P (mode))
   22111       248266 :     return inner_mode == DFmode ? cost->divsd : cost->divss;
   22112     71506751 :   else if (X87_FLOAT_MODE_P (mode))
   22113        44794 :     return cost->fdiv;
   22114     71461957 :   else if (FLOAT_MODE_P (mode))
   22115        16914 :     return ix86_vec_cost (mode,
   22116        16914 :                           inner_mode == DFmode ? cost->divsd : cost->divss);
   22117              :   else
   22118     79711580 :     return cost->divide[MODE_INDEX (mode)];
   22119              : }
   22120              : 
   22121              : /* Return cost of shift in MODE.
   22122              :    If CONSTANT_OP1 is true, the op1 value is known and set in OP1_VAL.
   22123              :    AND_IN_OP1 specify in op1 is result of AND and SHIFT_AND_TRUNCATE
   22124              :    if op1 is a result of subreg.
   22125              : 
   22126              :    SKIP_OP0/1 is set to true if cost of OP0/1 should be ignored.  */
   22127              : 
   22128              : static int
   22129    766599040 : ix86_shift_rotate_cost (const struct processor_costs *cost,
   22130              :                         enum rtx_code code,
   22131              :                         enum machine_mode mode, bool constant_op1,
   22132              :                         HOST_WIDE_INT op1_val,
   22133              :                         bool and_in_op1,
   22134              :                         bool shift_and_truncate,
   22135              :                         bool *skip_op0, bool *skip_op1)
   22136              : {
   22137    766599040 :   if (skip_op0)
   22138    766544253 :     *skip_op0 = *skip_op1 = false;
   22139              : 
   22140    766599040 :   if (GET_MODE_CLASS (mode) == MODE_VECTOR_INT)
   22141              :     {
   22142    393815798 :       int count;
   22143              :       /* Cost of reading the memory.  */
   22144    393815798 :       int extra;
   22145              : 
   22146    393815798 :       switch (mode)
   22147              :         {
   22148      5976612 :         case V4QImode:
   22149      5976612 :         case V8QImode:
   22150      5976612 :           if (TARGET_AVX2)
   22151              :             /* Use vpbroadcast.  */
   22152       193964 :             extra = cost->sse_op;
   22153              :           else
   22154      5782648 :             extra = COSTS_N_INSNS (cost->sse_load[2]) / 2;
   22155              : 
   22156      5976612 :           if (constant_op1)
   22157              :             {
   22158      5976584 :               if (code == ASHIFTRT)
   22159              :                 {
   22160           40 :                   count = 4;
   22161           40 :                   extra *= 2;
   22162              :                 }
   22163              :               else
   22164              :                 count = 2;
   22165              :             }
   22166           28 :           else if (TARGET_AVX512BW && TARGET_AVX512VL)
   22167           28 :             return ix86_vec_cost (mode, cost->sse_op * 4);
   22168            0 :           else if (TARGET_SSE4_1)
   22169              :             count = 5;
   22170            0 :           else if (code == ASHIFTRT)
   22171              :             count = 6;
   22172              :           else
   22173            0 :             count = 5;
   22174      5976584 :           return ix86_vec_cost (mode, cost->sse_op * count) + extra;
   22175              : 
   22176      2990974 :         case V16QImode:
   22177      2990974 :           if (TARGET_XOP)
   22178              :             {
   22179              :               /* For XOP we use vpshab, which requires a broadcast of the
   22180              :                  value to the variable shift insn.  For constants this
   22181              :                  means a V16Q const in mem; even when we can perform the
   22182              :                  shift with one insn set the cost to prefer paddb.  */
   22183         3489 :               if (constant_op1)
   22184              :                 {
   22185         2530 :                   extra = COSTS_N_INSNS (cost->sse_load[2]) / 2;
   22186         2530 :                   return ix86_vec_cost (mode, cost->sse_op) + extra;
   22187              :                 }
   22188              :               else
   22189              :                 {
   22190          959 :                   count = (code == ASHIFT) ? 3 : 4;
   22191          959 :                   return ix86_vec_cost (mode, cost->sse_op * count);
   22192              :                 }
   22193              :             }
   22194              :           /* FALLTHRU */
   22195      5976272 :         case V32QImode:
   22196      5976272 :           if (TARGET_GFNI && constant_op1)
   22197              :             {
   22198              :               /* Use vgf2p8affine.  One extra load for the mask, but in a loop
   22199              :                  with enough registers it will be moved out.  So for now don't
   22200              :                  account the constant mask load.  This is not quite right
   22201              :                  for non loop vectorization.  */
   22202        11327 :               extra = 0;
   22203        11327 :               return ix86_vec_cost (mode, cost->sse_op) + extra;
   22204              :             }
   22205      5964945 :           if (TARGET_AVX2)
   22206              :             /* Use vpbroadcast.  */
   22207       187470 :             extra = cost->sse_op;
   22208              :           else
   22209      5777475 :             extra = COSTS_N_INSNS (mode == V16QImode
   22210              :                                    ? cost->sse_load[2]
   22211      5777475 :                                    : cost->sse_load[3]) / 2;
   22212              : 
   22213      5964945 :           if (constant_op1)
   22214              :             {
   22215      5964794 :               if (code == ASHIFTRT)
   22216              :                 {
   22217          106 :                   count = 4;
   22218          106 :                   extra *= 2;
   22219              :                 }
   22220              :               else
   22221              :                 count = 2;
   22222              :             }
   22223          151 :           else if (TARGET_AVX512BW
   22224           75 :                    && ((mode == V32QImode && !TARGET_PREFER_AVX256)
   22225           37 :                        || (mode == V16QImode && TARGET_AVX512VL
   22226           37 :                            && !TARGET_PREFER_AVX128)))
   22227           75 :             return ix86_vec_cost (mode, cost->sse_op * 4);
   22228           76 :           else if (TARGET_AVX2
   22229            0 :                    && mode == V16QImode && !TARGET_PREFER_AVX128)
   22230              :             count = 6;
   22231           76 :           else if (TARGET_SSE4_1)
   22232              :             count = 9;
   22233           76 :           else if (code == ASHIFTRT)
   22234              :             count = 10;
   22235              :           else
   22236           76 :             count = 9;
   22237      5964870 :           return ix86_vec_cost (mode, cost->sse_op * count) + extra;
   22238              : 
   22239      2988770 :         case V64QImode:
   22240              :           /* Ignore the mask load for GF2P8AFFINEQB.  */
   22241      2988770 :           extra = 0;
   22242      2988770 :           return ix86_vec_cost (mode, cost->sse_op) + extra;
   22243              : 
   22244     53877185 :         case V2DImode:
   22245     53877185 :         case V4DImode:
   22246              :           /* V*DImode arithmetic right shift is emulated.  */
   22247     53877185 :           if (code == ASHIFTRT && !TARGET_AVX512VL)
   22248              :             {
   22249         1181 :               if (constant_op1)
   22250              :                 {
   22251          479 :                   if (op1_val == 63)
   22252          402 :                     count = TARGET_SSE4_2 ? 1 : 2;
   22253          376 :                   else if (TARGET_XOP)
   22254              :                     count = 2;
   22255           77 :                   else if (TARGET_SSE4_1)
   22256              :                     count = 3;
   22257              :                   else
   22258           86 :                     count = 4;
   22259              :                 }
   22260          702 :               else if (TARGET_XOP)
   22261              :                 count = 3;
   22262           21 :               else if (TARGET_SSE4_2)
   22263              :                 count = 4;
   22264              :               else
   22265         1181 :                 count = 5;
   22266              : 
   22267         1181 :               return ix86_vec_cost (mode, cost->sse_op * count);
   22268              :             }
   22269              :           /* FALLTHRU */
   22270    378869474 :         default:
   22271    378869474 :           return ix86_vec_cost (mode, cost->sse_op);
   22272              :         }
   22273              :     }
   22274              : 
   22275    754255442 :   if (GET_MODE_SIZE (mode) > UNITS_PER_WORD)
   22276              :     {
   22277    190792648 :       if (constant_op1)
   22278              :         {
   22279    190758198 :           if (op1_val > 32)
   22280    135517529 :             return cost->shift_const + COSTS_N_INSNS (2);
   22281              :           else
   22282     55240669 :             return cost->shift_const * 2;
   22283              :         }
   22284              :       else
   22285              :         {
   22286        34450 :           if (and_in_op1)
   22287           63 :             return cost->shift_var * 2;
   22288              :           else
   22289        34387 :             return cost->shift_var * 6 + COSTS_N_INSNS (2);
   22290              :         }
   22291              :     }
   22292              :   else
   22293              :     {
   22294    181990594 :       if (constant_op1)
   22295    181279494 :         return cost->shift_const;
   22296       711100 :       else if (shift_and_truncate)
   22297              :         {
   22298        22835 :           if (skip_op0)
   22299        22835 :             *skip_op0 = *skip_op1 = true;
   22300              :           /* Return the cost after shift-and truncation.  */
   22301        22835 :           return cost->shift_var;
   22302              :         }
   22303              :       else
   22304       688265 :         return cost->shift_var;
   22305              :     }
   22306              : }
   22307              : 
   22308              : static int
   22309    148912208 : ix86_insn_cost (rtx_insn *insn, bool speed)
   22310              : {
   22311    148912208 :   int insn_cost = 0;
   22312              :   /* Add extra cost to avoid post_reload late_combine revert
   22313              :      the optimization did in pass_rpad.  */
   22314    148912208 :   if (reload_completed
   22315      4645522 :       && ix86_rpad_gate ()
   22316       265551 :       && recog_memoized (insn) >= 0
   22317    149177505 :       && get_attr_avx_partial_xmm_update (insn)
   22318              :       == AVX_PARTIAL_XMM_UPDATE_TRUE)
   22319              :     insn_cost += COSTS_N_INSNS (3);
   22320              : 
   22321    148912208 :   return insn_cost + pattern_cost (PATTERN (insn), speed);
   22322              : }
   22323              : 
   22324              : /* Return cost of SSE/AVX FP->FP conversion (extensions and truncates).  */
   22325              : 
   22326              : static int
   22327       745487 : vec_fp_conversion_cost (const struct processor_costs *cost, int size)
   22328              : {
   22329       745487 :   if (size < 128)
   22330       741997 :     return cost->cvtss2sd;
   22331         3490 :   else if (size < 256)
   22332              :     {
   22333         1420 :       if (TARGET_SSE_SPLIT_REGS)
   22334            0 :         return cost->cvtss2sd * size / 64;
   22335         1420 :       return cost->cvtss2sd;
   22336              :     }
   22337         2070 :   if (size < 512)
   22338          768 :     return cost->vcvtps2pd256;
   22339              :   else
   22340         1302 :     return cost->vcvtps2pd512;
   22341              : }
   22342              : 
   22343              : /* Return true of X is UNSPEC with UNSPEC_PCMP or UNSPEC_UNSIGNED_PCMP.  */
   22344              : 
   22345              : static bool
   22346       266616 : unspec_pcmp_p (rtx x)
   22347              : {
   22348       266616 :   return GET_CODE (x) == UNSPEC
   22349       266616 :          && (XINT (x, 1) == UNSPEC_PCMP || XINT (x, 1) == UNSPEC_UNSIGNED_PCMP);
   22350              : }
   22351              : 
   22352              : /* Compute a (partial) cost for rtx X.  Return true if the complete
   22353              :    cost has been computed, and false if subexpressions should be
   22354              :    scanned.  In either case, *TOTAL contains the cost result.  */
   22355              : 
   22356              : static bool
   22357   7628701434 : ix86_rtx_costs (rtx x, machine_mode mode, int outer_code_i, int opno,
   22358              :                 int *total, bool speed)
   22359              : {
   22360   7628701434 :   rtx mask;
   22361   7628701434 :   enum rtx_code code = GET_CODE (x);
   22362   7628701434 :   enum rtx_code outer_code = (enum rtx_code) outer_code_i;
   22363   4088917346 :   const struct processor_costs *cost
   22364   7628701434 :     = speed ? ix86_tune_cost : &ix86_size_cost;
   22365   7628701434 :   int src_cost;
   22366              : 
   22367              :   /* Handling different vternlog variants.  */
   22368   7628701434 :   if ((GET_MODE_SIZE (mode) == 64
   22369   7628701434 :        ? TARGET_AVX512F
   22370   6457849688 :        : (TARGET_AVX512VL
   22371   6396588513 :           || (TARGET_AVX512F && !TARGET_PREFER_AVX256)))
   22372    176704431 :       && GET_MODE_SIZE (mode) >= 16
   22373    119961547 :       && outer_code_i == SET
   22374   7675067720 :       && ternlog_operand (x, mode))
   22375              :     {
   22376        32996 :       rtx args[3];
   22377              : 
   22378        32996 :       args[0] = NULL_RTX;
   22379        32996 :       args[1] = NULL_RTX;
   22380        32996 :       args[2] = NULL_RTX;
   22381        32996 :       int idx = ix86_ternlog_idx (x, args);
   22382        32996 :       gcc_assert (idx >= 0);
   22383              : 
   22384        32996 :       *total = cost->sse_op;
   22385       131984 :       for (int i = 0; i != 3; i++)
   22386        98988 :         if (args[i])
   22387        70502 :           *total += rtx_cost (args[i], GET_MODE (args[i]), UNSPEC, i, speed);
   22388        32996 :       return true;
   22389              :     }
   22390              : 
   22391              : 
   22392   7628668438 :   switch (code)
   22393              :     {
   22394     47325526 :     case SET:
   22395     47325526 :       if (register_operand (SET_DEST (x), VOIDmode)
   22396     47325526 :           && register_operand (SET_SRC (x), VOIDmode))
   22397              :         {
   22398     28921501 :           *total = ix86_set_reg_reg_cost (GET_MODE (SET_DEST (x)));
   22399     28921501 :           return true;
   22400              :         }
   22401              : 
   22402     18404025 :       if (register_operand (SET_SRC (x), VOIDmode))
   22403              :         /* Avoid potentially incorrect high cost from rtx_costs
   22404              :            for non-tieable SUBREGs.  */
   22405              :         src_cost = 0;
   22406              :       else
   22407              :         {
   22408     15603793 :           src_cost = rtx_cost (SET_SRC (x), mode, SET, 1, speed);
   22409              : 
   22410     15603793 :           if (CONSTANT_P (SET_SRC (x)))
   22411              :             /* Constant costs assume a base value of COSTS_N_INSNS (1) and add
   22412              :                a small value, possibly zero for cheap constants.  */
   22413      6944677 :             src_cost += COSTS_N_INSNS (1);
   22414              :         }
   22415              : 
   22416     18404025 :       *total = src_cost + rtx_cost (SET_DEST (x), mode, SET, 0, speed);
   22417     18404025 :       return true;
   22418              : 
   22419   2810295281 :     case CONST_INT:
   22420   2810295281 :     case CONST:
   22421   2810295281 :     case LABEL_REF:
   22422   2810295281 :     case SYMBOL_REF:
   22423   2810295281 :       if (x86_64_immediate_operand (x, VOIDmode))
   22424   2211709327 :         *total = 0;
   22425    598585954 :       else if (TARGET_64BIT && x86_64_zext_immediate_operand (x, VOIDmode))
   22426              :         /* Consider the zext constants slightly more expensive, as they
   22427              :            can't appear in most instructions.  */
   22428     27717223 :         *total = 1;
   22429              :       else
   22430              :         /* movabsq is slightly more expensive than a simple instruction. */
   22431    570868731 :         *total = COSTS_N_INSNS (1) + 1;
   22432              :       return true;
   22433              : 
   22434      7507981 :     case CONST_DOUBLE:
   22435      7507981 :       if (IS_STACK_MODE (mode))
   22436      1302681 :         switch (standard_80387_constant_p (x))
   22437              :           {
   22438              :           case -1:
   22439              :           case 0:
   22440              :             break;
   22441       283372 :           case 1: /* 0.0 */
   22442       283372 :             *total = 1;
   22443       283372 :             return true;
   22444       484787 :           default: /* Other constants */
   22445       484787 :             *total = 2;
   22446       484787 :             return true;
   22447              :           }
   22448              :       /* FALLTHRU */
   22449              : 
   22450     14265738 :     case CONST_VECTOR:
   22451     14265738 :       switch (standard_sse_constant_p (x, mode))
   22452              :         {
   22453              :         case 0:
   22454              :           break;
   22455      4189711 :         case 1:  /* 0: xor eliminates false dependency */
   22456      4189711 :           *total = 0;
   22457      4189711 :           return true;
   22458       160229 :         default: /* -1: cmp contains false dependency */
   22459       160229 :           *total = 1;
   22460       160229 :           return true;
   22461              :         }
   22462              :       /* FALLTHRU */
   22463              : 
   22464     10914460 :     case CONST_WIDE_INT:
   22465              :       /* Fall back to (MEM (SYMBOL_REF)), since that's where
   22466              :          it'll probably end up.  Add a penalty for size.  */
   22467     21828920 :       *total = (COSTS_N_INSNS (1)
   22468     21603170 :                 + (!TARGET_64BIT && flag_pic)
   22469     21828920 :                 + (GET_MODE_SIZE (mode) <= 4
   22470     19061814 :                    ? 0 : GET_MODE_SIZE (mode) <= 8 ? 1 : 2));
   22471     10914460 :       return true;
   22472              : 
   22473     22229051 :     case ZERO_EXTEND:
   22474              :       /* The zero extensions is often completely free on x86_64, so make
   22475              :          it as cheap as possible.  */
   22476     22229051 :       if (TARGET_64BIT && mode == DImode
   22477      4978230 :           && GET_MODE (XEXP (x, 0)) == SImode)
   22478      3071102 :         *total = 1;
   22479     19157949 :       else if (TARGET_ZERO_EXTEND_WITH_AND)
   22480            0 :         *total = cost->add;
   22481              :       else
   22482     19157949 :         *total = cost->movzx;
   22483              :       return false;
   22484              : 
   22485      3054653 :     case SIGN_EXTEND:
   22486      3054653 :       *total = cost->movsx;
   22487      3054653 :       return false;
   22488              : 
   22489    630734413 :     case ASHIFT:
   22490    630734413 :       if (SCALAR_INT_MODE_P (mode)
   22491    243865004 :           && GET_MODE_SIZE (mode) < UNITS_PER_WORD
   22492    673381207 :           && CONST_INT_P (XEXP (x, 1)))
   22493              :         {
   22494     42475211 :           HOST_WIDE_INT value = INTVAL (XEXP (x, 1));
   22495     42475211 :           if (value == 1)
   22496              :             {
   22497      2453935 :               *total = cost->add;
   22498      2453935 :               return false;
   22499              :             }
   22500     40021276 :           if ((value == 2 || value == 3)
   22501      4504546 :               && cost->lea <= cost->shift_const)
   22502              :             {
   22503      2123042 :               *total = cost->lea;
   22504      2123042 :               return false;
   22505              :             }
   22506              :         }
   22507              :       /* FALLTHRU */
   22508              : 
   22509    766544253 :     case ROTATE:
   22510    766544253 :     case ASHIFTRT:
   22511    766544253 :     case LSHIFTRT:
   22512    766544253 :     case ROTATERT:
   22513    766544253 :       bool skip_op0, skip_op1;
   22514    766544253 :       *total = ix86_shift_rotate_cost (cost, code, mode,
   22515    766544253 :                                        CONSTANT_P (XEXP (x, 1)),
   22516              :                                        CONST_INT_P (XEXP (x, 1))
   22517              :                                          ? INTVAL (XEXP (x, 1)) : -1,
   22518              :                                        GET_CODE (XEXP (x, 1)) == AND,
   22519    766544253 :                                        SUBREG_P (XEXP (x, 1))
   22520    766544253 :                                        && GET_CODE (XEXP (XEXP (x, 1),
   22521              :                                                           0)) == AND,
   22522              :                                        &skip_op0, &skip_op1);
   22523    766544253 :       if (skip_op0 || skip_op1)
   22524              :         {
   22525        22835 :           if (!skip_op0)
   22526            0 :             *total += rtx_cost (XEXP (x, 0), mode, code, 0, speed);
   22527        22835 :           if (!skip_op1)
   22528            0 :             *total += rtx_cost (XEXP (x, 1), mode, code, 0, speed);
   22529        22835 :           return true;
   22530              :         }
   22531              :       return false;
   22532              : 
   22533       230346 :     case FMA:
   22534       230346 :       {
   22535       230346 :         rtx sub;
   22536              : 
   22537       230346 :         gcc_assert (FLOAT_MODE_P (mode));
   22538       230346 :         gcc_assert (TARGET_FMA || TARGET_FMA4 || TARGET_AVX512F);
   22539              : 
   22540       460692 :         *total = ix86_vec_cost (mode,
   22541       230346 :                                 GET_MODE_INNER (mode) == SFmode
   22542              :                                 ? cost->fmass : cost->fmasd);
   22543       230346 :         *total += rtx_cost (XEXP (x, 1), mode, FMA, 1, speed);
   22544              : 
   22545              :         /* Negate in op0 or op2 is free: FMS, FNMA, FNMS.  */
   22546       230346 :         sub = XEXP (x, 0);
   22547       230346 :         if (GET_CODE (sub) == NEG)
   22548        51012 :           sub = XEXP (sub, 0);
   22549       230346 :         *total += rtx_cost (sub, mode, FMA, 0, speed);
   22550              : 
   22551       230346 :         sub = XEXP (x, 2);
   22552       230346 :         if (GET_CODE (sub) == NEG)
   22553        40520 :           sub = XEXP (sub, 0);
   22554       230346 :         *total += rtx_cost (sub, mode, FMA, 2, speed);
   22555       230346 :         return true;
   22556              :       }
   22557              : 
   22558   1739436799 :     case MULT:
   22559   1739436799 :       if (!FLOAT_MODE_P (mode) && !VECTOR_MODE_P (mode))
   22560              :         {
   22561    545476452 :           rtx op0 = XEXP (x, 0);
   22562    545476452 :           rtx op1 = XEXP (x, 1);
   22563    545476452 :           int nbits;
   22564    545476452 :           if (CONST_INT_P (XEXP (x, 1)))
   22565              :             {
   22566    527481392 :               unsigned HOST_WIDE_INT value = INTVAL (XEXP (x, 1));
   22567   1070508346 :               for (nbits = 0; value != 0; value &= value - 1)
   22568    543026954 :                 nbits++;
   22569              :             }
   22570              :           else
   22571              :             /* This is arbitrary.  */
   22572              :             nbits = 7;
   22573              : 
   22574              :           /* Compute costs correctly for widening multiplication.  */
   22575    545476452 :           if ((GET_CODE (op0) == SIGN_EXTEND || GET_CODE (op0) == ZERO_EXTEND)
   22576    550930769 :               && GET_MODE_SIZE (GET_MODE (XEXP (op0, 0))) * 2
   22577      5454317 :                  == GET_MODE_SIZE (mode))
   22578              :             {
   22579      5450247 :               int is_mulwiden = 0;
   22580      5450247 :               machine_mode inner_mode = GET_MODE (op0);
   22581              : 
   22582      5450247 :               if (GET_CODE (op0) == GET_CODE (op1))
   22583      5365936 :                 is_mulwiden = 1, op1 = XEXP (op1, 0);
   22584        84311 :               else if (CONST_INT_P (op1))
   22585              :                 {
   22586        74107 :                   if (GET_CODE (op0) == SIGN_EXTEND)
   22587        21643 :                     is_mulwiden = trunc_int_for_mode (INTVAL (op1), inner_mode)
   22588        21643 :                                   == INTVAL (op1);
   22589              :                   else
   22590        52464 :                     is_mulwiden = !(INTVAL (op1) & ~GET_MODE_MASK (inner_mode));
   22591              :                 }
   22592              : 
   22593      5440043 :               if (is_mulwiden)
   22594      5440043 :                 op0 = XEXP (op0, 0), mode = GET_MODE (op0);
   22595              :             }
   22596              : 
   22597    545476452 :           int mult_init;
   22598              :           // Double word multiplication requires 3 mults and 2 adds.
   22599   1106612010 :           if (GET_MODE_SIZE (mode) > UNITS_PER_WORD)
   22600              :             {
   22601    328337697 :               mult_init = 3 * cost->mult_init[MODE_INDEX (word_mode)]
   22602    328337697 :                           + 2 * cost->add;
   22603    328337697 :               nbits *= 3;
   22604              :             }
   22605    374514310 :           else mult_init = cost->mult_init[MODE_INDEX (mode)];
   22606              : 
   22607   1090952904 :           *total = (mult_init
   22608    545476452 :                     + nbits * cost->mult_bit
   22609    545476452 :                     + rtx_cost (op0, mode, outer_code, opno, speed)
   22610    545476452 :                     + rtx_cost (op1, mode, outer_code, opno, speed));
   22611              : 
   22612    545476452 :           return true;
   22613              :         }
   22614   1193960347 :       *total = ix86_multiplication_cost (cost, mode);
   22615   1193960347 :       return false;
   22616              : 
   22617     71742640 :     case DIV:
   22618     71742640 :     case UDIV:
   22619     71742640 :     case MOD:
   22620     71742640 :     case UMOD:
   22621     71742640 :       *total = ix86_division_cost (cost, mode);
   22622     71742640 :       return false;
   22623              : 
   22624    683061932 :     case PLUS:
   22625    683061932 :       if (GET_MODE_CLASS (mode) == MODE_INT
   22626    937951813 :           && GET_MODE_SIZE (mode) <= UNITS_PER_WORD)
   22627              :         {
   22628    142231914 :           if (GET_CODE (XEXP (x, 0)) == PLUS
   22629      3916443 :               && GET_CODE (XEXP (XEXP (x, 0), 0)) == MULT
   22630       843528 :               && CONST_INT_P (XEXP (XEXP (XEXP (x, 0), 0), 1))
   22631       843503 :               && CONSTANT_P (XEXP (x, 1)))
   22632              :             {
   22633       843446 :               HOST_WIDE_INT val = INTVAL (XEXP (XEXP (XEXP (x, 0), 0), 1));
   22634       843446 :               if (val == 2 || val == 4 || val == 8)
   22635              :                 {
   22636       843342 :                   *total = cost->lea;
   22637       843342 :                   *total += rtx_cost (XEXP (XEXP (x, 0), 1), mode,
   22638              :                                       outer_code, opno, speed);
   22639       843342 :                   *total += rtx_cost (XEXP (XEXP (XEXP (x, 0), 0), 0), mode,
   22640              :                                       outer_code, opno, speed);
   22641       843342 :                   *total += rtx_cost (XEXP (x, 1), mode,
   22642              :                                       outer_code, opno, speed);
   22643       843342 :                   return true;
   22644              :                 }
   22645              :             }
   22646    141388468 :           else if (GET_CODE (XEXP (x, 0)) == MULT
   22647     51830344 :                    && CONST_INT_P (XEXP (XEXP (x, 0), 1)))
   22648              :             {
   22649     51771357 :               HOST_WIDE_INT val = INTVAL (XEXP (XEXP (x, 0), 1));
   22650     51771357 :               if (val == 2 || val == 4 || val == 8)
   22651              :                 {
   22652      7954980 :                   *total = cost->lea;
   22653      7954980 :                   *total += rtx_cost (XEXP (XEXP (x, 0), 0), mode,
   22654              :                                       outer_code, opno, speed);
   22655      7954980 :                   *total += rtx_cost (XEXP (x, 1), mode,
   22656              :                                       outer_code, opno, speed);
   22657      7954980 :                   return true;
   22658              :                 }
   22659              :             }
   22660     89617111 :           else if (GET_CODE (XEXP (x, 0)) == PLUS)
   22661              :             {
   22662      3072997 :               rtx op = XEXP (XEXP (x, 0), 0);
   22663              : 
   22664              :               /* Add with carry, ignore the cost of adding a carry flag.  */
   22665      3072997 :               if (ix86_carry_flag_operator (op, mode)
   22666      3072997 :                   || ix86_carry_flag_unset_operator (op, mode))
   22667        68709 :                 *total = cost->add;
   22668              :               else
   22669              :                 {
   22670      3004288 :                   *total = cost->lea;
   22671      3004288 :                   *total += rtx_cost (op, mode,
   22672              :                                       outer_code, opno, speed);
   22673              :                 }
   22674              : 
   22675      3072997 :               *total += rtx_cost (XEXP (XEXP (x, 0), 1), mode,
   22676              :                                   outer_code, opno, speed);
   22677      3072997 :               *total += rtx_cost (XEXP (x, 1), mode,
   22678              :                                   outer_code, opno, speed);
   22679      3072997 :               return true;
   22680              :             }
   22681              :         }
   22682              :       /* FALLTHRU */
   22683              : 
   22684   1817104220 :     case MINUS:
   22685              :       /* Subtract with borrow, ignore the cost of subtracting a carry flag.  */
   22686   1817104220 :       if (GET_MODE_CLASS (mode) == MODE_INT
   22687    515134833 :           && GET_MODE_SIZE (mode) <= UNITS_PER_WORD
   22688    233281601 :           && GET_CODE (XEXP (x, 0)) == MINUS
   22689   1817142361 :           && (ix86_carry_flag_operator (XEXP (XEXP (x, 0), 1), mode)
   22690        14459 :               || ix86_carry_flag_unset_operator (XEXP (XEXP (x, 0), 1), mode)))
   22691              :         {
   22692        23682 :           *total = cost->add;
   22693        23682 :           *total += rtx_cost (XEXP (XEXP (x, 0), 0), mode,
   22694              :                               outer_code, opno, speed);
   22695        23682 :           *total += rtx_cost (XEXP (x, 1), mode,
   22696              :                               outer_code, opno, speed);
   22697        23682 :           return true;
   22698              :         }
   22699              : 
   22700   1817080538 :       if (SSE_FLOAT_MODE_SSEMATH_OR_HFBF_P (mode))
   22701      2388753 :         *total = cost->addss;
   22702   1814691785 :       else if (X87_FLOAT_MODE_P (mode))
   22703       217772 :         *total = cost->fadd;
   22704   1814474013 :       else if (FLOAT_MODE_P (mode))
   22705       440822 :         *total = ix86_vec_cost (mode, cost->addss);
   22706   1814033191 :       else if (GET_MODE_CLASS (mode) == MODE_VECTOR_INT)
   22707   1193600079 :         *total = ix86_vec_cost (mode, cost->sse_op);
   22708   1280154345 :       else if (GET_MODE_SIZE (mode) > UNITS_PER_WORD)
   22709    327411993 :         *total = cost->add * 2;
   22710              :       else
   22711    293021119 :         *total = cost->add;
   22712              :       return false;
   22713              : 
   22714      3983369 :     case IOR:
   22715      3983369 :       if (GET_MODE_CLASS (mode) == MODE_VECTOR_INT
   22716      3737797 :           || SSE_FLOAT_MODE_P (mode))
   22717              :         {
   22718              :           /* (ior (not ...) ...) can be a single insn in AVX512.  */
   22719          456 :           if (GET_CODE (XEXP (x, 0)) == NOT && TARGET_AVX512F
   22720       255190 :               && (GET_MODE_SIZE (mode) == 64
   22721            0 :                   || (TARGET_AVX512VL
   22722            0 :                       && (GET_MODE_SIZE (mode) == 32
   22723            0 :                           || GET_MODE_SIZE (mode) == 16))))
   22724              :             {
   22725            0 :               rtx right = GET_CODE (XEXP (x, 1)) != NOT
   22726            0 :                           ? XEXP (x, 1) : XEXP (XEXP (x, 1), 0);
   22727              : 
   22728            0 :               *total = ix86_vec_cost (mode, cost->sse_op)
   22729            0 :                        + rtx_cost (XEXP (XEXP (x, 0), 0), mode,
   22730              :                                    outer_code, opno, speed)
   22731            0 :                        + rtx_cost (right, mode, outer_code, opno, speed);
   22732            0 :               return true;
   22733              :             }
   22734       255190 :           *total = ix86_vec_cost (mode, cost->sse_op);
   22735       255190 :         }
   22736      3728179 :       else if (TARGET_64BIT
   22737      3439448 :                && mode == TImode
   22738      1692948 :                && GET_CODE (XEXP (x, 0)) == ASHIFT
   22739       254929 :                && GET_CODE (XEXP (XEXP (x, 0), 0)) == ZERO_EXTEND
   22740       252937 :                && GET_MODE (XEXP (XEXP (XEXP (x, 0), 0), 0)) == DImode
   22741       252937 :                && CONST_INT_P (XEXP (XEXP (x, 0), 1))
   22742       252937 :                && INTVAL (XEXP (XEXP (x, 0), 1)) == 64
   22743       252937 :                && GET_CODE (XEXP (x, 1)) == ZERO_EXTEND
   22744       229424 :                && GET_MODE (XEXP (XEXP (x, 1), 0)) == DImode)
   22745              :         {
   22746              :           /* *concatditi3 is cheap.  */
   22747       229424 :           rtx op0 = XEXP (XEXP (XEXP (x, 0), 0), 0);
   22748       229424 :           rtx op1 = XEXP (XEXP (x, 1), 0);
   22749         1431 :           *total = (SUBREG_P (op0) && GET_MODE (SUBREG_REG (op0)) == DFmode)
   22750       229424 :                    ? COSTS_N_INSNS (1)    /* movq.  */
   22751       227993 :                    : set_src_cost (op0, DImode, speed);
   22752         2336 :           *total += (SUBREG_P (op1) && GET_MODE (SUBREG_REG (op1)) == DFmode)
   22753       229424 :                     ? COSTS_N_INSNS (1)    /* movq.  */
   22754       227101 :                     : set_src_cost (op1, DImode, speed);
   22755       229424 :           return true;
   22756              :         }
   22757      3498755 :       else if (TARGET_64BIT
   22758      3210024 :                && mode == TImode
   22759      1463524 :                && GET_CODE (XEXP (x, 0)) == AND
   22760      1402907 :                && REG_P (XEXP (XEXP (x, 0), 0))
   22761      1397647 :                && CONST_WIDE_INT_P (XEXP (XEXP (x, 0), 1))
   22762      1394984 :                && CONST_WIDE_INT_NUNITS (XEXP (XEXP (x, 0), 1)) == 2
   22763      1394984 :                && CONST_WIDE_INT_ELT (XEXP (XEXP (x, 0), 1), 0) == -1
   22764       910756 :                && CONST_WIDE_INT_ELT (XEXP (XEXP (x, 0), 1), 1) == 0
   22765       910756 :                && GET_CODE (XEXP (x, 1)) == ASHIFT
   22766       908629 :                && GET_CODE (XEXP (XEXP (x, 1), 0)) == ZERO_EXTEND
   22767       908629 :                && GET_MODE (XEXP (XEXP (XEXP (x, 1), 0), 0)) == DImode
   22768       908629 :                && CONST_INT_P (XEXP (XEXP (x, 1), 1))
   22769      4407384 :                && INTVAL (XEXP (XEXP (x, 1), 1)) == 64)
   22770              :         {
   22771              :           /* *insvti_highpart is cheap.  */
   22772       908629 :           rtx op = XEXP (XEXP (XEXP (x, 1), 0), 0);
   22773       908629 :           *total = COSTS_N_INSNS (1) + 1;
   22774         1436 :           *total += (SUBREG_P (op) && GET_MODE (SUBREG_REG (op)) == DFmode)
   22775       908629 :                     ? COSTS_N_INSNS (1)    /* movq.  */
   22776       907713 :                     : set_src_cost (op, DImode, speed);
   22777       908629 :           return true;
   22778              :         }
   22779      5468983 :       else if (GET_MODE_SIZE (mode) > UNITS_PER_WORD)
   22780       748372 :         *total = cost->add * 2;
   22781              :       else
   22782      1841754 :         *total = cost->add;
   22783              :       return false;
   22784              : 
   22785       567965 :     case XOR:
   22786       567965 :       if (GET_MODE_CLASS (mode) == MODE_VECTOR_INT
   22787       443259 :           || SSE_FLOAT_MODE_P (mode))
   22788       124706 :         *total = ix86_vec_cost (mode, cost->sse_op);
   22789       944603 :       else if (GET_MODE_SIZE (mode) > UNITS_PER_WORD)
   22790        16314 :         *total = cost->add * 2;
   22791              :       else
   22792       426945 :         *total = cost->add;
   22793              :       return false;
   22794              : 
   22795      7289662 :     case AND:
   22796      7289662 :       if (address_no_seg_operand (x, mode))
   22797              :         {
   22798        15582 :           *total = cost->lea;
   22799        15582 :           return true;
   22800              :         }
   22801      7274080 :       else if (GET_MODE_CLASS (mode) == MODE_VECTOR_INT
   22802      6879643 :                || SSE_FLOAT_MODE_P (mode))
   22803              :         {
   22804              :           /* pandn is a single instruction.  */
   22805       428050 :           if (GET_CODE (XEXP (x, 0)) == NOT)
   22806              :             {
   22807        53737 :               rtx right = XEXP (x, 1);
   22808              : 
   22809              :               /* (and (not ...) (not ...)) can be a single insn in AVX512.  */
   22810          428 :               if (GET_CODE (right) == NOT && TARGET_AVX512F
   22811        53737 :                   && (GET_MODE_SIZE (mode) == 64
   22812            0 :                       || (TARGET_AVX512VL
   22813            0 :                           && (GET_MODE_SIZE (mode) == 32
   22814            0 :                               || GET_MODE_SIZE (mode) == 16))))
   22815            0 :                 right = XEXP (right, 0);
   22816              : 
   22817        53737 :               *total = ix86_vec_cost (mode, cost->sse_op)
   22818        53737 :                        + rtx_cost (XEXP (XEXP (x, 0), 0), mode,
   22819              :                                    outer_code, opno, speed)
   22820        53737 :                        + rtx_cost (right, mode, outer_code, opno, speed);
   22821        53737 :               return true;
   22822              :             }
   22823       374313 :           else if (GET_CODE (XEXP (x, 1)) == NOT)
   22824              :             {
   22825          862 :               *total = ix86_vec_cost (mode, cost->sse_op)
   22826          862 :                        + rtx_cost (XEXP (x, 0), mode,
   22827              :                                    outer_code, opno, speed)
   22828          862 :                        + rtx_cost (XEXP (XEXP (x, 1), 0), mode,
   22829              :                                    outer_code, opno, speed);
   22830          862 :               return true;
   22831              :             }
   22832       373451 :           *total = ix86_vec_cost (mode, cost->sse_op);
   22833       373451 :         }
   22834     14378739 :       else if (GET_MODE_SIZE (mode) > UNITS_PER_WORD)
   22835              :         {
   22836      1138450 :           if (TARGET_BMI && GET_CODE (XEXP (x,0)) == NOT)
   22837              :             {
   22838         1670 :               *total = cost->add * 2
   22839          835 :                        + rtx_cost (XEXP (XEXP (x, 0), 0), mode,
   22840              :                                    outer_code, opno, speed)
   22841          835 :                        + rtx_cost (XEXP (x, 1), mode,
   22842              :                                    outer_code, opno, speed);
   22843          835 :               return true;
   22844              :             }
   22845      1137615 :           else if (TARGET_BMI && GET_CODE (XEXP (x, 1)) == NOT)
   22846              :             {
   22847            0 :               *total = cost->add * 2
   22848            0 :                        + rtx_cost (XEXP (x, 0), mode,
   22849              :                                    outer_code, opno, speed)
   22850            0 :                        + rtx_cost (XEXP (XEXP (x, 1), 0), mode,
   22851              :                                    outer_code, opno, speed);
   22852            0 :               return true;
   22853              :             }
   22854      1137615 :           *total = cost->add * 2;
   22855              :         }
   22856      5707580 :       else if (TARGET_BMI && GET_CODE (XEXP (x,0)) == NOT)
   22857              :         {
   22858         7578 :           *total = cost->add
   22859         3789 :                    + rtx_cost (XEXP (XEXP (x, 0), 0), mode,
   22860              :                                outer_code, opno, speed)
   22861         3789 :                    + rtx_cost (XEXP (x, 1), mode, outer_code, opno, speed);
   22862         3789 :           return true;
   22863              :         }
   22864      5703791 :       else if (TARGET_BMI && GET_CODE (XEXP (x,1)) == NOT)
   22865              :         {
   22866          112 :           *total = cost->add
   22867           56 :                    + rtx_cost (XEXP (x, 0), mode, outer_code, opno, speed)
   22868           56 :                    + rtx_cost (XEXP (XEXP (x, 1), 0), mode,
   22869              :                                outer_code, opno, speed);
   22870           56 :           return true;
   22871              :         }
   22872              :       else
   22873      5703735 :         *total = cost->add;
   22874              :       return false;
   22875              : 
   22876       498466 :     case NOT:
   22877       498466 :       if (GET_MODE_CLASS (mode) == MODE_VECTOR_INT)
   22878              :         {
   22879              :           /* (not (xor ...)) can be a single insn in AVX512.  */
   22880            0 :           if (GET_CODE (XEXP (x, 0)) == XOR && TARGET_AVX512F
   22881         6565 :               && (GET_MODE_SIZE (mode) == 64
   22882            0 :                   || (TARGET_AVX512VL
   22883            0 :                       && (GET_MODE_SIZE (mode) == 32
   22884            0 :                           || GET_MODE_SIZE (mode) == 16))))
   22885              :             {
   22886            0 :               *total = ix86_vec_cost (mode, cost->sse_op)
   22887            0 :                        + rtx_cost (XEXP (XEXP (x, 0), 0), mode,
   22888              :                                    outer_code, opno, speed)
   22889            0 :                        + rtx_cost (XEXP (XEXP (x, 0), 1), mode,
   22890              :                                    outer_code, opno, speed);
   22891            0 :               return true;
   22892              :             }
   22893              : 
   22894              :           // vnot is pxor -1.
   22895         6565 :           *total = ix86_vec_cost (mode, cost->sse_op) + 1;
   22896              :         }
   22897      1128629 :       else if (GET_MODE_SIZE (mode) > UNITS_PER_WORD)
   22898        45213 :         *total = cost->add * 2;
   22899              :       else
   22900       446688 :         *total = cost->add;
   22901              :       return false;
   22902              : 
   22903     17973094 :     case NEG:
   22904     17973094 :       if (SSE_FLOAT_MODE_SSEMATH_OR_HFBF_P (mode))
   22905        51412 :         *total = cost->sse_op;
   22906     17921682 :       else if (X87_FLOAT_MODE_P (mode))
   22907        15139 :         *total = cost->fchs;
   22908     17906543 :       else if (FLOAT_MODE_P (mode))
   22909        26855 :         *total = ix86_vec_cost (mode, cost->sse_op);
   22910     17879688 :       else if (GET_MODE_CLASS (mode) == MODE_VECTOR_INT)
   22911     13245387 :         *total = ix86_vec_cost (mode, cost->sse_op);
   22912      9417441 :       else if (GET_MODE_SIZE (mode) > UNITS_PER_WORD)
   22913      1742761 :         *total = cost->add * 3;
   22914              :       else
   22915      2891540 :         *total = cost->add;
   22916              :       return false;
   22917              : 
   22918     54622446 :     case COMPARE:
   22919     54622446 :       rtx op0, op1;
   22920     54622446 :       op0 = XEXP (x, 0);
   22921     54622446 :       op1 = XEXP (x, 1);
   22922     54622446 :       if (GET_CODE (op0) == ZERO_EXTRACT
   22923       170323 :           && XEXP (op0, 1) == const1_rtx
   22924       152396 :           && CONST_INT_P (XEXP (op0, 2))
   22925       152360 :           && op1 == const0_rtx)
   22926              :         {
   22927              :           /* This kind of construct is implemented using test[bwl].
   22928              :              Treat it as if we had an AND.  */
   22929       152360 :           mode = GET_MODE (XEXP (op0, 0));
   22930       304720 :           *total = (cost->add
   22931       152360 :                     + rtx_cost (XEXP (op0, 0), mode, outer_code,
   22932              :                                 opno, speed)
   22933       152360 :                     + rtx_cost (const1_rtx, mode, outer_code, opno, speed));
   22934       152360 :           return true;
   22935              :         }
   22936              : 
   22937     54470086 :       if (GET_CODE (op0) == PLUS && rtx_equal_p (XEXP (op0, 0), op1))
   22938              :         {
   22939              :           /* This is an overflow detection, count it as a normal compare.  */
   22940       138691 :           *total = rtx_cost (op0, GET_MODE (op0), COMPARE, 0, speed);
   22941       138691 :           return true;
   22942              :         }
   22943              : 
   22944     54331395 :       rtx geu;
   22945              :       /* Match x
   22946              :          (compare:CCC (neg:QI (geu:QI (reg:CC_CCC FLAGS_REG) (const_int 0)))
   22947              :                       (ltu:QI (reg:CC_CCC FLAGS_REG) (const_int 0)))  */
   22948     54331395 :       if (mode == CCCmode
   22949       347773 :           && GET_CODE (op0) == NEG
   22950         8016 :           && GET_CODE (geu = XEXP (op0, 0)) == GEU
   22951         8013 :           && REG_P (XEXP (geu, 0))
   22952         8013 :           && (GET_MODE (XEXP (geu, 0)) == CCCmode
   22953          759 :               || GET_MODE (XEXP (geu, 0)) == CCmode)
   22954         8013 :           && REGNO (XEXP (geu, 0)) == FLAGS_REG
   22955         8013 :           && XEXP (geu, 1) == const0_rtx
   22956         8013 :           && GET_CODE (op1) == LTU
   22957         8013 :           && REG_P (XEXP (op1, 0))
   22958         8013 :           && GET_MODE (XEXP (op1, 0)) == GET_MODE (XEXP (geu, 0))
   22959         8013 :           && REGNO (XEXP (op1, 0)) == FLAGS_REG
   22960     54339408 :           && XEXP (op1, 1) == const0_rtx)
   22961              :         {
   22962              :           /* This is *setcc_qi_addqi3_cconly_overflow_1_* patterns, a nop.  */
   22963         8013 :           *total = 0;
   22964         8013 :           return true;
   22965              :         }
   22966              :       /* Match x
   22967              :          (compare:CCC (neg:QI (ltu:QI (reg:CCC FLAGS_REG) (const_int 0)))
   22968              :                       (geu:QI (reg:CCC FLAGS_REG) (const_int 0)))  */
   22969     54323382 :       if (mode == CCCmode
   22970       339760 :           && GET_CODE (op0) == NEG
   22971            3 :           && GET_CODE (XEXP (op0, 0)) == LTU
   22972            3 :           && REG_P (XEXP (XEXP (op0, 0), 0))
   22973            3 :           && GET_MODE (XEXP (XEXP (op0, 0), 0)) == CCCmode
   22974            3 :           && REGNO (XEXP (XEXP (op0, 0), 0)) == FLAGS_REG
   22975            3 :           && XEXP (XEXP (op0, 0), 1) == const0_rtx
   22976            3 :           && GET_CODE (op1) == GEU
   22977            3 :           && REG_P (XEXP (op1, 0))
   22978            3 :           && GET_MODE (XEXP (op1, 0)) == CCCmode
   22979            3 :           && REGNO (XEXP (op1, 0)) == FLAGS_REG
   22980     54323385 :           && XEXP (op1, 1) == const0_rtx)
   22981              :         {
   22982              :           /* This is *x86_cmc.  */
   22983            3 :           if (!speed)
   22984            0 :             *total = COSTS_N_BYTES (1);
   22985            3 :           else if (TARGET_SLOW_STC)
   22986            0 :             *total = COSTS_N_INSNS (2);
   22987              :           else
   22988            3 :             *total = COSTS_N_INSNS (1);
   22989            3 :           return true;
   22990              :         }
   22991              : 
   22992     54323379 :       if (SCALAR_INT_MODE_P (GET_MODE (op0))
   22993    113026524 :           && GET_MODE_SIZE (GET_MODE (op0)) > UNITS_PER_WORD)
   22994              :         {
   22995       788144 :           if (op1 == const0_rtx)
   22996       232380 :             *total = cost->add
   22997       116190 :                      + rtx_cost (op0, GET_MODE (op0), outer_code, opno, speed);
   22998              :           else
   22999      1343908 :             *total = 3*cost->add
   23000       671954 :                      + rtx_cost (op0, GET_MODE (op0), outer_code, opno, speed)
   23001       671954 :                      + rtx_cost (op1, GET_MODE (op0), outer_code, opno, speed);
   23002       788144 :           return true;
   23003              :         }
   23004              : 
   23005              :       /* The embedded comparison operand is completely free.  */
   23006     53535235 :       if (!general_operand (op0, GET_MODE (op0)) && op1 == const0_rtx)
   23007       382033 :         *total = 0;
   23008              : 
   23009              :       return false;
   23010              : 
   23011      1370580 :     case FLOAT_EXTEND:
   23012              :       /* x87 represents all values extended to 80bit.  */
   23013      1370580 :       if (!SSE_FLOAT_MODE_SSEMATH_OR_HFBF_P (mode))
   23014       668719 :         *total = 0;
   23015              :       else
   23016      1403722 :         *total = vec_fp_conversion_cost (cost, GET_MODE_BITSIZE (mode));
   23017              :       return false;
   23018              : 
   23019        83575 :     case FLOAT_TRUNCATE:
   23020        83575 :       if (!SSE_FLOAT_MODE_SSEMATH_OR_HFBF_P (mode))
   23021        58201 :         *total = cost->fadd;
   23022              :       else
   23023        50748 :         *total = vec_fp_conversion_cost (cost, GET_MODE_BITSIZE (mode));
   23024              :       return false;
   23025       681637 :     case FLOAT:
   23026       681637 :     case UNSIGNED_FLOAT:
   23027       681637 :       if (!SSE_FLOAT_MODE_SSEMATH_OR_HFBF_P (mode))
   23028              :         /* TODO: We do not have cost tables for x87.  */
   23029        92823 :         *total = cost->fadd;
   23030       588814 :       else if (VECTOR_MODE_P (mode))
   23031            0 :         *total = ix86_vec_cost (mode, cost->cvtpi2ps);
   23032              :       else
   23033       588814 :         *total = cost->cvtsi2ss;
   23034              :       return false;
   23035              : 
   23036       282987 :     case FIX:
   23037       282987 :     case UNSIGNED_FIX:
   23038       282987 :       if (!SSE_FLOAT_MODE_SSEMATH_OR_HFBF_P (mode))
   23039              :         /* TODO: We do not have cost tables for x87.  */
   23040       282987 :         *total = cost->fadd;
   23041            0 :       else if (VECTOR_MODE_P (mode))
   23042            0 :         *total = ix86_vec_cost (mode, cost->cvtps2pi);
   23043              :       else
   23044            0 :         *total = cost->cvtss2si;
   23045              :       return false;
   23046              : 
   23047       354266 :     case ABS:
   23048              :       /* SSE requires memory load for the constant operand. It may make
   23049              :          sense to account for this.  Of course the constant operand may or
   23050              :          may not be reused. */
   23051       354266 :       if (SSE_FLOAT_MODE_SSEMATH_OR_HFBF_P (mode))
   23052       249844 :         *total = cost->sse_op;
   23053       104422 :       else if (X87_FLOAT_MODE_P (mode))
   23054        33437 :         *total = cost->fabs;
   23055        70985 :       else if (FLOAT_MODE_P (mode))
   23056        29580 :         *total = ix86_vec_cost (mode, cost->sse_op);
   23057        41405 :       else if (GET_MODE_CLASS (mode) == MODE_VECTOR_INT)
   23058         6331 :         *total = cost->sse_op;
   23059              :       return false;
   23060              : 
   23061        28497 :     case SQRT:
   23062        28497 :       if (SSE_FLOAT_MODE_SSEMATH_OR_HFBF_P (mode))
   23063        18207 :         *total = mode == SFmode ? cost->sqrtss : cost->sqrtsd;
   23064        10290 :       else if (X87_FLOAT_MODE_P (mode))
   23065         4267 :         *total = cost->fsqrt;
   23066         6023 :       else if (FLOAT_MODE_P (mode))
   23067         6023 :         *total = ix86_vec_cost (mode,
   23068              :                                 mode == SFmode ? cost->sqrtss : cost->sqrtsd);
   23069              :       return false;
   23070              : 
   23071      3950076 :     case UNSPEC:
   23072      3950076 :       if (XINT (x, 1) == UNSPEC_TP)
   23073       125988 :         *total = 0;
   23074      3824088 :       else if (XINT (x, 1) == UNSPEC_VTERNLOG)
   23075              :         {
   23076         5210 :           *total = cost->sse_op;
   23077         5210 :           *total += rtx_cost (XVECEXP (x, 0, 0), mode, code, 0, speed);
   23078         5210 :           *total += rtx_cost (XVECEXP (x, 0, 1), mode, code, 1, speed);
   23079         5210 :           *total += rtx_cost (XVECEXP (x, 0, 2), mode, code, 2, speed);
   23080         5210 :           return true;
   23081              :         }
   23082      3818878 :       else if (XINT (x, 1) == UNSPEC_PTEST)
   23083              :         {
   23084        98913 :           *total = cost->sse_op;
   23085        98913 :           rtx test_op0 = XVECEXP (x, 0, 0);
   23086        98913 :           if (!rtx_equal_p (test_op0, XVECEXP (x, 0, 1)))
   23087              :             return false;
   23088        98236 :           if (GET_CODE (test_op0) == AND)
   23089              :             {
   23090           23 :               rtx and_op0 = XEXP (test_op0, 0);
   23091           23 :               if (GET_CODE (and_op0) == NOT)
   23092            0 :                 and_op0 = XEXP (and_op0, 0);
   23093           23 :               *total += rtx_cost (and_op0, GET_MODE (and_op0),
   23094              :                                   AND, 0, speed)
   23095           23 :                         + rtx_cost (XEXP (test_op0, 1), GET_MODE (and_op0),
   23096              :                                     AND, 1, speed);
   23097              :             }
   23098              :           else
   23099        98213 :             *total = rtx_cost (test_op0, GET_MODE (test_op0),
   23100              :                                UNSPEC, 0, speed);
   23101        98236 :           return true;
   23102              :         }
   23103              :       return false;
   23104              : 
   23105      2013663 :     case VEC_CONCAT:
   23106              :       /* ??? Assume all of these vector manipulation patterns are
   23107              :          recognizable.  In which case they all pretty much have the
   23108              :          same cost.
   23109              :          ??? We should still recruse when computing cost.  */
   23110      2013663 :      *total = cost->sse_op;
   23111      2013663 :      return true;
   23112              : 
   23113      2394146 :     case VEC_SELECT:
   23114              :      /* Special case extracting lower part from the vector.
   23115              :         This by itself needs to code and most of SSE/AVX instructions have
   23116              :         packed and single forms where the single form may be represented
   23117              :         by such VEC_SELECT.
   23118              : 
   23119              :         Use cost 1 (despite the fact that functionally equivalent SUBREG has
   23120              :         cost 0).  Making VEC_SELECT completely free, for example instructs CSE
   23121              :         to forward propagate VEC_SELECT into
   23122              : 
   23123              :            (set (reg eax) (reg src))
   23124              : 
   23125              :         which then prevents fwprop and combining. See i.e.
   23126              :         gcc.target/i386/pr91103-1.c.
   23127              : 
   23128              :         ??? rtvec_series_p test should be, for valid patterns, equivalent to
   23129              :         vec_series_lowpart_p but is not, since the latter calls
   23130              :         can_cange_mode_class on ALL_REGS and this return false since x87 does
   23131              :         not support subregs at all.  */
   23132      2394146 :      if (rtvec_series_p (XVEC (XEXP (x, 1), 0), 0))
   23133       748757 :        *total = rtx_cost (XEXP (x, 0), GET_MODE (XEXP (x, 0)),
   23134       748757 :                           outer_code, opno, speed) + 1;
   23135              :      else
   23136              :        /* ??? We should still recruse when computing cost.  */
   23137      1645389 :        *total = cost->sse_op;
   23138              :      return true;
   23139              : 
   23140      1216204 :     case VEC_DUPLICATE:
   23141      2432408 :       *total = rtx_cost (XEXP (x, 0),
   23142      1216204 :                          GET_MODE (XEXP (x, 0)),
   23143              :                          VEC_DUPLICATE, 0, speed);
   23144              :       /* It's broadcast instruction, not embedded broadcasting.  */
   23145      1216204 :       if (outer_code == SET)
   23146      1171262 :         *total += cost->sse_op;
   23147              : 
   23148              :      return true;
   23149              : 
   23150       714571 :     case VEC_MERGE:
   23151       714571 :       mask = XEXP (x, 2);
   23152              :       /* Scalar versions of SSE instructions may be represented as:
   23153              : 
   23154              :          (vec_merge (vec_duplicate (operation ....))
   23155              :                      (register or memory)
   23156              :                      (const_int 1))
   23157              : 
   23158              :          In this case vec_merge and vec_duplicate is for free.
   23159              :          Just recurse into operation and second operand.  */
   23160       714571 :       if (mask == const1_rtx
   23161       205626 :           && GET_CODE (XEXP (x, 0)) == VEC_DUPLICATE)
   23162              :         {
   23163        71832 :           *total = rtx_cost (XEXP (XEXP (x, 0), 0), mode,
   23164              :                              outer_code, opno, speed)
   23165        71832 :                    + rtx_cost (XEXP (x, 1), mode, outer_code, opno, speed);
   23166        71832 :           return true;
   23167              :         }
   23168              :       /* This is masked instruction, assume the same cost,
   23169              :          as nonmasked variant.  */
   23170       642739 :       else if (TARGET_AVX512F
   23171       642739 :                && (register_operand (mask, GET_MODE (mask))
   23172              :                    /* Redunduant clean up of high bits for kmask with VL=2/4
   23173              :                       .i.e (vec_merge op0, op1, (and op3 15)).  */
   23174       120573 :                    || (GET_CODE (mask) == AND
   23175          369 :                        && register_operand (XEXP (mask, 0), GET_MODE (mask))
   23176          369 :                        && CONST_INT_P (XEXP (mask, 1))
   23177          369 :                        && ((INTVAL (XEXP (mask, 1)) == 3
   23178          131 :                             && GET_MODE_NUNITS (mode) == 2)
   23179          238 :                            || (INTVAL (XEXP (mask, 1)) == 15
   23180          238 :                                && GET_MODE_NUNITS (mode) == 4)))))
   23181              :         {
   23182       373873 :           *total = rtx_cost (XEXP (x, 0), mode, outer_code, opno, speed)
   23183       373873 :                    + rtx_cost (XEXP (x, 1), mode, outer_code, opno, speed);
   23184       373873 :           return true;
   23185              :         }
   23186              :       /* Combination of the two above:
   23187              : 
   23188              :          (vec_merge (vec_merge (vec_duplicate (operation ...))
   23189              :                        (register or memory)
   23190              :                        (reg:QI mask))
   23191              :                     (register or memory)
   23192              :                     (const_int 1))
   23193              : 
   23194              :          i.e. avx512fp16_vcvtss2sh_mask.  */
   23195       268866 :       else if (TARGET_AVX512F
   23196       120204 :                && mask == const1_rtx
   23197        46161 :                && GET_CODE (XEXP (x, 0)) == VEC_MERGE
   23198        27110 :                && GET_CODE (XEXP (XEXP (x, 0), 0)) == VEC_DUPLICATE
   23199       271128 :                && register_operand (XEXP (XEXP (x, 0), 2),
   23200         2262 :                                     GET_MODE (XEXP (XEXP (x, 0), 2))))
   23201              :         {
   23202         2250 :           *total = rtx_cost (XEXP (XEXP (XEXP (x, 0), 0), 0),
   23203              :                              mode, outer_code, opno, speed)
   23204         2250 :                    + rtx_cost (XEXP (XEXP (x, 0), 1),
   23205              :                                mode, outer_code, opno, speed)
   23206         2250 :                    + rtx_cost (XEXP (x, 1), mode, outer_code, opno, speed);
   23207         2250 :           return true;
   23208              :         }
   23209              :       /* vcmp.  */
   23210       266616 :       else if (unspec_pcmp_p (mask)
   23211       266616 :                || (GET_CODE (mask) == NOT
   23212            0 :                    && unspec_pcmp_p (XEXP (mask, 0))))
   23213              :         {
   23214         1951 :           rtx uns = GET_CODE (mask) == NOT ? XEXP (mask, 0) : mask;
   23215         1951 :           rtx unsop0 = XVECEXP (uns, 0, 0);
   23216              :           /* Make (subreg:V4SI (not:V16QI (reg:V16QI ..)) 0)
   23217              :              cost the same as register.
   23218              :              This is used by avx_cmp<mode>3_ltint_not.  */
   23219         1951 :           if (SUBREG_P (unsop0))
   23220          417 :             unsop0 = XEXP (unsop0, 0);
   23221         1951 :           if (GET_CODE (unsop0) == NOT)
   23222           18 :             unsop0 = XEXP (unsop0, 0);
   23223         1951 :           *total = rtx_cost (XEXP (x, 0), mode, outer_code, opno, speed)
   23224         1951 :                    + rtx_cost (XEXP (x, 1), mode, outer_code, opno, speed)
   23225         1951 :                    + rtx_cost (unsop0, mode, UNSPEC, opno, speed)
   23226         1951 :                    + rtx_cost (XVECEXP (uns, 0, 1), mode, UNSPEC, opno, speed)
   23227         1951 :                    + cost->sse_op;
   23228         1951 :           return true;
   23229              :         }
   23230              :       else
   23231       264665 :         *total = cost->sse_op;
   23232       264665 :       return false;
   23233              : 
   23234    107093204 :     case MEM:
   23235              :       /* CONST_VECTOR_DUPLICATE_P in constant_pool is just broadcast.
   23236              :          or variants in ix86_vector_duplicate_simode_const.  */
   23237              : 
   23238    107093204 :       if (GET_MODE_SIZE (mode) >= 16
   23239     18185462 :           && VECTOR_MODE_P (mode)
   23240     12097407 :           && SYMBOL_REF_P (XEXP (x, 0))
   23241      2217209 :           && CONSTANT_POOL_ADDRESS_P (XEXP (x, 0))
   23242    109082675 :           && ix86_broadcast_from_constant (mode, x))
   23243              :         {
   23244       491133 :           *total = COSTS_N_INSNS (2) + speed;
   23245       491133 :           return true;
   23246              :         }
   23247              : 
   23248              :       /* An insn that accesses memory is slightly more expensive
   23249              :          than one that does not.  */
   23250    106602071 :       if (speed)
   23251              :         {
   23252     95313542 :           *total += 1;
   23253     95313542 :           rtx addr = XEXP (x, 0);
   23254              :           /* For MEM, rtx_cost iterates each subrtx, and adds up the costs,
   23255              :              so for MEM (reg) and MEM (reg + 4), the former costs 5,
   23256              :              the latter costs 9, it is not accurate for x86. Ideally
   23257              :              address_cost should be used, but it reduce cost too much.
   23258              :              So current solution is make constant disp as cheap as possible.  */
   23259     95313542 :           if (GET_CODE (addr) == PLUS
   23260     77709626 :               && x86_64_immediate_operand (XEXP (addr, 1), Pmode)
   23261              :               /* Only handle (reg + disp) since other forms of addr are mostly LEA,
   23262              :                  there's no additional cost for the plus of disp.  */
   23263    167411409 :               && register_operand (XEXP (addr, 0), Pmode))
   23264              :             {
   23265     56044236 :               *total += 1;
   23266     68838486 :               *total += rtx_cost (XEXP (addr, 0), Pmode, PLUS, 0, speed);
   23267     56044236 :               return true;
   23268              :             }
   23269              :         }
   23270              : 
   23271              :       return false;
   23272              : 
   23273        53604 :     case ZERO_EXTRACT:
   23274        53604 :       if (XEXP (x, 1) == const1_rtx
   23275        11409 :           && GET_CODE (XEXP (x, 2)) == ZERO_EXTEND
   23276            0 :           && GET_MODE (XEXP (x, 2)) == SImode
   23277            0 :           && GET_MODE (XEXP (XEXP (x, 2), 0)) == QImode)
   23278              :         {
   23279              :           /* Ignore cost of zero extension and masking of last argument.  */
   23280            0 :           *total += rtx_cost (XEXP (x, 0), mode, code, 0, speed);
   23281            0 :           *total += rtx_cost (XEXP (x, 1), mode, code, 1, speed);
   23282            0 :           *total += rtx_cost (XEXP (XEXP (x, 2), 0), mode, code, 2, speed);
   23283            0 :           return true;
   23284              :         }
   23285              :       return false;
   23286              : 
   23287     29462238 :     case IF_THEN_ELSE:
   23288     29462238 :       if (TARGET_XOP
   23289        25487 :           && VECTOR_MODE_P (mode)
   23290     29467853 :           && (GET_MODE_SIZE (mode) == 16 || GET_MODE_SIZE (mode) == 32))
   23291              :         {
   23292              :           /* vpcmov.  */
   23293         5047 :           *total = speed ? COSTS_N_INSNS (2) : COSTS_N_BYTES (6);
   23294         5047 :           if (!REG_P (XEXP (x, 0)))
   23295         4887 :             *total += rtx_cost (XEXP (x, 0), mode, code, 0, speed);
   23296         5047 :           if (!REG_P (XEXP (x, 1)))
   23297         4854 :             *total += rtx_cost (XEXP (x, 1), mode, code, 1, speed);
   23298         5047 :           if (!REG_P (XEXP (x, 2)))
   23299         4856 :             *total += rtx_cost (XEXP (x, 2), mode, code, 2, speed);
   23300         5047 :           return true;
   23301              :         }
   23302            0 :       else if (TARGET_CMOVE
   23303     29457191 :                && SCALAR_INT_MODE_P (mode)
   23304     31883682 :                && GET_MODE_SIZE (mode) <= UNITS_PER_WORD)
   23305              :         {
   23306              :           /* cmov.  */
   23307      2229394 :           *total = COSTS_N_INSNS (1);
   23308      2229394 :           if (!COMPARISON_P (XEXP (x, 0)) && !REG_P (XEXP (x, 0)))
   23309            0 :             *total += rtx_cost (XEXP (x, 0), mode, code, 0, speed);
   23310      2229394 :           if (!REG_P (XEXP (x, 1)))
   23311       124513 :             *total += rtx_cost (XEXP (x, 1), mode, code, 1, speed);
   23312      2229394 :           if (!REG_P (XEXP (x, 2)))
   23313       726534 :             *total += rtx_cost (XEXP (x, 2), mode, code, 2, speed);
   23314      2229394 :           return true;
   23315              :         }
   23316              :       return false;
   23317              : 
   23318              :     default:
   23319              :       return false;
   23320              :     }
   23321              : }
   23322              : 
   23323              : #if TARGET_MACHO
   23324              : 
   23325              : static int current_machopic_label_num;
   23326              : 
   23327              : /* Given a symbol name and its associated stub, write out the
   23328              :    definition of the stub.  */
   23329              : 
   23330              : void
   23331              : machopic_output_stub (FILE *file, const char *symb, const char *stub)
   23332              : {
   23333              :   unsigned int length;
   23334              :   char *binder_name, *symbol_name, lazy_ptr_name[32];
   23335              :   int label = ++current_machopic_label_num;
   23336              : 
   23337              :   /* For 64-bit we shouldn't get here.  */
   23338              :   gcc_assert (!TARGET_64BIT);
   23339              : 
   23340              :   /* Lose our funky encoding stuff so it doesn't contaminate the stub.  */
   23341              :   symb = targetm.strip_name_encoding (symb);
   23342              : 
   23343              :   length = strlen (stub);
   23344              :   binder_name = XALLOCAVEC (char, length + 32);
   23345              :   GEN_BINDER_NAME_FOR_STUB (binder_name, stub, length);
   23346              : 
   23347              :   length = strlen (symb);
   23348              :   symbol_name = XALLOCAVEC (char, length + 32);
   23349              :   GEN_SYMBOL_NAME_FOR_SYMBOL (symbol_name, symb, length);
   23350              : 
   23351              :   sprintf (lazy_ptr_name, "L%d$lz", label);
   23352              : 
   23353              :   if (MACHOPIC_ATT_STUB)
   23354              :     switch_to_section (darwin_sections[machopic_picsymbol_stub3_section]);
   23355              :   else if (MACHOPIC_PURE)
   23356              :     switch_to_section (darwin_sections[machopic_picsymbol_stub2_section]);
   23357              :   else
   23358              :     switch_to_section (darwin_sections[machopic_symbol_stub_section]);
   23359              : 
   23360              :   fprintf (file, "%s:\n", stub);
   23361              :   fprintf (file, "\t.indirect_symbol %s\n", symbol_name);
   23362              : 
   23363              :   if (MACHOPIC_ATT_STUB)
   23364              :     {
   23365              :       fprintf (file, "\thlt ; hlt ; hlt ; hlt ; hlt\n");
   23366              :     }
   23367              :   else if (MACHOPIC_PURE)
   23368              :     {
   23369              :       /* PIC stub.  */
   23370              :       /* 25-byte PIC stub using "CALL get_pc_thunk".  */
   23371              :       rtx tmp = gen_rtx_REG (SImode, 2 /* ECX */);
   23372              :       output_set_got (tmp, NULL_RTX);   /* "CALL ___<cpu>.get_pc_thunk.cx".  */
   23373              :       fprintf (file, "LPC$%d:\tmovl\t%s-LPC$%d(%%ecx),%%ecx\n",
   23374              :                label, lazy_ptr_name, label);
   23375              :       fprintf (file, "\tjmp\t*%%ecx\n");
   23376              :     }
   23377              :   else
   23378              :     fprintf (file, "\tjmp\t*%s\n", lazy_ptr_name);
   23379              : 
   23380              :   /* The AT&T-style ("self-modifying") stub is not lazily bound, thus
   23381              :      it needs no stub-binding-helper.  */
   23382              :   if (MACHOPIC_ATT_STUB)
   23383              :     return;
   23384              : 
   23385              :   fprintf (file, "%s:\n", binder_name);
   23386              : 
   23387              :   if (MACHOPIC_PURE)
   23388              :     {
   23389              :       fprintf (file, "\tlea\t%s-%s(%%ecx),%%ecx\n", lazy_ptr_name, binder_name);
   23390              :       fprintf (file, "\tpushl\t%%ecx\n");
   23391              :     }
   23392              :   else
   23393              :     fprintf (file, "\tpushl\t$%s\n", lazy_ptr_name);
   23394              : 
   23395              :   fputs ("\tjmp\tdyld_stub_binding_helper\n", file);
   23396              : 
   23397              :   /* N.B. Keep the correspondence of these
   23398              :      'symbol_ptr/symbol_ptr2/symbol_ptr3' sections consistent with the
   23399              :      old-pic/new-pic/non-pic stubs; altering this will break
   23400              :      compatibility with existing dylibs.  */
   23401              :   if (MACHOPIC_PURE)
   23402              :     {
   23403              :       /* 25-byte PIC stub using "CALL get_pc_thunk".  */
   23404              :       switch_to_section (darwin_sections[machopic_lazy_symbol_ptr2_section]);
   23405              :     }
   23406              :   else
   23407              :     /* 16-byte -mdynamic-no-pic stub.  */
   23408              :     switch_to_section(darwin_sections[machopic_lazy_symbol_ptr3_section]);
   23409              : 
   23410              :   fprintf (file, "%s:\n", lazy_ptr_name);
   23411              :   fprintf (file, "\t.indirect_symbol %s\n", symbol_name);
   23412              :   fprintf (file, ASM_LONG "%s\n", binder_name);
   23413              : }
   23414              : #endif /* TARGET_MACHO */
   23415              : 
   23416              : /* Order the registers for register allocator.  */
   23417              : 
   23418              : void
   23419       214527 : x86_order_regs_for_local_alloc (void)
   23420              : {
   23421       214527 :    int pos = 0;
   23422       214527 :    int i;
   23423              : 
   23424              :    /* First allocate the local general purpose registers.  */
   23425     19951011 :    for (i = 0; i < FIRST_PSEUDO_REGISTER; i++)
   23426     26601348 :      if (GENERAL_REGNO_P (i) && call_used_or_fixed_reg_p (i))
   23427      5587155 :         reg_alloc_order [pos++] = i;
   23428              : 
   23429              :    /* Global general purpose registers.  */
   23430     19951011 :    for (i = 0; i < FIRST_PSEUDO_REGISTER; i++)
   23431     22910505 :      if (GENERAL_REGNO_P (i) && !call_used_or_fixed_reg_p (i))
   23432      1277709 :         reg_alloc_order [pos++] = i;
   23433              : 
   23434              :    /* x87 registers come first in case we are doing FP math
   23435              :       using them.  */
   23436       214527 :    if (!TARGET_SSE_MATH)
   23437        57537 :      for (i = FIRST_STACK_REG; i <= LAST_STACK_REG; i++)
   23438        51144 :        reg_alloc_order [pos++] = i;
   23439              : 
   23440              :    /* SSE registers.  */
   23441      1930743 :    for (i = FIRST_SSE_REG; i <= LAST_SSE_REG; i++)
   23442      1716216 :      reg_alloc_order [pos++] = i;
   23443      1930743 :    for (i = FIRST_REX_SSE_REG; i <= LAST_REX_SSE_REG; i++)
   23444      1716216 :      reg_alloc_order [pos++] = i;
   23445              : 
   23446              :    /* Extended REX SSE registers.  */
   23447      3646959 :    for (i = FIRST_EXT_REX_SSE_REG; i <= LAST_EXT_REX_SSE_REG; i++)
   23448      3432432 :      reg_alloc_order [pos++] = i;
   23449              : 
   23450              :    /* Mask register.  */
   23451      1930743 :    for (i = FIRST_MASK_REG; i <= LAST_MASK_REG; i++)
   23452      1716216 :      reg_alloc_order [pos++] = i;
   23453              : 
   23454              :    /* x87 registers.  */
   23455       214527 :    if (TARGET_SSE_MATH)
   23456      1873206 :      for (i = FIRST_STACK_REG; i <= LAST_STACK_REG; i++)
   23457      1665072 :        reg_alloc_order [pos++] = i;
   23458              : 
   23459      1930743 :    for (i = FIRST_MMX_REG; i <= LAST_MMX_REG; i++)
   23460      1716216 :      reg_alloc_order [pos++] = i;
   23461              : 
   23462              :    /* Initialize the rest of array as we do not allocate some registers
   23463              :       at all.  */
   23464      1072635 :    while (pos < FIRST_PSEUDO_REGISTER)
   23465       858108 :      reg_alloc_order [pos++] = 0;
   23466       214527 : }
   23467              : 
   23468              : static bool
   23469    264627872 : ix86_ms_bitfield_layout_p (const_tree record_type)
   23470              : {
   23471    264627872 :   return ((TARGET_MS_BITFIELD_LAYOUT
   23472          215 :            && !lookup_attribute ("gcc_struct", TYPE_ATTRIBUTES (record_type)))
   23473    264627872 :           || lookup_attribute ("ms_struct", TYPE_ATTRIBUTES (record_type)));
   23474              : }
   23475              : 
   23476              : /* Returns an expression indicating where the this parameter is
   23477              :    located on entry to the FUNCTION.  */
   23478              : 
   23479              : static rtx
   23480         1761 : x86_this_parameter (tree function)
   23481              : {
   23482         1761 :   tree type = TREE_TYPE (function);
   23483         1761 :   bool aggr = aggregate_value_p (TREE_TYPE (type), type) != 0;
   23484         1761 :   int nregs;
   23485              : 
   23486         1761 :   if (TARGET_64BIT)
   23487              :     {
   23488         1759 :       const int *parm_regs;
   23489              : 
   23490         1759 :       if (lookup_attribute ("preserve_none", TYPE_ATTRIBUTES (type)))
   23491              :         parm_regs = x86_64_preserve_none_int_parameter_registers;
   23492         1759 :       else if (ix86_function_type_abi (type) == MS_ABI)
   23493              :         parm_regs = x86_64_ms_abi_int_parameter_registers;
   23494              :       else
   23495         1759 :         parm_regs = x86_64_int_parameter_registers;
   23496         1759 :       return gen_rtx_REG (Pmode, parm_regs[aggr]);
   23497              :     }
   23498              : 
   23499            2 :   nregs = ix86_function_regparm (type, function);
   23500              : 
   23501            2 :   if (nregs > 0 && !stdarg_p (type))
   23502              :     {
   23503            0 :       int regno;
   23504            0 :       unsigned int ccvt = ix86_get_callcvt (type);
   23505              : 
   23506            0 :       if ((ccvt & IX86_CALLCVT_FASTCALL) != 0)
   23507            0 :         regno = aggr ? DX_REG : CX_REG;
   23508            0 :       else if ((ccvt & IX86_CALLCVT_THISCALL) != 0)
   23509              :         {
   23510            0 :           regno = CX_REG;
   23511            0 :           if (aggr)
   23512            0 :             return gen_rtx_MEM (SImode,
   23513            0 :                                 plus_constant (Pmode, stack_pointer_rtx, 4));
   23514              :         }
   23515              :       else
   23516              :         {
   23517            0 :           regno = AX_REG;
   23518            0 :           if (aggr)
   23519              :             {
   23520            0 :               regno = DX_REG;
   23521            0 :               if (nregs == 1)
   23522            0 :                 return gen_rtx_MEM (SImode,
   23523            0 :                                     plus_constant (Pmode,
   23524              :                                                    stack_pointer_rtx, 4));
   23525              :             }
   23526              :         }
   23527            0 :       return gen_rtx_REG (SImode, regno);
   23528              :     }
   23529              : 
   23530            4 :   return gen_rtx_MEM (SImode, plus_constant (Pmode, stack_pointer_rtx,
   23531            4 :                                              aggr ? 8 : 4));
   23532              : }
   23533              : 
   23534              : /* Determine whether x86_output_mi_thunk can succeed.  */
   23535              : 
   23536              : static bool
   23537         4908 : x86_can_output_mi_thunk (const_tree, HOST_WIDE_INT, HOST_WIDE_INT vcall_offset,
   23538              :                          const_tree function)
   23539              : {
   23540              :   /* 64-bit can handle anything.  */
   23541         4908 :   if (TARGET_64BIT)
   23542              :     return true;
   23543              : 
   23544              :   /* For 32-bit, everything's fine if we have one free register.  */
   23545           76 :   if (ix86_function_regparm (TREE_TYPE (function), function) < 3)
   23546              :     return true;
   23547              : 
   23548              :   /* Need a free register for vcall_offset.  */
   23549            0 :   if (vcall_offset)
   23550              :     return false;
   23551              : 
   23552              :   /* Need a free register for GOT references.  */
   23553            0 :   if (flag_pic && !targetm.binds_local_p (function))
   23554              :     return false;
   23555              : 
   23556              :   /* Otherwise ok.  */
   23557              :   return true;
   23558              : }
   23559              : 
   23560              : /* Output the assembler code for a thunk function.  THUNK_DECL is the
   23561              :    declaration for the thunk function itself, FUNCTION is the decl for
   23562              :    the target function.  DELTA is an immediate constant offset to be
   23563              :    added to THIS.  If VCALL_OFFSET is nonzero, the word at
   23564              :    *(*this + vcall_offset) should be added to THIS.  */
   23565              : 
   23566              : static void
   23567         1761 : x86_output_mi_thunk (FILE *file, tree thunk_fndecl, HOST_WIDE_INT delta,
   23568              :                      HOST_WIDE_INT vcall_offset, tree function)
   23569              : {
   23570         1761 :   const char *fnname = IDENTIFIER_POINTER (DECL_ASSEMBLER_NAME (thunk_fndecl));
   23571         1761 :   rtx this_param = x86_this_parameter (function);
   23572         1761 :   rtx this_reg, tmp, fnaddr;
   23573         1761 :   unsigned int tmp_regno;
   23574         1761 :   rtx_insn *insn;
   23575         1761 :   int saved_flag_force_indirect_call = flag_force_indirect_call;
   23576              : 
   23577         1761 :   if (TARGET_64BIT)
   23578              :     tmp_regno = R10_REG;
   23579              :   else
   23580              :     {
   23581            2 :       unsigned int ccvt = ix86_get_callcvt (TREE_TYPE (function));
   23582            2 :       if ((ccvt & IX86_CALLCVT_FASTCALL) != 0)
   23583              :         tmp_regno = AX_REG;
   23584            2 :       else if ((ccvt & IX86_CALLCVT_THISCALL) != 0)
   23585              :         tmp_regno = DX_REG;
   23586              :       else
   23587            2 :         tmp_regno = CX_REG;
   23588              : 
   23589            2 :       if (flag_pic)
   23590            2 :   flag_force_indirect_call = 0;
   23591              :     }
   23592              : 
   23593         1761 :   emit_note (NOTE_INSN_PROLOGUE_END);
   23594              : 
   23595              :   /* CET is enabled, insert EB instruction.  */
   23596         1761 :   if ((flag_cf_protection & CF_BRANCH))
   23597           20 :     emit_insn (gen_nop_endbr ());
   23598              : 
   23599              :   /* If VCALL_OFFSET, we'll need THIS in a register.  Might as well
   23600              :      pull it in now and let DELTA benefit.  */
   23601         1761 :   if (REG_P (this_param))
   23602              :     this_reg = this_param;
   23603            2 :   else if (vcall_offset)
   23604              :     {
   23605              :       /* Put the this parameter into %eax.  */
   23606            2 :       this_reg = gen_rtx_REG (Pmode, AX_REG);
   23607            1 :       emit_move_insn (this_reg, this_param);
   23608              :     }
   23609              :   else
   23610              :     this_reg = NULL_RTX;
   23611              : 
   23612              :   /* Adjust the this parameter by a fixed constant.  */
   23613         1761 :   if (delta)
   23614              :     {
   23615          826 :       rtx delta_rtx = GEN_INT (delta);
   23616          826 :       rtx delta_dst = this_reg ? this_reg : this_param;
   23617              : 
   23618          826 :       if (TARGET_64BIT)
   23619              :         {
   23620          825 :           if (!x86_64_general_operand (delta_rtx, Pmode))
   23621              :             {
   23622            0 :               tmp = gen_rtx_REG (Pmode, tmp_regno);
   23623            0 :               emit_move_insn (tmp, delta_rtx);
   23624            0 :               delta_rtx = tmp;
   23625              :             }
   23626              :         }
   23627              : 
   23628          827 :       ix86_emit_binop (PLUS, Pmode, delta_dst, delta_rtx);
   23629              :     }
   23630              : 
   23631              :   /* Adjust the this parameter by a value stored in the vtable.  */
   23632         1761 :   if (vcall_offset)
   23633              :     {
   23634          986 :       rtx vcall_addr, vcall_mem, this_mem;
   23635              : 
   23636          987 :       tmp = gen_rtx_REG (Pmode, tmp_regno);
   23637              : 
   23638          986 :       this_mem = gen_rtx_MEM (ptr_mode, this_reg);
   23639          987 :       if (Pmode != ptr_mode)
   23640            0 :         this_mem = gen_rtx_ZERO_EXTEND (Pmode, this_mem);
   23641          986 :       emit_move_insn (tmp, this_mem);
   23642              : 
   23643              :       /* Adjust the this parameter.  */
   23644          987 :       vcall_addr = plus_constant (Pmode, tmp, vcall_offset);
   23645          986 :       if (TARGET_64BIT
   23646          986 :           && !ix86_legitimate_address_p (ptr_mode, vcall_addr, true))
   23647              :         {
   23648            0 :           rtx tmp2 = gen_rtx_REG (Pmode, R11_REG);
   23649            0 :           emit_move_insn (tmp2, GEN_INT (vcall_offset));
   23650            0 :           vcall_addr = gen_rtx_PLUS (Pmode, tmp, tmp2);
   23651              :         }
   23652              : 
   23653          986 :       vcall_mem = gen_rtx_MEM (ptr_mode, vcall_addr);
   23654          987 :       if (Pmode != ptr_mode)
   23655            0 :         emit_insn (gen_addsi_1_zext (this_reg,
   23656              :                                      gen_rtx_REG (ptr_mode,
   23657              :                                                   REGNO (this_reg)),
   23658              :                                      vcall_mem));
   23659              :       else
   23660          986 :         ix86_emit_binop (PLUS, Pmode, this_reg, vcall_mem);
   23661              :     }
   23662              : 
   23663              :   /* If necessary, drop THIS back to its stack slot.  */
   23664         1761 :   if (this_reg && this_reg != this_param)
   23665            1 :     emit_move_insn (this_param, this_reg);
   23666              : 
   23667         1761 :   fnaddr = XEXP (DECL_RTL (function), 0);
   23668         1761 :   if (TARGET_64BIT)
   23669              :     {
   23670           25 :       if (!flag_pic || targetm.binds_local_p (function)
   23671         1784 :           || TARGET_PECOFF)
   23672              :         ;
   23673              :       else
   23674              :         {
   23675            0 :           tmp = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, fnaddr), UNSPEC_GOTPCREL);
   23676            0 :           tmp = gen_rtx_CONST (Pmode, tmp);
   23677            0 :           fnaddr = gen_const_mem (Pmode, tmp);
   23678              :         }
   23679              :     }
   23680              :   else
   23681              :     {
   23682            2 :       if (!flag_pic || targetm.binds_local_p (function))
   23683              :         ;
   23684              : #if TARGET_MACHO
   23685              :       else if (TARGET_MACHO)
   23686              :         {
   23687              :           fnaddr = machopic_indirect_call_target (DECL_RTL (function));
   23688              :           fnaddr = XEXP (fnaddr, 0);
   23689              :         }
   23690              : #endif /* TARGET_MACHO */
   23691              :       else
   23692              :         {
   23693            0 :           tmp = gen_rtx_REG (Pmode, CX_REG);
   23694            0 :           output_set_got (tmp, NULL_RTX);
   23695              : 
   23696            0 :           fnaddr = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, fnaddr), UNSPEC_GOT);
   23697            0 :           fnaddr = gen_rtx_CONST (Pmode, fnaddr);
   23698            0 :           fnaddr = gen_rtx_PLUS (Pmode, tmp, fnaddr);
   23699            0 :           fnaddr = gen_const_mem (Pmode, fnaddr);
   23700              :         }
   23701              :     }
   23702              : 
   23703              :   /* Our sibling call patterns do not allow memories, because we have no
   23704              :      predicate that can distinguish between frame and non-frame memory.
   23705              :      For our purposes here, we can get away with (ab)using a jump pattern,
   23706              :      because we're going to do no optimization.  */
   23707         1761 :   if (MEM_P (fnaddr))
   23708              :     {
   23709            0 :       if (sibcall_insn_operand (fnaddr, word_mode))
   23710              :         {
   23711            0 :           fnaddr = XEXP (DECL_RTL (function), 0);
   23712            0 :           tmp = gen_rtx_MEM (QImode, fnaddr);
   23713            0 :           tmp = gen_rtx_CALL (VOIDmode, tmp, const0_rtx);
   23714            0 :           tmp = emit_call_insn (tmp);
   23715            0 :           SIBLING_CALL_P (tmp) = 1;
   23716              :         }
   23717              :       else
   23718            0 :         emit_jump_insn (gen_indirect_jump (fnaddr));
   23719              :     }
   23720              :   else
   23721              :     {
   23722         1761 :       if (ix86_cmodel == CM_LARGE_PIC && SYMBOLIC_CONST (fnaddr))
   23723              :         {
   23724              :           // CM_LARGE_PIC always uses pseudo PIC register which is
   23725              :           // uninitialized.  Since FUNCTION is local and calling it
   23726              :           // doesn't go through PLT, we use scratch register %r11 as
   23727              :           // PIC register and initialize it here.
   23728            3 :           pic_offset_table_rtx = gen_rtx_REG (Pmode, R11_REG);
   23729            3 :           ix86_init_large_pic_reg (tmp_regno);
   23730            3 :           fnaddr = legitimize_pic_address (fnaddr,
   23731            3 :                                            gen_rtx_REG (Pmode, tmp_regno));
   23732              :         }
   23733              : 
   23734         1761 :       if (!sibcall_insn_operand (fnaddr, word_mode))
   23735              :         {
   23736            9 :           tmp = gen_rtx_REG (word_mode, tmp_regno);
   23737            9 :           if (GET_MODE (fnaddr) != word_mode)
   23738            0 :             fnaddr = gen_rtx_ZERO_EXTEND (word_mode, fnaddr);
   23739            9 :           emit_move_insn (tmp, fnaddr);
   23740            9 :           fnaddr = tmp;
   23741              :         }
   23742              : 
   23743         1761 :       tmp = gen_rtx_MEM (QImode, fnaddr);
   23744         1761 :       tmp = gen_rtx_CALL (VOIDmode, tmp, const0_rtx);
   23745         1761 :       tmp = emit_call_insn (tmp);
   23746         1761 :       SIBLING_CALL_P (tmp) = 1;
   23747              :     }
   23748         1761 :   emit_barrier ();
   23749              : 
   23750              :   /* Emit just enough of rest_of_compilation to get the insns emitted.  */
   23751         1761 :   insn = get_insns ();
   23752         1761 :   shorten_branches (insn);
   23753         1761 :   assemble_start_function (thunk_fndecl, fnname);
   23754         1761 :   final_start_function (insn, file, 1);
   23755         1761 :   final (insn, file, 1);
   23756         1761 :   final_end_function ();
   23757         1761 :   assemble_end_function (thunk_fndecl, fnname);
   23758              : 
   23759         1761 :   flag_force_indirect_call = saved_flag_force_indirect_call;
   23760         1761 : }
   23761              : 
   23762              : static void
   23763       270554 : x86_file_start (void)
   23764              : {
   23765       270554 :   default_file_start ();
   23766       270554 :   if (TARGET_16BIT)
   23767            6 :     fputs ("\t.code16gcc\n", asm_out_file);
   23768              : #if TARGET_MACHO
   23769              :   darwin_file_start ();
   23770              : #endif
   23771       270554 :   if (X86_FILE_START_VERSION_DIRECTIVE)
   23772              :     fputs ("\t.version\t\"01.01\"\n", asm_out_file);
   23773       270554 :   if (X86_FILE_START_FLTUSED)
   23774              :     fputs ("\t.global\t__fltused\n", asm_out_file);
   23775       270554 :   if (ix86_asm_dialect == ASM_INTEL)
   23776           54 :     fputs ("\t.intel_syntax noprefix\n", asm_out_file);
   23777       270554 : }
   23778              : 
   23779              : int
   23780    107268545 : x86_field_alignment (tree type, int computed)
   23781              : {
   23782    107268545 :   machine_mode mode;
   23783              : 
   23784    107268545 :   if (TARGET_64BIT || TARGET_ALIGN_DOUBLE)
   23785              :     return computed;
   23786      9066751 :   if (TARGET_IAMCU)
   23787            0 :     return iamcu_alignment (type, computed);
   23788      9066751 :   type = strip_array_types (type);
   23789      9066751 :   mode = TYPE_MODE (type);
   23790      9066751 :   if (mode == DFmode || mode == DCmode
   23791      8961705 :       || GET_MODE_CLASS (mode) == MODE_INT
   23792      3001300 :       || GET_MODE_CLASS (mode) == MODE_COMPLEX_INT)
   23793              :     {
   23794      6065451 :       if (TYPE_ATOMIC (type) && computed > 32)
   23795              :         {
   23796            0 :           static bool warned;
   23797              : 
   23798            0 :           if (!warned && warn_psabi)
   23799              :             {
   23800            0 :               const char *url
   23801              :                 = CHANGES_ROOT_URL "gcc-11/changes.html#ia32_atomic";
   23802              : 
   23803            0 :               warned = true;
   23804            0 :               inform (input_location, "the alignment of %<_Atomic %T%> "
   23805              :                                       "fields changed in %{GCC 11.1%}",
   23806            0 :                       TYPE_MAIN_VARIANT (type), url);
   23807              :             }
   23808              :         }
   23809              :       else
   23810      6065451 :       return MIN (32, computed);
   23811              :     }
   23812              :   return computed;
   23813              : }
   23814              : 
   23815              : /* Print call to TARGET to FILE.  */
   23816              : 
   23817              : static void
   23818          296 : x86_print_call_or_nop (FILE *file, const char *target,
   23819              :                        const char *label)
   23820              : {
   23821          296 :   if (flag_nop_mcount || !strcmp (target, "nop"))
   23822              :     {
   23823            9 :       if (TARGET_16BIT)
   23824              :         /* 3 byte no-op: lea 0(%si), %si */
   23825            1 :         fprintf (file, "%s" ASM_BYTE "0x8d, 0x74, 0x00\n", label);
   23826              :       else
   23827              :         /* 5 byte nop: nopl 0(%[re]ax,%[re]ax,1) */
   23828            8 :         fprintf (file, "%s" ASM_BYTE "0x0f, 0x1f, 0x44, 0x00, 0x00\n",
   23829              :                  label);
   23830              :     }
   23831          287 :   else if (!TARGET_PECOFF && flag_pic)
   23832              :     {
   23833            8 :       gcc_assert (flag_plt);
   23834              : 
   23835            8 :       fprintf (file, "%s\tcall\t%s@PLT\n", label, target);
   23836              :     }
   23837              :   else
   23838          279 :     fprintf (file, "%s\tcall\t%s\n", label, target);
   23839          296 : }
   23840              : 
   23841              : static bool
   23842          316 : current_fentry_name (const char **name)
   23843              : {
   23844          316 :   tree attr = lookup_attribute ("fentry_name",
   23845          316 :                                 DECL_ATTRIBUTES (current_function_decl));
   23846          316 :   if (!attr)
   23847              :     return false;
   23848            2 :   *name = TREE_STRING_POINTER (TREE_VALUE (TREE_VALUE (attr)));
   23849            2 :   return true;
   23850              : }
   23851              : 
   23852              : static bool
   23853           16 : current_fentry_section (const char **name)
   23854              : {
   23855           16 :   tree attr = lookup_attribute ("fentry_section",
   23856           16 :                                 DECL_ATTRIBUTES (current_function_decl));
   23857           16 :   if (!attr)
   23858              :     return false;
   23859            2 :   *name = TREE_STRING_POINTER (TREE_VALUE (TREE_VALUE (attr)));
   23860            2 :   return true;
   23861              : }
   23862              : 
   23863              : /* Return a caller-saved register which isn't live or a callee-saved
   23864              :    register which has been saved on stack in the prologue at entry for
   23865              :    profile.  */
   23866              : 
   23867              : static int
   23868           17 : x86_64_select_profile_regnum (bool r11_ok ATTRIBUTE_UNUSED)
   23869              : {
   23870              :   /* Use %r10 if the profiler is emitted before the prologue or it isn't
   23871              :      used by DRAP.  */
   23872           17 :   if (ix86_profile_before_prologue ()
   23873            4 :       || !crtl->drap_reg
   23874           17 :       || REGNO (crtl->drap_reg) != R10_REG)
   23875              :     return R10_REG;
   23876              : 
   23877              :   /* The profiler is emitted after the prologue.  If there is a
   23878              :      caller-saved register which isn't live or a callee-saved
   23879              :      register saved on stack in the prologue, use it.  */
   23880              : 
   23881            0 :   bitmap reg_live = df_get_live_out (ENTRY_BLOCK_PTR_FOR_FN (cfun));
   23882              : 
   23883            0 :   int i;
   23884            0 :   for (i = 0; i < FIRST_PSEUDO_REGISTER; i++)
   23885            0 :     if (GENERAL_REGNO_P (i)
   23886            0 :         && i != R10_REG
   23887              : #ifdef NO_PROFILE_COUNTERS
   23888            0 :         && (r11_ok || i != R11_REG)
   23889              : #else
   23890              :         && i != R11_REG
   23891              : #endif
   23892            0 :         && TEST_HARD_REG_BIT (accessible_reg_set, i)
   23893            0 :         && (ix86_save_reg (i, true, true)
   23894            0 :             || (call_used_regs[i]
   23895            0 :                 && !fixed_regs[i]
   23896            0 :                 && !REGNO_REG_SET_P (reg_live, i))))
   23897            0 :       return i;
   23898              : 
   23899            0 :   sorry ("no register available for profiling %<-mcmodel=large%s%>",
   23900            0 :          ix86_cmodel == CM_LARGE_PIC ? " -fPIC" : "");
   23901              : 
   23902            0 :   return R10_REG;
   23903              : }
   23904              : 
   23905              : /* Output assembler code to FILE to increment profiler label # LABELNO
   23906              :    for profiling a function entry.  */
   23907              : void
   23908          316 : x86_function_profiler (FILE *file, int labelno ATTRIBUTE_UNUSED)
   23909              : {
   23910          316 :   if (cfun->machine->insn_queued_at_entrance)
   23911              :     {
   23912            7 :       if (cfun->machine->insn_queued_at_entrance == TYPE_ENDBR)
   23913            6 :         fprintf (file, "\t%s\n", TARGET_64BIT ? "endbr64" : "endbr32");
   23914            7 :       unsigned int patch_area_size
   23915            7 :         = crtl->patch_area_size - crtl->patch_area_entry;
   23916            7 :       if (patch_area_size)
   23917            2 :         ix86_output_patchable_area (patch_area_size,
   23918              :                                     crtl->patch_area_entry == 0);
   23919              :     }
   23920              : 
   23921          316 :   const char *mcount_name = MCOUNT_NAME;
   23922              : 
   23923          316 :   bool fentry_section_p
   23924          316 :     = (flag_record_mcount
   23925          617 :        || lookup_attribute ("fentry_section",
   23926          301 :                             DECL_ATTRIBUTES (current_function_decl)));
   23927              : 
   23928              :   const char *label = fentry_section_p ? "1:" : "";
   23929              : 
   23930          316 :   if (current_fentry_name (&mcount_name))
   23931              :     ;
   23932          314 :   else if (fentry_name)
   23933            1 :     mcount_name = fentry_name;
   23934          313 :   else if (flag_fentry)
   23935          301 :     mcount_name = MCOUNT_NAME_BEFORE_PROLOGUE;
   23936              : 
   23937          316 :   if (TARGET_64BIT)
   23938              :     {
   23939              : #ifndef NO_PROFILE_COUNTERS
   23940              :       if (ASSEMBLER_DIALECT == ASM_INTEL)
   23941              :         fprintf (file, "\tlea\tr11, %sP%d[rip]\n", LPREFIX, labelno);
   23942              :       else
   23943              :         fprintf (file, "\tleaq\t%sP%d(%%rip), %%r11\n", LPREFIX, labelno);
   23944              : #endif
   23945              : 
   23946          315 :       int scratch;
   23947          315 :       const char *reg;
   23948          315 :       char legacy_reg[4] = { 0 };
   23949              : 
   23950          315 :       if (!TARGET_PECOFF)
   23951              :         {
   23952          315 :           switch (ix86_cmodel)
   23953              :             {
   23954            7 :             case CM_LARGE:
   23955            7 :               scratch = x86_64_select_profile_regnum (true);
   23956            7 :               reg = hi_reg_name[scratch];
   23957            7 :               if (LEGACY_INT_REGNO_P (scratch))
   23958              :                 {
   23959            0 :                   legacy_reg[0] = 'r';
   23960            0 :                   legacy_reg[1] = reg[0];
   23961            0 :                   legacy_reg[2] = reg[1];
   23962            0 :                   reg = legacy_reg;
   23963              :                 }
   23964            7 :               if (ASSEMBLER_DIALECT == ASM_INTEL)
   23965            1 :                 fprintf (file, "%s\tmovabs\t%s, OFFSET FLAT:%s\n"
   23966              :                                "\tcall\t%s\n", label, reg, mcount_name,
   23967              :                                reg);
   23968              :               else
   23969            6 :                 fprintf (file, "%s\tmovabsq\t$%s, %%%s\n\tcall\t*%%%s\n",
   23970              :                          label, mcount_name, reg, reg);
   23971              :               break;
   23972           10 :             case CM_LARGE_PIC:
   23973              : #ifdef NO_PROFILE_COUNTERS
   23974           10 :               scratch = x86_64_select_profile_regnum (false);
   23975           10 :               reg = hi_reg_name[scratch];
   23976           10 :               if (LEGACY_INT_REGNO_P (scratch))
   23977              :                 {
   23978            0 :                   legacy_reg[0] = 'r';
   23979            0 :                   legacy_reg[1] = reg[0];
   23980            0 :                   legacy_reg[2] = reg[1];
   23981            0 :                   reg = legacy_reg;
   23982              :                 }
   23983           10 :               if (ASSEMBLER_DIALECT == ASM_INTEL)
   23984              :                 {
   23985            1 :                   fprintf (file, "1:movabs\tr11, "
   23986              :                                  "OFFSET FLAT:_GLOBAL_OFFSET_TABLE_-1b\n");
   23987            1 :                   fprintf (file, "\tlea\t%s, 1b[rip]\n", reg);
   23988            1 :                   fprintf (file, "\tadd\t%s, r11\n", reg);
   23989            1 :                   fprintf (file, "\tmovabs\tr11, OFFSET FLAT:%s@PLTOFF\n",
   23990              :                            mcount_name);
   23991            1 :                   fprintf (file, "\tadd\t%s, r11\n", reg);
   23992            1 :                   fprintf (file, "\tcall\t%s\n", reg);
   23993            1 :                   break;
   23994              :                 }
   23995            9 :               fprintf (file,
   23996              :                        "1:\tmovabsq\t$_GLOBAL_OFFSET_TABLE_-1b, %%r11\n");
   23997            9 :               fprintf (file, "\tleaq\t1b(%%rip), %%%s\n", reg);
   23998            9 :               fprintf (file, "\taddq\t%%r11, %%%s\n", reg);
   23999            9 :               fprintf (file, "\tmovabsq\t$%s@PLTOFF, %%r11\n", mcount_name);
   24000            9 :               fprintf (file, "\taddq\t%%r11, %%%s\n", reg);
   24001            9 :               fprintf (file, "\tcall\t*%%%s\n", reg);
   24002              : #else
   24003              :               sorry ("profiling %<-mcmodel=large%> with PIC is not supported");
   24004              : #endif
   24005            9 :               break;
   24006           12 :             case CM_SMALL_PIC:
   24007           12 :             case CM_MEDIUM_PIC:
   24008           12 :               if (!flag_plt)
   24009              :                 {
   24010            3 :                   if (ASSEMBLER_DIALECT == ASM_INTEL)
   24011            0 :                     fprintf (file, "%s\tcall\t[QWORD PTR %s@GOTPCREL[rip]]\n",
   24012              :                              label, mcount_name);
   24013              :                   else
   24014            3 :                     fprintf (file, "%s\tcall\t*%s@GOTPCREL(%%rip)\n",
   24015              :                              label, mcount_name);
   24016              :                   break;
   24017              :                 }
   24018              :               /* fall through */
   24019          295 :             default:
   24020          295 :               x86_print_call_or_nop (file, mcount_name, label);
   24021          295 :               break;
   24022              :             }
   24023              :         }
   24024              :       else
   24025              :         x86_print_call_or_nop (file, mcount_name, label);
   24026              :     }
   24027            1 :   else if (flag_pic)
   24028              :     {
   24029              : #ifndef NO_PROFILE_COUNTERS
   24030              :       if (ASSEMBLER_DIALECT == ASM_INTEL)
   24031              :         fprintf (file,
   24032              :                  "\tlea\t" PROFILE_COUNT_REGISTER ", %sP%d@GOTOFF[ebx]\n",
   24033              :                  LPREFIX, labelno);
   24034              :       else
   24035              :         fprintf (file,
   24036              :                  "\tleal\t%sP%d@GOTOFF(%%ebx), %%" PROFILE_COUNT_REGISTER "\n",
   24037              :                  LPREFIX, labelno);
   24038              : #endif
   24039            0 :       if (flag_plt)
   24040            0 :         x86_print_call_or_nop (file, mcount_name, label);
   24041            0 :       else if (ASSEMBLER_DIALECT == ASM_INTEL)
   24042            0 :         fprintf (file, "%s\tcall\t[DWORD PTR %s@GOT[ebx]]\n",
   24043              :                  label, mcount_name);
   24044              :       else
   24045            0 :         fprintf (file, "%s\tcall\t*%s@GOT(%%ebx)\n",
   24046              :                  label, mcount_name);
   24047              :     }
   24048              :   else
   24049              :     {
   24050              : #ifndef NO_PROFILE_COUNTERS
   24051              :       if (ASSEMBLER_DIALECT == ASM_INTEL)
   24052              :         fprintf (file,
   24053              :                  "\tmov\t" PROFILE_COUNT_REGISTER ", OFFSET FLAT:%sP%d\n",
   24054              :                  LPREFIX, labelno);
   24055              :       else
   24056              :         fprintf (file, "\tmovl\t$%sP%d, %%" PROFILE_COUNT_REGISTER "\n",
   24057              :                  LPREFIX, labelno);
   24058              : #endif
   24059            1 :       x86_print_call_or_nop (file, mcount_name, label);
   24060              :     }
   24061              : 
   24062          316 :   if (fentry_section_p)
   24063              :     {
   24064           16 :       const char *sname = "__mcount_loc";
   24065              : 
   24066           16 :       if (current_fentry_section (&sname))
   24067              :         ;
   24068           14 :       else if (fentry_section)
   24069            1 :         sname = fentry_section;
   24070              : 
   24071           16 :       fprintf (file, "\t.section %s, \"a\",@progbits\n", sname);
   24072           16 :       fprintf (file, "\t.%s 1b\n", TARGET_64BIT ? "quad" : "long");
   24073           16 :       fprintf (file, "\t.previous\n");
   24074              :     }
   24075          316 : }
   24076              : 
   24077              : /* We don't have exact information about the insn sizes, but we may assume
   24078              :    quite safely that we are informed about all 1 byte insns and memory
   24079              :    address sizes.  This is enough to eliminate unnecessary padding in
   24080              :    99% of cases.  */
   24081              : 
   24082              : int
   24083    383570477 : ix86_min_insn_size (rtx_insn *insn)
   24084              : {
   24085    383570477 :   int l = 0, len;
   24086              : 
   24087    383570477 :   if (!INSN_P (insn) || !active_insn_p (insn))
   24088       500271 :     return 0;
   24089              : 
   24090              :   /* Discard alignments we've emit and jump instructions.  */
   24091    383070206 :   if (GET_CODE (PATTERN (insn)) == UNSPEC_VOLATILE
   24092    383070206 :       && XINT (PATTERN (insn), 1) == UNSPECV_ALIGN)
   24093              :     return 0;
   24094              : 
   24095              :   /* Important case - calls are always 5 bytes.
   24096              :      It is common to have many calls in the row.  */
   24097    383070201 :   if (CALL_P (insn)
   24098      9101120 :       && symbolic_reference_mentioned_p (PATTERN (insn))
   24099    391832445 :       && !SIBLING_CALL_P (insn))
   24100              :     return 5;
   24101    374547114 :   len = get_attr_length (insn);
   24102    374547114 :   if (len <= 1)
   24103              :     return 1;
   24104              : 
   24105              :   /* For normal instructions we rely on get_attr_length being exact,
   24106              :      with a few exceptions.  */
   24107    365950213 :   if (!JUMP_P (insn))
   24108              :     {
   24109    360557909 :       enum attr_type type = get_attr_type (insn);
   24110              : 
   24111    360557909 :       switch (type)
   24112              :         {
   24113        94509 :         case TYPE_MULTI:
   24114        94509 :           if (GET_CODE (PATTERN (insn)) == ASM_INPUT
   24115        94509 :               || asm_noperands (PATTERN (insn)) >= 0)
   24116          527 :             return 0;
   24117              :           break;
   24118              :         case TYPE_OTHER:
   24119              :         case TYPE_FCMP:
   24120              :           break;
   24121              :         default:
   24122              :           /* Otherwise trust get_attr_length.  */
   24123              :           return len;
   24124              :         }
   24125              : 
   24126       474191 :       l = get_attr_length_address (insn);
   24127       474191 :       if (l < 4 && symbolic_reference_mentioned_p (PATTERN (insn)))
   24128              :         l = 4;
   24129              :     }
   24130       383753 :   if (l)
   24131        90438 :     return 1+l;
   24132              :   else
   24133      5776057 :     return 2;
   24134              : }
   24135              : 
   24136              : #ifdef ASM_OUTPUT_MAX_SKIP_ALIGN
   24137              : 
   24138              : /* AMD K8 core mispredicts jumps when there are more than 3 jumps in 16 byte
   24139              :    window.  */
   24140              : 
   24141              : static void
   24142        45424 : ix86_avoid_jump_mispredicts (void)
   24143              : {
   24144        45424 :   rtx_insn *insn, *start = get_insns ();
   24145        45424 :   int nbytes = 0, njumps = 0;
   24146        45424 :   bool isjump = false;
   24147              : 
   24148              :   /* Look for all minimal intervals of instructions containing 4 jumps.
   24149              :      The intervals are bounded by START and INSN.  NBYTES is the total
   24150              :      size of instructions in the interval including INSN and not including
   24151              :      START.  When the NBYTES is smaller than 16 bytes, it is possible
   24152              :      that the end of START and INSN ends up in the same 16byte page.
   24153              : 
   24154              :      The smallest offset in the page INSN can start is the case where START
   24155              :      ends on the offset 0.  Offset of INSN is then NBYTES - sizeof (INSN).
   24156              :      We add p2align to 16byte window with maxskip 15 - NBYTES + sizeof (INSN).
   24157              : 
   24158              :      Don't consider asm goto as jump, while it can contain a jump, it doesn't
   24159              :      have to, control transfer to label(s) can be performed through other
   24160              :      means, and also we estimate minimum length of all asm stmts as 0.  */
   24161       700742 :   for (insn = start; insn; insn = NEXT_INSN (insn))
   24162              :     {
   24163       655318 :       int min_size;
   24164              : 
   24165       655318 :       if (LABEL_P (insn))
   24166              :         {
   24167          956 :           align_flags alignment = label_to_alignment (insn);
   24168          956 :           int align = alignment.levels[0].log;
   24169          956 :           int max_skip = alignment.levels[0].maxskip;
   24170              : 
   24171          956 :           if (max_skip > 15)
   24172              :             max_skip = 15;
   24173              :           /* If align > 3, only up to 16 - max_skip - 1 bytes can be
   24174              :              already in the current 16 byte page, because otherwise
   24175              :              ASM_OUTPUT_MAX_SKIP_ALIGN could skip max_skip or fewer
   24176              :              bytes to reach 16 byte boundary.  */
   24177          956 :           if (align <= 0
   24178          328 :               || (align <= 3 && max_skip != (1 << align) - 1))
   24179          956 :             max_skip = 0;
   24180          956 :           if (dump_file)
   24181            0 :             fprintf (dump_file, "Label %i with max_skip %i\n",
   24182            0 :                      INSN_UID (insn), max_skip);
   24183          956 :           if (max_skip)
   24184              :             {
   24185         6278 :               while (nbytes + max_skip >= 16)
   24186              :                 {
   24187         5950 :                   start = NEXT_INSN (start);
   24188          310 :                   if ((JUMP_P (start) && asm_noperands (PATTERN (start)) < 0)
   24189         5967 :                       || CALL_P (start))
   24190          350 :                     njumps--, isjump = true;
   24191              :                   else
   24192              :                     isjump = false;
   24193         5950 :                   nbytes -= ix86_min_insn_size (start);
   24194              :                 }
   24195              :             }
   24196          956 :           continue;
   24197          956 :         }
   24198              : 
   24199       654362 :       min_size = ix86_min_insn_size (insn);
   24200       654362 :       nbytes += min_size;
   24201       654362 :       if (dump_file)
   24202            0 :         fprintf (dump_file, "Insn %i estimated to %i bytes\n",
   24203            0 :                  INSN_UID (insn), min_size);
   24204        46582 :       if ((JUMP_P (insn) && asm_noperands (PATTERN (insn)) < 0)
   24205       654382 :           || CALL_P (insn))
   24206        47597 :         njumps++;
   24207              :       else
   24208       606765 :         continue;
   24209              : 
   24210        55982 :       while (njumps > 3)
   24211              :         {
   24212         8385 :           start = NEXT_INSN (start);
   24213          545 :           if ((JUMP_P (start) && asm_noperands (PATTERN (start)) < 0)
   24214         8385 :               || CALL_P (start))
   24215         1247 :             njumps--, isjump = true;
   24216              :           else
   24217              :             isjump = false;
   24218         8385 :           nbytes -= ix86_min_insn_size (start);
   24219              :         }
   24220        47597 :       gcc_assert (njumps >= 0);
   24221        47597 :       if (dump_file)
   24222            0 :         fprintf (dump_file, "Interval %i to %i has %i bytes\n",
   24223            0 :                  INSN_UID (start), INSN_UID (insn), nbytes);
   24224              : 
   24225        47597 :       if (njumps == 3 && isjump && nbytes < 16)
   24226              :         {
   24227           40 :           int padsize = 15 - nbytes + ix86_min_insn_size (insn);
   24228              : 
   24229           40 :           if (dump_file)
   24230            0 :             fprintf (dump_file, "Padding insn %i by %i bytes!\n",
   24231            0 :                      INSN_UID (insn), padsize);
   24232           40 :           emit_insn_before (gen_max_skip_align (GEN_INT (4), GEN_INT (padsize)), insn);
   24233              :         }
   24234              :     }
   24235        45424 : }
   24236              : #endif
   24237              : 
   24238              : /* AMD Athlon works faster
   24239              :    when RET is not destination of conditional jump or directly preceded
   24240              :    by other jump instruction.  We avoid the penalty by inserting NOP just
   24241              :    before the RET instructions in such cases.  */
   24242              : static void
   24243        45144 : ix86_pad_returns (void)
   24244              : {
   24245        45144 :   edge e;
   24246        45144 :   edge_iterator ei;
   24247              : 
   24248        90312 :   FOR_EACH_EDGE (e, ei, EXIT_BLOCK_PTR_FOR_FN (cfun)->preds)
   24249              :     {
   24250        45168 :       basic_block bb = e->src;
   24251        45168 :       rtx_insn *ret = BB_END (bb);
   24252        45168 :       rtx_insn *prev;
   24253        45168 :       bool replace = false;
   24254              : 
   24255        45158 :       if (!JUMP_P (ret) || !ANY_RETURN_P (PATTERN (ret))
   24256        90326 :           || optimize_bb_for_size_p (bb))
   24257           23 :         continue;
   24258       179724 :       for (prev = PREV_INSN (ret); prev; prev = PREV_INSN (prev))
   24259       134161 :         if (active_insn_p (prev) || LABEL_P (prev))
   24260              :           break;
   24261        45145 :       if (prev && LABEL_P (prev))
   24262              :         {
   24263           43 :           edge e;
   24264           43 :           edge_iterator ei;
   24265              : 
   24266           56 :           FOR_EACH_EDGE (e, ei, bb->preds)
   24267          146 :             if (EDGE_FREQUENCY (e) && e->src->index >= 0
   24268           97 :                 && !(e->flags & EDGE_FALLTHRU))
   24269              :               {
   24270              :                 replace = true;
   24271              :                 break;
   24272              :               }
   24273              :         }
   24274           43 :       if (!replace)
   24275              :         {
   24276        45109 :           prev = prev_active_insn (ret);
   24277        45109 :           if (prev
   24278        45109 :               && ((JUMP_P (prev) && any_condjump_p (prev))
   24279        44673 :                   || CALL_P (prev)))
   24280              :             replace = true;
   24281              :           /* Empty functions get branch mispredict even when
   24282              :              the jump destination is not visible to us.  */
   24283        45109 :           if (!prev && !optimize_function_for_size_p (cfun))
   24284              :             replace = true;
   24285              :         }
   24286        44691 :       if (replace)
   24287              :         {
   24288          489 :           emit_jump_insn_before (gen_simple_return_internal_long (), ret);
   24289          489 :           delete_insn (ret);
   24290              :         }
   24291              :     }
   24292        45144 : }
   24293              : 
   24294              : /* Count the minimum number of instructions in BB.  Return 4 if the
   24295              :    number of instructions >= 4.  */
   24296              : 
   24297              : static int
   24298           42 : ix86_count_insn_bb (basic_block bb)
   24299              : {
   24300           42 :   rtx_insn *insn;
   24301           42 :   int insn_count = 0;
   24302              : 
   24303              :   /* Count number of instructions in this block.  Return 4 if the number
   24304              :      of instructions >= 4.  */
   24305          297 :   FOR_BB_INSNS (bb, insn)
   24306              :     {
   24307              :       /* Only happen in exit blocks.  */
   24308          291 :       if (JUMP_P (insn)
   24309          291 :           && ANY_RETURN_P (PATTERN (insn)))
   24310              :         break;
   24311              : 
   24312          267 :       if (NONDEBUG_INSN_P (insn)
   24313          102 :           && GET_CODE (PATTERN (insn)) != USE
   24314          351 :           && GET_CODE (PATTERN (insn)) != CLOBBER)
   24315              :         {
   24316           84 :           insn_count++;
   24317           84 :           if (insn_count >= 4)
   24318              :             return insn_count;
   24319              :         }
   24320              :     }
   24321              : 
   24322              :   return insn_count;
   24323              : }
   24324              : 
   24325              : 
   24326              : /* Count the minimum number of instructions in code path in BB.
   24327              :    Return 4 if the number of instructions >= 4.  */
   24328              : 
   24329              : static int
   24330           62 : ix86_count_insn (basic_block bb)
   24331              : {
   24332           62 :   edge e;
   24333           62 :   edge_iterator ei;
   24334           62 :   int min_prev_count;
   24335              : 
   24336              :   /* Only bother counting instructions along paths with no
   24337              :      more than 2 basic blocks between entry and exit.  Given
   24338              :      that BB has an edge to exit, determine if a predecessor
   24339              :      of BB has an edge from entry.  If so, compute the number
   24340              :      of instructions in the predecessor block.  If there
   24341              :      happen to be multiple such blocks, compute the minimum.  */
   24342           62 :   min_prev_count = 4;
   24343          145 :   FOR_EACH_EDGE (e, ei, bb->preds)
   24344              :     {
   24345          109 :       edge prev_e;
   24346          109 :       edge_iterator prev_ei;
   24347              : 
   24348          109 :       if (e->src == ENTRY_BLOCK_PTR_FOR_FN (cfun))
   24349              :         {
   24350           26 :           min_prev_count = 0;
   24351           26 :           break;
   24352              :         }
   24353          182 :       FOR_EACH_EDGE (prev_e, prev_ei, e->src->preds)
   24354              :         {
   24355          109 :           if (prev_e->src == ENTRY_BLOCK_PTR_FOR_FN (cfun))
   24356              :             {
   24357           10 :               int count = ix86_count_insn_bb (e->src);
   24358           10 :               if (count < min_prev_count)
   24359           83 :                 min_prev_count = count;
   24360              :               break;
   24361              :             }
   24362              :         }
   24363              :     }
   24364              : 
   24365           62 :   if (min_prev_count < 4)
   24366           32 :     min_prev_count += ix86_count_insn_bb (bb);
   24367              : 
   24368           62 :   return min_prev_count;
   24369              : }
   24370              : 
   24371              : /* Pad short function to 4 instructions.   */
   24372              : 
   24373              : static void
   24374           63 : ix86_pad_short_function (void)
   24375              : {
   24376           63 :   edge e;
   24377           63 :   edge_iterator ei;
   24378              : 
   24379          128 :   FOR_EACH_EDGE (e, ei, EXIT_BLOCK_PTR_FOR_FN (cfun)->preds)
   24380              :     {
   24381           65 :       rtx_insn *ret = BB_END (e->src);
   24382           65 :       if (JUMP_P (ret) && ANY_RETURN_P (PATTERN (ret)))
   24383              :         {
   24384           62 :           int insn_count = ix86_count_insn (e->src);
   24385              : 
   24386              :           /* Pad short function.  */
   24387           62 :           if (insn_count < 4)
   24388              :             {
   24389              :               rtx_insn *insn = ret;
   24390              : 
   24391              :               /* Find epilogue.  */
   24392              :               while (insn
   24393           60 :                      && (!NOTE_P (insn)
   24394           26 :                          || NOTE_KIND (insn) != NOTE_INSN_EPILOGUE_BEG))
   24395           37 :                 insn = PREV_INSN (insn);
   24396              : 
   24397           23 :               if (!insn)
   24398            0 :                 insn = ret;
   24399              : 
   24400              :               /* Two NOPs count as one instruction.  */
   24401           23 :               insn_count = 2 * (4 - insn_count);
   24402           23 :               emit_insn_before (gen_nops (GEN_INT (insn_count)), insn);
   24403              :             }
   24404              :         }
   24405              :     }
   24406           63 : }
   24407              : 
   24408              : /* Fix up a Windows system unwinder issue.  If an EH region falls through into
   24409              :    the epilogue, the Windows system unwinder will apply epilogue logic and
   24410              :    produce incorrect offsets.  This can be avoided by adding a nop between
   24411              :    the last insn that can throw and the first insn of the epilogue.  */
   24412              : 
   24413              : static void
   24414            0 : ix86_seh_fixup_eh_fallthru (void)
   24415              : {
   24416            0 :   edge e;
   24417            0 :   edge_iterator ei;
   24418              : 
   24419            0 :   FOR_EACH_EDGE (e, ei, EXIT_BLOCK_PTR_FOR_FN (cfun)->preds)
   24420              :     {
   24421            0 :       rtx_insn *insn, *next;
   24422              : 
   24423              :       /* Find the beginning of the epilogue.  */
   24424            0 :       for (insn = BB_END (e->src); insn != NULL; insn = PREV_INSN (insn))
   24425            0 :         if (NOTE_P (insn) && NOTE_KIND (insn) == NOTE_INSN_EPILOGUE_BEG)
   24426              :           break;
   24427            0 :       if (insn == NULL)
   24428            0 :         continue;
   24429              : 
   24430              :       /* We only care about preceding insns that can throw.  */
   24431            0 :       insn = prev_active_insn (insn);
   24432            0 :       if (insn == NULL || !can_throw_internal (insn))
   24433            0 :         continue;
   24434              : 
   24435              :       /* Do not separate calls from their debug information.  */
   24436            0 :       for (next = NEXT_INSN (insn); next != NULL; next = NEXT_INSN (next))
   24437            0 :         if (NOTE_P (next) && NOTE_KIND (next) == NOTE_INSN_VAR_LOCATION)
   24438            0 :           insn = next;
   24439              :         else
   24440              :           break;
   24441              : 
   24442            0 :       emit_insn_after (gen_nops (const1_rtx), insn);
   24443              :     }
   24444            0 : }
   24445              : /* Split vector load from parm_decl to elemental loads to avoid STLF
   24446              :    stalls.  */
   24447              : static void
   24448       976823 : ix86_split_stlf_stall_load ()
   24449              : {
   24450       976823 :   rtx_insn* insn, *start = get_insns ();
   24451       976823 :   unsigned window = 0;
   24452              : 
   24453     26891072 :   for (insn = start; insn; insn = NEXT_INSN (insn))
   24454              :     {
   24455     26890228 :       if (!NONDEBUG_INSN_P (insn))
   24456     15255077 :         continue;
   24457     11635151 :       window++;
   24458              :       /* Insert 64 vaddps %xmm18, %xmm19, %xmm20(no dependence between each
   24459              :          other, just emulate for pipeline) before stalled load, stlf stall
   24460              :          case is as fast as no stall cases on CLX.
   24461              :          Since CFG is freed before machine_reorg, just do a rough
   24462              :          calculation of the window according to the layout.  */
   24463     11635151 :       if (window > (unsigned) x86_stlf_window_ninsns)
   24464              :         return;
   24465              : 
   24466     11617289 :       if (any_uncondjump_p (insn)
   24467     11581616 :           || ANY_RETURN_P (PATTERN (insn))
   24468     22824605 :           || CALL_P (insn))
   24469              :         return;
   24470              : 
   24471     10659172 :       rtx set = single_set (insn);
   24472     10659172 :       if (!set)
   24473       432763 :         continue;
   24474     10226409 :       rtx src = SET_SRC (set);
   24475     20452472 :       if (!MEM_P (src)
   24476              :           /* Only handle V2DFmode load since it doesn't need any scratch
   24477              :              register.  */
   24478      1459497 :           || GET_MODE (src) != E_V2DFmode
   24479         5400 :           || !MEM_EXPR (src)
   24480     10230323 :           || TREE_CODE (get_base_address (MEM_EXPR (src))) != PARM_DECL)
   24481     10226063 :         continue;
   24482              : 
   24483          346 :       rtx zero = CONST0_RTX (V2DFmode);
   24484          346 :       rtx dest = SET_DEST (set);
   24485          346 :       rtx m = adjust_address (src, DFmode, 0);
   24486          346 :       rtx loadlpd = gen_sse2_loadlpd (dest, zero, m);
   24487          346 :       emit_insn_before (loadlpd, insn);
   24488          346 :       m = adjust_address (src, DFmode, 8);
   24489          346 :       rtx loadhpd = gen_sse2_loadhpd (dest, dest, m);
   24490          346 :       if (dump_file && (dump_flags & TDF_DETAILS))
   24491              :         {
   24492            0 :           fputs ("Due to potential STLF stall, split instruction:\n",
   24493              :                  dump_file);
   24494            0 :           print_rtl_single (dump_file, insn);
   24495            0 :           fputs ("To:\n", dump_file);
   24496            0 :           print_rtl_single (dump_file, loadlpd);
   24497            0 :           print_rtl_single (dump_file, loadhpd);
   24498              :         }
   24499          346 :       PATTERN (insn) = loadhpd;
   24500          346 :       INSN_CODE (insn) = -1;
   24501          346 :       gcc_assert (recog_memoized (insn) != -1);
   24502              :     }
   24503              : }
   24504              : 
   24505              : /* Implement machine specific optimizations.  We implement padding of returns
   24506              :    for K8 CPUs and pass to avoid 4 jumps in the single 16 byte window.  */
   24507              : static void
   24508      1471363 : ix86_reorg (void)
   24509              : {
   24510              :   /* We are freeing block_for_insn in the toplev to keep compatibility
   24511              :      with old MDEP_REORGS that are not CFG based.  Recompute it now.  */
   24512      1471363 :   compute_bb_for_insn ();
   24513              : 
   24514      1471363 :   if (TARGET_SEH && current_function_has_exception_handlers ())
   24515              :     ix86_seh_fixup_eh_fallthru ();
   24516              : 
   24517      1471363 :   if (optimize && optimize_function_for_speed_p (cfun))
   24518              :     {
   24519       979123 :       if (TARGET_SSE2)
   24520       976823 :         ix86_split_stlf_stall_load ();
   24521       979123 :       if (TARGET_PAD_SHORT_FUNCTION)
   24522           63 :         ix86_pad_short_function ();
   24523       979060 :       else if (TARGET_PAD_RETURNS)
   24524        45144 :         ix86_pad_returns ();
   24525              : #ifdef ASM_OUTPUT_MAX_SKIP_ALIGN
   24526       979123 :       if (TARGET_FOUR_JUMP_LIMIT)
   24527        45424 :         ix86_avoid_jump_mispredicts ();
   24528              : #endif
   24529              :     }
   24530      1471363 : }
   24531              : 
   24532              : /* Return nonzero when QImode register that must be represented via REX prefix
   24533              :    is used.  */
   24534              : bool
   24535      9173928 : x86_extended_QIreg_mentioned_p (rtx_insn *insn)
   24536              : {
   24537      9173928 :   int i;
   24538      9173928 :   extract_insn_cached (insn);
   24539     34734796 :   for (i = 0; i < recog_data.n_operands; i++)
   24540      4784935 :     if (GENERAL_REG_P (recog_data.operand[i])
   24541     22822765 :         && !QI_REGNO_P (REGNO (recog_data.operand[i])))
   24542              :        return true;
   24543              :   return false;
   24544              : }
   24545              : 
   24546              : /* Return true when INSN mentions register that must be encoded using REX
   24547              :    prefix.  */
   24548              : bool
   24549    195932865 : x86_extended_reg_mentioned_p (rtx insn)
   24550              : {
   24551    195932865 :   subrtx_iterator::array_type array;
   24552   1026070465 :   FOR_EACH_SUBRTX (iter, array, INSN_P (insn) ? PATTERN (insn) : insn, NONCONST)
   24553              :     {
   24554    878814833 :       const_rtx x = *iter;
   24555    878814833 :       if (REG_P (x)
   24556    878814833 :           && (REX_INT_REGNO_P (REGNO (x)) || REX_SSE_REGNO_P (REGNO (x))
   24557    252281852 :               || REX2_INT_REGNO_P (REGNO (x))))
   24558     48677233 :         return true;
   24559              :     }
   24560    147255632 :   return false;
   24561    195932865 : }
   24562              : 
   24563              : /* Return true when INSN mentions register that must be encoded using REX2
   24564              :    prefix.  */
   24565              : bool
   24566      2092856 : x86_extended_rex2reg_mentioned_p (rtx insn)
   24567              : {
   24568      2092856 :   subrtx_iterator::array_type array;
   24569      9742564 :   FOR_EACH_SUBRTX (iter, array, INSN_P (insn) ? PATTERN (insn) : insn, NONCONST)
   24570              :     {
   24571      7650379 :       const_rtx x = *iter;
   24572      7650379 :       if (REG_P (x) && REX2_INT_REGNO_P (REGNO (x)))
   24573          671 :         return true;
   24574              :     }
   24575      2092185 :   return false;
   24576      2092856 : }
   24577              : 
   24578              : /* Return true when rtx operands mentions register that must be encoded using
   24579              :    evex prefix.  */
   24580              : bool
   24581           10 : x86_evex_reg_mentioned_p (rtx operands[], int nops)
   24582              : {
   24583           10 :   int i;
   24584           28 :   for (i = 0; i < nops; i++)
   24585           22 :     if (EXT_REX_SSE_REG_P (operands[i])
   24586           40 :         || x86_extended_rex2reg_mentioned_p (operands[i]))
   24587            4 :       return true;
   24588              :   return false;
   24589              : }
   24590              : 
   24591              : /* If profitable, negate (without causing overflow) integer constant
   24592              :    of mode MODE at location LOC.  Return true in this case.  */
   24593              : bool
   24594      5880888 : x86_maybe_negate_const_int (rtx *loc, machine_mode mode)
   24595              : {
   24596      5880888 :   HOST_WIDE_INT val;
   24597              : 
   24598      5880888 :   if (!CONST_INT_P (*loc))
   24599              :     return false;
   24600              : 
   24601      4963509 :   switch (mode)
   24602              :     {
   24603      2812825 :     case E_DImode:
   24604              :       /* DImode x86_64 constants must fit in 32 bits.  */
   24605      2812825 :       gcc_assert (x86_64_immediate_operand (*loc, mode));
   24606              : 
   24607              :       mode = SImode;
   24608              :       break;
   24609              : 
   24610              :     case E_SImode:
   24611              :     case E_HImode:
   24612              :     case E_QImode:
   24613              :       break;
   24614              : 
   24615            0 :     default:
   24616            0 :       gcc_unreachable ();
   24617              :     }
   24618              : 
   24619              :   /* Avoid overflows.  */
   24620      4963509 :   if (mode_signbit_p (mode, *loc))
   24621              :     return false;
   24622              : 
   24623      4962996 :   val = INTVAL (*loc);
   24624              : 
   24625              :   /* Make things pretty and `subl $4,%eax' rather than `addl $-4,%eax'.
   24626              :      Exceptions: -128 encodes smaller than 128, so swap sign and op.  */
   24627      4962996 :   if ((val < 0 && val != -128)
   24628      3264452 :       || val == 128)
   24629              :     {
   24630      1709676 :       *loc = GEN_INT (-val);
   24631      1709676 :       return true;
   24632              :     }
   24633              : 
   24634              :   return false;
   24635              : }
   24636              : 
   24637              : /* Generate an unsigned DImode/SImode to FP conversion.  This is the same code
   24638              :    optabs would emit if we didn't have TFmode patterns.  */
   24639              : 
   24640              : void
   24641         4497 : x86_emit_floatuns (rtx operands[2])
   24642              : {
   24643         4497 :   rtx_code_label *neglab, *donelab;
   24644         4497 :   rtx i0, i1, f0, in, out;
   24645         4497 :   machine_mode mode, inmode;
   24646              : 
   24647         4497 :   inmode = GET_MODE (operands[1]);
   24648         4497 :   gcc_assert (inmode == SImode || inmode == DImode);
   24649              : 
   24650         4497 :   out = operands[0];
   24651         4497 :   in = force_reg (inmode, operands[1]);
   24652         4497 :   mode = GET_MODE (out);
   24653         4497 :   neglab = gen_label_rtx ();
   24654         4497 :   donelab = gen_label_rtx ();
   24655         4497 :   f0 = gen_reg_rtx (mode);
   24656              : 
   24657         4497 :   emit_cmp_and_jump_insns (in, const0_rtx, LT, const0_rtx, inmode, 0, neglab);
   24658              : 
   24659         4497 :   expand_float (out, in, 0);
   24660              : 
   24661         4497 :   emit_jump_insn (gen_jump (donelab));
   24662         4497 :   emit_barrier ();
   24663              : 
   24664         4497 :   emit_label (neglab);
   24665              : 
   24666         4497 :   i0 = expand_simple_binop (inmode, LSHIFTRT, in, const1_rtx, NULL,
   24667              :                             1, OPTAB_DIRECT);
   24668         4497 :   i1 = expand_simple_binop (inmode, AND, in, const1_rtx, NULL,
   24669              :                             1, OPTAB_DIRECT);
   24670         4497 :   i0 = expand_simple_binop (inmode, IOR, i0, i1, i0, 1, OPTAB_DIRECT);
   24671              : 
   24672         4497 :   expand_float (f0, i0, 0);
   24673              : 
   24674         4497 :   emit_insn (gen_rtx_SET (out, gen_rtx_PLUS (mode, f0, f0)));
   24675              : 
   24676         4497 :   emit_label (donelab);
   24677         4497 : }
   24678              : 
   24679              : /* Return the diagnostic message string if conversion from FROMTYPE to
   24680              :    TOTYPE is not allowed, NULL otherwise.  */
   24681              : 
   24682              : static const char *
   24683   1198959623 : ix86_invalid_conversion (const_tree fromtype, const_tree totype)
   24684              : {
   24685   1198959623 :   machine_mode from_mode = element_mode (fromtype);
   24686   1198959623 :   machine_mode to_mode = element_mode (totype);
   24687              : 
   24688   1198959623 :   if (!TARGET_SSE2 && from_mode != to_mode)
   24689              :     {
   24690              :       /* Do no allow conversions to/from BFmode/HFmode scalar types
   24691              :          when TARGET_SSE2 is not available.  */
   24692       466880 :       if (from_mode == BFmode)
   24693              :         return N_("invalid conversion from type %<__bf16%> "
   24694              :                   "without option %<-msse2%>");
   24695       466879 :       if (from_mode == HFmode)
   24696              :         return N_("invalid conversion from type %<_Float16%> "
   24697              :                   "without option %<-msse2%>");
   24698       466879 :       if (to_mode == BFmode)
   24699              :         return N_("invalid conversion to type %<__bf16%> "
   24700              :                   "without option %<-msse2%>");
   24701       466879 :       if (to_mode == HFmode)
   24702              :         return N_("invalid conversion to type %<_Float16%> "
   24703              :                   "without option %<-msse2%>");
   24704              :     }
   24705              : 
   24706              :   /* Warn for silent implicit conversion between __bf16 and short,
   24707              :      since __bfloat16 is refined as real __bf16 instead of short
   24708              :      since GCC13.  */
   24709   1198959621 :   if (element_mode (fromtype) != element_mode (totype)
   24710   1198959621 :       && (TARGET_AVX512BF16 || TARGET_AVXNECONVERT))
   24711              :     {
   24712              :       /* Warn for silent implicit conversion where user may expect
   24713              :          a bitcast.  */
   24714      7716257 :       if ((TYPE_MODE (fromtype) == BFmode
   24715          279 :            && TYPE_MODE (totype) == HImode)
   24716      7716535 :           || (TYPE_MODE (totype) == BFmode
   24717          423 :               && TYPE_MODE (fromtype) == HImode))
   24718            1 :         warning (0, "%<__bfloat16%> is redefined from typedef %<short%> "
   24719              :                 "to real %<__bf16%> since GCC 13.1, be careful of "
   24720              :                  "implicit conversion between %<__bf16%> and %<short%>; "
   24721              :                  "an explicit bitcast may be needed here");
   24722              :     }
   24723              : 
   24724              :   /* Conversion allowed.  */
   24725              :   return NULL;
   24726              : }
   24727              : 
   24728              : /* Return the diagnostic message string if the unary operation OP is
   24729              :    not permitted on TYPE, NULL otherwise.  */
   24730              : 
   24731              : static const char *
   24732     94000765 : ix86_invalid_unary_op (int op, const_tree type)
   24733              : {
   24734     94000765 :   machine_mode mmode = element_mode (type);
   24735              :   /* Reject all single-operand operations on BFmode/HFmode except for &
   24736              :      when TARGET_SSE2 is not available.  */
   24737     94000765 :   if (!TARGET_SSE2 && op != ADDR_EXPR)
   24738              :     {
   24739       110834 :       if (mmode == BFmode)
   24740              :         return N_("operation not permitted on type %<__bf16%> "
   24741              :                   "without option %<-msse2%>");
   24742       110834 :       if (mmode == HFmode)
   24743            0 :         return N_("operation not permitted on type %<_Float16%> "
   24744              :                   "without option %<-msse2%>");
   24745              :     }
   24746              : 
   24747              :   /* Operation allowed.  */
   24748              :   return NULL;
   24749              : }
   24750              : 
   24751              : /* Return the diagnostic message string if the binary operation OP is
   24752              :    not permitted on TYPE1 and TYPE2, NULL otherwise.  */
   24753              : 
   24754              : static const char *
   24755    177355832 : ix86_invalid_binary_op (int op ATTRIBUTE_UNUSED, const_tree type1,
   24756              :                         const_tree type2)
   24757              : {
   24758    177355832 :   machine_mode type1_mode = element_mode (type1);
   24759    177355832 :   machine_mode type2_mode = element_mode (type2);
   24760              :   /* Reject all 2-operand operations on BFmode or HFmode
   24761              :      when TARGET_SSE2 is not available.  */
   24762    177355832 :   if (!TARGET_SSE2)
   24763              :     {
   24764      1006455 :       if (type1_mode == BFmode || type2_mode == BFmode)
   24765              :         return N_("operation not permitted on type %<__bf16%> "
   24766              :                   "without option %<-msse2%>");
   24767              : 
   24768      1006455 :       if (type1_mode == HFmode || type2_mode == HFmode)
   24769            0 :         return N_("operation not permitted on type %<_Float16%> "
   24770              :                   "without option %<-msse2%>");
   24771              :     }
   24772              : 
   24773              :   /* Operation allowed.  */
   24774              :   return NULL;
   24775              : }
   24776              : 
   24777              : 
   24778              : /* Target hook for scalar_mode_supported_p.  */
   24779              : static bool
   24780      4481565 : ix86_scalar_mode_supported_p (scalar_mode mode)
   24781              : {
   24782      4481565 :   if (DECIMAL_FLOAT_MODE_P (mode))
   24783       625545 :     return default_decimal_float_supported_p ();
   24784      3856020 :   else if (mode == TFmode)
   24785              :     return true;
   24786      3536841 :   else if (mode == HFmode || mode == BFmode)
   24787              :     return true;
   24788              :   else
   24789      2900466 :     return default_scalar_mode_supported_p (mode);
   24790              : }
   24791              : 
   24792              : /* Implement TARGET_LIBGCC_FLOATING_POINT_MODE_SUPPORTED_P - return TRUE
   24793              :    if MODE is HFmode, and punt to the generic implementation otherwise.  */
   24794              : 
   24795              : static bool
   24796      2191694 : ix86_libgcc_floating_mode_supported_p (scalar_float_mode mode)
   24797              : {
   24798              :   /* NB: Always return TRUE for HFmode so that the _Float16 type will
   24799              :      be defined by the C front-end for AVX512FP16 intrinsics.  We will
   24800              :      issue an error in ix86_expand_move for HFmode if AVX512FP16 isn't
   24801              :      enabled.  */
   24802      1873982 :   return ((mode == HFmode || mode == BFmode)
   24803      3747964 :           ? true
   24804      1556270 :           : default_libgcc_floating_mode_supported_p (mode));
   24805              : }
   24806              : 
   24807              : /* Implements target hook vector_mode_supported_p.  */
   24808              : static bool
   24809   1317468053 : ix86_vector_mode_supported_p (machine_mode mode)
   24810              : {
   24811              :   /* For ia32, scalar TImode isn't supported and so V1TImode shouldn't be
   24812              :      either.  */
   24813   1453057104 :   if (!TARGET_64BIT && GET_MODE_INNER (mode) == TImode)
   24814              :     return false;
   24815   1317467603 :   if (TARGET_SSE && VALID_SSE_REG_MODE (mode))
   24816              :     return true;
   24817   1109751324 :   if (TARGET_SSE2 && VALID_SSE2_REG_MODE (mode))
   24818              :     return true;
   24819    495951132 :   if (TARGET_AVX && VALID_AVX256_REG_MODE (mode))
   24820              :     return true;
   24821    356907897 :   if (TARGET_AVX512F && VALID_AVX512F_REG_MODE (mode))
   24822              :     return true;
   24823    222983928 :   if ((TARGET_MMX || TARGET_MMX_WITH_SSE)
   24824    222927241 :       && VALID_MMX_REG_MODE (mode))
   24825              :     return true;
   24826     31660867 :   if ((TARGET_3DNOW || TARGET_MMX_WITH_SSE)
   24827     31029603 :       && VALID_MMX_REG_MODE_3DNOW (mode))
   24828              :     return true;
   24829     22137744 :   if (mode == V2QImode)
   24830        22622 :     return true;
   24831              :   return false;
   24832              : }
   24833              : 
   24834              : /* Target hook for c_mode_for_suffix.  */
   24835              : static machine_mode
   24836       193204 : ix86_c_mode_for_suffix (char suffix)
   24837              : {
   24838       193204 :   if (suffix == 'q')
   24839              :     return TFmode;
   24840           37 :   if (suffix == 'w')
   24841              :     return XFmode;
   24842              : 
   24843            0 :   return VOIDmode;
   24844              : }
   24845              : 
   24846              : /* Helper function to map common constraints to non-EGPR ones.
   24847              :    All related constraints have h prefix, and h plus Upper letter
   24848              :    means the constraint is strictly EGPR enabled, while h plus
   24849              :    lower letter indicates the constraint is strictly gpr16 only.
   24850              : 
   24851              :    Specially for "g" constraint, split it to rmi as there is
   24852              :    no corresponding general constraint define for backend.
   24853              : 
   24854              :    Here is the full list to map constraints that may involve
   24855              :    gpr to h prefixed.
   24856              : 
   24857              :    "g" -> "jrjmi"
   24858              :    "r" -> "jr"
   24859              :    "m" -> "jm"
   24860              :    "<" -> "j<"
   24861              :    ">" -> "j>"
   24862              :    "o" -> "jo"
   24863              :    "V" -> "jV"
   24864              :    "p" -> "jp"
   24865              :    "Bm" -> "ja"
   24866              : */
   24867              : 
   24868           43 : static void map_egpr_constraints (vec<const char *> &constraints)
   24869              : {
   24870           53 :   for (size_t i = 0; i < constraints.length(); i++)
   24871              :     {
   24872           10 :       const char *cur = constraints[i];
   24873              : 
   24874           10 :       if (startswith (cur, "=@cc"))
   24875            0 :         continue;
   24876              : 
   24877           10 :       int len = strlen (cur);
   24878           10 :       auto_vec<char> buf;
   24879              : 
   24880           24 :       for (int j = 0; j < len; j++)
   24881              :         {
   24882           14 :           switch (cur[j])
   24883              :             {
   24884            2 :             case 'g':
   24885            2 :               buf.safe_push ('j');
   24886            2 :               buf.safe_push ('r');
   24887            2 :               buf.safe_push ('j');
   24888            2 :               buf.safe_push ('m');
   24889            2 :               buf.safe_push ('i');
   24890            2 :               break;
   24891            8 :             case 'r':
   24892            8 :             case 'm':
   24893            8 :             case '<':
   24894            8 :             case '>':
   24895            8 :             case 'o':
   24896            8 :             case 'V':
   24897            8 :             case 'p':
   24898            8 :               buf.safe_push ('j');
   24899            8 :               buf.safe_push (cur[j]);
   24900            8 :               break;
   24901            0 :             case 'B':
   24902            0 :               if (cur[j + 1] == 'm')
   24903              :                 {
   24904            0 :                   buf.safe_push ('j');
   24905            0 :                   buf.safe_push ('a');
   24906            0 :                   j++;
   24907              :                 }
   24908              :               else
   24909              :                 {
   24910            0 :                   buf.safe_push (cur[j]);
   24911            0 :                   buf.safe_push (cur[j + 1]);
   24912            0 :                   j++;
   24913              :                 }
   24914              :               break;
   24915            0 :             case 'T':
   24916            0 :             case 'Y':
   24917            0 :             case 'W':
   24918            0 :             case 'j':
   24919            0 :               buf.safe_push (cur[j]);
   24920            0 :               buf.safe_push (cur[j + 1]);
   24921            0 :               j++;
   24922            0 :               break;
   24923            0 :             case '{':
   24924            0 :               do
   24925              :                 {
   24926            0 :                   buf.safe_push (cur[j]);
   24927            0 :                 } while (cur[j++] != '}');
   24928              :               break;
   24929            4 :             default:
   24930            4 :               buf.safe_push (cur[j]);
   24931            4 :               break;
   24932              :             }
   24933              :         }
   24934           10 :       buf.safe_push ('\0');
   24935           20 :       constraints[i] = xstrdup (buf.address ());
   24936           10 :     }
   24937           43 : }
   24938              : 
   24939              : /* Worker function for TARGET_MD_ASM_ADJUST.
   24940              : 
   24941              :    We implement asm flag outputs, and maintain source compatibility
   24942              :    with the old cc0-based compiler.  */
   24943              : 
   24944              : static rtx_insn *
   24945       107583 : ix86_md_asm_adjust (vec<rtx> &outputs, vec<rtx> & /*inputs*/,
   24946              :                     vec<machine_mode> & /*input_modes*/,
   24947              :                     vec<const char *> &constraints, vec<rtx> &/*uses*/,
   24948              :                     vec<rtx> &clobbers, HARD_REG_SET &clobbered_regs,
   24949              :                     location_t loc)
   24950              : {
   24951       107583 :   bool saw_asm_flag = false;
   24952              : 
   24953       107583 :   start_sequence ();
   24954              : 
   24955       107583 :   if (TARGET_APX_EGPR && !ix86_apx_inline_asm_use_gpr32)
   24956           43 :     map_egpr_constraints (constraints);
   24957              : 
   24958       289674 :   for (unsigned i = 0, n = outputs.length (); i < n; ++i)
   24959              :     {
   24960        75344 :       const char *con = constraints[i];
   24961        75344 :       if (!startswith (con, "=@cc"))
   24962        75256 :         continue;
   24963           88 :       con += 4;
   24964           88 :       if (strchr (con, ',') != NULL)
   24965              :         {
   24966            1 :           error_at (loc, "alternatives not allowed in %<asm%> flag output");
   24967            1 :           continue;
   24968              :         }
   24969              : 
   24970           87 :       bool invert = false;
   24971           87 :       if (con[0] == 'n')
   24972           19 :         invert = true, con++;
   24973              : 
   24974           87 :       machine_mode mode = CCmode;
   24975           87 :       rtx_code code = UNKNOWN;
   24976              : 
   24977           87 :       switch (con[0])
   24978              :         {
   24979           15 :         case 'a':
   24980           15 :           if (con[1] == 0)
   24981              :             mode = CCAmode, code = EQ;
   24982            4 :           else if (con[1] == 'e' && con[2] == 0)
   24983              :             mode = CCCmode, code = NE;
   24984              :           break;
   24985           11 :         case 'b':
   24986           11 :           if (con[1] == 0)
   24987              :             mode = CCCmode, code = EQ;
   24988            6 :           else if (con[1] == 'e' && con[2] == 0)
   24989              :             mode = CCAmode, code = NE;
   24990              :           break;
   24991           14 :         case 'c':
   24992           14 :           if (con[1] == 0)
   24993              :             mode = CCCmode, code = EQ;
   24994              :           break;
   24995            8 :         case 'e':
   24996            8 :           if (con[1] == 0)
   24997              :             mode = CCZmode, code = EQ;
   24998              :           break;
   24999           11 :         case 'g':
   25000           11 :           if (con[1] == 0)
   25001              :             mode = CCGCmode, code = GT;
   25002            5 :           else if (con[1] == 'e' && con[2] == 0)
   25003              :             mode = CCGCmode, code = GE;
   25004              :           break;
   25005           10 :         case 'l':
   25006           10 :           if (con[1] == 0)
   25007              :             mode = CCGCmode, code = LT;
   25008            5 :           else if (con[1] == 'e' && con[2] == 0)
   25009              :             mode = CCGCmode, code = LE;
   25010              :           break;
   25011            4 :         case 'o':
   25012            4 :           if (con[1] == 0)
   25013              :             mode = CCOmode, code = EQ;
   25014              :           break;
   25015            4 :         case 'p':
   25016            4 :           if (con[1] == 0)
   25017              :             mode = CCPmode, code = EQ;
   25018              :           break;
   25019            4 :         case 's':
   25020            4 :           if (con[1] == 0)
   25021              :             mode = CCSmode, code = EQ;
   25022              :           break;
   25023            6 :         case 'z':
   25024            6 :           if (con[1] == 0)
   25025              :             mode = CCZmode, code = EQ;
   25026              :           break;
   25027              :         }
   25028            1 :       if (code == UNKNOWN)
   25029              :         {
   25030            1 :           error_at (loc, "unknown %<asm%> flag output %qs", constraints[i]);
   25031            1 :           continue;
   25032              :         }
   25033           86 :       if (invert)
   25034           19 :         code = reverse_condition (code);
   25035              : 
   25036           86 :       rtx dest = outputs[i];
   25037           86 :       if (!saw_asm_flag)
   25038              :         {
   25039              :           /* This is the first asm flag output.  Here we put the flags
   25040              :              register in as the real output and adjust the condition to
   25041              :              allow it.  */
   25042           75 :           constraints[i] = "=Bf";
   25043           75 :           outputs[i] = gen_rtx_REG (CCmode, FLAGS_REG);
   25044           75 :           saw_asm_flag = true;
   25045              :         }
   25046              :       else
   25047              :         {
   25048              :           /* We don't need the flags register as output twice.  */
   25049           11 :           constraints[i] = "=X";
   25050           11 :           outputs[i] = gen_rtx_SCRATCH (SImode);
   25051              :         }
   25052              : 
   25053           86 :       rtx x = gen_rtx_REG (mode, FLAGS_REG);
   25054           86 :       x = gen_rtx_fmt_ee (code, QImode, x, const0_rtx);
   25055              : 
   25056           86 :       machine_mode dest_mode = GET_MODE (dest);
   25057           86 :       if (!SCALAR_INT_MODE_P (dest_mode))
   25058              :         {
   25059            3 :           error_at (loc, "invalid type for %<asm%> flag output");
   25060            3 :           continue;
   25061              :         }
   25062              : 
   25063           83 :       if (dest_mode == QImode)
   25064           73 :         emit_insn (gen_rtx_SET (dest, x));
   25065              :       else
   25066              :         {
   25067           10 :           rtx reg = gen_reg_rtx (QImode);
   25068           10 :           emit_insn (gen_rtx_SET (reg, x));
   25069              : 
   25070           10 :           reg = convert_to_mode (dest_mode, reg, 1);
   25071           10 :           emit_move_insn (dest, reg);
   25072              :         }
   25073              :     }
   25074              : 
   25075       107583 :   rtx_insn *seq = end_sequence ();
   25076              : 
   25077       107583 :   if (saw_asm_flag)
   25078              :     return seq;
   25079              :   else
   25080              :     {
   25081              :       /* If we had no asm flag outputs, clobber the flags.  */
   25082       107508 :       clobbers.safe_push (gen_rtx_REG (CCmode, FLAGS_REG));
   25083       107508 :       SET_HARD_REG_BIT (clobbered_regs, FLAGS_REG);
   25084       107508 :       return NULL;
   25085              :     }
   25086              : }
   25087              : 
   25088              : /* Implements target vector targetm.asm.encode_section_info.  */
   25089              : 
   25090              : static void ATTRIBUTE_UNUSED
   25091      9846721 : ix86_encode_section_info (tree decl, rtx rtl, int first)
   25092              : {
   25093      9846721 :   default_encode_section_info (decl, rtl, first);
   25094              : 
   25095      9846721 :   if (ix86_in_large_data_p (decl))
   25096           32 :     SYMBOL_REF_FLAGS (XEXP (rtl, 0)) |= SYMBOL_FLAG_FAR_ADDR;
   25097      9846721 : }
   25098              : 
   25099              : /* Worker function for REVERSE_CONDITION.  */
   25100              : 
   25101              : enum rtx_code
   25102     31890921 : ix86_reverse_condition (enum rtx_code code, machine_mode mode)
   25103              : {
   25104     31890921 :   return (mode == CCFPmode
   25105     31890921 :           ? reverse_condition_maybe_unordered (code)
   25106     27538384 :           : reverse_condition (code));
   25107              : }
   25108              : 
   25109              : /* Output code to perform an x87 FP register move, from OPERANDS[1]
   25110              :    to OPERANDS[0].  */
   25111              : 
   25112              : const char *
   25113       651709 : output_387_reg_move (rtx_insn *insn, rtx *operands)
   25114              : {
   25115       651709 :   if (REG_P (operands[0]))
   25116              :     {
   25117       544769 :       if (REG_P (operands[1])
   25118       544769 :           && find_regno_note (insn, REG_DEAD, REGNO (operands[1])))
   25119              :         {
   25120       296822 :           if (REGNO (operands[0]) == FIRST_STACK_REG)
   25121       276208 :             return output_387_ffreep (operands, 0);
   25122              :           return "fstp\t%y0";
   25123              :         }
   25124       247947 :       if (STACK_TOP_P (operands[0]))
   25125       247947 :         return "fld%Z1\t%y1";
   25126              :       return "fst\t%y0";
   25127              :     }
   25128       106940 :   else if (MEM_P (operands[0]))
   25129              :     {
   25130       106940 :       gcc_assert (REG_P (operands[1]));
   25131       106940 :       if (find_regno_note (insn, REG_DEAD, REGNO (operands[1])))
   25132              :         return "fstp%Z0\t%y0";
   25133              :       else
   25134              :         {
   25135              :           /* There is no non-popping store to memory for XFmode.
   25136              :              So if we need one, follow the store with a load.  */
   25137         8381 :           if (GET_MODE (operands[0]) == XFmode)
   25138              :             return "fstp%Z0\t%y0\n\tfld%Z0\t%y0";
   25139              :           else
   25140         1873 :             return "fst%Z0\t%y0";
   25141              :         }
   25142              :     }
   25143              :   else
   25144            0 :     gcc_unreachable();
   25145              : }
   25146              : #ifdef TARGET_SOLARIS
   25147              : /* Solaris implementation of TARGET_ASM_NAMED_SECTION.  */
   25148              : 
   25149              : static void
   25150              : i386_solaris_elf_named_section (const char *name, unsigned int flags,
   25151              :                                 tree decl)
   25152              : {
   25153              :   /* With Binutils 2.15, the "@unwind" marker must be specified on
   25154              :      every occurrence of the ".eh_frame" section, not just the first
   25155              :      one.  */
   25156              :   if (TARGET_64BIT
   25157              :       && strcmp (name, ".eh_frame") == 0)
   25158              :     {
   25159              :       fprintf (asm_out_file, "\t.section\t%s,\"%s\",@unwind\n", name,
   25160              :                flags & SECTION_WRITE ? "aw" : "a");
   25161              :       return;
   25162              :     }
   25163              : 
   25164              : #if !HAVE_GNU_AS
   25165              :   if (HAVE_COMDAT_GROUP && flags & SECTION_LINKONCE)
   25166              :     {
   25167              :       solaris_elf_asm_comdat_section (name, flags, decl);
   25168              :       return;
   25169              :     }
   25170              : 
   25171              :   /* Solaris/x86 as uses the same syntax for the SHF_EXCLUDE flags as the
   25172              :      SPARC assembler.  One cannot mix single-letter flags and #exclude, so
   25173              :      only emit the latter here.  */
   25174              :   if (flags & SECTION_EXCLUDE)
   25175              :     {
   25176              :       fprintf (asm_out_file, "\t.section\t%s,#exclude\n", name);
   25177              :       return;
   25178              :     }
   25179              : #endif
   25180              : 
   25181              :   default_elf_asm_named_section (name, flags, decl);
   25182              : }
   25183              : #endif /* TARGET_SOLARIS */
   25184              : 
   25185              : /* Return the mangling of TYPE if it is an extended fundamental type.  */
   25186              : 
   25187              : static const char *
   25188   1142360869 : ix86_mangle_type (const_tree type)
   25189              : {
   25190   1142360869 :   type = TYPE_MAIN_VARIANT (type);
   25191              : 
   25192   1142360869 :   if (TREE_CODE (type) != VOID_TYPE && TREE_CODE (type) != BOOLEAN_TYPE
   25193              :       && TREE_CODE (type) != INTEGER_TYPE && TREE_CODE (type) != REAL_TYPE)
   25194              :     return NULL;
   25195              : 
   25196    614132071 :   if (type == float128_type_node || type == float64x_type_node)
   25197              :     return NULL;
   25198              : 
   25199    613468103 :   switch (TYPE_MODE (type))
   25200              :     {
   25201              :     case E_BFmode:
   25202              :       return "DF16b";
   25203       276749 :     case E_HFmode:
   25204              :       /* _Float16 is "DF16_".
   25205              :          Align with clang's decision in https://reviews.llvm.org/D33719. */
   25206       276749 :       return "DF16_";
   25207      1158433 :     case E_TFmode:
   25208              :       /* __float128 is "g".  */
   25209      1158433 :       return "g";
   25210      8296450 :     case E_XFmode:
   25211              :       /* "long double" or __float80 is "e".  */
   25212      8296450 :       return "e";
   25213              :     default:
   25214              :       return NULL;
   25215              :     }
   25216              : }
   25217              : 
   25218              : /* Create C++ tinfo symbols for only conditionally available fundamental
   25219              :    types.  */
   25220              : 
   25221              : static void
   25222            5 : ix86_emit_support_tinfos (emit_support_tinfos_callback callback)
   25223              : {
   25224            5 :   extern tree ix86_float16_type_node;
   25225            5 :   extern tree ix86_bf16_type_node;
   25226              : 
   25227            5 :   if (!TARGET_SSE2)
   25228              :     {
   25229            0 :       if (!float16_type_node)
   25230            0 :         float16_type_node = ix86_float16_type_node;
   25231            0 :       if (!bfloat16_type_node)
   25232            0 :         bfloat16_type_node = ix86_bf16_type_node;
   25233            0 :       callback (float16_type_node);
   25234            0 :       callback (bfloat16_type_node);
   25235            0 :       float16_type_node = NULL_TREE;
   25236            0 :       bfloat16_type_node = NULL_TREE;
   25237              :     }
   25238            5 : }
   25239              : 
   25240              : static GTY(()) tree ix86_tls_stack_chk_guard_decl;
   25241              : 
   25242              : static tree
   25243          238 : ix86_stack_protect_guard (void)
   25244              : {
   25245          238 :   if (TARGET_SSP_TLS_GUARD)
   25246              :     {
   25247          235 :       tree type_node = lang_hooks.types.type_for_mode (ptr_mode, 1);
   25248          235 :       int qual = ENCODE_QUAL_ADDR_SPACE (ix86_stack_protector_guard_reg);
   25249          235 :       tree type = build_qualified_type (type_node, qual);
   25250          235 :       tree t;
   25251              : 
   25252          235 :       if (OPTION_SET_P (ix86_stack_protector_guard_symbol_str))
   25253              :         {
   25254            1 :           t = ix86_tls_stack_chk_guard_decl;
   25255              : 
   25256            1 :           if (t == NULL)
   25257              :             {
   25258            1 :               rtx x;
   25259              : 
   25260            1 :               t = build_decl
   25261            1 :                 (UNKNOWN_LOCATION, VAR_DECL,
   25262              :                  get_identifier (ix86_stack_protector_guard_symbol_str),
   25263              :                  type);
   25264            1 :               TREE_STATIC (t) = 1;
   25265            1 :               TREE_PUBLIC (t) = 1;
   25266            1 :               DECL_EXTERNAL (t) = 1;
   25267            1 :               TREE_USED (t) = 1;
   25268            1 :               TREE_THIS_VOLATILE (t) = 1;
   25269            1 :               DECL_ARTIFICIAL (t) = 1;
   25270            1 :               DECL_IGNORED_P (t) = 1;
   25271              : 
   25272              :               /* Do not share RTL as the declaration is visible outside of
   25273              :                  current function.  */
   25274            1 :               x = DECL_RTL (t);
   25275            1 :               RTX_FLAG (x, used) = 1;
   25276              : 
   25277            1 :               ix86_tls_stack_chk_guard_decl = t;
   25278              :             }
   25279              :         }
   25280              :       else
   25281              :         {
   25282          234 :           tree asptrtype = build_pointer_type (type);
   25283              : 
   25284          234 :           t = build_int_cst (asptrtype, ix86_stack_protector_guard_offset);
   25285          234 :           t = build2 (MEM_REF, asptrtype, t,
   25286              :                       build_int_cst (asptrtype, 0));
   25287          234 :           TREE_THIS_VOLATILE (t) = 1;
   25288              :         }
   25289              : 
   25290          235 :       return t;
   25291              :     }
   25292              : 
   25293            3 :   return default_stack_protect_guard ();
   25294              : }
   25295              : 
   25296              : static bool
   25297          743 : ix86_stack_protect_runtime_enabled_p (void)
   25298              : {
   25299              :   /* Naked functions should not enable stack protector.  */
   25300          743 :   return !ix86_function_naked (current_function_decl);
   25301              : }
   25302              : 
   25303              : /* For 32-bit code we can save PIC register setup by using
   25304              :    __stack_chk_fail_local hidden function instead of calling
   25305              :    __stack_chk_fail directly.  64-bit code doesn't need to setup any PIC
   25306              :    register, so it is better to call __stack_chk_fail directly.  */
   25307              : 
   25308              : static tree ATTRIBUTE_UNUSED
   25309          264 : ix86_stack_protect_fail (void)
   25310              : {
   25311          264 :   return TARGET_64BIT
   25312          264 :          ? default_external_stack_protect_fail ()
   25313            1 :          : default_hidden_stack_protect_fail ();
   25314              : }
   25315              : 
   25316              : /* Select a format to encode pointers in exception handling data.  CODE
   25317              :    is 0 for data, 1 for code labels, 2 for function pointers.  GLOBAL is
   25318              :    true if the symbol may be affected by dynamic relocations.
   25319              : 
   25320              :    ??? All x86 object file formats are capable of representing this.
   25321              :    After all, the relocation needed is the same as for the call insn.
   25322              :    Whether or not a particular assembler allows us to enter such, I
   25323              :    guess we'll have to see.  */
   25324              : 
   25325              : int
   25326       780597 : asm_preferred_eh_data_format (int code, int global)
   25327              : {
   25328              :   /* PE-COFF is effectively always -fPIC because of the .reloc section.  */
   25329       780597 :   if (flag_pic || TARGET_PECOFF || !ix86_direct_extern_access)
   25330              :     {
   25331        38401 :       int type = DW_EH_PE_sdata8;
   25332        38401 :       if (ptr_mode == SImode
   25333        24553 :           || ix86_cmodel == CM_SMALL_PIC
   25334        38485 :           || (ix86_cmodel == CM_MEDIUM_PIC && (global || code)))
   25335              :         type = DW_EH_PE_sdata4;
   25336        53789 :       return (global ? DW_EH_PE_indirect : 0) | DW_EH_PE_pcrel | type;
   25337              :     }
   25338              : 
   25339       742196 :   if (ix86_cmodel == CM_SMALL
   25340        18676 :       || (ix86_cmodel == CM_MEDIUM && code))
   25341       723531 :     return DW_EH_PE_udata4;
   25342              : 
   25343              :   return DW_EH_PE_absptr;
   25344              : }
   25345              : 
   25346              : /* Worker for ix86_builtin_vectorization_cost and the fallback calls
   25347              :    from ix86_vector_costs::add_stmt_cost.  */
   25348              : static int
   25349     14943338 : ix86_default_vector_cost (enum vect_cost_for_stmt type_of_cost,
   25350              :                           machine_mode mode)
   25351              : {
   25352     14943338 :   bool fp = FLOAT_MODE_P (mode);
   25353     14943338 :   int index;
   25354     14943338 :   switch (type_of_cost)
   25355              :     {
   25356      2202081 :       case scalar_stmt:
   25357      2202081 :         return fp ? ix86_cost->addss : COSTS_N_INSNS (1);
   25358              : 
   25359      2085810 :       case scalar_load:
   25360              :         /* load/store costs are relative to register move which is 2. Recompute
   25361              :            it to COSTS_N_INSNS so everything have same base.  */
   25362      4171620 :         return COSTS_N_INSNS (fp ? ix86_cost->sse_load[0]
   25363      2085810 :                               : ix86_cost->int_load [2]) / 2;
   25364              : 
   25365      3855910 :       case scalar_store:
   25366      7711820 :         return COSTS_N_INSNS (fp ? ix86_cost->sse_store[0]
   25367      3855910 :                               : ix86_cost->int_store [2]) / 2;
   25368              : 
   25369       973315 :       case vector_stmt:
   25370      1946630 :         return ix86_vec_cost (mode,
   25371      1946630 :                               fp ? ix86_cost->addss : ix86_cost->sse_op);
   25372              : 
   25373      1720777 :       case vector_load:
   25374      1720777 :         index = sse_store_index (mode);
   25375              :         /* See PR82713 - we may end up being called on non-vector type.  */
   25376      1720777 :         if (index < 0)
   25377        97717 :           index = 2;
   25378      1720777 :         return COSTS_N_INSNS (ix86_cost->sse_load[index]) / 2;
   25379              : 
   25380       868991 :       case vector_store:
   25381       868991 :         index = sse_store_index (mode);
   25382              :         /* See PR82713 - we may end up being called on non-vector type.  */
   25383       868991 :         if (index < 0)
   25384        89521 :           index = 2;
   25385       868991 :         return COSTS_N_INSNS (ix86_cost->sse_store[index]) / 2;
   25386              : 
   25387       748176 :       case vec_to_scalar:
   25388       748176 :       case scalar_to_vec:
   25389       748176 :         return ix86_vec_cost (mode, ix86_cost->sse_op);
   25390              : 
   25391              :       /* We should have separate costs for unaligned loads and gather/scatter.
   25392              :          Do that incrementally.  */
   25393       392101 :       case unaligned_load:
   25394       392101 :         index = sse_store_index (mode);
   25395              :         /* See PR82713 - we may end up being called on non-vector type.  */
   25396       392101 :         if (index < 0)
   25397         2663 :           index = 2;
   25398       392101 :         return COSTS_N_INSNS (ix86_cost->sse_unaligned_load[index]) / 2;
   25399              : 
   25400       785494 :       case unaligned_store:
   25401       785494 :         index = sse_store_index (mode);
   25402              :         /* See PR82713 - we may end up being called on non-vector type.  */
   25403       785494 :         if (index < 0)
   25404        16522 :           index = 2;
   25405       785494 :         return COSTS_N_INSNS (ix86_cost->sse_unaligned_store[index]) / 2;
   25406              : 
   25407            0 :       case vector_gather_load:
   25408            0 :         return ix86_vec_cost (mode,
   25409            0 :                               COSTS_N_INSNS
   25410              :                                  (ix86_cost->gather_static
   25411              :                                   + ix86_cost->gather_per_elt
   25412            0 :                                     * GET_MODE_NUNITS (mode)) / 2);
   25413              : 
   25414            0 :       case vector_scatter_store:
   25415            0 :         return ix86_vec_cost (mode,
   25416            0 :                               COSTS_N_INSNS
   25417              :                                  (ix86_cost->scatter_static
   25418              :                                   + ix86_cost->scatter_per_elt
   25419            0 :                                     * GET_MODE_NUNITS (mode)) / 2);
   25420              : 
   25421       275566 :       case cond_branch_taken:
   25422       275566 :         return ix86_cost->cond_taken_branch_cost;
   25423              : 
   25424         5510 :       case cond_branch_not_taken:
   25425         5510 :         return ix86_cost->cond_not_taken_branch_cost;
   25426              : 
   25427       245659 :       case vec_perm:
   25428       245659 :         return ix86_vec_cost (mode, ix86_cost->sse_op);
   25429              : 
   25430        69790 :       case vec_promote_demote:
   25431        69790 :         if (fp)
   25432         7927 :           return vec_fp_conversion_cost (ix86_tune_cost, mode);
   25433        61863 :         return ix86_vec_cost (mode, ix86_cost->sse_op);
   25434              : 
   25435       714158 :       case vec_construct:
   25436       714158 :         {
   25437       714158 :           int n = GET_MODE_NUNITS (mode);
   25438              :           /* N - 1 element inserts into an SSE vector, the possible
   25439              :              GPR -> XMM move is accounted for in add_stmt_cost.  */
   25440      1428316 :           if (GET_MODE_BITSIZE (mode) <= 128)
   25441       707672 :             return (n - 1) * ix86_cost->sse_op;
   25442              :           /* One vinserti128 for combining two SSE vectors for AVX256.  */
   25443        12972 :           else if (GET_MODE_BITSIZE (mode) == 256)
   25444         5214 :             return ((n - 2) * ix86_cost->sse_op
   25445         5214 :                     + ix86_vec_cost (mode, ix86_cost->sse_op));
   25446              :           /* One vinserti64x4 and two vinserti128 for combining SSE
   25447              :              and AVX256 vectors to AVX512.  */
   25448         2544 :           else if (GET_MODE_BITSIZE (mode) == 512)
   25449              :             {
   25450         1272 :               machine_mode half_mode
   25451         1272 :                 = mode_for_vector (GET_MODE_INNER (mode),
   25452         2544 :                                    GET_MODE_NUNITS (mode) / 2).require ();
   25453         1272 :               return ((n - 4) * ix86_cost->sse_op
   25454         1272 :                       + 2 * ix86_vec_cost (half_mode, ix86_cost->sse_op)
   25455         1272 :                       + ix86_vec_cost (mode, ix86_cost->sse_op));
   25456              :             }
   25457            0 :           gcc_unreachable ();
   25458              :         }
   25459              : 
   25460            0 :       default:
   25461            0 :         gcc_unreachable ();
   25462              :     }
   25463              : }
   25464              : 
   25465              : /* Implement targetm.vectorize.builtin_vectorization_cost.  */
   25466              : static int
   25467      9353319 : ix86_builtin_vectorization_cost (enum vect_cost_for_stmt type_of_cost,
   25468              :                                  tree vectype, int)
   25469              : {
   25470      9353319 :   machine_mode mode = TImode;
   25471      9353319 :   if (vectype != NULL)
   25472      7720621 :     mode = TYPE_MODE (vectype);
   25473      9353319 :   return ix86_default_vector_cost (type_of_cost, mode);
   25474              : }
   25475              : 
   25476              : 
   25477              : /* This function returns the calling abi specific va_list type node.
   25478              :    It returns  the FNDECL specific va_list type.  */
   25479              : 
   25480              : static tree
   25481        47580 : ix86_fn_abi_va_list (tree fndecl)
   25482              : {
   25483        47580 :   if (!TARGET_64BIT)
   25484          726 :     return va_list_type_node;
   25485        46854 :   gcc_assert (fndecl != NULL_TREE);
   25486              : 
   25487        46854 :   if (ix86_function_abi ((const_tree) fndecl) == MS_ABI)
   25488        12868 :     return ms_va_list_type_node;
   25489              :   else
   25490        33986 :     return sysv_va_list_type_node;
   25491              : }
   25492              : 
   25493              : /* Returns the canonical va_list type specified by TYPE. If there
   25494              :    is no valid TYPE provided, it return NULL_TREE.  */
   25495              : 
   25496              : static tree
   25497       246448 : ix86_canonical_va_list_type (tree type)
   25498              : {
   25499       246448 :   if (TARGET_64BIT)
   25500              :     {
   25501       245946 :       if (lookup_attribute ("ms_abi va_list", TYPE_ATTRIBUTES (type)))
   25502         5944 :         return ms_va_list_type_node;
   25503              : 
   25504       240002 :       if ((TREE_CODE (type) == ARRAY_TYPE
   25505        49915 :            && integer_zerop (array_type_nelts_minus_one (type)))
   25506       240002 :           || POINTER_TYPE_P (type))
   25507              :         {
   25508       188165 :           tree elem_type = TREE_TYPE (type);
   25509       188165 :           if (TREE_CODE (elem_type) == RECORD_TYPE
   25510       339528 :               && lookup_attribute ("sysv_abi va_list",
   25511       151363 :                                    TYPE_ATTRIBUTES (elem_type)))
   25512       151363 :             return sysv_va_list_type_node;
   25513              :         }
   25514              : 
   25515        88639 :       return NULL_TREE;
   25516              :     }
   25517              : 
   25518          502 :   return std_canonical_va_list_type (type);
   25519              : }
   25520              : 
   25521              : /* Iterate through the target-specific builtin types for va_list.
   25522              :    IDX denotes the iterator, *PTREE is set to the result type of
   25523              :    the va_list builtin, and *PNAME to its internal type.
   25524              :    Returns zero if there is no element for this index, otherwise
   25525              :    IDX should be increased upon the next call.
   25526              :    Note, do not iterate a base builtin's name like __builtin_va_list.
   25527              :    Used from c_common_nodes_and_builtins.  */
   25528              : 
   25529              : static int
   25530       613743 : ix86_enum_va_list (int idx, const char **pname, tree *ptree)
   25531              : {
   25532       613743 :   if (TARGET_64BIT)
   25533              :     {
   25534       608367 :       switch (idx)
   25535              :         {
   25536              :         default:
   25537              :           break;
   25538              : 
   25539       202789 :         case 0:
   25540       202789 :           *ptree = ms_va_list_type_node;
   25541       202789 :           *pname = "__builtin_ms_va_list";
   25542       202789 :           return 1;
   25543              : 
   25544       202789 :         case 1:
   25545       202789 :           *ptree = sysv_va_list_type_node;
   25546       202789 :           *pname = "__builtin_sysv_va_list";
   25547       202789 :           return 1;
   25548              :         }
   25549              :     }
   25550              : 
   25551              :   return 0;
   25552              : }
   25553              : 
   25554              : #undef TARGET_SCHED_DISPATCH
   25555              : #define TARGET_SCHED_DISPATCH ix86_bd_has_dispatch
   25556              : #undef TARGET_SCHED_DISPATCH_DO
   25557              : #define TARGET_SCHED_DISPATCH_DO ix86_bd_do_dispatch
   25558              : #undef TARGET_SCHED_REASSOCIATION_WIDTH
   25559              : #define TARGET_SCHED_REASSOCIATION_WIDTH ix86_reassociation_width
   25560              : #undef TARGET_SCHED_REORDER
   25561              : #define TARGET_SCHED_REORDER ix86_atom_sched_reorder
   25562              : #undef TARGET_SCHED_ADJUST_PRIORITY
   25563              : #define TARGET_SCHED_ADJUST_PRIORITY ix86_adjust_priority
   25564              : #undef TARGET_SCHED_DEPENDENCIES_EVALUATION_HOOK
   25565              : #define TARGET_SCHED_DEPENDENCIES_EVALUATION_HOOK \
   25566              :   ix86_dependencies_evaluation_hook
   25567              : 
   25568              : 
   25569              : /* Implementation of reassociation_width target hook used by
   25570              :    reassoc phase to identify parallelism level in reassociated
   25571              :    tree.  Statements tree_code is passed in OPC.  Arguments type
   25572              :    is passed in MODE.  */
   25573              : 
   25574              : static int
   25575        28644 : ix86_reassociation_width (unsigned int op, machine_mode mode)
   25576              : {
   25577        28644 :   int width = 1;
   25578              :   /* Vector part.  */
   25579        28644 :   if (VECTOR_MODE_P (mode))
   25580              :     {
   25581         8357 :       int div = 1;
   25582         8357 :       if (INTEGRAL_MODE_P (mode))
   25583         2588 :         width = ix86_cost->reassoc_vec_int;
   25584         5769 :       else if (FLOAT_MODE_P (mode))
   25585         5769 :         width = ix86_cost->reassoc_vec_fp;
   25586              : 
   25587         8357 :       if (width == 1)
   25588              :         return 1;
   25589              : 
   25590              :       /* Znver1-4 Integer vector instructions execute in FP unit
   25591              :          and can execute 3 additions and one multiplication per cycle.  */
   25592         8352 :       if ((ix86_tune == PROCESSOR_ZNVER1 || ix86_tune == PROCESSOR_ZNVER2
   25593         8352 :            || ix86_tune == PROCESSOR_ZNVER3 || ix86_tune == PROCESSOR_ZNVER4)
   25594            0 :           && INTEGRAL_MODE_P (mode) && op != PLUS && op != MINUS)
   25595              :         return 1;
   25596              :       /* Znver5 can do 2 integer multiplications per cycle with latency
   25597              :          of 3.  */
   25598         8352 :       if ((ix86_tune == PROCESSOR_ZNVER5 || ix86_tune == PROCESSOR_ZNVER6)
   25599            0 :           && INTEGRAL_MODE_P (mode) && op != PLUS && op != MINUS)
   25600         8352 :         width = 6;
   25601              : 
   25602              :       /* Account for targets that splits wide vectors into multiple parts.  */
   25603         8352 :       if (TARGET_AVX512_SPLIT_REGS && GET_MODE_BITSIZE (mode) > 256)
   25604            0 :         div = GET_MODE_BITSIZE (mode) / 256;
   25605         8352 :       else if (TARGET_AVX256_SPLIT_REGS && GET_MODE_BITSIZE (mode) > 128)
   25606            0 :         div = GET_MODE_BITSIZE (mode) / 128;
   25607         8352 :       else if (TARGET_SSE_SPLIT_REGS && GET_MODE_BITSIZE (mode) > 64)
   25608            0 :         div = GET_MODE_BITSIZE (mode) / 64;
   25609         8352 :       width = (width + div - 1) / div;
   25610         8352 :     }
   25611              :   /* Scalar part.  */
   25612              :   else if (INTEGRAL_MODE_P (mode))
   25613        14319 :     width = ix86_cost->reassoc_int;
   25614              :   else if (FLOAT_MODE_P (mode))
   25615         5968 :     width = ix86_cost->reassoc_fp;
   25616              : 
   25617              :   /* Avoid using too many registers in 32bit mode.  */
   25618        28639 :   if (!TARGET_64BIT && width > 2)
   25619        28644 :     width = 2;
   25620              :   return width;
   25621              : }
   25622              : 
   25623              : /* ??? No autovectorization into MMX or 3DNOW until we can reliably
   25624              :    place emms and femms instructions.  */
   25625              : 
   25626              : static machine_mode
   25627      5140216 : ix86_preferred_simd_mode (scalar_mode mode)
   25628              : {
   25629      5140216 :   if (!TARGET_SSE)
   25630          862 :     return word_mode;
   25631              : 
   25632      5139354 :   switch (mode)
   25633              :     {
   25634       416126 :     case E_QImode:
   25635       416126 :       if (TARGET_AVX512BW && !TARGET_PREFER_AVX256)
   25636              :         return V64QImode;
   25637       405745 :       else if (TARGET_AVX && !TARGET_PREFER_AVX128)
   25638              :         return V32QImode;
   25639              :       else
   25640       383912 :         return V16QImode;
   25641              : 
   25642       186711 :     case E_HImode:
   25643       186711 :       if (TARGET_AVX512BW && !TARGET_PREFER_AVX256)
   25644              :         return V32HImode;
   25645       176103 :       else if (TARGET_AVX && !TARGET_PREFER_AVX128)
   25646              :         return V16HImode;
   25647              :       else
   25648       159615 :         return V8HImode;
   25649              : 
   25650      1523146 :     case E_SImode:
   25651      1523146 :       if (TARGET_AVX512F && !TARGET_PREFER_AVX256)
   25652              :         return V16SImode;
   25653      1455909 :       else if (TARGET_AVX && !TARGET_PREFER_AVX128)
   25654              :         return V8SImode;
   25655              :       else
   25656      1299334 :         return V4SImode;
   25657              : 
   25658      1860728 :     case E_DImode:
   25659      1860728 :       if (TARGET_AVX512F && !TARGET_PREFER_AVX256)
   25660              :         return V8DImode;
   25661      1456507 :       else if (TARGET_AVX && !TARGET_PREFER_AVX128)
   25662              :         return V4DImode;
   25663              :       else
   25664      1394080 :         return V2DImode;
   25665              : 
   25666       142385 :     case E_HFmode:
   25667       142385 :       if (TARGET_AVX512FP16)
   25668              :         {
   25669       141639 :           if (TARGET_AVX512VL)
   25670              :             {
   25671        68628 :               if (TARGET_PREFER_AVX128)
   25672              :                 return V8HFmode;
   25673        68398 :               else if (TARGET_PREFER_AVX256)
   25674              :                 return V16HFmode;
   25675              :             }
   25676       139204 :           return V32HFmode;
   25677              :         }
   25678          746 :       return word_mode;
   25679              : 
   25680        63087 :     case E_BFmode:
   25681        63087 :       if (TARGET_AVX512F && !TARGET_PREFER_AVX256)
   25682              :         return V32BFmode;
   25683        26590 :       else if (TARGET_AVX && !TARGET_PREFER_AVX128)
   25684              :         return V16BFmode;
   25685              :       else
   25686        13523 :         return V8BFmode;
   25687              : 
   25688       618603 :     case E_SFmode:
   25689       618603 :       if (TARGET_AVX512F && !TARGET_PREFER_AVX256)
   25690              :         return V16SFmode;
   25691       418022 :       else if (TARGET_AVX && !TARGET_PREFER_AVX128)
   25692              :         return V8SFmode;
   25693              :       else
   25694       349874 :         return V4SFmode;
   25695              : 
   25696       292470 :     case E_DFmode:
   25697       292470 :       if (TARGET_AVX512F && !TARGET_PREFER_AVX256)
   25698              :         return V8DFmode;
   25699       170521 :       else if (TARGET_AVX && !TARGET_PREFER_AVX128)
   25700              :         return V4DFmode;
   25701       116061 :       else if (TARGET_SSE2)
   25702              :         return V2DFmode;
   25703              :       /* FALLTHRU */
   25704              : 
   25705        36154 :     default:
   25706        36154 :       return word_mode;
   25707              :     }
   25708              : }
   25709              : 
   25710              : /* If AVX is enabled then try vectorizing with both 256bit and 128bit
   25711              :    vectors.  If AVX512F is enabled then try vectorizing with 512bit,
   25712              :    256bit and 128bit vectors.  */
   25713              : 
   25714              : static unsigned int
   25715      2192778 : ix86_autovectorize_vector_modes (vector_modes *modes, bool all)
   25716              : {
   25717      2192778 :   if (TARGET_AVX512F && !TARGET_PREFER_AVX256)
   25718              :     {
   25719        74751 :       modes->safe_push (V64QImode);
   25720        74751 :       modes->safe_push (V32QImode);
   25721        74751 :       modes->safe_push (V16QImode);
   25722              :     }
   25723      2118027 :   else if (TARGET_AVX512F && all)
   25724              :     {
   25725          558 :       modes->safe_push (V32QImode);
   25726          558 :       modes->safe_push (V16QImode);
   25727          558 :       modes->safe_push (V64QImode);
   25728              :     }
   25729      2117469 :   else if (TARGET_AVX && !TARGET_PREFER_AVX128)
   25730              :     {
   25731        28693 :       modes->safe_push (V32QImode);
   25732        28693 :       modes->safe_push (V16QImode);
   25733              :     }
   25734      2088776 :   else if (TARGET_AVX && all)
   25735              :     {
   25736           24 :       modes->safe_push (V16QImode);
   25737           24 :       modes->safe_push (V32QImode);
   25738              :     }
   25739      2088752 :   else if (TARGET_SSE2)
   25740      2086487 :     modes->safe_push (V16QImode);
   25741              : 
   25742      2192778 :   if (TARGET_MMX_WITH_SSE)
   25743      1798127 :     modes->safe_push (V8QImode);
   25744              : 
   25745      2192778 :   if (TARGET_SSE2)
   25746      2190513 :     modes->safe_push (V4QImode);
   25747              : 
   25748      2192778 :   return ix86_vect_compare_costs ? VECT_COMPARE_COSTS : 0;
   25749              : }
   25750              : 
   25751              : /* Implemenation of targetm.vectorize.get_mask_mode.  */
   25752              : 
   25753              : static opt_machine_mode
   25754      3076248 : ix86_get_mask_mode (machine_mode data_mode)
   25755              : {
   25756      3076248 :   unsigned vector_size = GET_MODE_SIZE (data_mode);
   25757      3076248 :   unsigned nunits = GET_MODE_NUNITS (data_mode);
   25758      3076248 :   unsigned elem_size = vector_size / nunits;
   25759              : 
   25760              :   /* Scalar mask case.  */
   25761       317438 :   if ((TARGET_AVX512F && vector_size == 64)
   25762      2961337 :       || (TARGET_AVX512VL && (vector_size == 32 || vector_size == 16))
   25763              :       /* AVX512FP16 only supports vector comparison
   25764              :          to kmask for _Float16.  */
   25765      2847579 :       || (TARGET_AVX512VL && TARGET_AVX512FP16
   25766         3213 :           && GET_MODE_INNER (data_mode) == E_HFmode)
   25767      5925433 :       || (TARGET_AVX10_2 && GET_MODE_INNER (data_mode) == E_BFmode))
   25768              :     {
   25769       229294 :       if (elem_size == 4
   25770       229294 :           || elem_size == 8
   25771       103275 :           || (TARGET_AVX512BW && (elem_size == 1 || elem_size == 2)))
   25772       201683 :         return smallest_int_mode_for_size (nunits).require ();
   25773              :     }
   25774              : 
   25775      2874565 :   scalar_int_mode elem_mode
   25776      2874565 :     = smallest_int_mode_for_size (elem_size * BITS_PER_UNIT).require ();
   25777              : 
   25778      2874565 :   gcc_assert (elem_size * nunits == vector_size);
   25779              : 
   25780      2874565 :   return mode_for_vector (elem_mode, nunits);
   25781              : }
   25782              : 
   25783              : 
   25784              : 
   25785              : /* Return class of registers which could be used for pseudo of MODE
   25786              :    and of class RCLASS for spilling instead of memory.  Return NO_REGS
   25787              :    if it is not possible or non-profitable.  */
   25788              : 
   25789              : /* Disabled due to PRs 70902, 71453, 71555, 71596 and 71657.  */
   25790              : 
   25791              : static reg_class_t
   25792   6203686769 : ix86_spill_class (reg_class_t rclass, machine_mode mode)
   25793              : {
   25794   6203686769 :   if (0 && TARGET_GENERAL_REGS_SSE_SPILL
   25795              :       && TARGET_SSE2
   25796              :       && TARGET_INTER_UNIT_MOVES_TO_VEC
   25797              :       && TARGET_INTER_UNIT_MOVES_FROM_VEC
   25798              :       && (mode == SImode || (TARGET_64BIT && mode == DImode))
   25799              :       && INTEGER_CLASS_P (rclass))
   25800              :     return ALL_SSE_REGS;
   25801   6203686769 :   return NO_REGS;
   25802              : }
   25803              : 
   25804              : /* Implement TARGET_MAX_NOCE_IFCVT_SEQ_COST.  Like the default implementation,
   25805              :    but returns a lower bound.  */
   25806              : 
   25807              : static unsigned int
   25808      1878067 : ix86_max_noce_ifcvt_seq_cost (edge e)
   25809              : {
   25810      1878067 :   bool predictable_p = predictable_edge_p (e);
   25811      1878067 :   if (predictable_p)
   25812              :     {
   25813       145215 :       if (OPTION_SET_P (param_max_rtl_if_conversion_predictable_cost))
   25814            8 :         return param_max_rtl_if_conversion_predictable_cost;
   25815              :     }
   25816              :   else
   25817              :     {
   25818      1732852 :       if (OPTION_SET_P (param_max_rtl_if_conversion_unpredictable_cost))
   25819           73 :         return param_max_rtl_if_conversion_unpredictable_cost;
   25820              :     }
   25821              : 
   25822              :   /* For modern machines with deeper pipeline, the penalty for branch
   25823              :      misprediction could be higher than before to reset the pipeline
   25824              :      slots. Add parameter br_mispredict_scale as a factor to describe
   25825              :      the impact of reseting the pipeline.  */
   25826              : 
   25827      1877986 :   return BRANCH_COST (true, predictable_p)
   25828      1877986 :          * ix86_tune_cost->br_mispredict_scale;
   25829              : }
   25830              : 
   25831              : /* Return true if SEQ is a good candidate as a replacement for the
   25832              :    if-convertible sequence described in IF_INFO.  */
   25833              : 
   25834              : static bool
   25835       204426 : ix86_noce_conversion_profitable_p (rtx_insn *seq, struct noce_if_info *if_info)
   25836              : {
   25837       204426 :   if (TARGET_ONE_IF_CONV_INSN && if_info->speed_p)
   25838              :     {
   25839              :       int cmov_cnt = 0;
   25840              :       /* Punt if SEQ contains more than one CMOV or FCMOV instruction.
   25841              :          Maybe we should allow even more conditional moves as long as they
   25842              :          are used far enough not to stall the CPU, or also consider
   25843              :          IF_INFO->TEST_BB succ edge probabilities.  */
   25844          247 :       for (rtx_insn *insn = seq; insn; insn = NEXT_INSN (insn))
   25845              :         {
   25846          205 :           rtx set = single_set (insn);
   25847          205 :           if (!set)
   25848            0 :             continue;
   25849          205 :           if (GET_CODE (SET_SRC (set)) != IF_THEN_ELSE)
   25850          163 :             continue;
   25851           42 :           rtx src = SET_SRC (set);
   25852           42 :           machine_mode mode = GET_MODE (src);
   25853           42 :           if (GET_MODE_CLASS (mode) != MODE_INT
   25854            0 :               && GET_MODE_CLASS (mode) != MODE_FLOAT)
   25855            0 :             continue;
   25856           42 :           if ((!REG_P (XEXP (src, 1)) && !MEM_P (XEXP (src, 1)))
   25857           41 :               || (!REG_P (XEXP (src, 2)) && !MEM_P (XEXP (src, 2))))
   25858            1 :             continue;
   25859              :           /* insn is CMOV or FCMOV.  */
   25860           41 :           if (++cmov_cnt > 1)
   25861              :             return false;
   25862              :         }
   25863              :     }
   25864              : 
   25865              :   /* W/o TARGET_SSE4_1, it takes 3 instructions (pand, pandn and por)
   25866              :      for movdfcc/movsfcc, and could possibly fail cost comparison.
   25867              :      Increase branch cost will hurt performance for other modes, so
   25868              :      specially add some preference for floating point ifcvt.  */
   25869       204418 :   if (!TARGET_SSE4_1 && if_info->x
   25870       155403 :       && GET_MODE_CLASS (GET_MODE (if_info->x)) == MODE_FLOAT
   25871        34100 :       && if_info->speed_p)
   25872              :     {
   25873        27058 :       unsigned cost = seq_cost (seq, true);
   25874              : 
   25875        27058 :       if (cost <= if_info->original_cost)
   25876              :         return true;
   25877              : 
   25878        25860 :       return cost <= (if_info->max_seq_cost + COSTS_N_INSNS (2));
   25879              :     }
   25880              : 
   25881       177360 :   return default_noce_conversion_profitable_p (seq, if_info);
   25882              : }
   25883              : 
   25884              : /* x86-specific vector costs.  */
   25885              : class ix86_vector_costs : public vector_costs
   25886              : {
   25887              : public:
   25888              :   ix86_vector_costs (vec_info *, bool);
   25889              : 
   25890              :   unsigned int add_stmt_cost (int count, vect_cost_for_stmt kind,
   25891              :                               stmt_vec_info stmt_info, slp_tree node,
   25892              :                               tree vectype, int misalign,
   25893              :                               vect_cost_model_location where) override;
   25894              :   void finish_cost (const vector_costs *) override;
   25895              : 
   25896              : private:
   25897              : 
   25898              :   /* Estimate register pressure of the vectorized code.  */
   25899              :   void ix86_vect_estimate_reg_pressure ();
   25900              :   /* Number of GENERAL_REGS/SSE_REGS used in the vectorizer, it's used for
   25901              :      estimation of register pressure.
   25902              :      ??? Currently it's only used by vec_construct/scalar_to_vec
   25903              :      where we know it's not loaded from memory.  */
   25904              :   unsigned m_num_gpr_needed[3];
   25905              :   unsigned m_num_sse_needed[3];
   25906              :   /* Number of 256-bit vector permutation.  */
   25907              :   unsigned m_num_avx256_vec_perm[3];
   25908              :   /* Number of reductions for FMA/DOT_PROD_EXPR/SAD_EXPR  */
   25909              :   unsigned m_num_reduc[X86_REDUC_LAST];
   25910              :   /* Don't do unroll if m_prefer_unroll is false, default is true.  */
   25911              :   bool m_prefer_unroll;
   25912              : };
   25913              : 
   25914      1966356 : ix86_vector_costs::ix86_vector_costs (vec_info* vinfo, bool costing_for_scalar)
   25915              :   : vector_costs (vinfo, costing_for_scalar),
   25916      1966356 :     m_num_gpr_needed (),
   25917      1966356 :     m_num_sse_needed (),
   25918      1966356 :     m_num_avx256_vec_perm (),
   25919      1966356 :     m_num_reduc (),
   25920      1966356 :     m_prefer_unroll (true)
   25921      1966356 : {}
   25922              : 
   25923              : /* Implement targetm.vectorize.create_costs.  */
   25924              : 
   25925              : static vector_costs *
   25926      1966356 : ix86_vectorize_create_costs (vec_info *vinfo, bool costing_for_scalar)
   25927              : {
   25928      1966356 :   return new ix86_vector_costs (vinfo, costing_for_scalar);
   25929              : }
   25930              : 
   25931              : unsigned
   25932      6656078 : ix86_vector_costs::add_stmt_cost (int count, vect_cost_for_stmt kind,
   25933              :                                   stmt_vec_info stmt_info, slp_tree node,
   25934              :                                   tree vectype, int,
   25935              :                                   vect_cost_model_location where)
   25936              : {
   25937      6656078 :   unsigned retval = 0;
   25938      6656078 :   bool scalar_p
   25939              :     = (kind == scalar_stmt || kind == scalar_load || kind == scalar_store);
   25940      6656078 :   int stmt_cost = - 1;
   25941              : 
   25942      6656078 :   bool fp = false;
   25943      6656078 :   machine_mode mode = scalar_p ? SImode : TImode;
   25944              : 
   25945      6656078 :   if (vectype != NULL)
   25946              :     {
   25947      2977691 :       fp = FLOAT_TYPE_P (vectype);
   25948      2977691 :       mode = TYPE_MODE (vectype);
   25949      2977691 :       if (scalar_p)
   25950       242927 :         mode = TYPE_MODE (TREE_TYPE (vectype));
   25951              :     }
   25952              :   /* When we are costing a scalar stmt use the scalar stmt to get at the
   25953              :      type of the operation.  */
   25954      3678387 :   else if (scalar_p && stmt_info)
   25955      3618067 :     if (tree lhs = gimple_get_lhs (stmt_info->stmt))
   25956              :       {
   25957      3445962 :         fp = FLOAT_TYPE_P (TREE_TYPE (lhs));
   25958      3445962 :         mode = TYPE_MODE (TREE_TYPE (lhs));
   25959              :       }
   25960              : 
   25961      6656078 :   if ((kind == vector_stmt || kind == scalar_stmt)
   25962      1615817 :       && stmt_info
   25963      8265929 :       && stmt_info->stmt && gimple_code (stmt_info->stmt) == GIMPLE_ASSIGN)
   25964              :     {
   25965      1250771 :       tree_code subcode = gimple_assign_rhs_code (stmt_info->stmt);
   25966              :       /*machine_mode inner_mode = mode;
   25967              :       if (VECTOR_MODE_P (mode))
   25968              :         inner_mode = GET_MODE_INNER (mode);*/
   25969              : 
   25970      1250771 :       switch (subcode)
   25971              :         {
   25972       502468 :         case PLUS_EXPR:
   25973       502468 :         case POINTER_PLUS_EXPR:
   25974       502468 :         case MINUS_EXPR:
   25975       502468 :           if (kind == scalar_stmt)
   25976              :             {
   25977       326475 :               if (SSE_FLOAT_MODE_SSEMATH_OR_HFBF_P (mode))
   25978        69497 :                 stmt_cost = ix86_cost->addss;
   25979       256978 :               else if (X87_FLOAT_MODE_P (mode))
   25980          128 :                 stmt_cost = ix86_cost->fadd;
   25981              :               else
   25982       256850 :                 stmt_cost = ix86_cost->add;
   25983              :             }
   25984              :           else
   25985       175993 :             stmt_cost = ix86_vec_cost (mode, fp ? ix86_cost->addss
   25986              :                                        : ix86_cost->sse_op);
   25987              :           break;
   25988              : 
   25989       179475 :         case MULT_EXPR:
   25990              :           /* For MULT_HIGHPART_EXPR, x86 only supports pmulhw,
   25991              :              take it as MULT_EXPR.  */
   25992       179475 :         case MULT_HIGHPART_EXPR:
   25993       179475 :           stmt_cost = ix86_multiplication_cost (ix86_cost, mode);
   25994       179475 :           break;
   25995              :           /* There's no direct instruction for WIDEN_MULT_EXPR,
   25996              :              take emulation into account.  */
   25997         1018 :         case WIDEN_MULT_EXPR:
   25998         2036 :           stmt_cost = ix86_widen_mult_cost (ix86_cost, mode,
   25999         1018 :                                             TYPE_UNSIGNED (vectype));
   26000         1018 :           break;
   26001              : 
   26002         5990 :         case NEGATE_EXPR:
   26003         5990 :           if (SSE_FLOAT_MODE_SSEMATH_OR_HFBF_P (mode))
   26004         1700 :             stmt_cost = ix86_cost->sse_op;
   26005         4290 :           else if (X87_FLOAT_MODE_P (mode))
   26006            0 :             stmt_cost = ix86_cost->fchs;
   26007         4290 :           else if (VECTOR_MODE_P (mode))
   26008         1836 :             stmt_cost = ix86_vec_cost (mode, ix86_cost->sse_op);
   26009              :           else
   26010         2454 :             stmt_cost = ix86_cost->add;
   26011              :           break;
   26012        12377 :         case TRUNC_DIV_EXPR:
   26013        12377 :         case CEIL_DIV_EXPR:
   26014        12377 :         case FLOOR_DIV_EXPR:
   26015        12377 :         case ROUND_DIV_EXPR:
   26016        12377 :         case TRUNC_MOD_EXPR:
   26017        12377 :         case CEIL_MOD_EXPR:
   26018        12377 :         case FLOOR_MOD_EXPR:
   26019        12377 :         case RDIV_EXPR:
   26020        12377 :         case ROUND_MOD_EXPR:
   26021        12377 :         case EXACT_DIV_EXPR:
   26022        12377 :           stmt_cost = ix86_division_cost (ix86_cost, mode);
   26023        12377 :           break;
   26024              : 
   26025        54787 :         case RSHIFT_EXPR:
   26026        54787 :         case LSHIFT_EXPR:
   26027        54787 :         case LROTATE_EXPR:
   26028        54787 :         case RROTATE_EXPR:
   26029        54787 :           {
   26030        54787 :             tree op1 = gimple_assign_rhs1 (stmt_info->stmt);
   26031        54787 :             tree op2 = gimple_assign_rhs2 (stmt_info->stmt);
   26032        54787 :             stmt_cost = ix86_shift_rotate_cost
   26033        54787 :                            (ix86_cost,
   26034              :                             (subcode == RSHIFT_EXPR
   26035        31569 :                              && !TYPE_UNSIGNED (TREE_TYPE (op1)))
   26036              :                             ? ASHIFTRT : LSHIFTRT, mode,
   26037        54787 :                             TREE_CODE (op2) == INTEGER_CST,
   26038        54787 :                             cst_and_fits_in_hwi (op2)
   26039        32470 :                             ? int_cst_value (op2) : -1,
   26040              :                             false, false, NULL, NULL);
   26041              :           }
   26042        54787 :           break;
   26043        83216 :         case NOP_EXPR:
   26044              :           /* Only sign-conversions are free.  */
   26045        83216 :           if (tree_nop_conversion_p
   26046        83216 :                 (TREE_TYPE (gimple_assign_lhs (stmt_info->stmt)),
   26047        83216 :                  TREE_TYPE (gimple_assign_rhs1 (stmt_info->stmt))))
   26048              :             stmt_cost = 0;
   26049        83216 :           else if (fp)
   26050         6894 :             stmt_cost = vec_fp_conversion_cost
   26051         6894 :                           (ix86_tune_cost, GET_MODE_BITSIZE (mode));
   26052              :           break;
   26053              : 
   26054        13427 :         case FLOAT_EXPR:
   26055        13427 :             if (SSE_FLOAT_MODE_SSEMATH_OR_HFBF_P (mode))
   26056        10334 :               stmt_cost = ix86_cost->cvtsi2ss;
   26057         3093 :             else if (X87_FLOAT_MODE_P (mode))
   26058              :               /* TODO: We do not have cost tables for x87.  */
   26059           50 :               stmt_cost = ix86_cost->fadd;
   26060              :             else
   26061         3043 :               stmt_cost = ix86_vec_cost (mode, ix86_cost->cvtpi2ps);
   26062              :             break;
   26063              : 
   26064         1706 :         case FIX_TRUNC_EXPR:
   26065         1706 :             if (SSE_FLOAT_MODE_SSEMATH_OR_HFBF_P (mode))
   26066            0 :               stmt_cost = ix86_cost->cvtss2si;
   26067         1706 :             else if (X87_FLOAT_MODE_P (mode))
   26068              :               /* TODO: We do not have cost tables for x87.  */
   26069            0 :               stmt_cost = ix86_cost->fadd;
   26070              :             else
   26071         1706 :               stmt_cost = ix86_vec_cost (mode, ix86_cost->cvtps2pi);
   26072              :             break;
   26073              : 
   26074        38525 :         case COND_EXPR:
   26075        38525 :           {
   26076              :             /* SSE2 conditinal move sequence is:
   26077              :                  pcmpgtd %xmm5, %xmm0 (accounted separately)
   26078              :                  pand    %xmm0, %xmm2
   26079              :                  pandn   %xmm1, %xmm0
   26080              :                  por     %xmm2, %xmm0
   26081              :                while SSE4 uses cmp + blend
   26082              :                and AVX512 masked moves.
   26083              : 
   26084              :                The condition is accounted separately since we usually have
   26085              :                  p = a < b
   26086              :                  c = p ? x : y
   26087              :                and we will account first statement as setcc.  Exception is when
   26088              :                p is loaded from memory as bool and then we will not acocunt
   26089              :                the compare, but there is no way to check for this.  */
   26090              : 
   26091        38525 :             int ninsns = TARGET_SSE4_1 ? 1 : 3;
   26092              : 
   26093              :             /* If one of parameters is 0 or -1 the sequence will be simplified:
   26094              :                (if_true & mask) | (if_false & ~mask) -> if_true & mask  */
   26095        19913 :             if (ninsns > 1
   26096        19913 :                 && (zerop (gimple_assign_rhs2 (stmt_info->stmt))
   26097        19587 :                     || zerop (gimple_assign_rhs3 (stmt_info->stmt))
   26098        11513 :                     || integer_minus_onep
   26099        11513 :                         (gimple_assign_rhs2 (stmt_info->stmt))
   26100        11087 :                     || integer_minus_onep
   26101        11087 :                         (gimple_assign_rhs3 (stmt_info->stmt))))
   26102              :               ninsns = 1;
   26103              : 
   26104        38525 :             if (SSE_FLOAT_MODE_SSEMATH_OR_HFBF_P (mode))
   26105         2794 :               stmt_cost = ninsns * ix86_cost->sse_op;
   26106        35731 :             else if (X87_FLOAT_MODE_P (mode))
   26107              :               /* x87 requires conditional branch.  We don't have cost for
   26108              :                  that.  */
   26109              :               ;
   26110        35722 :             else if (VECTOR_MODE_P (mode))
   26111        14725 :               stmt_cost = ix86_vec_cost (mode, ninsns * ix86_cost->sse_op);
   26112              :             else
   26113              :               /* compare (accounted separately) + cmov.  */
   26114        20997 :               stmt_cost = ix86_cost->add;
   26115              :           }
   26116              :           break;
   26117              : 
   26118        22115 :         case MIN_EXPR:
   26119        22115 :         case MAX_EXPR:
   26120        22115 :           if (fp)
   26121              :             {
   26122         1008 :               if (X87_FLOAT_MODE_P (mode)
   26123          384 :                   && !SSE_FLOAT_MODE_SSEMATH_OR_HFBF_P (mode))
   26124              :                 /* x87 requires conditional branch.  We don't have cost for
   26125              :                    that.  */
   26126              :                 ;
   26127              :               else
   26128              :                 /* minss  */
   26129         1008 :                 stmt_cost = ix86_vec_cost (mode, ix86_cost->sse_op);
   26130              :             }
   26131              :           else
   26132              :             {
   26133        21107 :               if (VECTOR_MODE_P (mode))
   26134              :                 {
   26135         4069 :                   stmt_cost = ix86_vec_cost (mode, ix86_cost->sse_op);
   26136              :                   /* vpmin was introduced in SSE3.
   26137              :                      SSE2 needs pcmpgtd + pand + pandn + pxor.
   26138              :                      If one of parameters is 0 or -1 the sequence is simplified
   26139              :                      to pcmpgtd + pand.  */
   26140         4069 :                   if (!TARGET_SSSE3)
   26141              :                     {
   26142         3100 :                       if (zerop (gimple_assign_rhs2 (stmt_info->stmt))
   26143         4434 :                           || integer_minus_onep
   26144         1334 :                                 (gimple_assign_rhs2 (stmt_info->stmt)))
   26145         1766 :                         stmt_cost *= 2;
   26146              :                       else
   26147         1334 :                         stmt_cost *= 4;
   26148              :                     }
   26149              :                 }
   26150              :               else
   26151              :                 /* cmp + cmov.  */
   26152        17038 :                 stmt_cost = ix86_cost->add * 2;
   26153              :             }
   26154              :           break;
   26155              : 
   26156          940 :         case ABS_EXPR:
   26157          940 :         case ABSU_EXPR:
   26158          940 :           if (fp)
   26159              :             {
   26160          374 :               if (X87_FLOAT_MODE_P (mode)
   26161          150 :                   && !SSE_FLOAT_MODE_SSEMATH_OR_HFBF_P (mode))
   26162              :                 /* fabs.  */
   26163            0 :                 stmt_cost = ix86_cost->fabs;
   26164              :               else
   26165              :                 /* andss of sign bit.  */
   26166          374 :                 stmt_cost = ix86_vec_cost (mode, ix86_cost->sse_op);
   26167              :             }
   26168              :           else
   26169              :             {
   26170          566 :               if (VECTOR_MODE_P (mode))
   26171              :                 {
   26172           99 :                   stmt_cost = ix86_vec_cost (mode, ix86_cost->sse_op);
   26173              :                   /* vabs was introduced in SSE3.
   26174              :                      SSE3 uses psrat + pxor + psub.  */
   26175           99 :                   if (!TARGET_SSSE3)
   26176           75 :                     stmt_cost *= 3;
   26177              :                 }
   26178              :               else
   26179              :                 /* neg + cmov.  */
   26180          467 :                 stmt_cost = ix86_cost->add * 2;
   26181              :             }
   26182              :           break;
   26183              : 
   26184       107774 :         case BIT_IOR_EXPR:
   26185       107774 :         case BIT_XOR_EXPR:
   26186       107774 :         case BIT_AND_EXPR:
   26187       107774 :         case BIT_NOT_EXPR:
   26188       107774 :           gcc_assert (!SSE_FLOAT_MODE_SSEMATH_OR_HFBF_P (mode)
   26189              :                       && !X87_FLOAT_MODE_P (mode));
   26190       107774 :           if (VECTOR_MODE_P (mode))
   26191        35338 :             stmt_cost = ix86_vec_cost (mode, ix86_cost->sse_op);
   26192              :           else
   26193        72436 :             stmt_cost = ix86_cost->add;
   26194              :           break;
   26195              : 
   26196       226953 :         default:
   26197       226953 :           if (truth_value_p (subcode))
   26198              :             {
   26199        73405 :               if (SSE_FLOAT_MODE_SSEMATH_OR_HFBF_P (mode))
   26200              :                 /* CMPccS? insructions are cheap, so use sse_op.  While they
   26201              :                    produce a mask which may need to be turned to 0/1 by and,
   26202              :                    expect that this will be optimized away in a common case.  */
   26203            0 :                 stmt_cost = ix86_cost->sse_op;
   26204        73405 :               else if (X87_FLOAT_MODE_P (mode))
   26205              :                 /* fcmp + setcc.  */
   26206            0 :                 stmt_cost = ix86_cost->fadd + ix86_cost->add;
   26207        73405 :               else if (VECTOR_MODE_P (mode))
   26208        14871 :                 stmt_cost = ix86_vec_cost (mode, ix86_cost->sse_op);
   26209              :               else
   26210              :                 /* setcc.  */
   26211        58534 :                 stmt_cost = ix86_cost->add;
   26212              :               break;
   26213              :             }
   26214              :           break;
   26215              :         }
   26216              :     }
   26217              : 
   26218              :   /* Record number of load/store/gather/scatter in vectorized body.  */
   26219      6656078 :   if (where == vect_body && !m_costing_for_scalar)
   26220              :     {
   26221      1703034 :       int scale = 1;
   26222      1703034 :       if (vectype
   26223      3397499 :           && ((GET_MODE_SIZE (TYPE_MODE (vectype)) == 64
   26224        59575 :               && TARGET_AVX512_SPLIT_REGS)
   26225      3388806 :               || (GET_MODE_SIZE (TYPE_MODE (vectype)) == 32
   26226        93449 :                   && TARGET_AVX256_SPLIT_REGS)))
   26227              :         scale = 2;
   26228              : 
   26229      1703034 :       switch (kind)
   26230              :         {
   26231              :           /* Emulated gather/scatter or any scalarization.  */
   26232       114230 :         case scalar_load:
   26233       114230 :         case scalar_stmt:
   26234       114230 :         case scalar_store:
   26235       114230 :         case vector_gather_load:
   26236       114230 :         case vector_scatter_store:
   26237       114230 :           m_prefer_unroll = false;
   26238       114230 :           break;
   26239              : 
   26240       474357 :         case vector_stmt:
   26241       474357 :         case vec_to_scalar:
   26242              :           /* Count number of reduction FMA and "real" DOT_PROD_EXPR,
   26243              :              unroll in the vectorizer will enable partial sum.  */
   26244       474357 :           if (stmt_info
   26245       474335 :               && vect_is_reduction (stmt_info)
   26246       521950 :               && stmt_info->stmt)
   26247              :             {
   26248              :               /* Handle __builtin_fma.  */
   26249        47593 :               if (gimple_call_combined_fn (stmt_info->stmt) == CFN_FMA)
   26250              :                 {
   26251            6 :                   m_num_reduc[X86_REDUC_FMA] += count * scale;
   26252            6 :                   break;
   26253              :                 }
   26254              : 
   26255        47587 :               if (!is_gimple_assign (stmt_info->stmt))
   26256              :                 break;
   26257              : 
   26258        45209 :               tree_code subcode = gimple_assign_rhs_code (stmt_info->stmt);
   26259        45209 :               machine_mode inner_mode = GET_MODE_INNER (mode);
   26260        45209 :               tree rhs1, rhs2;
   26261        45209 :               bool native_vnni_p = true;
   26262        45209 :               gimple* def;
   26263        45209 :               machine_mode mode_rhs;
   26264        45209 :               switch (subcode)
   26265              :                 {
   26266        35233 :                 case PLUS_EXPR:
   26267        35233 :                 case MINUS_EXPR:
   26268        35233 :                   if (!fp || !flag_associative_math
   26269        15940 :                       || flag_fp_contract_mode != FP_CONTRACT_FAST)
   26270              :                     break;
   26271              : 
   26272              :                   /* FMA condition for different modes.  */
   26273        15940 :                   if (((inner_mode == DFmode || inner_mode == SFmode)
   26274        15928 :                        && !TARGET_FMA && !TARGET_AVX512VL)
   26275         5776 :                       || (inner_mode == HFmode && !TARGET_AVX512FP16)
   26276         5776 :                       || (inner_mode == BFmode && !TARGET_AVX10_2))
   26277              :                     break;
   26278              : 
   26279              :                   /* MULT_EXPR + PLUS_EXPR/MINUS_EXPR is transformed
   26280              :                      to FMA/FNMA after vectorization.  */
   26281         5776 :                   rhs1 = gimple_assign_rhs1 (stmt_info->stmt);
   26282         5776 :                   rhs2 = gimple_assign_rhs2 (stmt_info->stmt);
   26283         5776 :                   if (subcode == PLUS_EXPR
   26284         4538 :                       && TREE_CODE (rhs1) == SSA_NAME
   26285         4538 :                       && (def = SSA_NAME_DEF_STMT (rhs1), true)
   26286         4538 :                       && is_gimple_assign (def)
   26287         8106 :                       && gimple_assign_rhs_code (def) == MULT_EXPR)
   26288         1402 :                     m_num_reduc[X86_REDUC_FMA] += count * scale;
   26289         4374 :                   else if (TREE_CODE (rhs2) == SSA_NAME
   26290         4374 :                            && (def = SSA_NAME_DEF_STMT (rhs2), true)
   26291         4374 :                            && is_gimple_assign (def)
   26292         8716 :                            && gimple_assign_rhs_code (def) == MULT_EXPR)
   26293         4338 :                     m_num_reduc[X86_REDUC_FMA] += count * scale;
   26294              :                   break;
   26295              : 
   26296              :                   /* Vectorizer lane_reducing_op_p supports DOT_PROX_EXPR,
   26297              :                      WIDEN_SUM_EXPR and SAD_EXPR, x86 backend only supports
   26298              :                      SAD_EXPR (usad{v16qi,v32qi,v64qi}) and DOT_PROD_EXPR.  */
   26299          374 :                 case DOT_PROD_EXPR:
   26300          374 :                   rhs1 = gimple_assign_rhs1 (stmt_info->stmt);
   26301          374 :                   mode_rhs = TYPE_MODE (TREE_TYPE (rhs1));
   26302          374 :                   if (mode_rhs == QImode)
   26303              :                     {
   26304          211 :                       rhs2 = gimple_assign_rhs2 (stmt_info->stmt);
   26305          211 :                       signop signop1_p = TYPE_SIGN (TREE_TYPE (rhs1));
   26306          211 :                       signop signop2_p = TYPE_SIGN (TREE_TYPE (rhs2));
   26307              : 
   26308              :                       /* vpdpbusd.  */
   26309          211 :                       if (signop1_p != signop2_p)
   26310           53 :                         native_vnni_p
   26311           53 :                           = (GET_MODE_SIZE (mode) == 64
   26312           53 :                              ? TARGET_AVX512VNNI
   26313           10 :                              : ((TARGET_AVX512VNNI && TARGET_AVX512VL)
   26314           53 :                                 || TARGET_AVXVNNI));
   26315              :                       else
   26316              :                         /* vpdpbssd.  */
   26317          158 :                         native_vnni_p
   26318          174 :                           = (GET_MODE_SIZE (mode) == 64
   26319          158 :                              ? TARGET_AVX10_2
   26320          142 :                              : (TARGET_AVXVNNIINT8 || TARGET_AVX10_2));
   26321              :                     }
   26322          374 :                   m_num_reduc[X86_REDUC_DOT_PROD] += count * scale;
   26323              : 
   26324              :                   /* Dislike to do unroll and partial sum for
   26325              :                      emulated DOT_PROD_EXPR.  */
   26326          374 :                   if (!native_vnni_p)
   26327          128 :                     m_num_reduc[X86_REDUC_DOT_PROD] += 3 * count;
   26328              :                   break;
   26329              : 
   26330           80 :                 case SAD_EXPR:
   26331           80 :                   m_num_reduc[X86_REDUC_SAD] += count * scale;
   26332           80 :                   break;
   26333              : 
   26334              :                 default:
   26335              :                   break;
   26336              :                 }
   26337              :             }
   26338              : 
   26339              :         default:
   26340              :           break;
   26341              :         }
   26342              :     }
   26343              : 
   26344              : 
   26345      6656078 :   combined_fn cfn;
   26346      6656078 :   if ((kind == vector_stmt || kind == scalar_stmt)
   26347      1615817 :       && stmt_info
   26348      1609851 :       && stmt_info->stmt
   26349      8265929 :       && (cfn = gimple_call_combined_fn (stmt_info->stmt)) != CFN_LAST)
   26350        17524 :     switch (cfn)
   26351              :       {
   26352           63 :       case CFN_FMA:
   26353           63 :         stmt_cost = ix86_vec_cost (mode,
   26354           63 :                                    mode == SFmode ? ix86_cost->fmass
   26355              :                                    : ix86_cost->fmasd);
   26356           63 :         break;
   26357           24 :       case CFN_MULH:
   26358           24 :         stmt_cost = ix86_multiplication_cost (ix86_cost, mode);
   26359           24 :         break;
   26360              :       default:
   26361              :         break;
   26362              :       }
   26363              : 
   26364      6656078 :   if (kind == vec_promote_demote)
   26365              :     {
   26366        45080 :       int outer_size
   26367              :         = tree_to_uhwi
   26368        45080 :             (TYPE_SIZE
   26369        45080 :                 (TREE_TYPE (gimple_assign_lhs (stmt_info->stmt))));
   26370        45080 :       int inner_size
   26371              :         = tree_to_uhwi
   26372        45080 :             (TYPE_SIZE
   26373        45080 :                 (TREE_TYPE (gimple_assign_rhs1 (stmt_info->stmt))));
   26374        45080 :       bool inner_fp = FLOAT_TYPE_P
   26375              :                         (TREE_TYPE (gimple_assign_rhs1 (stmt_info->stmt)));
   26376              : 
   26377         3831 :       if (fp && inner_fp)
   26378         3431 :         stmt_cost = vec_fp_conversion_cost
   26379         3431 :                           (ix86_tune_cost, GET_MODE_BITSIZE (mode));
   26380        41649 :       else if (fp && !inner_fp)
   26381         4106 :         stmt_cost = ix86_vec_cost (mode, ix86_cost->cvtpi2ps);
   26382        37543 :       else if (!fp && inner_fp)
   26383          400 :         stmt_cost = ix86_vec_cost (mode, ix86_cost->cvtps2pi);
   26384              :       else
   26385        37143 :         stmt_cost = ix86_vec_cost (mode, ix86_cost->sse_op);
   26386              :       /* VEC_PACK_TRUNC_EXPR and similar demote operations: If outer size is
   26387              :          greater than inner size we will end up doing two conversions and
   26388              :          packing them.  We always pack pairs; if the size difference is greater
   26389              :          it is split into multiple demote operations.  */
   26390        45080 :       if (inner_size > outer_size)
   26391        17470 :         stmt_cost = stmt_cost * 2
   26392        17470 :                     + ix86_vec_cost (mode, ix86_cost->sse_op);
   26393              :     }
   26394              : 
   26395              :   /* If we do elementwise loads into a vector then we are bound by
   26396              :      latency and execution resources for the many scalar loads
   26397              :      (AGU and load ports).  Try to account for this by scaling the
   26398              :      construction cost by the number of elements involved.  */
   26399      6656078 :   if ((kind == vec_construct || kind == vec_to_scalar)
   26400      6656078 :       && ((node
   26401       424883 :            && (((SLP_TREE_MEMORY_ACCESS_TYPE (node) == VMAT_ELEMENTWISE
   26402       436480 :                  || (SLP_TREE_MEMORY_ACCESS_TYPE (node) == VMAT_STRIDED_SLP
   26403        36111 :                      && SLP_TREE_LANES (node) == 1))
   26404        40984 :                 && (TREE_CODE (DR_STEP (STMT_VINFO_DATA_REF
   26405              :                                         (SLP_TREE_REPRESENTATIVE (node))))
   26406              :                     != INTEGER_CST))
   26407        69296 :                || mat_gather_scatter_p (SLP_TREE_MEMORY_ACCESS_TYPE (node))))))
   26408              :     {
   26409        30692 :       stmt_cost = ix86_default_vector_cost (kind, mode);
   26410        30692 :       stmt_cost *= (TYPE_VECTOR_SUBPARTS (vectype) + 1);
   26411              :     }
   26412      6625386 :   else if ((kind == vec_construct || kind == scalar_to_vec)
   26413       445424 :            && node
   26414       415204 :            && SLP_TREE_DEF_TYPE (node) == vect_external_def)
   26415              :     {
   26416       303058 :       stmt_cost = ix86_default_vector_cost (kind, mode);
   26417       303058 :       unsigned i;
   26418       303058 :       tree op;
   26419      1295105 :       FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_OPS (node), i, op)
   26420       688989 :         if (TREE_CODE (op) == SSA_NAME)
   26421       466416 :           TREE_VISITED (op) = 0;
   26422       992047 :       FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_OPS (node), i, op)
   26423              :         {
   26424       688989 :           if (TREE_CODE (op) != SSA_NAME
   26425       466416 :               || TREE_VISITED (op))
   26426       255956 :             continue;
   26427       433033 :           TREE_VISITED (op) = 1;
   26428       433033 :           gimple *def = SSA_NAME_DEF_STMT (op);
   26429       433033 :           tree tem;
   26430              :           /* Look through a conversion.  */
   26431       433033 :           if (is_gimple_assign (def)
   26432       246627 :               && CONVERT_EXPR_CODE_P (gimple_assign_rhs_code (def))
   26433        26995 :               && ((tem = gimple_assign_rhs1 (def)), true)
   26434       460028 :               && TREE_CODE (tem) == SSA_NAME)
   26435        26784 :             def = SSA_NAME_DEF_STMT (tem);
   26436              :           /* When the component is loaded from memory without sign-
   26437              :              or zero-extension we can move it to a vector register and/or
   26438              :              insert it via vpinsr with a memory operand.  */
   26439       433033 :           if (gimple_assign_load_p (def)
   26440       130193 :               && tree_nop_conversion_p (TREE_TYPE (op),
   26441       130193 :                                         TREE_TYPE (gimple_assign_lhs (def)))
   26442       687409 :               && (GET_MODE_SIZE (TYPE_MODE (TREE_TYPE (op))) > 1
   26443         5162 :                   || TARGET_SSE4_1))
   26444              :             ;
   26445              :           /* When the component is extracted from a vector it is already
   26446              :              in a vector register.  */
   26447       310010 :           else if (is_gimple_assign (def)
   26448       119435 :                    && gimple_assign_rhs_code (def) == BIT_FIELD_REF
   26449       312744 :                    && VECTOR_TYPE_P (TREE_TYPE
   26450              :                                 (TREE_OPERAND (gimple_assign_rhs1 (def), 0))))
   26451              :             ;
   26452              :           else
   26453              :             {
   26454       307691 :               if (fp)
   26455              :                 {
   26456              :                   /* Scalar FP values residing in x87 registers need to be
   26457              :                      spilled and reloaded.  */
   26458        13430 :                   auto mode2 = TYPE_MODE (TREE_TYPE (op));
   26459        13430 :                   if (IS_STACK_MODE (mode2))
   26460              :                     {
   26461          971 :                       int cost
   26462              :                         = (ix86_cost->hard_register.fp_store[mode2 == SFmode
   26463          971 :                                                              ? 0 : 1]
   26464          971 :                            + ix86_cost->sse_load[sse_store_index (mode2)]);
   26465          971 :                       stmt_cost += COSTS_N_INSNS (cost) / 2;
   26466              :                     }
   26467        13430 :                   m_num_sse_needed[where]++;
   26468              :                 }
   26469              :               else
   26470              :                 {
   26471       294261 :                   m_num_gpr_needed[where]++;
   26472              : 
   26473       294261 :                   stmt_cost += COSTS_N_INSNS (ix86_cost->integer_to_sse) / 2;
   26474              :                 }
   26475              :             }
   26476              :         }
   26477       992047 :       FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_OPS (node), i, op)
   26478       688989 :         if (TREE_CODE (op) == SSA_NAME)
   26479       466416 :           TREE_VISITED (op) = 0;
   26480              :     }
   26481      6656078 :   if (stmt_cost == -1)
   26482      5256269 :     stmt_cost = ix86_default_vector_cost (kind, mode);
   26483              : 
   26484      6656078 :   if (kind == vec_perm && vectype
   26485       177956 :       && GET_MODE_SIZE (TYPE_MODE (vectype)) == 32
   26486              :       /* BIT_FIELD_REF <vect_**, 64, 0> 0 times vec_perm costs 0 in body.  */
   26487      6659551 :       && count != 0)
   26488              :     {
   26489         3473 :       bool real_perm = true;
   26490         3473 :       unsigned nunits = TYPE_VECTOR_SUBPARTS (vectype);
   26491              : 
   26492         3473 :       if (node
   26493         3470 :           && SLP_TREE_LOAD_PERMUTATION (node).exists ()
   26494              :           /* Loop vectorization will have 4 times vec_perm
   26495              :              with index as {0, 0, 0, 0}.
   26496              :              But it actually generates
   26497              :              vec_perm_expr <vect, vect, 0, 0, 0, 0>
   26498              :              vec_perm_expr <vect, vect, 1, 1, 1, 1>
   26499              :              vec_perm_expr <vect, vect, 2, 2, 2, 2>
   26500              :              Need to be handled separately.  */
   26501         6298 :           && is_a <bb_vec_info> (m_vinfo))
   26502              :         {
   26503           39 :           unsigned half = nunits / 2;
   26504           39 :           unsigned i = 0;
   26505           39 :           bool allsame = true;
   26506           39 :           unsigned first = SLP_TREE_LOAD_PERMUTATION (node)[0];
   26507           39 :           bool cross_lane_p = false;
   26508          198 :           for (i = 0 ; i != SLP_TREE_LANES (node); i++)
   26509              :             {
   26510          197 :               unsigned tmp = SLP_TREE_LOAD_PERMUTATION (node)[i];
   26511              :               /* allsame is just a broadcast.  */
   26512          197 :               if (tmp != first)
   26513           92 :                 allsame = false;
   26514              : 
   26515              :               /* 4 times vec_perm with number of lanes multiple of nunits.  */
   26516          197 :               tmp = tmp & (nunits - 1);
   26517          197 :               unsigned index = i & (nunits - 1);
   26518          197 :               if ((index < half && tmp >= half)
   26519          197 :                   || (index >= half && tmp < half))
   26520           65 :                 cross_lane_p = true;
   26521              : 
   26522          197 :               if (!allsame && cross_lane_p)
   26523              :                 break;
   26524              :             }
   26525              : 
   26526           39 :           if (i == SLP_TREE_LANES (node))
   26527              :             real_perm = false;
   26528              :         }
   26529              : 
   26530              :       if (real_perm)
   26531              :         {
   26532         3472 :           m_num_avx256_vec_perm[where] += count;
   26533         3472 :           if (dump_file && (dump_flags & TDF_DETAILS))
   26534              :             {
   26535          228 :               fprintf (dump_file, "Detected avx256 cross-lane permutation: ");
   26536          228 :               if (stmt_info)
   26537          225 :                 print_gimple_expr (dump_file, stmt_info->stmt, 0, TDF_SLIM);
   26538          228 :               fprintf (dump_file, " \n");
   26539              :             }
   26540              :         }
   26541              :     }
   26542              : 
   26543              :   /* Penalize DFmode vector operations for Bonnell.  */
   26544      6656078 :   if (TARGET_CPU_P (BONNELL) && kind == vector_stmt
   26545      6656140 :       && vectype && GET_MODE_INNER (TYPE_MODE (vectype)) == DFmode)
   26546           12 :     stmt_cost *= 5;  /* FIXME: The value here is arbitrary.  */
   26547              : 
   26548              :   /* Statements in an inner loop relative to the loop being
   26549              :      vectorized are weighted more heavily.  The value here is
   26550              :      arbitrary and could potentially be improved with analysis.  */
   26551      6656078 :   retval = adjust_cost_for_freq (stmt_info, where, count * stmt_cost);
   26552              : 
   26553              :   /* We need to multiply all vector stmt cost by 1.7 (estimated cost)
   26554              :      for Silvermont as it has out of order integer pipeline and can execute
   26555              :      2 scalar instruction per tick, but has in order SIMD pipeline.  */
   26556      6656078 :   if ((TARGET_CPU_P (SILVERMONT) || TARGET_CPU_P (GOLDMONT)
   26557      6656078 :        || TARGET_CPU_P (GOLDMONT_PLUS) || TARGET_CPU_P (INTEL))
   26558         1811 :       && stmt_info && stmt_info->stmt)
   26559              :     {
   26560         1595 :       tree lhs_op = gimple_get_lhs (stmt_info->stmt);
   26561         1595 :       if (lhs_op && TREE_CODE (TREE_TYPE (lhs_op)) == INTEGER_TYPE)
   26562         1198 :         retval = (retval * 17) / 10;
   26563              :     }
   26564              : 
   26565      6656078 :   m_costs[where] += retval;
   26566              : 
   26567      6656078 :   return retval;
   26568              : }
   26569              : 
   26570              : void
   26571      1690129 : ix86_vector_costs::ix86_vect_estimate_reg_pressure ()
   26572              : {
   26573      1690129 :   unsigned gpr_spill_cost = COSTS_N_INSNS (ix86_cost->int_store [2]) / 2;
   26574      1690129 :   unsigned sse_spill_cost = COSTS_N_INSNS (ix86_cost->sse_store[0]) / 2;
   26575              : 
   26576              :   /* Any better way to have target available fp registers, currently use SSE_REGS.  */
   26577      1690129 :   unsigned target_avail_sse = TARGET_64BIT ? (TARGET_AVX512F ? 32 : 16) : 8;
   26578      6760516 :   for (unsigned i = 0; i != 3; i++)
   26579              :     {
   26580      5070387 :       if (m_num_gpr_needed[i] > target_avail_regs)
   26581          691 :         m_costs[i] += gpr_spill_cost * (m_num_gpr_needed[i] - target_avail_regs);
   26582              :       /* Only measure sse registers pressure.  */
   26583      5070387 :       if (TARGET_SSE && (m_num_sse_needed[i] > target_avail_sse))
   26584           92 :         m_costs[i] += sse_spill_cost * (m_num_sse_needed[i] - target_avail_sse);
   26585              :     }
   26586      1690129 : }
   26587              : 
   26588              : void
   26589      1690129 : ix86_vector_costs::finish_cost (const vector_costs *scalar_costs)
   26590              : {
   26591      1690129 :   loop_vec_info loop_vinfo = dyn_cast<loop_vec_info> (m_vinfo);
   26592       379885 :   if (loop_vinfo && !m_costing_for_scalar)
   26593              :     {
   26594              :       /* We are currently not asking the vectorizer to compare costs
   26595              :          between different vector mode sizes.  When using predication
   26596              :          that will end up always choosing the prefered mode size even
   26597              :          if there's a smaller mode covering all lanes.  Test for this
   26598              :          situation and artificially reject the larger mode attempt.
   26599              :          ???  We currently lack masked ops for sub-SSE sized modes,
   26600              :          so we could restrict this rejection to AVX and AVX512 modes
   26601              :          but error on the safe side for now.  */
   26602        82416 :       if (LOOP_VINFO_USING_PARTIAL_VECTORS_P (loop_vinfo)
   26603           22 :           && !LOOP_VINFO_EPILOGUE_P (loop_vinfo)
   26604           15 :           && LOOP_VINFO_NITERS_KNOWN_P (loop_vinfo)
   26605        82426 :           && (exact_log2 (LOOP_VINFO_VECT_FACTOR (loop_vinfo).to_constant ())
   26606           20 :               > ceil_log2 (LOOP_VINFO_INT_NITERS (loop_vinfo))))
   26607            8 :         m_costs[vect_body] = INT_MAX;
   26608              : 
   26609              :       /* We'd like to avoid using masking if there's an in-order reduction
   26610              :          to vectorize because that will also perform in-order adds of
   26611              :          masked elements (as neutral value, of course) here, but there
   26612              :          is currently no way to indicate to try un-masked with the same
   26613              :          mode.  */
   26614              : 
   26615        82416 :       bool any_reduc_p = false;
   26616       327520 :       for (int i = 0; i != X86_REDUC_LAST; i++)
   26617       245881 :         if (m_num_reduc[i])
   26618              :           {
   26619              :             any_reduc_p = true;
   26620              :             break;
   26621              :           }
   26622              : 
   26623        82416 :       if (any_reduc_p
   26624              :           /* Not much gain for loop with gather and scatter.  */
   26625          777 :           && m_prefer_unroll
   26626          627 :           && !LOOP_VINFO_EPILOGUE_P (loop_vinfo))
   26627              :         {
   26628          956 :           unsigned unroll_factor
   26629          478 :             = OPTION_SET_P (ix86_vect_unroll_limit)
   26630          478 :             ? ix86_vect_unroll_limit
   26631          478 :             : ix86_cost->vect_unroll_limit;
   26632              : 
   26633          478 :           if (unroll_factor > 1)
   26634              :             {
   26635         1912 :               for (int i = 0 ; i != X86_REDUC_LAST; i++)
   26636              :                 {
   26637         1434 :                   if (m_num_reduc[i])
   26638              :                     {
   26639          478 :                       unsigned tmp = CEIL (ix86_cost->reduc_lat_mult_thr[i],
   26640              :                                            m_num_reduc[i]);
   26641         1434 :                       unroll_factor = MIN (unroll_factor, tmp);
   26642              :                     }
   26643              :                 }
   26644              : 
   26645          956 :               m_suggested_unroll_factor  = 1 << ceil_log2 (unroll_factor);
   26646              :             }
   26647              :         }
   26648              : 
   26649              :     }
   26650              : 
   26651      1690129 :   ix86_vect_estimate_reg_pressure ();
   26652              : 
   26653      6760516 :   for (int i = 0; i != 3; i++)
   26654      5070387 :     if (m_num_avx256_vec_perm[i]
   26655          444 :         && TARGET_AVX256_AVOID_VEC_PERM)
   26656            7 :       m_costs[i] = INT_MAX;
   26657              : 
   26658              :   /* When X86_TUNE_AVX512_TWO_EPILOGUES is enabled arrange for both
   26659              :      a AVX2 and a SSE epilogue for AVX512 vectorized loops.  */
   26660      1690129 :   if (loop_vinfo
   26661       379885 :       && LOOP_VINFO_EPILOGUE_P (loop_vinfo)
   26662        46878 :       && GET_MODE_SIZE (loop_vinfo->vector_mode) == 32
   26663      1690953 :       && ix86_tune_features[X86_TUNE_AVX512_TWO_EPILOGUES])
   26664           14 :     m_suggested_epilogue_mode = V16QImode;
   26665              :   /* When a 128bit SSE vectorized epilogue still has a VF of 16 or larger
   26666              :      enable a 64bit SSE epilogue.  */
   26667      1690129 :   if (loop_vinfo
   26668       379885 :       && LOOP_VINFO_EPILOGUE_P (loop_vinfo)
   26669        46878 :       && GET_MODE_SIZE (loop_vinfo->vector_mode) == 16
   26670      1692603 :       && LOOP_VINFO_VECT_FACTOR (loop_vinfo).to_constant () >= 16)
   26671           91 :     m_suggested_epilogue_mode = V8QImode;
   26672              : 
   26673              :   /* When X86_TUNE_AVX512_MASKED_EPILOGUES is enabled try to use
   26674              :      a masked epilogue if that doesn't seem detrimental.  */
   26675      1690129 :   if (loop_vinfo
   26676       379885 :       && !LOOP_VINFO_EPILOGUE_P (loop_vinfo)
   26677       356446 :       && LOOP_VINFO_VECT_FACTOR (loop_vinfo).to_constant () > 2
   26678              :       /* Avoid a masked epilog if cascaded epilogues eventually get us
   26679              :          to one with VF 1 as that means no scalar epilog at all.  */
   26680        52197 :       && !((GET_MODE_SIZE (loop_vinfo->vector_mode)
   26681        52197 :             / LOOP_VINFO_VECT_FACTOR (loop_vinfo).to_constant () == 16)
   26682           35 :            && ix86_tune_features[X86_TUNE_AVX512_TWO_EPILOGUES])
   26683        52196 :       && ix86_tune_features[X86_TUNE_AVX512_MASKED_EPILOGUES]
   26684      1690219 :       && !OPTION_SET_P (param_vect_partial_vector_usage))
   26685              :     {
   26686           84 :       bool avoid = false;
   26687           84 :       if (LOOP_VINFO_NITERS_KNOWN_P (loop_vinfo)
   26688           68 :           && LOOP_VINFO_PEELING_FOR_ALIGNMENT (loop_vinfo) >= 0)
   26689              :         {
   26690           68 :           unsigned int peel_niter
   26691              :             = LOOP_VINFO_PEELING_FOR_ALIGNMENT (loop_vinfo);
   26692           68 :           if (LOOP_VINFO_PEELING_FOR_GAPS (loop_vinfo))
   26693            0 :             peel_niter += 1;
   26694              :           /* When we know the number of scalar iterations of the epilogue,
   26695              :              avoid masking when a single vector epilog iteration handles
   26696              :              it in full.  */
   26697           68 :           if (pow2p_hwi ((LOOP_VINFO_INT_NITERS (loop_vinfo) - peel_niter)
   26698           68 :                          % LOOP_VINFO_VECT_FACTOR (loop_vinfo).to_constant ()))
   26699              :             avoid = true;
   26700              :         }
   26701           83 :       if (!avoid && loop_outer (loop_outer (LOOP_VINFO_LOOP (loop_vinfo))))
   26702            7 :         for (auto ddr : LOOP_VINFO_DDRS (loop_vinfo))
   26703              :           {
   26704            2 :             if (DDR_ARE_DEPENDENT (ddr) == chrec_known)
   26705              :               ;
   26706            2 :             else if (DDR_ARE_DEPENDENT (ddr) == chrec_dont_know)
   26707              :               ;
   26708              :             else
   26709              :               {
   26710            1 :                 int loop_depth
   26711            2 :                     = index_in_loop_nest (LOOP_VINFO_LOOP (loop_vinfo)->num,
   26712            1 :                                           DDR_LOOP_NEST (ddr));
   26713            2 :                 if (DDR_NUM_DIST_VECTS (ddr) == 1
   26714            1 :                     && DDR_DIST_VECTS (ddr)[0][loop_depth] == 0)
   26715              :                   {
   26716              :                     /* Avoid the case when there's an outer loop that might
   26717              :                        traverse a multi-dimensional array with the inner
   26718              :                        loop just executing the masked epilogue with a
   26719              :                        read-write where the next outer iteration might
   26720              :                        read from the masked part of the previous write,
   26721              :                        'n' filling half a vector.
   26722              :                          for (j = 0; j < m; ++j)
   26723              :                            for (i = 0; i < n; ++i)
   26724              :                              a[j][i] = c * a[j][i];  */
   26725              :                     avoid = true;
   26726              :                     break;
   26727              :                   }
   26728              :               }
   26729              :           }
   26730              :       /* Avoid using masking if there's an in-order reduction
   26731              :          to vectorize because that will also perform in-order adds of
   26732              :          masked elements (as neutral value, of course).  */
   26733           84 :       if (!avoid)
   26734              :         {
   26735          331 :           for (auto inst : LOOP_VINFO_SLP_INSTANCES (loop_vinfo))
   26736           86 :             if (SLP_INSTANCE_KIND (inst) == slp_inst_kind_reduc_group
   26737           86 :                 && (vect_reduc_type (loop_vinfo, SLP_INSTANCE_TREE (inst))
   26738              :                     == FOLD_LEFT_REDUCTION))
   26739              :               {
   26740              :                 avoid = true;
   26741              :                 break;
   26742              :               }
   26743              :         }
   26744           82 :       if (!avoid)
   26745              :         {
   26746           81 :           m_suggested_epilogue_mode = loop_vinfo->vector_mode;
   26747           81 :           m_masked_epilogue = 1;
   26748              :         }
   26749              :     }
   26750              : 
   26751      1690129 :   vector_costs::finish_cost (scalar_costs);
   26752      1690129 : }
   26753              : 
   26754              : /* Validate target specific memory model bits in VAL. */
   26755              : 
   26756              : static unsigned HOST_WIDE_INT
   26757       407670 : ix86_memmodel_check (unsigned HOST_WIDE_INT val)
   26758              : {
   26759       407670 :   enum memmodel model = memmodel_from_int (val);
   26760       407670 :   bool strong;
   26761              : 
   26762       407670 :   if (val & ~(unsigned HOST_WIDE_INT)(IX86_HLE_ACQUIRE|IX86_HLE_RELEASE
   26763              :                                       |MEMMODEL_MASK)
   26764       407666 :       || ((val & IX86_HLE_ACQUIRE) && (val & IX86_HLE_RELEASE)))
   26765              :     {
   26766            4 :       warning (OPT_Winvalid_memory_model,
   26767              :                "unknown architecture specific memory model");
   26768            4 :       return MEMMODEL_SEQ_CST;
   26769              :     }
   26770       407666 :   strong = (is_mm_acq_rel (model) || is_mm_seq_cst (model));
   26771       407666 :   if (val & IX86_HLE_ACQUIRE && !(is_mm_acquire (model) || strong))
   26772              :     {
   26773            0 :       warning (OPT_Winvalid_memory_model,
   26774              :               "%<HLE_ACQUIRE%> not used with %<ACQUIRE%> or stronger "
   26775              :                "memory model");
   26776            0 :       return MEMMODEL_SEQ_CST | IX86_HLE_ACQUIRE;
   26777              :     }
   26778       407666 :   if (val & IX86_HLE_RELEASE && !(is_mm_release (model) || strong))
   26779              :     {
   26780            0 :       warning (OPT_Winvalid_memory_model,
   26781              :               "%<HLE_RELEASE%> not used with %<RELEASE%> or stronger "
   26782              :                "memory model");
   26783            0 :       return MEMMODEL_SEQ_CST | IX86_HLE_RELEASE;
   26784              :     }
   26785              :   return val;
   26786              : }
   26787              : 
   26788              : /* Set CLONEI->vecsize_mangle, CLONEI->mask_mode, CLONEI->vecsize_int,
   26789              :    CLONEI->vecsize_float and if CLONEI->simdlen is 0, also
   26790              :    CLONEI->simdlen.  Return 0 if SIMD clones shouldn't be emitted,
   26791              :    or number of vecsize_mangle variants that should be emitted.  */
   26792              : 
   26793              : static int
   26794         7589 : ix86_simd_clone_compute_vecsize_and_simdlen (struct cgraph_node *node,
   26795              :                                              struct cgraph_simd_clone *clonei,
   26796              :                                              tree base_type, int num,
   26797              :                                              bool explicit_p)
   26798              : {
   26799         7589 :   int ret = 1;
   26800              : 
   26801         7589 :   if (clonei->simdlen
   26802         7589 :       && (clonei->simdlen < 2
   26803         1321 :           || clonei->simdlen > 1024
   26804         1321 :           || (clonei->simdlen & (clonei->simdlen - 1)) != 0))
   26805              :     {
   26806            0 :       if (explicit_p)
   26807            0 :         warning_at (DECL_SOURCE_LOCATION (node->decl), 0,
   26808              :                     "unsupported simdlen %wd", clonei->simdlen.to_constant ());
   26809            0 :       return 0;
   26810              :     }
   26811              : 
   26812         7589 :   tree ret_type = TREE_TYPE (TREE_TYPE (node->decl));
   26813         7589 :   if (TREE_CODE (ret_type) != VOID_TYPE)
   26814         6797 :     switch (TYPE_MODE (ret_type))
   26815              :       {
   26816         6797 :       case E_QImode:
   26817         6797 :       case E_HImode:
   26818         6797 :       case E_SImode:
   26819         6797 :       case E_DImode:
   26820         6797 :       case E_SFmode:
   26821         6797 :       case E_DFmode:
   26822              :       /* case E_SCmode: */
   26823              :       /* case E_DCmode: */
   26824         6797 :         if (!AGGREGATE_TYPE_P (ret_type))
   26825              :           break;
   26826              :         /* FALLTHRU */
   26827            2 :       default:
   26828            2 :         if (explicit_p)
   26829            2 :           warning_at (DECL_SOURCE_LOCATION (node->decl), 0,
   26830              :                       "unsupported return type %qT for simd", ret_type);
   26831            2 :         return 0;
   26832              :       }
   26833              : 
   26834         7587 :   tree t;
   26835         7587 :   int i;
   26836         7587 :   tree type_arg_types = TYPE_ARG_TYPES (TREE_TYPE (node->decl));
   26837         7587 :   bool decl_arg_p = (node->definition || type_arg_types == NULL_TREE);
   26838              : 
   26839         7587 :   for (t = (decl_arg_p ? DECL_ARGUMENTS (node->decl) : type_arg_types), i = 0;
   26840        20430 :        t && t != void_list_node; t = TREE_CHAIN (t), i++)
   26841              :     {
   26842        16670 :       tree arg_type = decl_arg_p ? TREE_TYPE (t) : TREE_VALUE (t);
   26843        12848 :       switch (TYPE_MODE (arg_type))
   26844              :         {
   26845        12829 :         case E_QImode:
   26846        12829 :         case E_HImode:
   26847        12829 :         case E_SImode:
   26848        12829 :         case E_DImode:
   26849        12829 :         case E_SFmode:
   26850        12829 :         case E_DFmode:
   26851              :         /* case E_SCmode: */
   26852              :         /* case E_DCmode: */
   26853        12829 :           if (!AGGREGATE_TYPE_P (arg_type))
   26854              :             break;
   26855              :           /* FALLTHRU */
   26856           41 :         default:
   26857           41 :           if (clonei->args[i].arg_type == SIMD_CLONE_ARG_TYPE_UNIFORM)
   26858              :             break;
   26859            5 :           if (explicit_p)
   26860            5 :             warning_at (DECL_SOURCE_LOCATION (node->decl), 0,
   26861              :                         "unsupported argument type %qT for simd", arg_type);
   26862              :           return 0;
   26863              :         }
   26864              :     }
   26865              : 
   26866         7582 :   if (!TREE_PUBLIC (node->decl) || !explicit_p)
   26867              :     {
   26868              :       /* If the function isn't exported, we can pick up just one ISA
   26869              :          for the clones.  */
   26870          114 :       if (TARGET_AVX512F)
   26871            0 :         clonei->vecsize_mangle = 'e';
   26872          114 :       else if (TARGET_AVX2)
   26873            1 :         clonei->vecsize_mangle = 'd';
   26874          113 :       else if (TARGET_AVX)
   26875           88 :         clonei->vecsize_mangle = 'c';
   26876              :       else
   26877           25 :         clonei->vecsize_mangle = 'b';
   26878              :       ret = 1;
   26879              :     }
   26880              :   else
   26881              :     {
   26882         7468 :       clonei->vecsize_mangle = "bcde"[num];
   26883         7468 :       ret = 4;
   26884              :     }
   26885         7582 :   clonei->mask_mode = VOIDmode;
   26886         7582 :   switch (clonei->vecsize_mangle)
   26887              :     {
   26888         1892 :     case 'b':
   26889         1892 :       clonei->vecsize_int = 128;
   26890         1892 :       clonei->vecsize_float = 128;
   26891         1892 :       break;
   26892         1955 :     case 'c':
   26893         1955 :       clonei->vecsize_int = 128;
   26894         1955 :       clonei->vecsize_float = 256;
   26895         1955 :       break;
   26896         1868 :     case 'd':
   26897         1868 :       clonei->vecsize_int = 256;
   26898         1868 :       clonei->vecsize_float = 256;
   26899         1868 :       break;
   26900         1867 :     case 'e':
   26901         1867 :       clonei->vecsize_int = 512;
   26902         1867 :       clonei->vecsize_float = 512;
   26903         1867 :       if (TYPE_MODE (base_type) == QImode)
   26904           19 :         clonei->mask_mode = DImode;
   26905              :       else
   26906         1848 :         clonei->mask_mode = SImode;
   26907              :       break;
   26908              :     }
   26909         7582 :   if (clonei->simdlen == 0)
   26910              :     {
   26911         6261 :       if (SCALAR_INT_MODE_P (TYPE_MODE (base_type)))
   26912         3297 :         clonei->simdlen = clonei->vecsize_int;
   26913              :       else
   26914         2964 :         clonei->simdlen = clonei->vecsize_float;
   26915         6261 :       clonei->simdlen = clonei->simdlen
   26916        12522 :                         / GET_MODE_BITSIZE (TYPE_MODE (base_type));
   26917              :     }
   26918         1321 :   else if (clonei->simdlen > 16)
   26919              :     {
   26920              :       /* For compatibility with ICC, use the same upper bounds
   26921              :          for simdlen.  In particular, for CTYPE below, use the return type,
   26922              :          unless the function returns void, in that case use the characteristic
   26923              :          type.  If it is possible for given SIMDLEN to pass CTYPE value
   26924              :          in registers (8 [XYZ]MM* regs for 32-bit code, 16 [XYZ]MM* regs
   26925              :          for 64-bit code), accept that SIMDLEN, otherwise warn and don't
   26926              :          emit corresponding clone.  */
   26927           12 :       tree ctype = ret_type;
   26928           12 :       if (VOID_TYPE_P (ret_type))
   26929            0 :         ctype = base_type;
   26930           24 :       int cnt = GET_MODE_BITSIZE (TYPE_MODE (ctype)) * clonei->simdlen;
   26931           12 :       if (SCALAR_INT_MODE_P (TYPE_MODE (ctype)))
   26932            8 :         cnt /= clonei->vecsize_int;
   26933              :       else
   26934            4 :         cnt /= clonei->vecsize_float;
   26935           12 :       if (cnt > (TARGET_64BIT ? 16 : 8))
   26936              :         {
   26937            0 :           if (explicit_p)
   26938            0 :             warning_at (DECL_SOURCE_LOCATION (node->decl), 0,
   26939              :                         "unsupported simdlen %wd",
   26940              :                         clonei->simdlen.to_constant ());
   26941            0 :           return 0;
   26942              :         }
   26943              :       }
   26944              :   return ret;
   26945              : }
   26946              : 
   26947              : /* If SIMD clone NODE can't be used in a vectorized loop
   26948              :    in current function, return -1, otherwise return a badness of using it
   26949              :    (0 if it is most desirable from vecsize_mangle point of view, 1
   26950              :    slightly less desirable, etc.).  */
   26951              : 
   26952              : static int
   26953         1768 : ix86_simd_clone_usable (struct cgraph_node *node, machine_mode)
   26954              : {
   26955         1768 :   switch (node->simdclone->vecsize_mangle)
   26956              :     {
   26957          621 :     case 'b':
   26958          621 :       if (!TARGET_SSE2)
   26959              :         return -1;
   26960          621 :       if (!TARGET_AVX)
   26961              :         return 0;
   26962          520 :       return TARGET_AVX512F ? 3 : TARGET_AVX2 ? 2 : 1;
   26963          627 :     case 'c':
   26964          627 :       if (!TARGET_AVX)
   26965              :         return -1;
   26966          582 :       return TARGET_AVX512F ? 2 : TARGET_AVX2 ? 1 : 0;
   26967          332 :     case 'd':
   26968          332 :       if (!TARGET_AVX2)
   26969              :         return -1;
   26970          139 :       return TARGET_AVX512F ? 1 : 0;
   26971          188 :     case 'e':
   26972          188 :       if (!TARGET_AVX512F)
   26973          130 :         return -1;
   26974              :       return 0;
   26975            0 :     default:
   26976            0 :       gcc_unreachable ();
   26977              :     }
   26978              : }
   26979              : 
   26980              : /* This function adjusts the unroll factor based on
   26981              :    the hardware capabilities. For ex, bdver3 has
   26982              :    a loop buffer which makes unrolling of smaller
   26983              :    loops less important. This function decides the
   26984              :    unroll factor using number of memory references
   26985              :    (value 32 is used) as a heuristic. */
   26986              : 
   26987              : static unsigned
   26988       812975 : ix86_loop_unroll_adjust (unsigned nunroll, class loop *loop)
   26989              : {
   26990       812975 :   basic_block *bbs;
   26991       812975 :   rtx_insn *insn;
   26992       812975 :   unsigned i;
   26993       812975 :   unsigned mem_count = 0;
   26994              : 
   26995              :   /* Unroll small size loop when unroll factor is not explicitly
   26996              :      specified.  */
   26997       812975 :   if (ix86_unroll_only_small_loops && !loop->unroll)
   26998              :     {
   26999       769757 :       if (loop->ninsns <= ix86_cost->small_unroll_ninsns)
   27000        70153 :         return MIN (nunroll, ix86_cost->small_unroll_factor);
   27001              :       else
   27002              :         return 1;
   27003              :     }
   27004              : 
   27005        43218 :   if (!TARGET_ADJUST_UNROLL)
   27006              :      return nunroll;
   27007              : 
   27008              :   /* Count the number of memory references within the loop body.
   27009              :      This value determines the unrolling factor for bdver3 and bdver4
   27010              :      architectures. */
   27011            7 :   subrtx_iterator::array_type array;
   27012            7 :   bbs = get_loop_body (loop);
   27013           21 :   for (i = 0; i < loop->num_nodes; i++)
   27014          102 :     FOR_BB_INSNS (bbs[i], insn)
   27015           88 :       if (NONDEBUG_INSN_P (insn))
   27016          464 :         FOR_EACH_SUBRTX (iter, array, PATTERN (insn), NONCONST)
   27017          404 :           if (const_rtx x = *iter)
   27018          404 :             if (MEM_P (x))
   27019              :               {
   27020           25 :                 machine_mode mode = GET_MODE (x);
   27021           50 :                 unsigned int n_words = GET_MODE_SIZE (mode) / UNITS_PER_WORD;
   27022           25 :                 if (n_words > 4)
   27023            0 :                   mem_count += 2;
   27024              :                 else
   27025           25 :                   mem_count += 1;
   27026              :               }
   27027            7 :   free (bbs);
   27028              : 
   27029            7 :   if (mem_count && mem_count <=32)
   27030            7 :     return MIN (nunroll, 32 / mem_count);
   27031              : 
   27032              :   return nunroll;
   27033            7 : }
   27034              : 
   27035              : 
   27036              : /* Implement TARGET_FLOAT_EXCEPTIONS_ROUNDING_SUPPORTED_P.  */
   27037              : 
   27038              : static bool
   27039       415274 : ix86_float_exceptions_rounding_supported_p (void)
   27040              : {
   27041              :   /* For x87 floating point with standard excess precision handling,
   27042              :      there is no adddf3 pattern (since x87 floating point only has
   27043              :      XFmode operations) so the default hook implementation gets this
   27044              :      wrong.  */
   27045       415274 :   return TARGET_80387 || (TARGET_SSE && TARGET_SSE_MATH);
   27046              : }
   27047              : 
   27048              : /* Implement TARGET_ATOMIC_ASSIGN_EXPAND_FENV.  */
   27049              : 
   27050              : static void
   27051         7054 : ix86_atomic_assign_expand_fenv (tree *hold, tree *clear, tree *update)
   27052              : {
   27053         7054 :   if (!TARGET_80387 && !(TARGET_SSE && TARGET_SSE_MATH))
   27054              :     return;
   27055         7054 :   tree exceptions_var = create_tmp_var_raw (integer_type_node);
   27056         7054 :   if (TARGET_80387)
   27057              :     {
   27058         7054 :       tree fenv_index_type = build_index_type (size_int (6));
   27059         7054 :       tree fenv_type = build_array_type (unsigned_type_node, fenv_index_type);
   27060         7054 :       tree fenv_var = create_tmp_var_raw (fenv_type);
   27061         7054 :       TREE_ADDRESSABLE (fenv_var) = 1;
   27062         7054 :       tree fenv_ptr = build_pointer_type (fenv_type);
   27063         7054 :       tree fenv_addr = build1 (ADDR_EXPR, fenv_ptr, fenv_var);
   27064         7054 :       fenv_addr = fold_convert (ptr_type_node, fenv_addr);
   27065         7054 :       tree fnstenv = get_ix86_builtin (IX86_BUILTIN_FNSTENV);
   27066         7054 :       tree fldenv = get_ix86_builtin (IX86_BUILTIN_FLDENV);
   27067         7054 :       tree fnstsw = get_ix86_builtin (IX86_BUILTIN_FNSTSW);
   27068         7054 :       tree fnclex = get_ix86_builtin (IX86_BUILTIN_FNCLEX);
   27069         7054 :       tree hold_fnstenv = build_call_expr (fnstenv, 1, fenv_addr);
   27070         7054 :       tree hold_fnclex = build_call_expr (fnclex, 0);
   27071         7054 :       fenv_var = build4 (TARGET_EXPR, fenv_type, fenv_var, hold_fnstenv,
   27072              :                          NULL_TREE, NULL_TREE);
   27073         7054 :       *hold = build2 (COMPOUND_EXPR, void_type_node, fenv_var,
   27074              :                       hold_fnclex);
   27075         7054 :       *clear = build_call_expr (fnclex, 0);
   27076         7054 :       tree sw_var = create_tmp_var_raw (short_unsigned_type_node);
   27077         7054 :       tree fnstsw_call = build_call_expr (fnstsw, 0);
   27078         7054 :       tree sw_mod = build4 (TARGET_EXPR, short_unsigned_type_node, sw_var,
   27079              :                             fnstsw_call, NULL_TREE, NULL_TREE);
   27080         7054 :       tree exceptions_x87 = fold_convert (integer_type_node, sw_var);
   27081         7054 :       tree update_mod = build4 (TARGET_EXPR, integer_type_node,
   27082              :                                 exceptions_var, exceptions_x87,
   27083              :                                 NULL_TREE, NULL_TREE);
   27084         7054 :       *update = build2 (COMPOUND_EXPR, integer_type_node,
   27085              :                         sw_mod, update_mod);
   27086         7054 :       tree update_fldenv = build_call_expr (fldenv, 1, fenv_addr);
   27087         7054 :       *update = build2 (COMPOUND_EXPR, void_type_node, *update, update_fldenv);
   27088              :     }
   27089         7054 :   if (TARGET_SSE && TARGET_SSE_MATH)
   27090              :     {
   27091         7054 :       tree mxcsr_orig_var = create_tmp_var_raw (unsigned_type_node);
   27092         7054 :       tree mxcsr_mod_var = create_tmp_var_raw (unsigned_type_node);
   27093         7054 :       tree stmxcsr = get_ix86_builtin (IX86_BUILTIN_STMXCSR);
   27094         7054 :       tree ldmxcsr = get_ix86_builtin (IX86_BUILTIN_LDMXCSR);
   27095         7054 :       tree stmxcsr_hold_call = build_call_expr (stmxcsr, 0);
   27096         7054 :       tree hold_assign_orig = build4 (TARGET_EXPR, unsigned_type_node,
   27097              :                                       mxcsr_orig_var, stmxcsr_hold_call,
   27098              :                                       NULL_TREE, NULL_TREE);
   27099         7054 :       tree hold_mod_val = build2 (BIT_IOR_EXPR, unsigned_type_node,
   27100              :                                   mxcsr_orig_var,
   27101              :                                   build_int_cst (unsigned_type_node, 0x1f80));
   27102         7054 :       hold_mod_val = build2 (BIT_AND_EXPR, unsigned_type_node, hold_mod_val,
   27103              :                              build_int_cst (unsigned_type_node, 0xffffffc0));
   27104         7054 :       tree hold_assign_mod = build4 (TARGET_EXPR, unsigned_type_node,
   27105              :                                      mxcsr_mod_var, hold_mod_val,
   27106              :                                      NULL_TREE, NULL_TREE);
   27107         7054 :       tree ldmxcsr_hold_call = build_call_expr (ldmxcsr, 1, mxcsr_mod_var);
   27108         7054 :       tree hold_all = build2 (COMPOUND_EXPR, unsigned_type_node,
   27109              :                               hold_assign_orig, hold_assign_mod);
   27110         7054 :       hold_all = build2 (COMPOUND_EXPR, void_type_node, hold_all,
   27111              :                          ldmxcsr_hold_call);
   27112         7054 :       if (*hold)
   27113         7054 :         *hold = build2 (COMPOUND_EXPR, void_type_node, *hold, hold_all);
   27114              :       else
   27115            0 :         *hold = hold_all;
   27116         7054 :       tree ldmxcsr_clear_call = build_call_expr (ldmxcsr, 1, mxcsr_mod_var);
   27117         7054 :       if (*clear)
   27118         7054 :         *clear = build2 (COMPOUND_EXPR, void_type_node, *clear,
   27119              :                          ldmxcsr_clear_call);
   27120              :       else
   27121            0 :         *clear = ldmxcsr_clear_call;
   27122         7054 :       tree stxmcsr_update_call = build_call_expr (stmxcsr, 0);
   27123         7054 :       tree exceptions_sse = fold_convert (integer_type_node,
   27124              :                                           stxmcsr_update_call);
   27125         7054 :       if (*update)
   27126              :         {
   27127         7054 :           tree exceptions_mod = build2 (BIT_IOR_EXPR, integer_type_node,
   27128              :                                         exceptions_var, exceptions_sse);
   27129         7054 :           tree exceptions_assign = build2 (MODIFY_EXPR, integer_type_node,
   27130              :                                            exceptions_var, exceptions_mod);
   27131         7054 :           *update = build2 (COMPOUND_EXPR, integer_type_node, *update,
   27132              :                             exceptions_assign);
   27133              :         }
   27134              :       else
   27135            0 :         *update = build4 (TARGET_EXPR, integer_type_node, exceptions_var,
   27136              :                           exceptions_sse, NULL_TREE, NULL_TREE);
   27137         7054 :       tree ldmxcsr_update_call = build_call_expr (ldmxcsr, 1, mxcsr_orig_var);
   27138         7054 :       *update = build2 (COMPOUND_EXPR, void_type_node, *update,
   27139              :                         ldmxcsr_update_call);
   27140              :     }
   27141         7054 :   tree atomic_feraiseexcept
   27142         7054 :     = builtin_decl_implicit (BUILT_IN_ATOMIC_FERAISEEXCEPT);
   27143         7054 :   tree atomic_feraiseexcept_call = build_call_expr (atomic_feraiseexcept,
   27144              :                                                     1, exceptions_var);
   27145         7054 :   *update = build2 (COMPOUND_EXPR, void_type_node, *update,
   27146              :                     atomic_feraiseexcept_call);
   27147              : }
   27148              : 
   27149              : #if !TARGET_MACHO && !TARGET_DLLIMPORT_DECL_ATTRIBUTES
   27150              : /* For i386, common symbol is local only for non-PIE binaries.  For
   27151              :    x86-64, common symbol is local only for non-PIE binaries or linker
   27152              :    supports copy reloc in PIE binaries.   */
   27153              : 
   27154              : static bool
   27155    768052975 : ix86_binds_local_p (const_tree exp)
   27156              : {
   27157    768052975 :   bool direct_extern_access
   27158    768052975 :     = (ix86_direct_extern_access
   27159   1532612905 :        && !(VAR_OR_FUNCTION_DECL_P (exp)
   27160    764559930 :             && lookup_attribute ("nodirect_extern_access",
   27161    764559930 :                                  DECL_ATTRIBUTES (exp))));
   27162    768052975 :   if (!direct_extern_access)
   27163         1225 :     ix86_has_no_direct_extern_access = true;
   27164    768052975 :   return default_binds_local_p_3 (exp, flag_shlib != 0, true,
   27165              :                                   direct_extern_access,
   27166              :                                   (direct_extern_access
   27167    768051750 :                                    && (!flag_pic
   27168    132127531 :                                        || (TARGET_64BIT
   27169    768052975 :                                            && HAVE_LD_PIE_COPYRELOC != 0))));
   27170              : }
   27171              : 
   27172              : /* If flag_pic or ix86_direct_extern_access is false, then neither
   27173              :    local nor global relocs should be placed in readonly memory.  */
   27174              : 
   27175              : static int
   27176      5129143 : ix86_reloc_rw_mask (void)
   27177              : {
   27178      5129143 :   return (flag_pic || !ix86_direct_extern_access) ? 3 : 0;
   27179              : }
   27180              : #endif
   27181              : 
   27182              : /* Return true iff ADDR can be used as a symbolic base address.  */
   27183              : 
   27184              : static bool
   27185         3162 : symbolic_base_address_p (rtx addr)
   27186              : {
   27187            0 :   if (SYMBOL_REF_P (addr))
   27188              :     return true;
   27189              : 
   27190         3138 :   if (GET_CODE (addr) == UNSPEC && XINT (addr, 1) == UNSPEC_GOTOFF)
   27191            0 :     return true;
   27192              : 
   27193              :   return false;
   27194              : }
   27195              : 
   27196              : /* Return true iff ADDR can be used as a base address.  */
   27197              : 
   27198              : static bool
   27199         4734 : base_address_p (rtx addr)
   27200              : {
   27201            0 :   if (REG_P (addr))
   27202              :     return true;
   27203              : 
   27204         2944 :   if (symbolic_base_address_p (addr))
   27205            0 :     return true;
   27206              : 
   27207              :   return false;
   27208              : }
   27209              : 
   27210              : /* If MEM is in the form of [(base+symbase)+offset], extract the three
   27211              :    parts of address and set to BASE, SYMBASE and OFFSET, otherwise
   27212              :    return false.  */
   27213              : 
   27214              : static bool
   27215         3043 : extract_base_offset_in_addr (rtx mem, rtx *base, rtx *symbase, rtx *offset)
   27216              : {
   27217         3043 :   rtx addr;
   27218              : 
   27219         3043 :   gcc_assert (MEM_P (mem));
   27220              : 
   27221         3043 :   addr = XEXP (mem, 0);
   27222              : 
   27223         3043 :   if (GET_CODE (addr) == CONST)
   27224           10 :     addr = XEXP (addr, 0);
   27225              : 
   27226         3043 :   if (base_address_p (addr))
   27227              :     {
   27228         1352 :       *base = addr;
   27229         1352 :       *symbase = const0_rtx;
   27230         1352 :       *offset = const0_rtx;
   27231         1352 :       return true;
   27232              :     }
   27233              : 
   27234         1691 :   if (GET_CODE (addr) == PLUS
   27235         1691 :       && base_address_p (XEXP (addr, 0)))
   27236              :     {
   27237          462 :       rtx addend = XEXP (addr, 1);
   27238              : 
   27239          462 :       if (GET_CODE (addend) == CONST)
   27240            0 :         addend = XEXP (addend, 0);
   27241              : 
   27242          462 :       if (CONST_INT_P (addend))
   27243              :         {
   27244          244 :           *base = XEXP (addr, 0);
   27245          244 :           *symbase = const0_rtx;
   27246          244 :           *offset = addend;
   27247          244 :           return true;
   27248              :         }
   27249              : 
   27250              :       /* Also accept REG + symbolic ref, with or without a CONST_INT
   27251              :          offset.  */
   27252          218 :       if (REG_P (XEXP (addr, 0)))
   27253              :         {
   27254          218 :           if (symbolic_base_address_p (addend))
   27255              :             {
   27256            0 :               *base = XEXP (addr, 0);
   27257            0 :               *symbase = addend;
   27258            0 :               *offset = const0_rtx;
   27259            0 :               return true;
   27260              :             }
   27261              : 
   27262          218 :           if (GET_CODE (addend) == PLUS
   27263            0 :               && symbolic_base_address_p (XEXP (addend, 0))
   27264          218 :               && CONST_INT_P (XEXP (addend, 1)))
   27265              :             {
   27266            0 :               *base = XEXP (addr, 0);
   27267            0 :               *symbase = XEXP (addend, 0);
   27268            0 :               *offset = XEXP (addend, 1);
   27269            0 :               return true;
   27270              :             }
   27271              :         }
   27272              :     }
   27273              : 
   27274              :   return false;
   27275              : }
   27276              : 
   27277              : /* Given OPERANDS of consecutive load/store, check if we can merge
   27278              :    them into move multiple.  LOAD is true if they are load instructions.
   27279              :    MODE is the mode of memory operands.  */
   27280              : 
   27281              : bool
   27282         1697 : ix86_operands_ok_for_move_multiple (rtx *operands, bool load,
   27283              :                                     machine_mode mode)
   27284              : {
   27285         1697 :   HOST_WIDE_INT offval_1, offval_2, msize;
   27286         1697 :   rtx mem_1, mem_2, reg_1, reg_2, base_1, base_2,
   27287              :     symbase_1, symbase_2, offset_1, offset_2;
   27288              : 
   27289         1697 :   if (load)
   27290              :     {
   27291         1401 :       mem_1 = operands[1];
   27292         1401 :       mem_2 = operands[3];
   27293         1401 :       reg_1 = operands[0];
   27294         1401 :       reg_2 = operands[2];
   27295              :     }
   27296              :   else
   27297              :     {
   27298          296 :       mem_1 = operands[0];
   27299          296 :       mem_2 = operands[2];
   27300          296 :       reg_1 = operands[1];
   27301          296 :       reg_2 = operands[3];
   27302              :     }
   27303              : 
   27304         1697 :   gcc_assert (REG_P (reg_1) && REG_P (reg_2));
   27305              : 
   27306         1697 :   if (REGNO (reg_1) != REGNO (reg_2))
   27307              :     return false;
   27308              : 
   27309              :   /* Check if the addresses are in the form of [base+offset].  */
   27310         1697 :   if (!extract_base_offset_in_addr (mem_1, &base_1, &symbase_1, &offset_1))
   27311              :     return false;
   27312         1346 :   if (!extract_base_offset_in_addr (mem_2, &base_2, &symbase_2, &offset_2))
   27313              :     return false;
   27314              : 
   27315              :   /* Check if the bases are the same.  */
   27316          250 :   if (!rtx_equal_p (base_1, base_2) || !rtx_equal_p (symbase_1, symbase_2))
   27317          119 :     return false;
   27318              : 
   27319          131 :   offval_1 = INTVAL (offset_1);
   27320          131 :   offval_2 = INTVAL (offset_2);
   27321          131 :   msize = GET_MODE_SIZE (mode);
   27322              :   /* Check if mem_1 is adjacent to mem_2 and mem_1 has lower address.  */
   27323          131 :   if (offval_1 + msize != offval_2)
   27324              :     return false;
   27325              : 
   27326              :   return true;
   27327              : }
   27328              : 
   27329              : /* Implement the TARGET_OPTAB_SUPPORTED_P hook.  */
   27330              : 
   27331              : static bool
   27332       342226 : ix86_optab_supported_p (int op, machine_mode mode1, machine_mode,
   27333              :                         optimization_type opt_type)
   27334              : {
   27335       342226 :   switch (op)
   27336              :     {
   27337          216 :     case asin_optab:
   27338          216 :     case acos_optab:
   27339          216 :     case log1p_optab:
   27340          216 :     case exp_optab:
   27341          216 :     case exp10_optab:
   27342          216 :     case exp2_optab:
   27343          216 :     case expm1_optab:
   27344          216 :     case ldexp_optab:
   27345          216 :     case scalb_optab:
   27346          216 :     case round_optab:
   27347          216 :     case lround_optab:
   27348          216 :       return opt_type == OPTIMIZE_FOR_SPEED;
   27349              : 
   27350          263 :     case rint_optab:
   27351          263 :       if (SSE_FLOAT_MODE_P (mode1)
   27352          144 :           && TARGET_SSE_MATH
   27353          128 :           && !flag_trapping_math
   27354           21 :           && !TARGET_SSE4_1
   27355              :           && mode1 != HFmode)
   27356           21 :         return opt_type == OPTIMIZE_FOR_SPEED;
   27357              :       return true;
   27358              : 
   27359         1892 :     case floor_optab:
   27360         1892 :     case ceil_optab:
   27361         1892 :     case btrunc_optab:
   27362         1892 :       if ((SSE_FLOAT_MODE_P (mode1)
   27363         1594 :            && TARGET_SSE_MATH
   27364         1515 :            && TARGET_SSE4_1)
   27365         1825 :           || mode1 == HFmode)
   27366              :         return true;
   27367         1756 :       return opt_type == OPTIMIZE_FOR_SPEED;
   27368              : 
   27369           84 :     case rsqrt_optab:
   27370           84 :       return opt_type == OPTIMIZE_FOR_SPEED && use_rsqrt_p (mode1);
   27371              : 
   27372              :     default:
   27373              :       return true;
   27374              :     }
   27375              : }
   27376              : 
   27377              : /* Address space support.
   27378              : 
   27379              :    This is not "far pointers" in the 16-bit sense, but an easy way
   27380              :    to use %fs and %gs segment prefixes.  Therefore:
   27381              : 
   27382              :     (a) All address spaces have the same modes,
   27383              :     (b) All address spaces have the same addresss forms,
   27384              :     (c) While %fs and %gs are technically subsets of the generic
   27385              :         address space, they are probably not subsets of each other.
   27386              :     (d) Since we have no access to the segment base register values
   27387              :         without resorting to a system call, we cannot convert a
   27388              :         non-default address space to a default address space.
   27389              :         Therefore we do not claim %fs or %gs are subsets of generic.
   27390              : 
   27391              :    Therefore we can (mostly) use the default hooks.  */
   27392              : 
   27393              : /* All use of segmentation is assumed to make address 0 valid.  */
   27394              : 
   27395              : static bool
   27396     67636174 : ix86_addr_space_zero_address_valid (addr_space_t as)
   27397              : {
   27398     67636174 :   return as != ADDR_SPACE_GENERIC;
   27399              : }
   27400              : 
   27401              : static void
   27402       774345 : ix86_init_libfuncs (void)
   27403              : {
   27404       774345 :   if (TARGET_64BIT)
   27405              :     {
   27406       759391 :       set_optab_libfunc (sdivmod_optab, TImode, "__divmodti4");
   27407       759391 :       set_optab_libfunc (udivmod_optab, TImode, "__udivmodti4");
   27408              :     }
   27409              :   else
   27410              :     {
   27411        14954 :       set_optab_libfunc (sdivmod_optab, DImode, "__divmoddi4");
   27412        14954 :       set_optab_libfunc (udivmod_optab, DImode, "__udivmoddi4");
   27413              :     }
   27414              : 
   27415              : #if TARGET_MACHO
   27416              :   darwin_rename_builtins ();
   27417              : #endif
   27418       774345 : }
   27419              : 
   27420              : /* Set the value of FLT_EVAL_METHOD in float.h.  When using only the
   27421              :    FPU, assume that the fpcw is set to extended precision; when using
   27422              :    only SSE, rounding is correct; when using both SSE and the FPU,
   27423              :    the rounding precision is indeterminate, since either may be chosen
   27424              :    apparently at random.  */
   27425              : 
   27426              : static enum flt_eval_method
   27427     89258230 : ix86_get_excess_precision (enum excess_precision_type type)
   27428              : {
   27429     89258230 :   switch (type)
   27430              :     {
   27431     85322578 :       case EXCESS_PRECISION_TYPE_FAST:
   27432              :         /* The fastest type to promote to will always be the native type,
   27433              :            whether that occurs with implicit excess precision or
   27434              :            otherwise.  */
   27435     85322578 :         return TARGET_AVX512FP16
   27436     85322578 :                ? FLT_EVAL_METHOD_PROMOTE_TO_FLOAT16
   27437     85322578 :                : FLT_EVAL_METHOD_PROMOTE_TO_FLOAT;
   27438      3935571 :       case EXCESS_PRECISION_TYPE_STANDARD:
   27439      3935571 :       case EXCESS_PRECISION_TYPE_IMPLICIT:
   27440              :         /* Otherwise, the excess precision we want when we are
   27441              :            in a standards compliant mode, and the implicit precision we
   27442              :            provide would be identical were it not for the unpredictable
   27443              :            cases.  */
   27444      3935571 :         if (TARGET_AVX512FP16 && TARGET_SSE_MATH)
   27445              :           return FLT_EVAL_METHOD_PROMOTE_TO_FLOAT16;
   27446      3929791 :         else if (!TARGET_80387)
   27447              :           return FLT_EVAL_METHOD_PROMOTE_TO_FLOAT;
   27448      3923669 :         else if (!TARGET_MIX_SSE_I387)
   27449              :           {
   27450      3923497 :             if (!(TARGET_SSE && TARGET_SSE_MATH))
   27451              :               return FLT_EVAL_METHOD_PROMOTE_TO_LONG_DOUBLE;
   27452      2936679 :             else if (TARGET_SSE2)
   27453              :               return FLT_EVAL_METHOD_PROMOTE_TO_FLOAT;
   27454              :           }
   27455              : 
   27456              :         /* If we are in standards compliant mode, but we know we will
   27457              :            calculate in unpredictable precision, return
   27458              :            FLT_EVAL_METHOD_FLOAT.  There is no reason to introduce explicit
   27459              :            excess precision if the target can't guarantee it will honor
   27460              :            it.  */
   27461          318 :         return (type == EXCESS_PRECISION_TYPE_STANDARD
   27462          318 :                 ? FLT_EVAL_METHOD_PROMOTE_TO_FLOAT
   27463              :                 : FLT_EVAL_METHOD_UNPREDICTABLE);
   27464           81 :       case EXCESS_PRECISION_TYPE_FLOAT16:
   27465           81 :         if (TARGET_80387
   27466           75 :             && !(TARGET_SSE_MATH && TARGET_SSE))
   27467            4 :           error ("%<-fexcess-precision=16%> is not compatible with %<-mfpmath=387%>");
   27468              :         return FLT_EVAL_METHOD_PROMOTE_TO_FLOAT16;
   27469            0 :       default:
   27470            0 :         gcc_unreachable ();
   27471              :     }
   27472              : 
   27473              :   return FLT_EVAL_METHOD_UNPREDICTABLE;
   27474              : }
   27475              : 
   27476              : /* Return true if _BitInt(N) is supported and fill its details into *INFO.  */
   27477              : bool
   27478       347268 : ix86_bitint_type_info (int n, struct bitint_info *info)
   27479              : {
   27480       347268 :   if (n <= 8)
   27481         5332 :     info->limb_mode = QImode;
   27482       341936 :   else if (n <= 16)
   27483         1893 :     info->limb_mode = HImode;
   27484       340043 :   else if (n <= 32 || (!TARGET_64BIT && n > 64))
   27485        45556 :     info->limb_mode = SImode;
   27486              :   else
   27487       294487 :     info->limb_mode = DImode;
   27488       347268 :   info->abi_limb_mode = info->limb_mode;
   27489       347268 :   info->big_endian = false;
   27490       347268 :   info->extended = false;
   27491       347268 :   return true;
   27492              : }
   27493              : 
   27494              : /* Implement TARGET_C_MODE_FOR_FLOATING_TYPE.  Return DFmode, TFmode
   27495              :    or XFmode for TI_LONG_DOUBLE_TYPE which is for long double type,
   27496              :    based on long double bits, go with the default one for the others.  */
   27497              : 
   27498              : static machine_mode
   27499      3618778 : ix86_c_mode_for_floating_type (enum tree_index ti)
   27500              : {
   27501      3618778 :   if (ti == TI_LONG_DOUBLE_TYPE)
   27502       604023 :     return (TARGET_LONG_DOUBLE_64 ? DFmode
   27503       603991 :                                   : (TARGET_LONG_DOUBLE_128 ? TFmode : XFmode));
   27504      3014755 :   return default_mode_for_floating_type (ti);
   27505              : }
   27506              : 
   27507              : /* Returns modified FUNCTION_TYPE for cdtor callabi.  */
   27508              : tree
   27509        13675 : ix86_cxx_adjust_cdtor_callabi_fntype (tree fntype)
   27510              : {
   27511        13675 :   if (TARGET_64BIT
   27512           69 :       || TARGET_RTD
   27513        13744 :       || ix86_function_type_abi (fntype) != MS_ABI)
   27514        13675 :     return fntype;
   27515              :   /* For 32-bit MS ABI add thiscall attribute.  */
   27516            0 :   tree attribs = tree_cons (get_identifier ("thiscall"), NULL_TREE,
   27517            0 :                             TYPE_ATTRIBUTES (fntype));
   27518            0 :   return build_type_attribute_variant (fntype, attribs);
   27519              : }
   27520              : 
   27521              : /* Implement PUSH_ROUNDING.  On 386, we have pushw instruction that
   27522              :    decrements by exactly 2 no matter what the position was, there is no pushb.
   27523              : 
   27524              :    But as CIE data alignment factor on this arch is -4 for 32bit targets
   27525              :    and -8 for 64bit targets, we need to make sure all stack pointer adjustments
   27526              :    are in multiple of 4 for 32bit targets and 8 for 64bit targets.  */
   27527              : 
   27528              : poly_int64
   27529    271673257 : ix86_push_rounding (poly_int64 bytes)
   27530              : {
   27531    351044654 :   return ROUND_UP (bytes, UNITS_PER_WORD);
   27532              : }
   27533              : 
   27534              : /* Use 8 bits metadata start from bit48 for LAM_U48,
   27535              :    6 bits metadat start from bit57 for LAM_U57.  */
   27536              : #define IX86_HWASAN_SHIFT (ix86_lam_type == lam_u48             \
   27537              :                            ? 48                                 \
   27538              :                            : (ix86_lam_type == lam_u57 ? 57 : 0))
   27539              : #define IX86_HWASAN_TAG_SIZE (ix86_lam_type == lam_u48          \
   27540              :                               ? 8                               \
   27541              :                               : (ix86_lam_type == lam_u57 ? 6 : 0))
   27542              : 
   27543              : /* Implement TARGET_MEMTAG_CAN_TAG_ADDRESSES.  */
   27544              : bool
   27545      6195836 : ix86_memtag_can_tag_addresses ()
   27546              : {
   27547      6195836 :   return ix86_lam_type != lam_none && TARGET_LP64;
   27548              : }
   27549              : 
   27550              : /* Implement TARGET_MEMTAG_TAG_BITSIZE.  */
   27551              : unsigned char
   27552          450 : ix86_memtag_tag_bitsize ()
   27553              : {
   27554          450 :   return IX86_HWASAN_TAG_SIZE;
   27555              : }
   27556              : 
   27557              : /* Implement TARGET_MEMTAG_SET_TAG.  */
   27558              : rtx
   27559          106 : ix86_memtag_set_tag (rtx untagged, rtx tag, rtx target)
   27560              : {
   27561              :   /* default_memtag_insert_random_tag may
   27562              :      generate tag with value more than 6 bits.  */
   27563          106 :   if (ix86_lam_type == lam_u57)
   27564              :     {
   27565          106 :       unsigned HOST_WIDE_INT and_imm
   27566              :         = (HOST_WIDE_INT_1U << IX86_HWASAN_TAG_SIZE) - 1;
   27567              : 
   27568          106 :       emit_insn (gen_andqi3 (tag, tag, GEN_INT (and_imm)));
   27569              :     }
   27570          106 :   tag = expand_simple_binop (Pmode, ASHIFT, tag,
   27571          106 :                              GEN_INT (IX86_HWASAN_SHIFT), NULL_RTX,
   27572              :                              /* unsignedp = */1, OPTAB_WIDEN);
   27573          106 :   rtx ret = expand_simple_binop (Pmode, IOR, untagged, tag, target,
   27574              :                                  /* unsignedp = */1, OPTAB_DIRECT);
   27575          106 :   return ret;
   27576              : }
   27577              : 
   27578              : /* Implement TARGET_MEMTAG_EXTRACT_TAG.  */
   27579              : rtx
   27580          180 : ix86_memtag_extract_tag (rtx tagged_pointer, rtx target)
   27581              : {
   27582          180 :   rtx tag = expand_simple_binop (Pmode, LSHIFTRT, tagged_pointer,
   27583          180 :                                  GEN_INT (IX86_HWASAN_SHIFT), target,
   27584              :                                  /* unsignedp = */0,
   27585              :                                  OPTAB_DIRECT);
   27586          180 :   rtx ret = gen_reg_rtx (QImode);
   27587              :   /* Mask off bit63 when LAM_U57.  */
   27588          180 :   if (ix86_lam_type == lam_u57)
   27589              :     {
   27590          180 :       unsigned HOST_WIDE_INT and_imm
   27591              :         = (HOST_WIDE_INT_1U << IX86_HWASAN_TAG_SIZE) - 1;
   27592          180 :       emit_insn (gen_andqi3 (ret, gen_lowpart (QImode, tag),
   27593          180 :                              gen_int_mode (and_imm, QImode)));
   27594              :     }
   27595              :   else
   27596            0 :     emit_move_insn (ret, gen_lowpart (QImode, tag));
   27597          180 :   return ret;
   27598              : }
   27599              : 
   27600              : /* The default implementation of TARGET_MEMTAG_UNTAGGED_POINTER.  */
   27601              : rtx
   27602          114 : ix86_memtag_untagged_pointer (rtx tagged_pointer, rtx target)
   27603              : {
   27604              :   /* Leave bit63 alone.  */
   27605          114 :   rtx tag_mask = gen_int_mode (((HOST_WIDE_INT_1U << IX86_HWASAN_SHIFT)
   27606          114 :                                 + (HOST_WIDE_INT_1U << 63) - 1),
   27607          114 :                                Pmode);
   27608          114 :   rtx untagged_base = expand_simple_binop (Pmode, AND, tagged_pointer,
   27609              :                                            tag_mask, target, true,
   27610              :                                            OPTAB_DIRECT);
   27611          114 :   gcc_assert (untagged_base);
   27612          114 :   return untagged_base;
   27613              : }
   27614              : 
   27615              : /* Implement TARGET_MEMTAG_ADD_TAG.  */
   27616              : rtx
   27617           90 : ix86_memtag_add_tag (rtx base, poly_int64 offset, unsigned char tag_offset)
   27618              : {
   27619           90 :   rtx base_tag = gen_reg_rtx (QImode);
   27620           90 :   rtx base_addr = gen_reg_rtx (Pmode);
   27621           90 :   rtx tagged_addr = gen_reg_rtx (Pmode);
   27622           90 :   rtx new_tag = gen_reg_rtx (QImode);
   27623          180 :   unsigned HOST_WIDE_INT and_imm
   27624           90 :     = (HOST_WIDE_INT_1U << IX86_HWASAN_SHIFT) - 1;
   27625              : 
   27626              :   /* When there's "overflow" in tag adding,
   27627              :      need to mask the most significant bit off.  */
   27628           90 :   emit_move_insn (base_tag, ix86_memtag_extract_tag (base, NULL_RTX));
   27629           90 :   emit_move_insn (base_addr,
   27630              :                   ix86_memtag_untagged_pointer (base, NULL_RTX));
   27631           90 :   emit_insn (gen_add2_insn (base_tag, gen_int_mode (tag_offset, QImode)));
   27632           90 :   emit_move_insn (new_tag, base_tag);
   27633           90 :   emit_insn (gen_andqi3 (new_tag, new_tag, gen_int_mode (and_imm, QImode)));
   27634           90 :   emit_move_insn (tagged_addr,
   27635              :                   ix86_memtag_set_tag (base_addr, new_tag, NULL_RTX));
   27636           90 :   return plus_constant (Pmode, tagged_addr, offset);
   27637              : }
   27638              : 
   27639              : /* Implement TARGET_HAVE_CCMP.  */
   27640              : static bool
   27641      8094506 : ix86_have_ccmp ()
   27642              : {
   27643      8094506 :   return (bool) TARGET_APX_CCMP;
   27644              : }
   27645              : 
   27646              : /* Implement TARGET_MODE_CAN_TRANSFER_BITS.  */
   27647              : static bool
   27648      4549977 : ix86_mode_can_transfer_bits (machine_mode mode)
   27649              : {
   27650      4549977 :   if (GET_MODE_CLASS (mode) == MODE_FLOAT
   27651      4503667 :       || GET_MODE_CLASS (mode) == MODE_COMPLEX_FLOAT)
   27652       111112 :     switch (GET_MODE_INNER (mode))
   27653              :       {
   27654        53370 :       case E_SFmode:
   27655        53370 :       case E_DFmode:
   27656              :         /* These suffer from normalization upon load when not using SSE.  */
   27657        53370 :         return !(ix86_fpmath & FPMATH_387);
   27658              :       default:
   27659              :         return true;
   27660              :       }
   27661              : 
   27662              :   return true;
   27663              : }
   27664              : 
   27665              : /* Implement TARGET_REDZONE_CLOBBER.  */
   27666              : static rtx
   27667            2 : ix86_redzone_clobber ()
   27668              : {
   27669            2 :   cfun->machine->asm_redzone_clobber_seen = true;
   27670            2 :   if (ix86_using_red_zone ())
   27671              :     {
   27672            2 :       rtx base = plus_constant (Pmode, stack_pointer_rtx, -RED_ZONE_SIZE);
   27673            2 :       rtx mem = gen_rtx_MEM (BLKmode, base);
   27674            2 :       set_mem_size (mem, RED_ZONE_SIZE);
   27675            2 :       return mem;
   27676              :     }
   27677              :   return NULL_RTX;
   27678              : }
   27679              : 
   27680              : /* Target-specific selftests.  */
   27681              : 
   27682              : #if CHECKING_P
   27683              : 
   27684              : namespace selftest {
   27685              : 
   27686              : /* Verify that hard regs are dumped as expected (in compact mode).  */
   27687              : 
   27688              : static void
   27689            4 : ix86_test_dumping_hard_regs ()
   27690              : {
   27691            4 :   ASSERT_RTL_DUMP_EQ ("(reg:SI ax)", gen_raw_REG (SImode, 0));
   27692            4 :   ASSERT_RTL_DUMP_EQ ("(reg:SI dx)", gen_raw_REG (SImode, 1));
   27693            4 : }
   27694              : 
   27695              : /* Test dumping an insn with repeated references to the same SCRATCH,
   27696              :    to verify the rtx_reuse code.  */
   27697              : 
   27698              : static void
   27699            4 : ix86_test_dumping_memory_blockage ()
   27700              : {
   27701            4 :   set_new_first_and_last_insn (NULL, NULL);
   27702              : 
   27703            4 :   rtx pat = gen_memory_blockage ();
   27704            4 :   rtx_reuse_manager r;
   27705            4 :   r.preprocess (pat);
   27706              : 
   27707              :   /* Verify that the repeated references to the SCRATCH show use
   27708              :      reuse IDS.  The first should be prefixed with a reuse ID,
   27709              :      and the second should be dumped as a "reuse_rtx" of that ID.
   27710              :      The expected string assumes Pmode == DImode.  */
   27711            4 :   if (Pmode == DImode)
   27712            4 :     ASSERT_RTL_DUMP_EQ_WITH_REUSE
   27713              :       ("(cinsn 1 (set (mem/v:BLK (0|scratch:DI) [0  A8])\n"
   27714              :        "        (unspec:BLK [\n"
   27715              :        "                (mem/v:BLK (reuse_rtx 0) [0  A8])\n"
   27716              :        "            ] UNSPEC_MEMORY_BLOCKAGE)))\n", pat, &r);
   27717            4 : }
   27718              : 
   27719              : /* Verify loading an RTL dump; specifically a dump of copying
   27720              :    a param on x86_64 from a hard reg into the frame.
   27721              :    This test is target-specific since the dump contains target-specific
   27722              :    hard reg names.  */
   27723              : 
   27724              : static void
   27725            4 : ix86_test_loading_dump_fragment_1 ()
   27726              : {
   27727            4 :   rtl_dump_test t (SELFTEST_LOCATION,
   27728            4 :                    locate_file ("x86_64/copy-hard-reg-into-frame.rtl"));
   27729              : 
   27730            4 :   rtx_insn *insn = get_insn_by_uid (1);
   27731              : 
   27732              :   /* The block structure and indentation here is purely for
   27733              :      readability; it mirrors the structure of the rtx.  */
   27734            4 :   tree mem_expr;
   27735            4 :   {
   27736            4 :     rtx pat = PATTERN (insn);
   27737            4 :     ASSERT_EQ (SET, GET_CODE (pat));
   27738            4 :     {
   27739            4 :       rtx dest = SET_DEST (pat);
   27740            4 :       ASSERT_EQ (MEM, GET_CODE (dest));
   27741              :       /* Verify the "/c" was parsed.  */
   27742            4 :       ASSERT_TRUE (RTX_FLAG (dest, call));
   27743            4 :       ASSERT_EQ (SImode, GET_MODE (dest));
   27744            4 :       {
   27745            4 :         rtx addr = XEXP (dest, 0);
   27746            4 :         ASSERT_EQ (PLUS, GET_CODE (addr));
   27747            4 :         ASSERT_EQ (DImode, GET_MODE (addr));
   27748            4 :         {
   27749            4 :           rtx lhs = XEXP (addr, 0);
   27750              :           /* Verify that the "frame" REG was consolidated.  */
   27751            4 :           ASSERT_RTX_PTR_EQ (frame_pointer_rtx, lhs);
   27752              :         }
   27753            4 :         {
   27754            4 :           rtx rhs = XEXP (addr, 1);
   27755            4 :           ASSERT_EQ (CONST_INT, GET_CODE (rhs));
   27756            4 :           ASSERT_EQ (-4, INTVAL (rhs));
   27757              :         }
   27758              :       }
   27759              :       /* Verify the "[1 i+0 S4 A32]" was parsed.  */
   27760            4 :       ASSERT_EQ (1, MEM_ALIAS_SET (dest));
   27761              :       /* "i" should have been handled by synthesizing a global int
   27762              :          variable named "i".  */
   27763            4 :       mem_expr = MEM_EXPR (dest);
   27764            4 :       ASSERT_NE (mem_expr, NULL);
   27765            4 :       ASSERT_EQ (VAR_DECL, TREE_CODE (mem_expr));
   27766            4 :       ASSERT_EQ (integer_type_node, TREE_TYPE (mem_expr));
   27767            4 :       ASSERT_EQ (IDENTIFIER_NODE, TREE_CODE (DECL_NAME (mem_expr)));
   27768            4 :       ASSERT_STREQ ("i", IDENTIFIER_POINTER (DECL_NAME (mem_expr)));
   27769              :       /* "+0".  */
   27770            4 :       ASSERT_TRUE (MEM_OFFSET_KNOWN_P (dest));
   27771            4 :       ASSERT_EQ (0, MEM_OFFSET (dest));
   27772              :       /* "S4".  */
   27773            4 :       ASSERT_EQ (4, MEM_SIZE (dest));
   27774              :       /* "A32.  */
   27775            4 :       ASSERT_EQ (32, MEM_ALIGN (dest));
   27776              :     }
   27777            4 :     {
   27778            4 :       rtx src = SET_SRC (pat);
   27779            4 :       ASSERT_EQ (REG, GET_CODE (src));
   27780            4 :       ASSERT_EQ (SImode, GET_MODE (src));
   27781            4 :       ASSERT_EQ (5, REGNO (src));
   27782            4 :       tree reg_expr = REG_EXPR (src);
   27783              :       /* "i" here should point to the same var as for the MEM_EXPR.  */
   27784            4 :       ASSERT_EQ (reg_expr, mem_expr);
   27785              :     }
   27786              :   }
   27787            4 : }
   27788              : 
   27789              : /* Verify that the RTL loader copes with a call_insn dump.
   27790              :    This test is target-specific since the dump contains a target-specific
   27791              :    hard reg name.  */
   27792              : 
   27793              : static void
   27794            4 : ix86_test_loading_call_insn ()
   27795              : {
   27796              :   /* The test dump includes register "xmm0", where requires TARGET_SSE
   27797              :      to exist.  */
   27798            4 :   if (!TARGET_SSE)
   27799            0 :     return;
   27800              : 
   27801            4 :   rtl_dump_test t (SELFTEST_LOCATION, locate_file ("x86_64/call-insn.rtl"));
   27802              : 
   27803            4 :   rtx_insn *insn = get_insns ();
   27804            4 :   ASSERT_EQ (CALL_INSN, GET_CODE (insn));
   27805              : 
   27806              :   /* "/j".  */
   27807            4 :   ASSERT_TRUE (RTX_FLAG (insn, jump));
   27808              : 
   27809            4 :   rtx pat = PATTERN (insn);
   27810            4 :   ASSERT_EQ (CALL, GET_CODE (SET_SRC (pat)));
   27811              : 
   27812              :   /* Verify REG_NOTES.  */
   27813            4 :   {
   27814              :     /* "(expr_list:REG_CALL_DECL".   */
   27815            4 :     ASSERT_EQ (EXPR_LIST, GET_CODE (REG_NOTES (insn)));
   27816            4 :     rtx_expr_list *note0 = as_a <rtx_expr_list *> (REG_NOTES (insn));
   27817            4 :     ASSERT_EQ (REG_CALL_DECL, REG_NOTE_KIND (note0));
   27818              : 
   27819              :     /* "(expr_list:REG_EH_REGION (const_int 0 [0])".  */
   27820            4 :     rtx_expr_list *note1 = note0->next ();
   27821            4 :     ASSERT_EQ (REG_EH_REGION, REG_NOTE_KIND (note1));
   27822              : 
   27823            4 :     ASSERT_EQ (NULL, note1->next ());
   27824              :   }
   27825              : 
   27826              :   /* Verify CALL_INSN_FUNCTION_USAGE.  */
   27827            4 :   {
   27828              :     /* "(expr_list:DF (use (reg:DF 21 xmm0))".  */
   27829            4 :     rtx_expr_list *usage
   27830            4 :       = as_a <rtx_expr_list *> (CALL_INSN_FUNCTION_USAGE (insn));
   27831            4 :     ASSERT_EQ (EXPR_LIST, GET_CODE (usage));
   27832            4 :     ASSERT_EQ (DFmode, GET_MODE (usage));
   27833            4 :     ASSERT_EQ (USE, GET_CODE (usage->element ()));
   27834            4 :     ASSERT_EQ (NULL, usage->next ());
   27835              :   }
   27836            4 : }
   27837              : 
   27838              : /* Verify that the RTL loader copes a dump from print_rtx_function.
   27839              :    This test is target-specific since the dump contains target-specific
   27840              :    hard reg names.  */
   27841              : 
   27842              : static void
   27843            4 : ix86_test_loading_full_dump ()
   27844              : {
   27845            4 :   rtl_dump_test t (SELFTEST_LOCATION, locate_file ("x86_64/times-two.rtl"));
   27846              : 
   27847            4 :   ASSERT_STREQ ("times_two", IDENTIFIER_POINTER (DECL_NAME (cfun->decl)));
   27848              : 
   27849            4 :   rtx_insn *insn_1 = get_insn_by_uid (1);
   27850            4 :   ASSERT_EQ (NOTE, GET_CODE (insn_1));
   27851              : 
   27852            4 :   rtx_insn *insn_7 = get_insn_by_uid (7);
   27853            4 :   ASSERT_EQ (INSN, GET_CODE (insn_7));
   27854            4 :   ASSERT_EQ (PARALLEL, GET_CODE (PATTERN (insn_7)));
   27855              : 
   27856            4 :   rtx_insn *insn_15 = get_insn_by_uid (15);
   27857            4 :   ASSERT_EQ (INSN, GET_CODE (insn_15));
   27858            4 :   ASSERT_EQ (USE, GET_CODE (PATTERN (insn_15)));
   27859              : 
   27860              :   /* Verify crtl->return_rtx.  */
   27861            4 :   ASSERT_EQ (REG, GET_CODE (crtl->return_rtx));
   27862            4 :   ASSERT_EQ (0, REGNO (crtl->return_rtx));
   27863            4 :   ASSERT_EQ (SImode, GET_MODE (crtl->return_rtx));
   27864            4 : }
   27865              : 
   27866              : /* Verify that the RTL loader copes with UNSPEC and UNSPEC_VOLATILE insns.
   27867              :    In particular, verify that it correctly loads the 2nd operand.
   27868              :    This test is target-specific since these are machine-specific
   27869              :    operands (and enums).  */
   27870              : 
   27871              : static void
   27872            4 : ix86_test_loading_unspec ()
   27873              : {
   27874            4 :   rtl_dump_test t (SELFTEST_LOCATION, locate_file ("x86_64/unspec.rtl"));
   27875              : 
   27876            4 :   ASSERT_STREQ ("test_unspec", IDENTIFIER_POINTER (DECL_NAME (cfun->decl)));
   27877              : 
   27878            4 :   ASSERT_TRUE (cfun);
   27879              : 
   27880              :   /* Test of an UNSPEC.  */
   27881            4 :    rtx_insn *insn = get_insns ();
   27882            4 :   ASSERT_EQ (INSN, GET_CODE (insn));
   27883            4 :   rtx set = single_set (insn);
   27884            4 :   ASSERT_NE (NULL, set);
   27885            4 :   rtx dst = SET_DEST (set);
   27886            4 :   ASSERT_EQ (MEM, GET_CODE (dst));
   27887            4 :   rtx src = SET_SRC (set);
   27888            4 :   ASSERT_EQ (UNSPEC, GET_CODE (src));
   27889            4 :   ASSERT_EQ (BLKmode, GET_MODE (src));
   27890            4 :   ASSERT_EQ (UNSPEC_MEMORY_BLOCKAGE, XINT (src, 1));
   27891              : 
   27892            4 :   rtx v0 = XVECEXP (src, 0, 0);
   27893              : 
   27894              :   /* Verify that the two uses of the first SCRATCH have pointer
   27895              :      equality.  */
   27896            4 :   rtx scratch_a = XEXP (dst, 0);
   27897            4 :   ASSERT_EQ (SCRATCH, GET_CODE (scratch_a));
   27898              : 
   27899            4 :   rtx scratch_b = XEXP (v0, 0);
   27900            4 :   ASSERT_EQ (SCRATCH, GET_CODE (scratch_b));
   27901              : 
   27902            4 :   ASSERT_EQ (scratch_a, scratch_b);
   27903              : 
   27904              :   /* Verify that the two mems are thus treated as equal.  */
   27905            4 :   ASSERT_TRUE (rtx_equal_p (dst, v0));
   27906              : 
   27907              :   /* Verify that the insn is recognized.  */
   27908            4 :   ASSERT_NE(-1, recog_memoized (insn));
   27909              : 
   27910              :   /* Test of an UNSPEC_VOLATILE, which has its own enum values.  */
   27911            4 :   insn = NEXT_INSN (insn);
   27912            4 :   ASSERT_EQ (INSN, GET_CODE (insn));
   27913              : 
   27914            4 :   set = single_set (insn);
   27915            4 :   ASSERT_NE (NULL, set);
   27916              : 
   27917            4 :   src = SET_SRC (set);
   27918            4 :   ASSERT_EQ (UNSPEC_VOLATILE, GET_CODE (src));
   27919            4 :   ASSERT_EQ (UNSPECV_RDTSCP, XINT (src, 1));
   27920            4 : }
   27921              : 
   27922              : /* Run all target-specific selftests.  */
   27923              : 
   27924              : static void
   27925            4 : ix86_run_selftests (void)
   27926              : {
   27927            4 :   ix86_test_dumping_hard_regs ();
   27928            4 :   ix86_test_dumping_memory_blockage ();
   27929              : 
   27930              :   /* Various tests of loading RTL dumps, here because they contain
   27931              :      ix86-isms (e.g. names of hard regs).  */
   27932            4 :   ix86_test_loading_dump_fragment_1 ();
   27933            4 :   ix86_test_loading_call_insn ();
   27934            4 :   ix86_test_loading_full_dump ();
   27935            4 :   ix86_test_loading_unspec ();
   27936            4 : }
   27937              : 
   27938              : } // namespace selftest
   27939              : 
   27940              : #endif /* CHECKING_P */
   27941              : 
   27942              : static const scoped_attribute_specs *const ix86_attribute_table[] =
   27943              : {
   27944              :   &ix86_gnu_attribute_table
   27945              : };
   27946              : 
   27947              : /* Initialize the GCC target structure.  */
   27948              : #undef TARGET_RETURN_IN_MEMORY
   27949              : #define TARGET_RETURN_IN_MEMORY ix86_return_in_memory
   27950              : 
   27951              : #undef TARGET_LEGITIMIZE_ADDRESS
   27952              : #define TARGET_LEGITIMIZE_ADDRESS ix86_legitimize_address
   27953              : 
   27954              : #undef TARGET_ATTRIBUTE_TABLE
   27955              : #define TARGET_ATTRIBUTE_TABLE ix86_attribute_table
   27956              : #undef TARGET_FUNCTION_ATTRIBUTE_INLINABLE_P
   27957              : #define TARGET_FUNCTION_ATTRIBUTE_INLINABLE_P hook_bool_const_tree_true
   27958              : #if TARGET_DLLIMPORT_DECL_ATTRIBUTES
   27959              : #  undef TARGET_MERGE_DECL_ATTRIBUTES
   27960              : #  define TARGET_MERGE_DECL_ATTRIBUTES merge_dllimport_decl_attributes
   27961              : #endif
   27962              : 
   27963              : #undef TARGET_INVALID_CONVERSION
   27964              : #define TARGET_INVALID_CONVERSION ix86_invalid_conversion
   27965              : 
   27966              : #undef TARGET_INVALID_UNARY_OP
   27967              : #define TARGET_INVALID_UNARY_OP ix86_invalid_unary_op
   27968              : 
   27969              : #undef TARGET_INVALID_BINARY_OP
   27970              : #define TARGET_INVALID_BINARY_OP ix86_invalid_binary_op
   27971              : 
   27972              : #undef TARGET_COMP_TYPE_ATTRIBUTES
   27973              : #define TARGET_COMP_TYPE_ATTRIBUTES ix86_comp_type_attributes
   27974              : 
   27975              : #undef TARGET_INIT_BUILTINS
   27976              : #define TARGET_INIT_BUILTINS ix86_init_builtins
   27977              : #undef TARGET_BUILTIN_DECL
   27978              : #define TARGET_BUILTIN_DECL ix86_builtin_decl
   27979              : #undef TARGET_EXPAND_BUILTIN
   27980              : #define TARGET_EXPAND_BUILTIN ix86_expand_builtin
   27981              : 
   27982              : #undef TARGET_VECTORIZE_BUILTIN_VECTORIZED_FUNCTION
   27983              : #define TARGET_VECTORIZE_BUILTIN_VECTORIZED_FUNCTION \
   27984              :   ix86_builtin_vectorized_function
   27985              : 
   27986              : #undef TARGET_VECTORIZE_BUILTIN_GATHER
   27987              : #define TARGET_VECTORIZE_BUILTIN_GATHER ix86_vectorize_builtin_gather
   27988              : 
   27989              : #undef TARGET_VECTORIZE_BUILTIN_SCATTER
   27990              : #define TARGET_VECTORIZE_BUILTIN_SCATTER ix86_vectorize_builtin_scatter
   27991              : 
   27992              : #undef TARGET_BUILTIN_RECIPROCAL
   27993              : #define TARGET_BUILTIN_RECIPROCAL ix86_builtin_reciprocal
   27994              : 
   27995              : #undef TARGET_ASM_FUNCTION_EPILOGUE
   27996              : #define TARGET_ASM_FUNCTION_EPILOGUE ix86_output_function_epilogue
   27997              : 
   27998              : #undef TARGET_ASM_PRINT_PATCHABLE_FUNCTION_ENTRY
   27999              : #define TARGET_ASM_PRINT_PATCHABLE_FUNCTION_ENTRY \
   28000              :   ix86_print_patchable_function_entry
   28001              : 
   28002              : #undef TARGET_ENCODE_SECTION_INFO
   28003              : #ifndef SUBTARGET_ENCODE_SECTION_INFO
   28004              : #define TARGET_ENCODE_SECTION_INFO ix86_encode_section_info
   28005              : #else
   28006              : #define TARGET_ENCODE_SECTION_INFO SUBTARGET_ENCODE_SECTION_INFO
   28007              : #endif
   28008              : 
   28009              : #undef TARGET_ASM_OPEN_PAREN
   28010              : #define TARGET_ASM_OPEN_PAREN ""
   28011              : #undef TARGET_ASM_CLOSE_PAREN
   28012              : #define TARGET_ASM_CLOSE_PAREN ""
   28013              : 
   28014              : #undef TARGET_ASM_BYTE_OP
   28015              : #define TARGET_ASM_BYTE_OP ASM_BYTE
   28016              : 
   28017              : #undef TARGET_ASM_ALIGNED_HI_OP
   28018              : #define TARGET_ASM_ALIGNED_HI_OP ASM_SHORT
   28019              : #undef TARGET_ASM_ALIGNED_SI_OP
   28020              : #define TARGET_ASM_ALIGNED_SI_OP ASM_LONG
   28021              : #ifdef ASM_QUAD
   28022              : #undef TARGET_ASM_ALIGNED_DI_OP
   28023              : #define TARGET_ASM_ALIGNED_DI_OP ASM_QUAD
   28024              : #endif
   28025              : 
   28026              : #undef TARGET_PROFILE_BEFORE_PROLOGUE
   28027              : #define TARGET_PROFILE_BEFORE_PROLOGUE ix86_profile_before_prologue
   28028              : 
   28029              : #undef TARGET_MANGLE_DECL_ASSEMBLER_NAME
   28030              : #define TARGET_MANGLE_DECL_ASSEMBLER_NAME ix86_mangle_decl_assembler_name
   28031              : 
   28032              : #undef TARGET_ASM_UNALIGNED_HI_OP
   28033              : #define TARGET_ASM_UNALIGNED_HI_OP TARGET_ASM_ALIGNED_HI_OP
   28034              : #undef TARGET_ASM_UNALIGNED_SI_OP
   28035              : #define TARGET_ASM_UNALIGNED_SI_OP TARGET_ASM_ALIGNED_SI_OP
   28036              : #undef TARGET_ASM_UNALIGNED_DI_OP
   28037              : #define TARGET_ASM_UNALIGNED_DI_OP TARGET_ASM_ALIGNED_DI_OP
   28038              : 
   28039              : #undef TARGET_PRINT_OPERAND
   28040              : #define TARGET_PRINT_OPERAND ix86_print_operand
   28041              : #undef TARGET_PRINT_OPERAND_ADDRESS
   28042              : #define TARGET_PRINT_OPERAND_ADDRESS ix86_print_operand_address
   28043              : #undef TARGET_PRINT_OPERAND_PUNCT_VALID_P
   28044              : #define TARGET_PRINT_OPERAND_PUNCT_VALID_P ix86_print_operand_punct_valid_p
   28045              : #undef TARGET_ASM_OUTPUT_ADDR_CONST_EXTRA
   28046              : #define TARGET_ASM_OUTPUT_ADDR_CONST_EXTRA i386_asm_output_addr_const_extra
   28047              : 
   28048              : #undef TARGET_SCHED_INIT_GLOBAL
   28049              : #define TARGET_SCHED_INIT_GLOBAL ix86_sched_init_global
   28050              : #undef TARGET_SCHED_ADJUST_COST
   28051              : #define TARGET_SCHED_ADJUST_COST ix86_adjust_cost
   28052              : #undef TARGET_SCHED_ISSUE_RATE
   28053              : #define TARGET_SCHED_ISSUE_RATE ix86_issue_rate
   28054              : #undef TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD
   28055              : #define TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD \
   28056              :   ia32_multipass_dfa_lookahead
   28057              : #undef TARGET_SCHED_MACRO_FUSION_P
   28058              : #define TARGET_SCHED_MACRO_FUSION_P ix86_macro_fusion_p
   28059              : #undef TARGET_SCHED_MACRO_FUSION_PAIR_P
   28060              : #define TARGET_SCHED_MACRO_FUSION_PAIR_P ix86_macro_fusion_pair_p
   28061              : 
   28062              : #undef TARGET_FUNCTION_OK_FOR_SIBCALL
   28063              : #define TARGET_FUNCTION_OK_FOR_SIBCALL ix86_function_ok_for_sibcall
   28064              : 
   28065              : #undef TARGET_MEMMODEL_CHECK
   28066              : #define TARGET_MEMMODEL_CHECK ix86_memmodel_check
   28067              : 
   28068              : #undef TARGET_ATOMIC_ASSIGN_EXPAND_FENV
   28069              : #define TARGET_ATOMIC_ASSIGN_EXPAND_FENV ix86_atomic_assign_expand_fenv
   28070              : 
   28071              : #ifdef HAVE_AS_TLS
   28072              : #undef TARGET_HAVE_TLS
   28073              : #define TARGET_HAVE_TLS true
   28074              : #endif
   28075              : #undef TARGET_CANNOT_FORCE_CONST_MEM
   28076              : #define TARGET_CANNOT_FORCE_CONST_MEM ix86_cannot_force_const_mem
   28077              : #undef TARGET_USE_BLOCKS_FOR_CONSTANT_P
   28078              : #define TARGET_USE_BLOCKS_FOR_CONSTANT_P hook_bool_mode_const_rtx_true
   28079              : 
   28080              : #undef TARGET_DELEGITIMIZE_ADDRESS
   28081              : #define TARGET_DELEGITIMIZE_ADDRESS ix86_delegitimize_address
   28082              : 
   28083              : #undef TARGET_CONST_NOT_OK_FOR_DEBUG_P
   28084              : #define TARGET_CONST_NOT_OK_FOR_DEBUG_P ix86_const_not_ok_for_debug_p
   28085              : 
   28086              : #undef TARGET_MS_BITFIELD_LAYOUT_P
   28087              : #define TARGET_MS_BITFIELD_LAYOUT_P ix86_ms_bitfield_layout_p
   28088              : 
   28089              : #if TARGET_MACHO
   28090              : #undef TARGET_BINDS_LOCAL_P
   28091              : #define TARGET_BINDS_LOCAL_P darwin_binds_local_p
   28092              : #else
   28093              : #undef TARGET_BINDS_LOCAL_P
   28094              : #define TARGET_BINDS_LOCAL_P ix86_binds_local_p
   28095              : #endif
   28096              : #if TARGET_DLLIMPORT_DECL_ATTRIBUTES
   28097              : #undef TARGET_BINDS_LOCAL_P
   28098              : #define TARGET_BINDS_LOCAL_P i386_pe_binds_local_p
   28099              : #endif
   28100              : 
   28101              : #undef TARGET_ASM_OUTPUT_MI_THUNK
   28102              : #define TARGET_ASM_OUTPUT_MI_THUNK x86_output_mi_thunk
   28103              : #undef TARGET_ASM_CAN_OUTPUT_MI_THUNK
   28104              : #define TARGET_ASM_CAN_OUTPUT_MI_THUNK x86_can_output_mi_thunk
   28105              : 
   28106              : #undef TARGET_ASM_FILE_START
   28107              : #define TARGET_ASM_FILE_START x86_file_start
   28108              : 
   28109              : #undef TARGET_OPTION_OVERRIDE
   28110              : #define TARGET_OPTION_OVERRIDE ix86_option_override
   28111              : 
   28112              : #undef TARGET_REGISTER_MOVE_COST
   28113              : #define TARGET_REGISTER_MOVE_COST ix86_register_move_cost
   28114              : #undef TARGET_MEMORY_MOVE_COST
   28115              : #define TARGET_MEMORY_MOVE_COST ix86_memory_move_cost
   28116              : #undef TARGET_RTX_COSTS
   28117              : #define TARGET_RTX_COSTS ix86_rtx_costs
   28118              : #undef TARGET_INSN_COST
   28119              : #define TARGET_INSN_COST ix86_insn_cost
   28120              : #undef TARGET_ADDRESS_COST
   28121              : #define TARGET_ADDRESS_COST ix86_address_cost
   28122              : 
   28123              : #undef TARGET_USE_BY_PIECES_INFRASTRUCTURE_P
   28124              : #define TARGET_USE_BY_PIECES_INFRASTRUCTURE_P \
   28125              :   ix86_use_by_pieces_infrastructure_p
   28126              : 
   28127              : #undef TARGET_OVERLAP_OP_BY_PIECES_P
   28128              : #define TARGET_OVERLAP_OP_BY_PIECES_P hook_bool_void_true
   28129              : 
   28130              : #undef TARGET_FLAGS_REGNUM
   28131              : #define TARGET_FLAGS_REGNUM FLAGS_REG
   28132              : #undef TARGET_FIXED_CONDITION_CODE_REGS
   28133              : #define TARGET_FIXED_CONDITION_CODE_REGS ix86_fixed_condition_code_regs
   28134              : #undef TARGET_CC_MODES_COMPATIBLE
   28135              : #define TARGET_CC_MODES_COMPATIBLE ix86_cc_modes_compatible
   28136              : 
   28137              : #undef TARGET_MACHINE_DEPENDENT_REORG
   28138              : #define TARGET_MACHINE_DEPENDENT_REORG ix86_reorg
   28139              : 
   28140              : #undef TARGET_BUILD_BUILTIN_VA_LIST
   28141              : #define TARGET_BUILD_BUILTIN_VA_LIST ix86_build_builtin_va_list
   28142              : 
   28143              : #undef TARGET_FOLD_BUILTIN
   28144              : #define TARGET_FOLD_BUILTIN ix86_fold_builtin
   28145              : 
   28146              : #undef TARGET_GIMPLE_FOLD_BUILTIN
   28147              : #define TARGET_GIMPLE_FOLD_BUILTIN ix86_gimple_fold_builtin
   28148              : 
   28149              : #undef TARGET_COMPARE_VERSION_PRIORITY
   28150              : #define TARGET_COMPARE_VERSION_PRIORITY ix86_compare_version_priority
   28151              : 
   28152              : #undef TARGET_GENERATE_VERSION_DISPATCHER_BODY
   28153              : #define TARGET_GENERATE_VERSION_DISPATCHER_BODY \
   28154              :   ix86_generate_version_dispatcher_body
   28155              : 
   28156              : #undef TARGET_GET_FUNCTION_VERSIONS_DISPATCHER
   28157              : #define TARGET_GET_FUNCTION_VERSIONS_DISPATCHER \
   28158              :   ix86_get_function_versions_dispatcher
   28159              : 
   28160              : #undef TARGET_ENUM_VA_LIST_P
   28161              : #define TARGET_ENUM_VA_LIST_P ix86_enum_va_list
   28162              : 
   28163              : #undef TARGET_FN_ABI_VA_LIST
   28164              : #define TARGET_FN_ABI_VA_LIST ix86_fn_abi_va_list
   28165              : 
   28166              : #undef TARGET_CANONICAL_VA_LIST_TYPE
   28167              : #define TARGET_CANONICAL_VA_LIST_TYPE ix86_canonical_va_list_type
   28168              : 
   28169              : #undef TARGET_EXPAND_BUILTIN_VA_START
   28170              : #define TARGET_EXPAND_BUILTIN_VA_START ix86_va_start
   28171              : 
   28172              : #undef TARGET_MD_ASM_ADJUST
   28173              : #define TARGET_MD_ASM_ADJUST ix86_md_asm_adjust
   28174              : 
   28175              : #undef TARGET_C_EXCESS_PRECISION
   28176              : #define TARGET_C_EXCESS_PRECISION ix86_get_excess_precision
   28177              : #undef TARGET_C_BITINT_TYPE_INFO
   28178              : #define TARGET_C_BITINT_TYPE_INFO ix86_bitint_type_info
   28179              : #undef TARGET_C_MODE_FOR_FLOATING_TYPE
   28180              : #define TARGET_C_MODE_FOR_FLOATING_TYPE ix86_c_mode_for_floating_type
   28181              : #undef TARGET_CXX_ADJUST_CDTOR_CALLABI_FNTYPE
   28182              : #define TARGET_CXX_ADJUST_CDTOR_CALLABI_FNTYPE ix86_cxx_adjust_cdtor_callabi_fntype
   28183              : #undef TARGET_PROMOTE_PROTOTYPES
   28184              : #define TARGET_PROMOTE_PROTOTYPES hook_bool_const_tree_true
   28185              : #undef TARGET_PUSH_ARGUMENT
   28186              : #define TARGET_PUSH_ARGUMENT ix86_push_argument
   28187              : #undef TARGET_SETUP_INCOMING_VARARGS
   28188              : #define TARGET_SETUP_INCOMING_VARARGS ix86_setup_incoming_varargs
   28189              : #undef TARGET_MUST_PASS_IN_STACK
   28190              : #define TARGET_MUST_PASS_IN_STACK ix86_must_pass_in_stack
   28191              : #undef TARGET_ALLOCATE_STACK_SLOTS_FOR_ARGS
   28192              : #define TARGET_ALLOCATE_STACK_SLOTS_FOR_ARGS ix86_allocate_stack_slots_for_args
   28193              : #undef TARGET_FUNCTION_ARG_ADVANCE
   28194              : #define TARGET_FUNCTION_ARG_ADVANCE ix86_function_arg_advance
   28195              : #undef TARGET_FUNCTION_ARG
   28196              : #define TARGET_FUNCTION_ARG ix86_function_arg
   28197              : #undef TARGET_INIT_PIC_REG
   28198              : #define TARGET_INIT_PIC_REG ix86_init_pic_reg
   28199              : #undef TARGET_USE_PSEUDO_PIC_REG
   28200              : #define TARGET_USE_PSEUDO_PIC_REG ix86_use_pseudo_pic_reg
   28201              : #undef TARGET_FUNCTION_ARG_BOUNDARY
   28202              : #define TARGET_FUNCTION_ARG_BOUNDARY ix86_function_arg_boundary
   28203              : #undef TARGET_PASS_BY_REFERENCE
   28204              : #define TARGET_PASS_BY_REFERENCE ix86_pass_by_reference
   28205              : #undef TARGET_INTERNAL_ARG_POINTER
   28206              : #define TARGET_INTERNAL_ARG_POINTER ix86_internal_arg_pointer
   28207              : #undef TARGET_UPDATE_STACK_BOUNDARY
   28208              : #define TARGET_UPDATE_STACK_BOUNDARY ix86_update_stack_boundary
   28209              : #undef TARGET_GET_DRAP_RTX
   28210              : #define TARGET_GET_DRAP_RTX ix86_get_drap_rtx
   28211              : #undef TARGET_STRICT_ARGUMENT_NAMING
   28212              : #define TARGET_STRICT_ARGUMENT_NAMING hook_bool_CUMULATIVE_ARGS_true
   28213              : #undef TARGET_STATIC_CHAIN
   28214              : #define TARGET_STATIC_CHAIN ix86_static_chain
   28215              : #undef TARGET_TRAMPOLINE_INIT
   28216              : #define TARGET_TRAMPOLINE_INIT ix86_trampoline_init
   28217              : #undef TARGET_RETURN_POPS_ARGS
   28218              : #define TARGET_RETURN_POPS_ARGS ix86_return_pops_args
   28219              : 
   28220              : #undef TARGET_WARN_FUNC_RETURN
   28221              : #define TARGET_WARN_FUNC_RETURN ix86_warn_func_return
   28222              : 
   28223              : #undef TARGET_LEGITIMATE_COMBINED_INSN
   28224              : #define TARGET_LEGITIMATE_COMBINED_INSN ix86_legitimate_combined_insn
   28225              : 
   28226              : #undef TARGET_ASAN_SHADOW_OFFSET
   28227              : #define TARGET_ASAN_SHADOW_OFFSET ix86_asan_shadow_offset
   28228              : 
   28229              : #undef TARGET_GIMPLIFY_VA_ARG_EXPR
   28230              : #define TARGET_GIMPLIFY_VA_ARG_EXPR ix86_gimplify_va_arg
   28231              : 
   28232              : #undef TARGET_SCALAR_MODE_SUPPORTED_P
   28233              : #define TARGET_SCALAR_MODE_SUPPORTED_P ix86_scalar_mode_supported_p
   28234              : 
   28235              : #undef TARGET_LIBGCC_FLOATING_MODE_SUPPORTED_P
   28236              : #define TARGET_LIBGCC_FLOATING_MODE_SUPPORTED_P \
   28237              : ix86_libgcc_floating_mode_supported_p
   28238              : 
   28239              : #undef TARGET_VECTOR_MODE_SUPPORTED_P
   28240              : #define TARGET_VECTOR_MODE_SUPPORTED_P ix86_vector_mode_supported_p
   28241              : 
   28242              : #undef TARGET_C_MODE_FOR_SUFFIX
   28243              : #define TARGET_C_MODE_FOR_SUFFIX ix86_c_mode_for_suffix
   28244              : 
   28245              : #ifdef HAVE_AS_TLS
   28246              : #undef TARGET_ASM_OUTPUT_DWARF_DTPREL
   28247              : #define TARGET_ASM_OUTPUT_DWARF_DTPREL i386_output_dwarf_dtprel
   28248              : #endif
   28249              : 
   28250              : #ifdef SUBTARGET_INSERT_ATTRIBUTES
   28251              : #undef TARGET_INSERT_ATTRIBUTES
   28252              : #define TARGET_INSERT_ATTRIBUTES SUBTARGET_INSERT_ATTRIBUTES
   28253              : #endif
   28254              : 
   28255              : #undef TARGET_MANGLE_TYPE
   28256              : #define TARGET_MANGLE_TYPE ix86_mangle_type
   28257              : 
   28258              : #undef TARGET_EMIT_SUPPORT_TINFOS
   28259              : #define TARGET_EMIT_SUPPORT_TINFOS ix86_emit_support_tinfos
   28260              : 
   28261              : #undef TARGET_STACK_PROTECT_GUARD
   28262              : #define TARGET_STACK_PROTECT_GUARD ix86_stack_protect_guard
   28263              : 
   28264              : #undef TARGET_STACK_PROTECT_RUNTIME_ENABLED_P
   28265              : #define TARGET_STACK_PROTECT_RUNTIME_ENABLED_P \
   28266              :   ix86_stack_protect_runtime_enabled_p
   28267              : 
   28268              : #if !TARGET_MACHO
   28269              : #undef TARGET_STACK_PROTECT_FAIL
   28270              : #define TARGET_STACK_PROTECT_FAIL ix86_stack_protect_fail
   28271              : #endif
   28272              : 
   28273              : #undef TARGET_FUNCTION_VALUE
   28274              : #define TARGET_FUNCTION_VALUE ix86_function_value
   28275              : 
   28276              : #undef TARGET_FUNCTION_VALUE_REGNO_P
   28277              : #define TARGET_FUNCTION_VALUE_REGNO_P ix86_function_value_regno_p
   28278              : 
   28279              : #undef TARGET_ZERO_CALL_USED_REGS
   28280              : #define TARGET_ZERO_CALL_USED_REGS ix86_zero_call_used_regs
   28281              : 
   28282              : #undef TARGET_PROMOTE_FUNCTION_MODE
   28283              : #define TARGET_PROMOTE_FUNCTION_MODE ix86_promote_function_mode
   28284              : 
   28285              : #undef  TARGET_OVERRIDE_OPTIONS_AFTER_CHANGE
   28286              : #define TARGET_OVERRIDE_OPTIONS_AFTER_CHANGE ix86_override_options_after_change
   28287              : 
   28288              : #undef TARGET_MEMBER_TYPE_FORCES_BLK
   28289              : #define TARGET_MEMBER_TYPE_FORCES_BLK ix86_member_type_forces_blk
   28290              : 
   28291              : #undef TARGET_INSTANTIATE_DECLS
   28292              : #define TARGET_INSTANTIATE_DECLS ix86_instantiate_decls
   28293              : 
   28294              : #undef TARGET_SECONDARY_RELOAD
   28295              : #define TARGET_SECONDARY_RELOAD ix86_secondary_reload
   28296              : #undef TARGET_SECONDARY_MEMORY_NEEDED
   28297              : #define TARGET_SECONDARY_MEMORY_NEEDED ix86_secondary_memory_needed
   28298              : #undef TARGET_SECONDARY_MEMORY_NEEDED_MODE
   28299              : #define TARGET_SECONDARY_MEMORY_NEEDED_MODE ix86_secondary_memory_needed_mode
   28300              : 
   28301              : #undef TARGET_CLASS_MAX_NREGS
   28302              : #define TARGET_CLASS_MAX_NREGS ix86_class_max_nregs
   28303              : 
   28304              : #undef TARGET_PREFERRED_RELOAD_CLASS
   28305              : #define TARGET_PREFERRED_RELOAD_CLASS ix86_preferred_reload_class
   28306              : #undef TARGET_PREFERRED_OUTPUT_RELOAD_CLASS
   28307              : #define TARGET_PREFERRED_OUTPUT_RELOAD_CLASS ix86_preferred_output_reload_class
   28308              : /* When this hook returns true for MODE, the compiler allows
   28309              :    registers explicitly used in the rtl to be used as spill registers
   28310              :    but prevents the compiler from extending the lifetime of these
   28311              :    registers.  */
   28312              : #undef TARGET_SMALL_REGISTER_CLASSES_FOR_MODE_P
   28313              : #define TARGET_SMALL_REGISTER_CLASSES_FOR_MODE_P hook_bool_mode_true
   28314              : #undef TARGET_CLASS_LIKELY_SPILLED_P
   28315              : #define TARGET_CLASS_LIKELY_SPILLED_P ix86_class_likely_spilled_p
   28316              : #undef TARGET_CALLEE_SAVE_COST
   28317              : #define TARGET_CALLEE_SAVE_COST ix86_callee_save_cost
   28318              : 
   28319              : #undef TARGET_VECTORIZE_BUILTIN_VECTORIZATION_COST
   28320              : #define TARGET_VECTORIZE_BUILTIN_VECTORIZATION_COST \
   28321              :   ix86_builtin_vectorization_cost
   28322              : #undef TARGET_VECTORIZE_VEC_PERM_CONST
   28323              : #define TARGET_VECTORIZE_VEC_PERM_CONST ix86_vectorize_vec_perm_const
   28324              : #undef TARGET_VECTORIZE_PREFERRED_SIMD_MODE
   28325              : #define TARGET_VECTORIZE_PREFERRED_SIMD_MODE \
   28326              :   ix86_preferred_simd_mode
   28327              : #undef TARGET_VECTORIZE_SPLIT_REDUCTION
   28328              : #define TARGET_VECTORIZE_SPLIT_REDUCTION \
   28329              :   ix86_split_reduction
   28330              : #undef TARGET_VECTORIZE_AUTOVECTORIZE_VECTOR_MODES
   28331              : #define TARGET_VECTORIZE_AUTOVECTORIZE_VECTOR_MODES \
   28332              :   ix86_autovectorize_vector_modes
   28333              : #undef TARGET_VECTORIZE_GET_MASK_MODE
   28334              : #define TARGET_VECTORIZE_GET_MASK_MODE ix86_get_mask_mode
   28335              : #undef TARGET_VECTORIZE_CREATE_COSTS
   28336              : #define TARGET_VECTORIZE_CREATE_COSTS ix86_vectorize_create_costs
   28337              : 
   28338              : #undef TARGET_SET_CURRENT_FUNCTION
   28339              : #define TARGET_SET_CURRENT_FUNCTION ix86_set_current_function
   28340              : 
   28341              : #undef TARGET_OPTION_VALID_ATTRIBUTE_P
   28342              : #define TARGET_OPTION_VALID_ATTRIBUTE_P ix86_valid_target_attribute_p
   28343              : 
   28344              : #undef TARGET_OPTION_SAVE
   28345              : #define TARGET_OPTION_SAVE ix86_function_specific_save
   28346              : 
   28347              : #undef TARGET_OPTION_RESTORE
   28348              : #define TARGET_OPTION_RESTORE ix86_function_specific_restore
   28349              : 
   28350              : #undef TARGET_OPTION_POST_STREAM_IN
   28351              : #define TARGET_OPTION_POST_STREAM_IN ix86_function_specific_post_stream_in
   28352              : 
   28353              : #undef TARGET_OPTION_PRINT
   28354              : #define TARGET_OPTION_PRINT ix86_function_specific_print
   28355              : 
   28356              : #undef TARGET_CAN_INLINE_P
   28357              : #define TARGET_CAN_INLINE_P ix86_can_inline_p
   28358              : 
   28359              : #undef TARGET_LEGITIMATE_ADDRESS_P
   28360              : #define TARGET_LEGITIMATE_ADDRESS_P ix86_legitimate_address_p
   28361              : 
   28362              : #undef TARGET_REGISTER_PRIORITY
   28363              : #define TARGET_REGISTER_PRIORITY ix86_register_priority
   28364              : 
   28365              : #undef TARGET_REGISTER_USAGE_LEVELING_P
   28366              : #define TARGET_REGISTER_USAGE_LEVELING_P hook_bool_void_true
   28367              : 
   28368              : #undef TARGET_LEGITIMATE_CONSTANT_P
   28369              : #define TARGET_LEGITIMATE_CONSTANT_P ix86_legitimate_constant_p
   28370              : 
   28371              : #undef TARGET_COMPUTE_FRAME_LAYOUT
   28372              : #define TARGET_COMPUTE_FRAME_LAYOUT ix86_compute_frame_layout
   28373              : 
   28374              : #undef TARGET_FRAME_POINTER_REQUIRED
   28375              : #define TARGET_FRAME_POINTER_REQUIRED ix86_frame_pointer_required
   28376              : 
   28377              : #undef TARGET_CAN_ELIMINATE
   28378              : #define TARGET_CAN_ELIMINATE ix86_can_eliminate
   28379              : 
   28380              : #undef TARGET_EXTRA_LIVE_ON_ENTRY
   28381              : #define TARGET_EXTRA_LIVE_ON_ENTRY ix86_live_on_entry
   28382              : 
   28383              : #undef TARGET_ASM_CODE_END
   28384              : #define TARGET_ASM_CODE_END ix86_code_end
   28385              : 
   28386              : #undef TARGET_CONDITIONAL_REGISTER_USAGE
   28387              : #define TARGET_CONDITIONAL_REGISTER_USAGE ix86_conditional_register_usage
   28388              : 
   28389              : #undef TARGET_CANONICALIZE_COMPARISON
   28390              : #define TARGET_CANONICALIZE_COMPARISON ix86_canonicalize_comparison
   28391              : 
   28392              : #undef TARGET_LOOP_UNROLL_ADJUST
   28393              : #define TARGET_LOOP_UNROLL_ADJUST ix86_loop_unroll_adjust
   28394              : 
   28395              : /* Disabled due to PRs 70902, 71453, 71555, 71596 and 71657.  */
   28396              : #undef TARGET_SPILL_CLASS
   28397              : #define TARGET_SPILL_CLASS ix86_spill_class
   28398              : 
   28399              : #undef TARGET_SIMD_CLONE_COMPUTE_VECSIZE_AND_SIMDLEN
   28400              : #define TARGET_SIMD_CLONE_COMPUTE_VECSIZE_AND_SIMDLEN \
   28401              :   ix86_simd_clone_compute_vecsize_and_simdlen
   28402              : 
   28403              : #undef TARGET_SIMD_CLONE_ADJUST
   28404              : #define TARGET_SIMD_CLONE_ADJUST ix86_simd_clone_adjust
   28405              : 
   28406              : #undef TARGET_SIMD_CLONE_USABLE
   28407              : #define TARGET_SIMD_CLONE_USABLE ix86_simd_clone_usable
   28408              : 
   28409              : #undef TARGET_OMP_DEVICE_KIND_ARCH_ISA
   28410              : #define TARGET_OMP_DEVICE_KIND_ARCH_ISA ix86_omp_device_kind_arch_isa
   28411              : 
   28412              : #undef TARGET_FLOAT_EXCEPTIONS_ROUNDING_SUPPORTED_P
   28413              : #define TARGET_FLOAT_EXCEPTIONS_ROUNDING_SUPPORTED_P \
   28414              :   ix86_float_exceptions_rounding_supported_p
   28415              : 
   28416              : #undef TARGET_MODE_EMIT
   28417              : #define TARGET_MODE_EMIT ix86_emit_mode_set
   28418              : 
   28419              : #undef TARGET_MODE_NEEDED
   28420              : #define TARGET_MODE_NEEDED ix86_mode_needed
   28421              : 
   28422              : #undef TARGET_MODE_AFTER
   28423              : #define TARGET_MODE_AFTER ix86_mode_after
   28424              : 
   28425              : #undef TARGET_MODE_ENTRY
   28426              : #define TARGET_MODE_ENTRY ix86_mode_entry
   28427              : 
   28428              : #undef TARGET_MODE_EXIT
   28429              : #define TARGET_MODE_EXIT ix86_mode_exit
   28430              : 
   28431              : #undef TARGET_MODE_PRIORITY
   28432              : #define TARGET_MODE_PRIORITY ix86_mode_priority
   28433              : 
   28434              : #undef TARGET_CALL_FUSAGE_CONTAINS_NON_CALLEE_CLOBBERS
   28435              : #define TARGET_CALL_FUSAGE_CONTAINS_NON_CALLEE_CLOBBERS true
   28436              : 
   28437              : #undef TARGET_OFFLOAD_OPTIONS
   28438              : #define TARGET_OFFLOAD_OPTIONS \
   28439              :   ix86_offload_options
   28440              : 
   28441              : #undef TARGET_ABSOLUTE_BIGGEST_ALIGNMENT
   28442              : #define TARGET_ABSOLUTE_BIGGEST_ALIGNMENT 512
   28443              : 
   28444              : #undef TARGET_OPTAB_SUPPORTED_P
   28445              : #define TARGET_OPTAB_SUPPORTED_P ix86_optab_supported_p
   28446              : 
   28447              : #undef TARGET_HARD_REGNO_SCRATCH_OK
   28448              : #define TARGET_HARD_REGNO_SCRATCH_OK ix86_hard_regno_scratch_ok
   28449              : 
   28450              : #undef TARGET_CUSTOM_FUNCTION_DESCRIPTORS
   28451              : #define TARGET_CUSTOM_FUNCTION_DESCRIPTORS X86_CUSTOM_FUNCTION_TEST
   28452              : 
   28453              : #undef TARGET_ADDR_SPACE_ZERO_ADDRESS_VALID
   28454              : #define TARGET_ADDR_SPACE_ZERO_ADDRESS_VALID ix86_addr_space_zero_address_valid
   28455              : 
   28456              : #undef TARGET_INIT_LIBFUNCS
   28457              : #define TARGET_INIT_LIBFUNCS ix86_init_libfuncs
   28458              : 
   28459              : #undef TARGET_EXPAND_DIVMOD_LIBFUNC
   28460              : #define TARGET_EXPAND_DIVMOD_LIBFUNC ix86_expand_divmod_libfunc
   28461              : 
   28462              : #undef TARGET_MAX_NOCE_IFCVT_SEQ_COST
   28463              : #define TARGET_MAX_NOCE_IFCVT_SEQ_COST ix86_max_noce_ifcvt_seq_cost
   28464              : 
   28465              : #undef TARGET_NOCE_CONVERSION_PROFITABLE_P
   28466              : #define TARGET_NOCE_CONVERSION_PROFITABLE_P ix86_noce_conversion_profitable_p
   28467              : 
   28468              : #undef TARGET_HARD_REGNO_NREGS
   28469              : #define TARGET_HARD_REGNO_NREGS ix86_hard_regno_nregs
   28470              : #undef TARGET_HARD_REGNO_MODE_OK
   28471              : #define TARGET_HARD_REGNO_MODE_OK ix86_hard_regno_mode_ok
   28472              : 
   28473              : #undef TARGET_MODES_TIEABLE_P
   28474              : #define TARGET_MODES_TIEABLE_P ix86_modes_tieable_p
   28475              : 
   28476              : #undef TARGET_HARD_REGNO_CALL_PART_CLOBBERED
   28477              : #define TARGET_HARD_REGNO_CALL_PART_CLOBBERED \
   28478              :   ix86_hard_regno_call_part_clobbered
   28479              : 
   28480              : #undef TARGET_INSN_CALLEE_ABI
   28481              : #define TARGET_INSN_CALLEE_ABI ix86_insn_callee_abi
   28482              : 
   28483              : #undef TARGET_CAN_CHANGE_MODE_CLASS
   28484              : #define TARGET_CAN_CHANGE_MODE_CLASS ix86_can_change_mode_class
   28485              : 
   28486              : #undef TARGET_LOWER_LOCAL_DECL_ALIGNMENT
   28487              : #define TARGET_LOWER_LOCAL_DECL_ALIGNMENT ix86_lower_local_decl_alignment
   28488              : 
   28489              : #undef TARGET_STATIC_RTX_ALIGNMENT
   28490              : #define TARGET_STATIC_RTX_ALIGNMENT ix86_static_rtx_alignment
   28491              : #undef TARGET_CONSTANT_ALIGNMENT
   28492              : #define TARGET_CONSTANT_ALIGNMENT ix86_constant_alignment
   28493              : 
   28494              : #undef TARGET_EMPTY_RECORD_P
   28495              : #define TARGET_EMPTY_RECORD_P ix86_is_empty_record
   28496              : 
   28497              : #undef TARGET_WARN_PARAMETER_PASSING_ABI
   28498              : #define TARGET_WARN_PARAMETER_PASSING_ABI ix86_warn_parameter_passing_abi
   28499              : 
   28500              : #undef TARGET_GET_MULTILIB_ABI_NAME
   28501              : #define TARGET_GET_MULTILIB_ABI_NAME \
   28502              :   ix86_get_multilib_abi_name
   28503              : 
   28504              : #undef TARGET_IFUNC_REF_LOCAL_OK
   28505              : #define TARGET_IFUNC_REF_LOCAL_OK ix86_ifunc_ref_local_ok
   28506              : 
   28507              : #if !TARGET_MACHO && !TARGET_DLLIMPORT_DECL_ATTRIBUTES
   28508              : # undef TARGET_ASM_RELOC_RW_MASK
   28509              : # define TARGET_ASM_RELOC_RW_MASK ix86_reloc_rw_mask
   28510              : #endif
   28511              : 
   28512              : #undef TARGET_MEMTAG_CAN_TAG_ADDRESSES
   28513              : #define TARGET_MEMTAG_CAN_TAG_ADDRESSES ix86_memtag_can_tag_addresses
   28514              : 
   28515              : #undef TARGET_MEMTAG_ADD_TAG
   28516              : #define TARGET_MEMTAG_ADD_TAG ix86_memtag_add_tag
   28517              : 
   28518              : #undef TARGET_MEMTAG_SET_TAG
   28519              : #define TARGET_MEMTAG_SET_TAG ix86_memtag_set_tag
   28520              : 
   28521              : #undef TARGET_MEMTAG_EXTRACT_TAG
   28522              : #define TARGET_MEMTAG_EXTRACT_TAG ix86_memtag_extract_tag
   28523              : 
   28524              : #undef TARGET_MEMTAG_UNTAGGED_POINTER
   28525              : #define TARGET_MEMTAG_UNTAGGED_POINTER ix86_memtag_untagged_pointer
   28526              : 
   28527              : #undef TARGET_MEMTAG_TAG_BITSIZE
   28528              : #define TARGET_MEMTAG_TAG_BITSIZE ix86_memtag_tag_bitsize
   28529              : 
   28530              : #undef TARGET_GEN_CCMP_FIRST
   28531              : #define TARGET_GEN_CCMP_FIRST ix86_gen_ccmp_first
   28532              : 
   28533              : #undef TARGET_GEN_CCMP_NEXT
   28534              : #define TARGET_GEN_CCMP_NEXT ix86_gen_ccmp_next
   28535              : 
   28536              : #undef TARGET_HAVE_CCMP
   28537              : #define TARGET_HAVE_CCMP ix86_have_ccmp
   28538              : 
   28539              : #undef TARGET_MODE_CAN_TRANSFER_BITS
   28540              : #define TARGET_MODE_CAN_TRANSFER_BITS ix86_mode_can_transfer_bits
   28541              : 
   28542              : #undef TARGET_REDZONE_CLOBBER
   28543              : #define TARGET_REDZONE_CLOBBER ix86_redzone_clobber
   28544              : 
   28545              : static bool
   28546        94254 : ix86_libc_has_fast_function (int fcode ATTRIBUTE_UNUSED)
   28547              : {
   28548              : #ifdef OPTION_GLIBC
   28549        94254 :   if (OPTION_GLIBC)
   28550        94254 :     return (built_in_function)fcode == BUILT_IN_MEMPCPY;
   28551              :   else
   28552              :     return false;
   28553              : #else
   28554              :   return false;
   28555              : #endif
   28556              : }
   28557              : 
   28558              : #undef TARGET_LIBC_HAS_FAST_FUNCTION
   28559              : #define TARGET_LIBC_HAS_FAST_FUNCTION ix86_libc_has_fast_function
   28560              : 
   28561              : static unsigned
   28562        78746 : ix86_libm_function_max_error (unsigned cfn, machine_mode mode,
   28563              :                               bool boundary_p)
   28564              : {
   28565              : #ifdef OPTION_GLIBC
   28566        78746 :   bool glibc_p = OPTION_GLIBC;
   28567              : #else
   28568              :   bool glibc_p = false;
   28569              : #endif
   28570        78746 :   if (glibc_p)
   28571              :     {
   28572              :       /* If __FAST_MATH__ is defined, glibc provides libmvec.  */
   28573        78746 :       unsigned int libmvec_ret = 0;
   28574        78746 :       if (!flag_trapping_math
   28575         8296 :           && flag_unsafe_math_optimizations
   28576         3374 :           && flag_finite_math_only
   28577         3348 :           && !flag_signed_zeros
   28578         3348 :           && !flag_errno_math)
   28579         3348 :         switch (cfn)
   28580              :           {
   28581         1396 :           CASE_CFN_COS:
   28582         1396 :           CASE_CFN_COS_FN:
   28583         1396 :           CASE_CFN_SIN:
   28584         1396 :           CASE_CFN_SIN_FN:
   28585         1396 :             if (!boundary_p)
   28586              :               {
   28587              :                 /* With non-default rounding modes, libmvec provides
   28588              :                    complete garbage in results.  E.g.
   28589              :                    _ZGVcN8v_sinf for 1.40129846e-45f in FE_UPWARD
   28590              :                    returns 0.00333309174f rather than 1.40129846e-45f.  */
   28591          587 :                 if (flag_rounding_math)
   28592              :                   return ~0U;
   28593              :                 /* https://www.gnu.org/software/libc/manual/html_node/Errors-in-Math-Functions.html
   28594              :                    claims libmvec maximum error is 4ulps.
   28595              :                    My own random testing indicates 2ulps for SFmode and
   28596              :                    0.5ulps for DFmode, but let's go with the 4ulps.  */
   28597              :                 libmvec_ret = 4;
   28598              :               }
   28599              :             break;
   28600              :           default:
   28601              :             break;
   28602              :           }
   28603        78746 :       unsigned int ret = glibc_linux_libm_function_max_error (cfn, mode,
   28604              :                                                               boundary_p);
   28605        78746 :       return MAX (ret, libmvec_ret);
   28606              :     }
   28607            0 :   return default_libm_function_max_error (cfn, mode, boundary_p);
   28608              : }
   28609              : 
   28610              : #undef TARGET_LIBM_FUNCTION_MAX_ERROR
   28611              : #define TARGET_LIBM_FUNCTION_MAX_ERROR ix86_libm_function_max_error
   28612              : 
   28613              : #if TARGET_MACHO
   28614              : static bool
   28615              : ix86_cannot_copy_insn_p (rtx_insn *insn)
   28616              : {
   28617              :   if (TARGET_64BIT)
   28618              :     return false;
   28619              : 
   28620              :   rtx set = single_set (insn);
   28621              :   if (set)
   28622              :     {
   28623              :       rtx src = SET_SRC (set);
   28624              :       if (GET_CODE (src) == UNSPEC
   28625              :           && XINT (src, 1) == UNSPEC_SET_GOT)
   28626              :         return true;
   28627              :     }
   28628              :   return false;
   28629              : }
   28630              : 
   28631              : #undef TARGET_CANNOT_COPY_INSN_P
   28632              : #define TARGET_CANNOT_COPY_INSN_P ix86_cannot_copy_insn_p
   28633              : 
   28634              : #endif
   28635              : 
   28636              : #if CHECKING_P
   28637              : #undef TARGET_RUN_TARGET_SELFTESTS
   28638              : #define TARGET_RUN_TARGET_SELFTESTS selftest::ix86_run_selftests
   28639              : #endif /* #if CHECKING_P */
   28640              : 
   28641              : #undef TARGET_DOCUMENTATION_NAME
   28642              : #define TARGET_DOCUMENTATION_NAME "x86"
   28643              : 
   28644              : /* Implement TARGET_SHRINK_WRAP_GET_SEPARATE_COMPONENTS.  */
   28645              : sbitmap
   28646       735781 : ix86_get_separate_components (void)
   28647              : {
   28648       735781 :   HOST_WIDE_INT offset, to_allocate;
   28649       735781 :   sbitmap components = sbitmap_alloc (FIRST_PSEUDO_REGISTER);
   28650       735781 :   bitmap_clear (components);
   28651       735781 :   struct machine_function *m = cfun->machine;
   28652              : 
   28653       735781 :   offset = m->frame.stack_pointer_offset;
   28654       735781 :   to_allocate = offset - m->frame.sse_reg_save_offset;
   28655              : 
   28656              :   /* Shrink wrap separate uses MOV, which means APX PPX cannot be used.
   28657              :      Experiments show that APX PPX can speed up the prologue.  If the function
   28658              :      does not exit early during actual execution, then using APX PPX is faster.
   28659              :      If the function always exits early during actual execution, then shrink
   28660              :      wrap separate reduces the number of MOV (PUSH/POP) instructions actually
   28661              :      executed, thus speeding up execution.
   28662              :      foo:
   28663              :           movl    $1, %eax
   28664              :           testq   %rdi, %rdi
   28665              :           jne.L60
   28666              :           ret   ---> early return.
   28667              :     .L60:
   28668              :           subq    $88, %rsp     ---> belong to prologue.
   28669              :           xorl    %eax, %eax
   28670              :           movq    %rbx, 40 (%rsp) ---> belong to prologue.
   28671              :           movq    8 (%rdi), %rbx
   28672              :           movq    %rbp, 48 (%rsp) ---> belong to prologue.
   28673              :           movq    %rdi, %rbp
   28674              :           testq   %rbx, %rbx
   28675              :           jne.L61
   28676              :           movq    40 (%rsp), %rbx
   28677              :           movq    48 (%rsp), %rbp
   28678              :           addq    $88, %rsp
   28679              :           ret
   28680              :      .L61:
   28681              :           movq    %r12, 56 (%rsp) ---> belong to prologue.
   28682              :           movq    %r13, 64 (%rsp) ---> belong to prologue.
   28683              :           movq    %r14, 72 (%rsp) ---> belong to prologue.
   28684              :      ... ...
   28685              : 
   28686              :      Disable shrink wrap separate when PPX is enabled.  */
   28687       735781 :   if ((TARGET_APX_PPX && !crtl->calls_eh_return)
   28688       735316 :       || cfun->machine->func_type != TYPE_NORMAL
   28689              :       || TARGET_SEH
   28690       735218 :       || crtl->stack_realign_needed
   28691       725596 :       || m->call_ms2sysv)
   28692              :     return components;
   28693              : 
   28694              :   /* Since shrink wrapping separate uses MOV instead of PUSH/POP.
   28695              :      Disable shrink wrap separate when MOV is prohibited.  */
   28696       723674 :   if (save_regs_using_push_pop (to_allocate))
   28697              :     return components;
   28698              : 
   28699     32609055 :   for (unsigned int regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
   28700     32258420 :     if (GENERAL_REGNO_P (regno) && ix86_save_reg (regno, true, true))
   28701              :       {
   28702              :         /* Skip registers with large offsets, where a pseudo may be needed.  */
   28703       605503 :         if (IN_RANGE (offset, -0x8000, 0x7fff))
   28704       604430 :           bitmap_set_bit (components, regno);
   28705       650810 :         offset += UNITS_PER_WORD;
   28706              :       }
   28707              : 
   28708              :   /* Don't mess with the following registers.  */
   28709       350635 :   if (frame_pointer_needed)
   28710         6306 :     bitmap_clear_bit (components, HARD_FRAME_POINTER_REGNUM);
   28711              : 
   28712       350635 :   if (crtl->drap_reg)
   28713          128 :     bitmap_clear_bit (components, REGNO (crtl->drap_reg));
   28714              : 
   28715       350635 :   if (pic_offset_table_rtx)
   28716        29534 :     bitmap_clear_bit (components, REAL_PIC_OFFSET_TABLE_REGNUM);
   28717              : 
   28718              :   return components;
   28719              : }
   28720              : 
   28721              : /* Implement TARGET_SHRINK_WRAP_COMPONENTS_FOR_BB.  */
   28722              : sbitmap
   28723      9439949 : ix86_components_for_bb (basic_block bb)
   28724              : {
   28725      9439949 :   bitmap in = DF_LIVE_IN (bb);
   28726      9439949 :   bitmap gen = &DF_LIVE_BB_INFO (bb)->gen;
   28727      9439949 :   bitmap kill = &DF_LIVE_BB_INFO (bb)->kill;
   28728              : 
   28729      9439949 :   sbitmap components = sbitmap_alloc (FIRST_PSEUDO_REGISTER);
   28730      9439949 :   bitmap_clear (components);
   28731              : 
   28732      9439949 :   function_abi_aggregator callee_abis;
   28733      9439949 :   rtx_insn *insn;
   28734    110356657 :   FOR_BB_INSNS (bb, insn)
   28735    100916708 :     if (CALL_P (insn))
   28736      3054056 :       callee_abis.note_callee_abi (insn_callee_abi (insn));
   28737      9439949 :   HARD_REG_SET extra_caller_saves = callee_abis.caller_save_regs (*crtl->abi);
   28738              : 
   28739              :   /* GPRs are used in a bb if they are in the IN, GEN, or KILL sets.  */
   28740    877915257 :   for (unsigned int regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
   28741    868475308 :     if (!fixed_regs[regno]
   28742    868475308 :         && (TEST_HARD_REG_BIT (extra_caller_saves, regno)
   28743    441582573 :             || bitmap_bit_p (in, regno)
   28744    415733232 :             || bitmap_bit_p (gen, regno)
   28745    403127763 :             || bitmap_bit_p (kill, regno)))
   28746     38719748 :       bitmap_set_bit (components, regno);
   28747              : 
   28748      9439949 :   return components;
   28749              : }
   28750              : 
   28751              : /* Implement TARGET_SHRINK_WRAP_DISQUALIFY_COMPONENTS.  */
   28752              : void
   28753       488211 : ix86_disqualify_components (sbitmap, edge, sbitmap, bool)
   28754              : {
   28755              :   /* Nothing to do for x86.  */
   28756       488211 : }
   28757              : 
   28758              : /* Implement TARGET_SHRINK_WRAP_EMIT_PROLOGUE_COMPONENTS.  */
   28759              : void
   28760       175865 : ix86_emit_prologue_components (sbitmap components)
   28761              : {
   28762       175865 :   HOST_WIDE_INT cfa_offset;
   28763       175865 :   struct machine_function *m = cfun->machine;
   28764              : 
   28765       175865 :   cfa_offset = m->frame.reg_save_offset + m->fs.sp_offset
   28766       175865 :                - m->frame.stack_pointer_offset;
   28767     16355445 :   for (unsigned int regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
   28768     16179580 :     if (GENERAL_REGNO_P (regno) && ix86_save_reg (regno, true, true))
   28769              :       {
   28770       796062 :         if (bitmap_bit_p (components, regno))
   28771       202417 :           ix86_emit_save_reg_using_mov (word_mode, regno, cfa_offset);
   28772       846032 :         cfa_offset -= UNITS_PER_WORD;
   28773              :       }
   28774       175865 : }
   28775              : 
   28776              : /* Implement TARGET_SHRINK_WRAP_EMIT_EPILOGUE_COMPONENTS.  */
   28777              : void
   28778       156089 : ix86_emit_epilogue_components (sbitmap components)
   28779              : {
   28780       156089 :   HOST_WIDE_INT cfa_offset;
   28781       156089 :   struct machine_function *m = cfun->machine;
   28782       156089 :   cfa_offset = m->frame.reg_save_offset + m->fs.sp_offset
   28783       156089 :                - m->frame.stack_pointer_offset;
   28784              : 
   28785     14516277 :   for (unsigned int regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
   28786     14360188 :     if (GENERAL_REGNO_P (regno) && ix86_save_reg (regno, true, true))
   28787              :       {
   28788       709608 :         if (bitmap_bit_p (components, regno))
   28789              :           {
   28790       273612 :             rtx reg = gen_rtx_REG (word_mode, regno);
   28791       273612 :             rtx mem;
   28792       273612 :             rtx_insn *insn;
   28793              : 
   28794       273612 :             mem = choose_baseaddr (cfa_offset, NULL);
   28795       273612 :             mem = gen_frame_mem (word_mode, mem);
   28796       273612 :             insn = emit_move_insn (reg, mem);
   28797              : 
   28798       273612 :             RTX_FRAME_RELATED_P (insn) = 1;
   28799       273612 :             add_reg_note (insn, REG_CFA_RESTORE, reg);
   28800              :           }
   28801       766197 :         cfa_offset -= UNITS_PER_WORD;
   28802              :       }
   28803       156089 : }
   28804              : 
   28805              : /* Implement TARGET_SHRINK_WRAP_SET_HANDLED_COMPONENTS.  */
   28806              : void
   28807        45765 : ix86_set_handled_components (sbitmap components)
   28808              : {
   28809      4256145 :   for (unsigned int regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
   28810      4210380 :     if (bitmap_bit_p (components, regno))
   28811              :       {
   28812       107683 :         cfun->machine->reg_is_wrapped_separately[regno] = true;
   28813       107683 :         cfun->machine->use_fast_prologue_epilogue = true;
   28814       107683 :         cfun->machine->frame.save_regs_using_mov = true;
   28815              :       }
   28816        45765 : }
   28817              : 
   28818              : #undef TARGET_SHRINK_WRAP_GET_SEPARATE_COMPONENTS
   28819              : #define TARGET_SHRINK_WRAP_GET_SEPARATE_COMPONENTS ix86_get_separate_components
   28820              : #undef TARGET_SHRINK_WRAP_COMPONENTS_FOR_BB
   28821              : #define TARGET_SHRINK_WRAP_COMPONENTS_FOR_BB ix86_components_for_bb
   28822              : #undef TARGET_SHRINK_WRAP_DISQUALIFY_COMPONENTS
   28823              : #define TARGET_SHRINK_WRAP_DISQUALIFY_COMPONENTS ix86_disqualify_components
   28824              : #undef TARGET_SHRINK_WRAP_EMIT_PROLOGUE_COMPONENTS
   28825              : #define TARGET_SHRINK_WRAP_EMIT_PROLOGUE_COMPONENTS \
   28826              :   ix86_emit_prologue_components
   28827              : #undef TARGET_SHRINK_WRAP_EMIT_EPILOGUE_COMPONENTS
   28828              : #define TARGET_SHRINK_WRAP_EMIT_EPILOGUE_COMPONENTS \
   28829              :   ix86_emit_epilogue_components
   28830              : #undef TARGET_SHRINK_WRAP_SET_HANDLED_COMPONENTS
   28831              : #define TARGET_SHRINK_WRAP_SET_HANDLED_COMPONENTS ix86_set_handled_components
   28832              : 
   28833              : struct gcc_target targetm = TARGET_INITIALIZER;
   28834              : 
   28835              : #include "gt-i386.h"
        

Generated by: LCOV version 2.4-beta

LCOV profile is generated on x86_64 machine using following configure options: configure --disable-bootstrap --enable-coverage=opt --enable-languages=c,c++,fortran,go,jit,lto,rust,m2 --enable-host-shared. GCC test suite is run with the built compiler.