LCOV - code coverage report
Current view: top level - gcc/config/i386 - i386-features.cc (source / functions) Coverage Total Hit
Test: gcc.info Lines: 88.8 % 2752 2445
Test Date: 2026-05-11 19:44:49 Functions: 98.9 % 95 94
Legend: Lines:     hit not hit

            Line data    Source code
       1              : /* Copyright (C) 1988-2026 Free Software Foundation, Inc.
       2              : 
       3              : This file is part of GCC.
       4              : 
       5              : GCC is free software; you can redistribute it and/or modify
       6              : it under the terms of the GNU General Public License as published by
       7              : the Free Software Foundation; either version 3, or (at your option)
       8              : any later version.
       9              : 
      10              : GCC is distributed in the hope that it will be useful,
      11              : but WITHOUT ANY WARRANTY; without even the implied warranty of
      12              : MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
      13              : GNU General Public License for more details.
      14              : 
      15              : You should have received a copy of the GNU General Public License
      16              : along with GCC; see the file COPYING3.  If not see
      17              : <http://www.gnu.org/licenses/>.  */
      18              : 
      19              : #define IN_TARGET_CODE 1
      20              : 
      21              : #include "config.h"
      22              : #include "system.h"
      23              : #include "coretypes.h"
      24              : #include "backend.h"
      25              : #include "rtl.h"
      26              : #include "tree.h"
      27              : #include "memmodel.h"
      28              : #include "gimple.h"
      29              : #include "cfghooks.h"
      30              : #include "cfgloop.h"
      31              : #include "df.h"
      32              : #include "tm_p.h"
      33              : #include "stringpool.h"
      34              : #include "expmed.h"
      35              : #include "optabs.h"
      36              : #include "regs.h"
      37              : #include "emit-rtl.h"
      38              : #include "recog.h"
      39              : #include "cgraph.h"
      40              : #include "diagnostic.h"
      41              : #include "cfgbuild.h"
      42              : #include "alias.h"
      43              : #include "fold-const.h"
      44              : #include "attribs.h"
      45              : #include "calls.h"
      46              : #include "stor-layout.h"
      47              : #include "varasm.h"
      48              : #include "output.h"
      49              : #include "insn-attr.h"
      50              : #include "flags.h"
      51              : #include "except.h"
      52              : #include "explow.h"
      53              : #include "expr.h"
      54              : #include "cfgrtl.h"
      55              : #include "common/common-target.h"
      56              : #include "langhooks.h"
      57              : #include "reload.h"
      58              : #include "gimplify.h"
      59              : #include "dwarf2.h"
      60              : #include "tm-constrs.h"
      61              : #include "cselib.h"
      62              : #include "sched-int.h"
      63              : #include "opts.h"
      64              : #include "tree-pass.h"
      65              : #include "context.h"
      66              : #include "pass_manager.h"
      67              : #include "target-globals.h"
      68              : #include "gimple-iterator.h"
      69              : #include "shrink-wrap.h"
      70              : #include "builtins.h"
      71              : #include "rtl-iter.h"
      72              : #include "tree-iterator.h"
      73              : #include "dbgcnt.h"
      74              : #include "case-cfn-macros.h"
      75              : #include "dojump.h"
      76              : #include "fold-const-call.h"
      77              : #include "tree-vrp.h"
      78              : #include "tree-ssanames.h"
      79              : #include "selftest.h"
      80              : #include "selftest-rtl.h"
      81              : #include "print-rtl.h"
      82              : #include "intl.h"
      83              : #include "ifcvt.h"
      84              : #include "symbol-summary.h"
      85              : #include "sreal.h"
      86              : #include "ipa-cp.h"
      87              : #include "ipa-prop.h"
      88              : #include "ipa-fnsummary.h"
      89              : #include "wide-int-bitmask.h"
      90              : #include "tree-vector-builder.h"
      91              : #include "debug.h"
      92              : #include "dwarf2out.h"
      93              : #include "i386-builtins.h"
      94              : #include "i386-features.h"
      95              : #include "i386-expand.h"
      96              : 
      97              : const char * const xlogue_layout::STUB_BASE_NAMES[XLOGUE_STUB_COUNT] = {
      98              :   "savms64",
      99              :   "resms64",
     100              :   "resms64x",
     101              :   "savms64f",
     102              :   "resms64f",
     103              :   "resms64fx"
     104              : };
     105              : 
     106              : const unsigned xlogue_layout::REG_ORDER[xlogue_layout::MAX_REGS] = {
     107              : /* The below offset values are where each register is stored for the layout
     108              :    relative to incoming stack pointer.  The value of each m_regs[].offset will
     109              :    be relative to the incoming base pointer (rax or rsi) used by the stub.
     110              : 
     111              :     s_instances:   0            1               2               3
     112              :     Offset:                                     realigned or    aligned + 8
     113              :     Register       aligned      aligned + 8     aligned w/HFP   w/HFP   */
     114              :     XMM15_REG,  /* 0x10         0x18            0x10            0x18    */
     115              :     XMM14_REG,  /* 0x20         0x28            0x20            0x28    */
     116              :     XMM13_REG,  /* 0x30         0x38            0x30            0x38    */
     117              :     XMM12_REG,  /* 0x40         0x48            0x40            0x48    */
     118              :     XMM11_REG,  /* 0x50         0x58            0x50            0x58    */
     119              :     XMM10_REG,  /* 0x60         0x68            0x60            0x68    */
     120              :     XMM9_REG,   /* 0x70         0x78            0x70            0x78    */
     121              :     XMM8_REG,   /* 0x80         0x88            0x80            0x88    */
     122              :     XMM7_REG,   /* 0x90         0x98            0x90            0x98    */
     123              :     XMM6_REG,   /* 0xa0         0xa8            0xa0            0xa8    */
     124              :     SI_REG,     /* 0xa8         0xb0            0xa8            0xb0    */
     125              :     DI_REG,     /* 0xb0         0xb8            0xb0            0xb8    */
     126              :     BX_REG,     /* 0xb8         0xc0            0xb8            0xc0    */
     127              :     BP_REG,     /* 0xc0         0xc8            N/A             N/A     */
     128              :     R12_REG,    /* 0xc8         0xd0            0xc0            0xc8    */
     129              :     R13_REG,    /* 0xd0         0xd8            0xc8            0xd0    */
     130              :     R14_REG,    /* 0xd8         0xe0            0xd0            0xd8    */
     131              :     R15_REG,    /* 0xe0         0xe8            0xd8            0xe0    */
     132              : };
     133              : 
     134              : /* Instantiate static const values.  */
     135              : const HOST_WIDE_INT xlogue_layout::STUB_INDEX_OFFSET;
     136              : const unsigned xlogue_layout::MIN_REGS;
     137              : const unsigned xlogue_layout::MAX_REGS;
     138              : const unsigned xlogue_layout::MAX_EXTRA_REGS;
     139              : const unsigned xlogue_layout::VARIANT_COUNT;
     140              : const unsigned xlogue_layout::STUB_NAME_MAX_LEN;
     141              : 
     142              : /* Initialize xlogue_layout::s_stub_names to zero.  */
     143              : char xlogue_layout::s_stub_names[2][XLOGUE_STUB_COUNT][VARIANT_COUNT]
     144              :                                 [STUB_NAME_MAX_LEN];
     145              : 
     146              : /* Instantiates all xlogue_layout instances.  */
     147              : const xlogue_layout xlogue_layout::s_instances[XLOGUE_SET_COUNT] = {
     148              :   xlogue_layout (0, false),
     149              :   xlogue_layout (8, false),
     150              :   xlogue_layout (0, true),
     151              :   xlogue_layout (8, true)
     152              : };
     153              : 
     154              : /* Return an appropriate const instance of xlogue_layout based upon values
     155              :    in cfun->machine and crtl.  */
     156              : const class xlogue_layout &
     157        49891 : xlogue_layout::get_instance ()
     158              : {
     159        49891 :   enum xlogue_stub_sets stub_set;
     160        49891 :   bool aligned_plus_8 = cfun->machine->call_ms2sysv_pad_in;
     161              : 
     162        49891 :   if (stack_realign_fp)
     163              :     stub_set = XLOGUE_SET_HFP_ALIGNED_OR_REALIGN;
     164        40910 :   else if (frame_pointer_needed)
     165        25246 :     stub_set = aligned_plus_8
     166        31552 :               ? XLOGUE_SET_HFP_ALIGNED_PLUS_8
     167              :               : XLOGUE_SET_HFP_ALIGNED_OR_REALIGN;
     168              :   else
     169         9358 :     stub_set = aligned_plus_8 ? XLOGUE_SET_ALIGNED_PLUS_8 : XLOGUE_SET_ALIGNED;
     170              : 
     171        49891 :   return s_instances[stub_set];
     172              : }
     173              : 
     174              : /* Determine how many clobbered registers can be saved by the stub.
     175              :    Returns the count of registers the stub will save and restore.  */
     176              : unsigned
     177        35225 : xlogue_layout::count_stub_managed_regs ()
     178              : {
     179        35225 :   bool hfp = frame_pointer_needed || stack_realign_fp;
     180        35225 :   unsigned i, count;
     181        35225 :   unsigned regno;
     182              : 
     183        94890 :   for (count = i = MIN_REGS; i < MAX_REGS; ++i)
     184              :     {
     185        93670 :       regno = REG_ORDER[i];
     186        93670 :       if (regno == BP_REG && hfp)
     187        18200 :         continue;
     188        75470 :       if (!ix86_save_reg (regno, false, false))
     189              :         break;
     190        41465 :       ++count;
     191              :     }
     192        35225 :   return count;
     193              : }
     194              : 
     195              : /* Determine if register REGNO is a stub managed register given the
     196              :    total COUNT of stub managed registers.  */
     197              : bool
     198      2650688 : xlogue_layout::is_stub_managed_reg (unsigned regno, unsigned count)
     199              : {
     200      2650688 :   bool hfp = frame_pointer_needed || stack_realign_fp;
     201      2650688 :   unsigned i;
     202              : 
     203     34587805 :   for (i = 0; i < count; ++i)
     204              :     {
     205     32436986 :       gcc_assert (i < MAX_REGS);
     206     32436986 :       if (REG_ORDER[i] == BP_REG && hfp)
     207       522627 :         ++count;
     208     31914359 :       else if (REG_ORDER[i] == regno)
     209              :         return true;
     210              :     }
     211              :   return false;
     212              : }
     213              : 
     214              : /* Constructor for xlogue_layout.  */
     215      1147664 : xlogue_layout::xlogue_layout (HOST_WIDE_INT stack_align_off_in, bool hfp)
     216      1147664 :   : m_hfp (hfp) , m_nregs (hfp ? 17 : 18),
     217      1147664 :     m_stack_align_off_in (stack_align_off_in)
     218              : {
     219      1147664 :   HOST_WIDE_INT offset = stack_align_off_in;
     220      1147664 :   unsigned i, j;
     221              : 
     222     21805616 :   for (i = j = 0; i < MAX_REGS; ++i)
     223              :     {
     224     20657952 :       unsigned regno = REG_ORDER[i];
     225              : 
     226     20657952 :       if (regno == BP_REG && hfp)
     227       573832 :         continue;
     228     20084120 :       if (SSE_REGNO_P (regno))
     229              :         {
     230     11476640 :           offset += 16;
     231              :           /* Verify that SSE regs are always aligned.  */
     232     11476640 :           gcc_assert (!((stack_align_off_in + offset) & 15));
     233              :         }
     234              :       else
     235      8607480 :         offset += 8;
     236              : 
     237     20084120 :       m_regs[j].regno    = regno;
     238     20084120 :       m_regs[j++].offset = offset - STUB_INDEX_OFFSET;
     239              :     }
     240      1147664 :   gcc_assert (j == m_nregs);
     241      1147664 : }
     242              : 
     243              : const char *
     244        14666 : xlogue_layout::get_stub_name (enum xlogue_stub stub,
     245              :                               unsigned n_extra_regs)
     246              : {
     247        14666 :   const int have_avx = TARGET_AVX;
     248        14666 :   char *name = s_stub_names[!!have_avx][stub][n_extra_regs];
     249              : 
     250              :   /* Lazy init */
     251        14666 :   if (!*name)
     252              :     {
     253          362 :       int res = snprintf (name, STUB_NAME_MAX_LEN, "__%s_%s_%u",
     254              :                           (have_avx ? "avx" : "sse"),
     255          181 :                           STUB_BASE_NAMES[stub],
     256              :                           MIN_REGS + n_extra_regs);
     257          181 :       gcc_checking_assert (res < (int)STUB_NAME_MAX_LEN);
     258              :     }
     259              : 
     260        14666 :   return name;
     261              : }
     262              : 
     263              : /* Return rtx of a symbol ref for the entry point (based upon
     264              :    cfun->machine->call_ms2sysv_extra_regs) of the specified stub.  */
     265              : rtx
     266        14666 : xlogue_layout::get_stub_rtx (enum xlogue_stub stub)
     267              : {
     268        14666 :   const unsigned n_extra_regs = cfun->machine->call_ms2sysv_extra_regs;
     269        14666 :   gcc_checking_assert (n_extra_regs <= MAX_EXTRA_REGS);
     270        14666 :   gcc_assert (stub < XLOGUE_STUB_COUNT);
     271        14666 :   gcc_assert (crtl->stack_realign_finalized);
     272              : 
     273        14666 :   return gen_rtx_SYMBOL_REF (Pmode, get_stub_name (stub, n_extra_regs));
     274              : }
     275              : 
     276              : unsigned scalar_chain::max_id = 0;
     277              : 
     278              : namespace {
     279              : 
     280              : /* Initialize new chain.  */
     281              : 
     282      6313817 : scalar_chain::scalar_chain (enum machine_mode smode_, enum machine_mode vmode_)
     283              : {
     284      6313817 :   smode = smode_;
     285      6313817 :   vmode = vmode_;
     286              : 
     287      6313817 :   chain_id = ++max_id;
     288              : 
     289      6313817 :    if (dump_file)
     290          136 :     fprintf (dump_file, "Created a new instruction chain #%d\n", chain_id);
     291              : 
     292      6313817 :   bitmap_obstack_initialize (NULL);
     293      6313817 :   insns = BITMAP_ALLOC (NULL);
     294      6313817 :   defs = BITMAP_ALLOC (NULL);
     295      6313817 :   defs_conv = BITMAP_ALLOC (NULL);
     296      6313817 :   insns_conv = BITMAP_ALLOC (NULL);
     297      6313817 :   queue = NULL;
     298              : 
     299      6313817 :   cost_sse_integer = 0;
     300      6313817 :   weighted_cost_sse_integer = 0 ;
     301      6313817 :   max_visits = x86_stv_max_visits;
     302      6313817 : }
     303              : 
     304              : /* Free chain's data.  */
     305              : 
     306      6313817 : scalar_chain::~scalar_chain ()
     307              : {
     308      6313817 :   BITMAP_FREE (insns);
     309      6313817 :   BITMAP_FREE (defs);
     310      6313817 :   BITMAP_FREE (defs_conv);
     311      6313817 :   BITMAP_FREE (insns_conv);
     312      6313817 :   bitmap_obstack_release (NULL);
     313      6313817 : }
     314              : 
     315              : /* Add instruction into chains' queue.  */
     316              : 
     317              : void
     318      8176123 : scalar_chain::add_to_queue (unsigned insn_uid)
     319              : {
     320      8176123 :   if (!bitmap_set_bit (queue, insn_uid))
     321              :     return;
     322              : 
     323      6151556 :   if (dump_file)
     324          141 :     fprintf (dump_file, "  Adding insn %d into chain's #%d queue\n",
     325              :              insn_uid, chain_id);
     326              : }
     327              : 
     328              : /* For DImode conversion, mark register defined by DEF as requiring
     329              :    conversion.  */
     330              : 
     331              : void
     332      9245003 : scalar_chain::mark_dual_mode_def (df_ref def)
     333              : {
     334      9245003 :   gcc_assert (DF_REF_REG_DEF_P (def));
     335              : 
     336              :   /* Record the def/insn pair so we can later efficiently iterate over
     337              :      the defs to convert on insns not in the chain.  */
     338      9245003 :   bool reg_new = bitmap_set_bit (defs_conv, DF_REF_REGNO (def));
     339      9245003 :   basic_block bb = BLOCK_FOR_INSN (DF_REF_INSN (def));
     340      9245003 :   profile_count entry_count = ENTRY_BLOCK_PTR_FOR_FN (cfun)->count;
     341      9245003 :   bool speed_p = optimize_bb_for_speed_p (bb);
     342      9245003 :   int cost = 0;
     343              : 
     344      9245003 :   if (!bitmap_bit_p (insns, DF_REF_INSN_UID (def)))
     345              :     {
     346      2675947 :       if (!bitmap_set_bit (insns_conv, DF_REF_INSN_UID (def))
     347      2675947 :           && !reg_new)
     348      1381996 :         return;
     349              : 
     350              :       /* Cost integer to sse moves.  */
     351      2439045 :       if (speed_p)
     352      2158852 :         cost = COSTS_N_INSNS (ix86_cost->integer_to_sse) / 2;
     353       280193 :       else if (TARGET_64BIT || smode == SImode)
     354              :         cost = COSTS_N_BYTES (4);
     355              :       /* vmovd (4 bytes) + vpinsrd (6 bytes).  */
     356        18647 :       else if (TARGET_SSE4_1)
     357              :         cost = COSTS_N_BYTES (10);
     358              :       /* movd (4 bytes) + movd (4 bytes) + unpckldq (4 bytes).  */
     359              :       else
     360      7863007 :         cost = COSTS_N_BYTES (12);
     361              :     }
     362              :   else
     363              :     {
     364      6569056 :       if (!reg_new)
     365              :         return;
     366              : 
     367              :       /* Cost sse to integer moves.  */
     368      5423962 :       if (speed_p)
     369      4866801 :         cost = COSTS_N_INSNS (ix86_cost->sse_to_integer) / 2;
     370       557161 :       else if (TARGET_64BIT || smode == SImode)
     371              :         cost = COSTS_N_BYTES (4);
     372              :       /* vmovd (4 bytes) + vpextrd (6 bytes).  */
     373         2971 :       else if (TARGET_SSE4_1)
     374              :         cost = COSTS_N_BYTES (10);
     375              :       /* movd (4 bytes) + psrlq (5 bytes) + movd (4 bytes).  */
     376              :       else
     377      7863007 :         cost = COSTS_N_BYTES (13);
     378              :     }
     379              : 
     380      7863007 :   if (speed_p)
     381      7025653 :     weighted_cost_sse_integer += bb->count.to_sreal_scale (entry_count) * cost;
     382              : 
     383      7863007 :   cost_sse_integer += cost;
     384              : 
     385      7863007 :   if (dump_file)
     386          240 :     fprintf (dump_file,
     387              :              "  Mark r%d def in insn %d as requiring both modes in chain #%d\n",
     388          240 :              DF_REF_REGNO (def), DF_REF_INSN_UID (def), chain_id);
     389              : }
     390              : 
     391              : /* Check REF's chain to add new insns into a queue
     392              :    and find registers requiring conversion.  Return true if OK, false
     393              :    if the analysis was aborted.  */
     394              : 
     395              : bool
     396     17563148 : scalar_chain::analyze_register_chain (bitmap candidates, df_ref ref,
     397              :                                       bitmap disallowed)
     398              : {
     399     17563148 :   df_link *chain;
     400     17563148 :   bool mark_def = false;
     401              : 
     402     17563148 :   gcc_checking_assert (bitmap_bit_p (insns, DF_REF_INSN_UID (ref)));
     403              : 
     404     61158873 :   for (chain = DF_REF_CHAIN (ref); chain; chain = chain->next)
     405              :     {
     406     43601140 :       unsigned uid = DF_REF_INSN_UID (chain->ref);
     407              : 
     408     43601140 :       if (!NONDEBUG_INSN_P (DF_REF_INSN (chain->ref)))
     409      7906954 :         continue;
     410              : 
     411     35694186 :       if (--max_visits == 0)
     412              :         return false;
     413              : 
     414     35693606 :       if (!DF_REF_REG_MEM_P (chain->ref))
     415              :         {
     416     29753976 :           if (bitmap_bit_p (insns, uid))
     417      9453839 :             continue;
     418              : 
     419     20300137 :           if (bitmap_bit_p (candidates, uid))
     420              :             {
     421      8176123 :               add_to_queue (uid);
     422      8176123 :               continue;
     423              :             }
     424              : 
     425              :           /* If we run into parts of an aborted chain discovery abort.  */
     426     12124014 :           if (bitmap_bit_p (disallowed, uid))
     427              :             return false;
     428              :         }
     429              : 
     430     18058809 :       if (DF_REF_REG_DEF_P (chain->ref))
     431              :         {
     432      2675947 :           if (dump_file)
     433          125 :             fprintf (dump_file, "  r%d def in insn %d isn't convertible\n",
     434              :                      DF_REF_REGNO (chain->ref), uid);
     435      2675947 :           mark_dual_mode_def (chain->ref);
     436              :         }
     437              :       else
     438              :         {
     439     15382862 :           if (dump_file)
     440          524 :             fprintf (dump_file, "  r%d use in insn %d isn't convertible\n",
     441              :                      DF_REF_REGNO (chain->ref), uid);
     442              :           mark_def = true;
     443              :         }
     444              :     }
     445              : 
     446     17557733 :   if (mark_def)
     447      6569056 :     mark_dual_mode_def (ref);
     448              : 
     449              :   return true;
     450              : }
     451              : 
     452              : /* Check whether X is a convertible *concatditi_? variant.  X is known
     453              :    to be any_or_plus:TI, i.e. PLUS:TI, IOR:TI or XOR:TI.  */
     454              : 
     455              : static bool
     456        26830 : timode_concatdi_p (rtx x)
     457              : {
     458        26830 :   rtx op0 = XEXP (x, 0);
     459        26830 :   rtx op1 = XEXP (x, 1);
     460              : 
     461        26830 :   if (GET_CODE (op1) == ASHIFT)
     462          946 :     std::swap (op0, op1);
     463              : 
     464        26830 :   return GET_CODE (op0) == ASHIFT
     465        18161 :          && GET_CODE (XEXP (op0, 0)) == ZERO_EXTEND
     466        18161 :          && GET_MODE (XEXP (XEXP (op0, 0), 0)) == DImode
     467        18161 :          && REG_P (XEXP (XEXP (op0, 0), 0))
     468        18026 :          && CONST_INT_P (XEXP (op0, 1))
     469        18026 :          && INTVAL (XEXP (op0, 1)) == 64
     470        18026 :          && GET_CODE (op1) == ZERO_EXTEND
     471        17080 :          && GET_MODE (XEXP (op1, 0)) == DImode
     472        43910 :          && REG_P (XEXP (op1, 0));
     473              : }
     474              : 
     475              : 
     476              : /* Add instruction into a chain.  Return true if OK, false if the search
     477              :    was aborted.  */
     478              : 
     479              : bool
     480     12459232 : scalar_chain::add_insn (bitmap candidates, unsigned int insn_uid,
     481              :                         bitmap disallowed)
     482              : {
     483     12459232 :   if (!bitmap_set_bit (insns, insn_uid))
     484              :     return true;
     485              : 
     486     12459232 :   if (dump_file)
     487          277 :     fprintf (dump_file, "  Adding insn %d to chain #%d\n", insn_uid, chain_id);
     488              : 
     489     12459232 :   rtx_insn *insn = DF_INSN_UID_GET (insn_uid)->insn;
     490     12459232 :   rtx def_set = single_set (insn);
     491     12459232 :   if (def_set && REG_P (SET_DEST (def_set))
     492     22054735 :       && !HARD_REGISTER_P (SET_DEST (def_set)))
     493      9595491 :     bitmap_set_bit (defs, REGNO (SET_DEST (def_set)));
     494              : 
     495              :   /* ???  The following is quadratic since analyze_register_chain
     496              :      iterates over all refs to look for dual-mode regs.  Instead this
     497              :      should be done separately for all regs mentioned in the chain once.  */
     498     12459232 :   df_ref ref;
     499     25416645 :   for (ref = DF_INSN_UID_DEFS (insn_uid); ref; ref = DF_REF_NEXT_LOC (ref))
     500     12959274 :     if (!HARD_REGISTER_P (DF_REF_REG (ref)))
     501      9595491 :       if (!analyze_register_chain (candidates, ref, disallowed))
     502              :         return false;
     503              : 
     504              :   /* The operand(s) of VEC_SELECT, ZERO_EXTEND and similar ops don't need
     505              :      to be converted/convertible.  */
     506     12457371 :   if (def_set)
     507     12457371 :     switch (GET_CODE (SET_SRC (def_set)))
     508              :       {
     509              :       case VEC_SELECT:
     510              :         return true;
     511          122 :       case ZERO_EXTEND:
     512          122 :         if (GET_MODE (XEXP (SET_SRC (def_set), 0)) == DImode)
     513              :           return true;
     514              :         break;
     515      2339725 :       case PLUS:
     516      2339725 :       case IOR:
     517      2339725 :       case XOR:
     518      2339725 :         if (smode == TImode && timode_concatdi_p (SET_SRC (def_set)))
     519              :           return true;
     520              :         break;
     521              :       default:
     522              :         break;
     523              :       }
     524              : 
     525     27284629 :   for (ref = DF_INSN_UID_USES (insn_uid); ref; ref = DF_REF_NEXT_LOC (ref))
     526     14869771 :     if (!DF_REF_REG_MEM_P (ref))
     527      7967657 :       if (!analyze_register_chain (candidates, ref, disallowed))
     528              :         return false;
     529              : 
     530              :   return true;
     531              : }
     532              : 
     533              : /* Build new chain starting from insn INSN_UID recursively
     534              :    adding all dependent uses and definitions.  Return true if OK, false
     535              :    if the chain discovery was aborted.  */
     536              : 
     537              : bool
     538      6313817 : scalar_chain::build (bitmap candidates, unsigned insn_uid, bitmap disallowed)
     539              : {
     540      6313817 :   queue = BITMAP_ALLOC (NULL);
     541      6313817 :   bitmap_set_bit (queue, insn_uid);
     542              : 
     543      6313817 :   if (dump_file)
     544          136 :     fprintf (dump_file, "Building chain #%d...\n", chain_id);
     545              : 
     546     18767634 :   while (!bitmap_empty_p (queue))
     547              :     {
     548     12459232 :       insn_uid = bitmap_first_set_bit (queue);
     549     12459232 :       bitmap_clear_bit (queue, insn_uid);
     550     12459232 :       bitmap_clear_bit (candidates, insn_uid);
     551     12459232 :       if (!add_insn (candidates, insn_uid, disallowed))
     552              :         {
     553              :           /* If we aborted the search put sofar found insn on the set of
     554              :              disallowed insns so that further searches reaching them also
     555              :              abort and thus we abort the whole but yet undiscovered chain.  */
     556         5415 :           bitmap_ior_into (disallowed, insns);
     557         5415 :           if (dump_file)
     558            0 :             fprintf (dump_file, "Aborted chain #%d discovery\n", chain_id);
     559         5415 :           BITMAP_FREE (queue);
     560         5415 :           return false;
     561              :         }
     562              :     }
     563              : 
     564      6308402 :   if (dump_file)
     565              :     {
     566          136 :       fprintf (dump_file, "Collected chain #%d...\n", chain_id);
     567          136 :       fprintf (dump_file, "  insns: ");
     568          136 :       dump_bitmap (dump_file, insns);
     569          136 :       if (!bitmap_empty_p (defs_conv))
     570              :         {
     571          136 :           bitmap_iterator bi;
     572          136 :           unsigned id;
     573          136 :           const char *comma = "";
     574          136 :           fprintf (dump_file, "  defs to convert: ");
     575          366 :           EXECUTE_IF_SET_IN_BITMAP (defs_conv, 0, id, bi)
     576              :             {
     577          230 :               fprintf (dump_file, "%sr%d", comma, id);
     578          230 :               comma = ", ";
     579              :             }
     580          136 :           fprintf (dump_file, "\n");
     581              :         }
     582              :     }
     583              : 
     584      6308402 :   BITMAP_FREE (queue);
     585              : 
     586      6308402 :   return true;
     587              : }
     588              : 
     589              : /* Return a cost of building a vector constant
     590              :    instead of using a scalar one.  */
     591              : 
     592              : int
     593      2631554 : general_scalar_chain::vector_const_cost (rtx exp, basic_block bb)
     594              : {
     595      2631554 :   gcc_assert (CONST_INT_P (exp));
     596              : 
     597      2631554 :   if (standard_sse_constant_p (exp, vmode))
     598       609064 :     return ix86_cost->sse_op;
     599      2022490 :   if (optimize_bb_for_size_p (bb))
     600              :     return COSTS_N_BYTES (8);
     601              :   /* We have separate costs for SImode and DImode, use SImode costs
     602              :      for smaller modes.  */
     603      2400957 :   return COSTS_N_INSNS (ix86_cost->sse_load[smode == DImode ? 1 : 0]) / 2;
     604              : }
     605              : 
     606              : /* Return true if it's cost profitable for chain conversion.  */
     607              : 
     608              : bool
     609      5842535 : general_scalar_chain::compute_convert_gain ()
     610              : {
     611      5842535 :   bitmap_iterator bi;
     612      5842535 :   unsigned insn_uid;
     613      5842535 :   int gain = 0;
     614      5842535 :   sreal weighted_gain = 0;
     615              : 
     616      5842535 :   if (dump_file)
     617          136 :     fprintf (dump_file, "Computing gain for chain #%d...\n", chain_id);
     618              : 
     619              :   /* SSE costs distinguish between SImode and DImode loads/stores, for
     620              :      int costs factor in the number of GPRs involved.  When supporting
     621              :      smaller modes than SImode the int load/store costs need to be
     622              :      adjusted as well.  */
     623      5842535 :   unsigned sse_cost_idx = smode == DImode ? 1 : 0;
     624      5842535 :   int m = smode == DImode ? (TARGET_64BIT ? 1 : 2) : 1;
     625              : 
     626     17360138 :   EXECUTE_IF_SET_IN_BITMAP (insns, 0, insn_uid, bi)
     627              :     {
     628     11517603 :       rtx_insn *insn = DF_INSN_UID_GET (insn_uid)->insn;
     629     11517603 :       rtx def_set = single_set (insn);
     630     11517603 :       rtx src = SET_SRC (def_set);
     631     11517603 :       rtx dst = SET_DEST (def_set);
     632     11517603 :       basic_block bb = BLOCK_FOR_INSN (insn);
     633     11517603 :       int igain = 0;
     634     11517603 :       profile_count entry_count = ENTRY_BLOCK_PTR_FOR_FN (cfun)->count;
     635     11517603 :       bool speed_p = optimize_bb_for_speed_p (bb);
     636     11517603 :       sreal bb_freq = bb->count.to_sreal_scale (entry_count);
     637              : 
     638     11517603 :       if (REG_P (src) && REG_P (dst))
     639              :         {
     640       922764 :           if (!speed_p)
     641              :             /* reg-reg move is 2 bytes, while SSE 3.  */
     642       187039 :             igain += COSTS_N_BYTES (2 * m - 3);
     643              :           else
     644              :             /* Move costs are normalized to reg-reg move having cost 2.  */
     645       735725 :             igain += COSTS_N_INSNS (2 * m - ix86_cost->xmm_move) / 2;
     646              :         }
     647     10594839 :       else if (REG_P (src) && MEM_P (dst))
     648              :         {
     649      2301079 :           if (!speed_p)
     650              :             /* Integer load/store is 3+ bytes and SSE 4+.  */
     651       194174 :             igain += COSTS_N_BYTES (3 * m - 4);
     652              :           else
     653      2106905 :             igain
     654      2106905 :               += COSTS_N_INSNS (m * ix86_cost->int_store[2]
     655              :                                 - ix86_cost->sse_store[sse_cost_idx]) / 2;
     656              :         }
     657      8293760 :       else if (MEM_P (src) && REG_P (dst))
     658              :         {
     659      3746136 :           if (!speed_p)
     660       361157 :             igain += COSTS_N_BYTES (3 * m - 4);
     661              :           else
     662      3384979 :             igain += COSTS_N_INSNS (m * ix86_cost->int_load[2]
     663              :                                     - ix86_cost->sse_load[sse_cost_idx]) / 2;
     664              :         }
     665              :       else
     666              :         {
     667              :           /* For operations on memory operands, include the overhead
     668              :              of explicit load and store instructions.  */
     669      4547624 :           if (MEM_P (dst))
     670              :             {
     671        66315 :               if (!speed_p)
     672              :                 /* ??? This probably should account size difference
     673              :                    of SSE and integer load rather than full SSE load.  */
     674              :                 igain -= COSTS_N_BYTES (8);
     675              :               else
     676              :                 {
     677        57071 :                   int cost = (m * (ix86_cost->int_load[2]
     678        57071 :                                    + ix86_cost->int_store[2])
     679        57071 :                              - (ix86_cost->sse_load[sse_cost_idx] +
     680        57071 :                                 ix86_cost->sse_store[sse_cost_idx]));
     681        57071 :                   igain += COSTS_N_INSNS (cost) / 2;
     682              :                 }
     683              :             }
     684              : 
     685      4547624 :           switch (GET_CODE (src))
     686              :             {
     687       469336 :             case ASHIFT:
     688       469336 :             case ASHIFTRT:
     689       469336 :             case LSHIFTRT:
     690       469336 :               if (m == 2)
     691              :                 {
     692        17068 :                   if (INTVAL (XEXP (src, 1)) >= 32)
     693        11526 :                     igain += ix86_cost->add;
     694              :                   /* Gain for extend highpart case.  */
     695         5542 :                   else if (GET_CODE (XEXP (src, 0)) == ASHIFT)
     696            0 :                     igain += ix86_cost->shift_const - ix86_cost->sse_op;
     697              :                   else
     698         5542 :                     igain += ix86_cost->shift_const;
     699              :                 }
     700              : 
     701       469336 :               igain += ix86_cost->shift_const - ix86_cost->sse_op;
     702              : 
     703       469336 :               if (CONST_INT_P (XEXP (src, 0)))
     704            0 :                 igain -= vector_const_cost (XEXP (src, 0), bb);
     705              :               break;
     706              : 
     707         3817 :             case ROTATE:
     708         3817 :             case ROTATERT:
     709         3817 :               igain += m * ix86_cost->shift_const;
     710         3817 :               if (TARGET_AVX512VL)
     711          204 :                 igain -= ix86_cost->sse_op;
     712         3613 :               else if (smode == DImode)
     713              :                 {
     714          612 :                   int bits = INTVAL (XEXP (src, 1));
     715          612 :                   if ((bits & 0x0f) == 0)
     716          128 :                     igain -= ix86_cost->sse_op;
     717          484 :                   else if ((bits & 0x07) == 0)
     718           27 :                     igain -= 2 * ix86_cost->sse_op;
     719              :                   else
     720          457 :                     igain -= 3 * ix86_cost->sse_op;
     721              :                 }
     722         3001 :               else if (INTVAL (XEXP (src, 1)) == 16)
     723          240 :                 igain -= ix86_cost->sse_op;
     724              :               else
     725         2761 :                 igain -= 2 * ix86_cost->sse_op;
     726              :               break;
     727              : 
     728      2801816 :             case AND:
     729      2801816 :             case IOR:
     730      2801816 :             case XOR:
     731      2801816 :             case PLUS:
     732      2801816 :             case MINUS:
     733      2801816 :               igain += m * ix86_cost->add - ix86_cost->sse_op;
     734              :               /* Additional gain for andnot for targets without BMI.  */
     735      2801816 :               if (GET_CODE (XEXP (src, 0)) == NOT
     736         3599 :                   && !TARGET_BMI)
     737         3590 :                 igain += m * ix86_cost->add;
     738              : 
     739      2801816 :               if (CONST_INT_P (XEXP (src, 0)))
     740            0 :                 igain -= vector_const_cost (XEXP (src, 0), bb);
     741      2801816 :               if (CONST_INT_P (XEXP (src, 1)))
     742      1674978 :                 igain -= vector_const_cost (XEXP (src, 1), bb);
     743      2801816 :               if (MEM_P (XEXP (src, 1)))
     744              :                 {
     745        84881 :                   if (!speed_p)
     746        20485 :                     igain -= COSTS_N_BYTES (m == 2 ? 3 : 5);
     747              :                   else
     748        74634 :                     igain += COSTS_N_INSNS
     749              :                                (m * ix86_cost->int_load[2]
     750              :                                  - ix86_cost->sse_load[sse_cost_idx]) / 2;
     751              :                 }
     752              :               break;
     753              : 
     754        49966 :             case NEG:
     755        49966 :             case NOT:
     756        49966 :               igain -= ix86_cost->sse_op + COSTS_N_INSNS (1);
     757              : 
     758        49966 :               if (GET_CODE (XEXP (src, 0)) != ABS)
     759              :                 {
     760        49966 :                   igain += m * ix86_cost->add;
     761        49966 :                   break;
     762              :                 }
     763              :               /* FALLTHRU */
     764              : 
     765         1006 :             case ABS:
     766         1006 :             case SMAX:
     767         1006 :             case SMIN:
     768         1006 :             case UMAX:
     769         1006 :             case UMIN:
     770              :               /* We do not have any conditional move cost, estimate it as a
     771              :                  reg-reg move.  Comparisons are costed as adds.  */
     772         1006 :               igain += m * (COSTS_N_INSNS (2) + ix86_cost->add);
     773              :               /* Integer SSE ops are all costed the same.  */
     774         1006 :               igain -= ix86_cost->sse_op;
     775         1006 :               break;
     776              : 
     777            0 :             case COMPARE:
     778            0 :               if (XEXP (src, 1) != const0_rtx)
     779              :                 {
     780              :                   /* cmp vs. pxor;pshufd;ptest.  */
     781            0 :                   igain += COSTS_N_INSNS (m - 3);
     782              :                 }
     783            0 :               else if (GET_CODE (XEXP (src, 0)) != AND)
     784              :                 {
     785              :                   /* test vs. pshufd;ptest.  */
     786            0 :                   igain += COSTS_N_INSNS (m - 2);
     787              :                 }
     788            0 :               else if (GET_CODE (XEXP (XEXP (src, 0), 0)) != NOT)
     789              :                 {
     790              :                   /* and;test vs. pshufd;ptest.  */
     791            0 :                   igain += COSTS_N_INSNS (2 * m - 2);
     792              :                 }
     793            0 :               else if (TARGET_BMI)
     794              :                 {
     795              :                   /* andn;test vs. pandn;pshufd;ptest.  */
     796            0 :                   igain += COSTS_N_INSNS (2 * m - 3);
     797              :                 }
     798              :               else
     799              :                 {
     800              :                   /* not;and;test vs. pandn;pshufd;ptest.  */
     801            0 :                   igain += COSTS_N_INSNS (3 * m - 3);
     802              :                 }
     803              :               break;
     804              : 
     805      1185549 :             case CONST_INT:
     806      1185549 :               if (REG_P (dst))
     807              :                 {
     808      1185549 :                   if (!speed_p)
     809              :                     {
     810              :                       /* xor (2 bytes) vs. xorps (3 bytes).  */
     811       228973 :                       if (src == const0_rtx)
     812       120066 :                         igain -= COSTS_N_BYTES (1);
     813              :                       /* movdi_internal vs. movv2di_internal.  */
     814              :                       /* => mov (5 bytes) vs. movaps (7 bytes).  */
     815       108907 :                       else if (x86_64_immediate_operand (src, SImode))
     816        95987 :                         igain -= COSTS_N_BYTES (2);
     817              :                       else
     818              :                         /* ??? Larger immediate constants are placed in the
     819              :                            constant pool, where the size benefit/impact of
     820              :                            STV conversion is affected by whether and how
     821              :                            often each constant pool entry is shared/reused.
     822              :                            The value below is empirically derived from the
     823              :                            CSiBE benchmark (and the optimal value may drift
     824              :                            over time).  */
     825              :                         igain += COSTS_N_BYTES (0);
     826              :                     }
     827              :                   else
     828              :                     {
     829              :                       /* DImode can be immediate for TARGET_64BIT
     830              :                          and SImode always.  */
     831       956576 :                       igain += m * COSTS_N_INSNS (1);
     832       956576 :                       igain -= vector_const_cost (src, bb);
     833              :                     }
     834              :                 }
     835            0 :               else if (MEM_P (dst))
     836              :                 {
     837            0 :                   igain += (m * ix86_cost->int_store[2]
     838            0 :                             - ix86_cost->sse_store[sse_cost_idx]);
     839            0 :                   igain -= vector_const_cost (src, bb);
     840              :                 }
     841              :               break;
     842              : 
     843        36134 :             case VEC_SELECT:
     844        36134 :               if (XVECEXP (XEXP (src, 1), 0, 0) == const0_rtx)
     845              :                 {
     846              :                   // movd (4 bytes) replaced with movdqa (4 bytes).
     847        26667 :                   if (!!speed_p)
     848        24910 :                     igain += COSTS_N_INSNS (ix86_cost->sse_to_integer
     849              :                                             - ix86_cost->xmm_move) / 2;
     850              :                 }
     851              :               else
     852              :                 {
     853              :                   // pshufd; movd replaced with pshufd.
     854         9467 :                   if (!speed_p)
     855          648 :                     igain += COSTS_N_BYTES (4);
     856              :                   else
     857         8819 :                     igain += ix86_cost->sse_to_integer;
     858              :                 }
     859              :               break;
     860              : 
     861            0 :             default:
     862            0 :               gcc_unreachable ();
     863              :             }
     864              :         }
     865              : 
     866     11515846 :       if (speed_p)
     867     10249267 :         weighted_gain += bb_freq * igain;
     868     11517603 :       gain += igain;
     869              : 
     870     11517603 :       if (igain != 0 && dump_file)
     871              :         {
     872           93 :           fprintf (dump_file, "  Instruction gain %d with bb_freq %.2f for",
     873              :                    igain, bb_freq.to_double ());
     874           93 :           dump_insn_slim (dump_file, insn);
     875              :         }
     876              :     }
     877              : 
     878      5842535 :   if (dump_file)
     879              :     {
     880          136 :       fprintf (dump_file, "  Instruction conversion gain: %d, \n",
     881              :                gain);
     882          136 :       fprintf (dump_file, "  Registers conversion cost: %d\n",
     883              :                cost_sse_integer);
     884          136 :       fprintf (dump_file, "  Weighted instruction conversion gain: %.2f, \n",
     885              :                weighted_gain.to_double ());
     886          136 :       fprintf (dump_file, "  Weighted registers conversion cost: %.2f\n",
     887              :                weighted_cost_sse_integer.to_double ());
     888              :     }
     889              : 
     890      5842535 :   if (weighted_gain != weighted_cost_sse_integer)
     891      4706867 :     return weighted_gain > weighted_cost_sse_integer;
     892              :   else
     893      1135668 :     return gain > cost_sse_integer;;
     894              : }
     895              : 
     896              : /* Insert generated conversion instruction sequence INSNS
     897              :    after instruction AFTER.  New BB may be required in case
     898              :    instruction has EH region attached.  */
     899              : 
     900              : void
     901        31171 : scalar_chain::emit_conversion_insns (rtx insns, rtx_insn *after)
     902              : {
     903        31171 :   if (!control_flow_insn_p (after))
     904              :     {
     905        30958 :       emit_insn_after (insns, after);
     906        30958 :       return;
     907              :     }
     908              : 
     909          213 :   basic_block bb = BLOCK_FOR_INSN (after);
     910          213 :   edge e = find_fallthru_edge (bb->succs);
     911          213 :   gcc_assert (e);
     912              : 
     913          213 :   basic_block new_bb = split_edge (e);
     914          213 :   emit_insn_after (insns, BB_HEAD (new_bb));
     915              : }
     916              : 
     917              : } // anon namespace
     918              : 
     919              : /* Generate the canonical SET_SRC to move GPR to a VMODE vector register,
     920              :    zeroing the upper parts.  */
     921              : 
     922              : static rtx
     923       173167 : gen_gpr_to_xmm_move_src (enum machine_mode vmode, rtx gpr)
     924              : {
     925       346334 :   switch (GET_MODE_NUNITS (vmode))
     926              :     {
     927           25 :     case 1:
     928           25 :       return gen_rtx_SUBREG (vmode, gpr, 0);
     929       172582 :     case 2:
     930       345164 :       return gen_rtx_VEC_CONCAT (vmode, gpr,
     931              :                                  CONST0_RTX (GET_MODE_INNER (vmode)));
     932          560 :     default:
     933          560 :       return gen_rtx_VEC_MERGE (vmode, gen_rtx_VEC_DUPLICATE (vmode, gpr),
     934              :                                 CONST0_RTX (vmode), GEN_INT (HOST_WIDE_INT_1U));
     935              :     }
     936              : }
     937              : 
     938              : /* Make vector copies for all register REGNO definitions
     939              :    and replace its uses in a chain.  */
     940              : 
     941              : void
     942         8415 : scalar_chain::make_vector_copies (rtx_insn *insn, rtx reg)
     943              : {
     944         8415 :   rtx vreg = *defs_map.get (reg);
     945              : 
     946         8415 :   start_sequence ();
     947         8415 :   if (!TARGET_INTER_UNIT_MOVES_TO_VEC)
     948              :     {
     949            0 :       rtx tmp = assign_386_stack_local (smode, SLOT_STV_TEMP);
     950            0 :       if (smode == DImode && !TARGET_64BIT)
     951              :         {
     952            0 :           emit_move_insn (adjust_address (tmp, SImode, 0),
     953              :                           gen_rtx_SUBREG (SImode, reg, 0));
     954            0 :           emit_move_insn (adjust_address (tmp, SImode, 4),
     955              :                           gen_rtx_SUBREG (SImode, reg, 4));
     956              :         }
     957              :       else
     958            0 :         emit_move_insn (copy_rtx (tmp), reg);
     959            0 :       emit_insn (gen_rtx_SET (gen_rtx_SUBREG (vmode, vreg, 0),
     960              :                               gen_gpr_to_xmm_move_src (vmode, tmp)));
     961              :     }
     962         8415 :   else if (!TARGET_64BIT && smode == DImode)
     963              :     {
     964         8273 :       if (TARGET_SSE4_1)
     965              :         {
     966          356 :           emit_insn (gen_sse2_loadld (gen_rtx_SUBREG (V4SImode, vreg, 0),
     967              :                                       CONST0_RTX (V4SImode),
     968              :                                       gen_rtx_SUBREG (SImode, reg, 0)));
     969          356 :           emit_insn (gen_sse4_1_pinsrd (gen_rtx_SUBREG (V4SImode, vreg, 0),
     970              :                                         gen_rtx_SUBREG (V4SImode, vreg, 0),
     971              :                                         gen_rtx_SUBREG (SImode, reg, 4),
     972              :                                         GEN_INT (2)));
     973              :         }
     974              :       else
     975              :         {
     976         7917 :           rtx tmp = gen_reg_rtx (DImode);
     977         7917 :           emit_insn (gen_sse2_loadld (gen_rtx_SUBREG (V4SImode, vreg, 0),
     978              :                                       CONST0_RTX (V4SImode),
     979              :                                       gen_rtx_SUBREG (SImode, reg, 0)));
     980         7917 :           emit_insn (gen_sse2_loadld (gen_rtx_SUBREG (V4SImode, tmp, 0),
     981              :                                       CONST0_RTX (V4SImode),
     982              :                                       gen_rtx_SUBREG (SImode, reg, 4)));
     983         7917 :           emit_insn (gen_vec_interleave_lowv4si
     984              :                      (gen_rtx_SUBREG (V4SImode, vreg, 0),
     985              :                       gen_rtx_SUBREG (V4SImode, vreg, 0),
     986              :                       gen_rtx_SUBREG (V4SImode, tmp, 0)));
     987              :         }
     988              :     }
     989              :   else
     990          142 :     emit_insn (gen_rtx_SET (gen_rtx_SUBREG (vmode, vreg, 0),
     991              :                             gen_gpr_to_xmm_move_src (vmode, reg)));
     992         8415 :   rtx_insn *seq = end_sequence ();
     993         8415 :   emit_conversion_insns (seq, insn);
     994              : 
     995         8415 :   if (dump_file)
     996            0 :     fprintf (dump_file,
     997              :              "  Copied r%d to a vector register r%d for insn %d\n",
     998            0 :              REGNO (reg), REGNO (vreg), INSN_UID (insn));
     999         8415 : }
    1000              : 
    1001              : /* Copy the definition SRC of INSN inside the chain to DST for
    1002              :    scalar uses outside of the chain.  */
    1003              : 
    1004              : void
    1005        21998 : scalar_chain::convert_reg (rtx_insn *insn, rtx dst, rtx src)
    1006              : {
    1007        21998 :   start_sequence ();
    1008        21998 :   if (!TARGET_INTER_UNIT_MOVES_FROM_VEC)
    1009              :     {
    1010            0 :       rtx tmp = assign_386_stack_local (smode, SLOT_STV_TEMP);
    1011            0 :       emit_move_insn (tmp, src);
    1012            0 :       if (!TARGET_64BIT && smode == DImode)
    1013              :         {
    1014            0 :           emit_move_insn (gen_rtx_SUBREG (SImode, dst, 0),
    1015              :                           adjust_address (tmp, SImode, 0));
    1016            0 :           emit_move_insn (gen_rtx_SUBREG (SImode, dst, 4),
    1017              :                           adjust_address (tmp, SImode, 4));
    1018              :         }
    1019              :       else
    1020            0 :         emit_move_insn (dst, copy_rtx (tmp));
    1021              :     }
    1022        21998 :   else if (!TARGET_64BIT && smode == DImode)
    1023              :     {
    1024        21107 :       if (TARGET_SSE4_1)
    1025              :         {
    1026            0 :           rtx tmp = gen_rtx_PARALLEL (VOIDmode,
    1027              :                                       gen_rtvec (1, const0_rtx));
    1028            0 :           emit_insn
    1029            0 :               (gen_rtx_SET
    1030              :                (gen_rtx_SUBREG (SImode, dst, 0),
    1031              :                 gen_rtx_VEC_SELECT (SImode,
    1032              :                                     gen_rtx_SUBREG (V4SImode, src, 0),
    1033              :                                     tmp)));
    1034              : 
    1035            0 :           tmp = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (1, const1_rtx));
    1036            0 :           emit_insn
    1037            0 :               (gen_rtx_SET
    1038              :                (gen_rtx_SUBREG (SImode, dst, 4),
    1039              :                 gen_rtx_VEC_SELECT (SImode,
    1040              :                                     gen_rtx_SUBREG (V4SImode, src, 0),
    1041              :                                     tmp)));
    1042              :         }
    1043              :       else
    1044              :         {
    1045        21107 :           rtx vcopy = gen_reg_rtx (V2DImode);
    1046        21107 :           emit_move_insn (vcopy, gen_rtx_SUBREG (V2DImode, src, 0));
    1047        21107 :           emit_move_insn (gen_rtx_SUBREG (SImode, dst, 0),
    1048              :                           gen_rtx_SUBREG (SImode, vcopy, 0));
    1049        21107 :           emit_move_insn (vcopy,
    1050              :                           gen_rtx_LSHIFTRT (V2DImode,
    1051              :                                             vcopy, GEN_INT (32)));
    1052        21107 :           emit_move_insn (gen_rtx_SUBREG (SImode, dst, 4),
    1053              :                           gen_rtx_SUBREG (SImode, vcopy, 0));
    1054              :         }
    1055              :     }
    1056              :   else
    1057          891 :     emit_move_insn (dst, src);
    1058              : 
    1059        21998 :   rtx_insn *seq = end_sequence ();
    1060        21998 :   emit_conversion_insns (seq, insn);
    1061              : 
    1062        21998 :   if (dump_file)
    1063            0 :     fprintf (dump_file,
    1064              :              "  Copied r%d to a scalar register r%d for insn %d\n",
    1065            0 :              REGNO (src), REGNO (dst), INSN_UID (insn));
    1066        21998 : }
    1067              : 
    1068              : /* Helper function to convert immediate constant X to vmode.  */
    1069              : static rtx
    1070        36524 : smode_convert_cst (rtx x, enum machine_mode vmode)
    1071              : {
    1072              :   /* Prefer all ones vector in case of -1.  */
    1073        36524 :   if (constm1_operand (x, GET_MODE (x)))
    1074          623 :     return CONSTM1_RTX (vmode);
    1075              : 
    1076        35901 :   unsigned n = GET_MODE_NUNITS (vmode);
    1077        35901 :   rtx *v = XALLOCAVEC (rtx, n);
    1078        35901 :   v[0] = x;
    1079        41641 :   for (unsigned i = 1; i < n; ++i)
    1080         5740 :     v[i] = const0_rtx;
    1081        35901 :   return gen_rtx_CONST_VECTOR (vmode, gen_rtvec_v (n, v));
    1082              : }
    1083              : 
    1084              : /* Convert operand OP in INSN.  We should handle
    1085              :    memory operands and uninitialized registers.
    1086              :    All other register uses are converted during
    1087              :    registers conversion.  */
    1088              : 
    1089              : void
    1090       247504 : scalar_chain::convert_op (rtx *op, rtx_insn *insn)
    1091              : {
    1092       247504 :   rtx tmp;
    1093              : 
    1094       247504 :   if (GET_MODE (*op) == V1TImode)
    1095              :     return;
    1096              : 
    1097       247353 :   *op = copy_rtx_if_shared (*op);
    1098              : 
    1099       247353 :   if (GET_CODE (*op) == NOT
    1100       247353 :       || GET_CODE (*op) == ASHIFT)
    1101              :     {
    1102         3493 :       convert_op (&XEXP (*op, 0), insn);
    1103         3493 :       PUT_MODE (*op, vmode);
    1104              :     }
    1105              :   else if (MEM_P (*op))
    1106              :     {
    1107       173025 :       rtx_insn *movabs = NULL;
    1108              : 
    1109              :       /* Emit MOVABS to load from a 64-bit absolute address to a GPR.  */
    1110       173025 :       if (!memory_operand (*op, GET_MODE (*op)))
    1111              :         {
    1112            0 :           tmp = gen_reg_rtx (GET_MODE (*op));
    1113            0 :           movabs = emit_insn_before (gen_rtx_SET (tmp, *op), insn);
    1114              : 
    1115            0 :           *op = tmp;
    1116              :         }
    1117              : 
    1118       173025 :       tmp = gen_rtx_SUBREG (vmode, gen_reg_rtx (GET_MODE (*op)), 0);
    1119              : 
    1120       173025 :       rtx_insn *eh_insn
    1121       173025 :         = emit_insn_before (gen_rtx_SET (copy_rtx (tmp),
    1122              :                                          gen_gpr_to_xmm_move_src (vmode, *op)),
    1123       173025 :                             insn);
    1124              : 
    1125       173025 :       if (cfun->can_throw_non_call_exceptions)
    1126              :         {
    1127              :           /* Handle REG_EH_REGION note.  */
    1128       168754 :           rtx note = find_reg_note (insn, REG_EH_REGION, NULL_RTX);
    1129       168754 :           if (note)
    1130              :             {
    1131         3588 :               if (movabs)
    1132            0 :                 eh_insn = movabs;
    1133         3588 :               control_flow_insns.safe_push (eh_insn);
    1134         3588 :               add_reg_note (eh_insn, REG_EH_REGION, XEXP (note, 0));
    1135              :             }
    1136              :         }
    1137              : 
    1138       173025 :       *op = tmp;
    1139              : 
    1140       173025 :       if (dump_file)
    1141            0 :         fprintf (dump_file, "  Preloading operand for insn %d into r%d\n",
    1142            0 :                  INSN_UID (insn), reg_or_subregno (tmp));
    1143              :     }
    1144              :   else if (REG_P (*op))
    1145        64740 :     *op = gen_rtx_SUBREG (vmode, *op, 0);
    1146              :   else if (CONST_SCALAR_INT_P (*op))
    1147              :     {
    1148         6095 :       rtx vec_cst = smode_convert_cst (*op, vmode);
    1149              : 
    1150         6095 :       if (!standard_sse_constant_p (vec_cst, vmode))
    1151              :         {
    1152         2703 :           start_sequence ();
    1153         2703 :           vec_cst = validize_mem (force_const_mem (vmode, vec_cst));
    1154         2703 :           rtx_insn *seq = end_sequence ();
    1155         2703 :           emit_insn_before (seq, insn);
    1156              :         }
    1157              : 
    1158         6095 :       tmp = gen_rtx_SUBREG (vmode, gen_reg_rtx (smode), 0);
    1159              : 
    1160         6095 :       emit_insn_before (gen_move_insn (copy_rtx (tmp), vec_cst), insn);
    1161         6095 :       *op = tmp;
    1162              :     }
    1163              :   else
    1164              :     {
    1165            0 :       gcc_assert (SUBREG_P (*op));
    1166            0 :       gcc_assert (GET_MODE (*op) == vmode);
    1167              :     }
    1168              : }
    1169              : 
    1170              : /* Convert CCZmode COMPARE to vector mode.  */
    1171              : 
    1172              : rtx
    1173           10 : scalar_chain::convert_compare (rtx op1, rtx op2, rtx_insn *insn)
    1174              : {
    1175           10 :   rtx src, tmp;
    1176              : 
    1177              :   /* Handle any REG_EQUAL notes.  */
    1178           10 :   tmp = find_reg_equal_equiv_note (insn);
    1179           10 :   if (tmp)
    1180              :     {
    1181            1 :       if (GET_CODE (XEXP (tmp, 0)) == COMPARE
    1182            1 :           && GET_MODE (XEXP (tmp, 0)) == CCZmode
    1183            1 :           && REG_P (XEXP (XEXP (tmp, 0), 0)))
    1184              :         {
    1185            1 :           rtx *op = &XEXP (XEXP (tmp, 0), 1);
    1186            1 :           if (CONST_SCALAR_INT_P (*op))
    1187              :             {
    1188            1 :               if (constm1_operand (*op, GET_MODE (*op)))
    1189            0 :                 *op = CONSTM1_RTX (vmode);
    1190              :               else
    1191              :                 {
    1192            1 :                   unsigned n = GET_MODE_NUNITS (vmode);
    1193            1 :                   rtx *v = XALLOCAVEC (rtx, n);
    1194            1 :                   v[0] = *op;
    1195            1 :                   for (unsigned i = 1; i < n; ++i)
    1196            0 :                     v[i] = const0_rtx;
    1197            1 :                   *op = gen_rtx_CONST_VECTOR (vmode, gen_rtvec_v (n, v));
    1198              :                 }
    1199              :               tmp = NULL_RTX;
    1200              :             }
    1201            0 :           else if (REG_P (*op))
    1202              :             tmp = NULL_RTX;
    1203              :         }
    1204              : 
    1205              :       if (tmp)
    1206            0 :         remove_note (insn, tmp);
    1207              :     }
    1208              : 
    1209              :   /* Comparison against anything other than zero, requires an XOR.  */
    1210           10 :   if (op2 != const0_rtx)
    1211              :     {
    1212            4 :       convert_op (&op1, insn);
    1213            4 :       convert_op (&op2, insn);
    1214              :       /* If both operands are MEMs, explicitly load the OP1 into TMP.  */
    1215            4 :       if (MEM_P (op1) && MEM_P (op2))
    1216              :         {
    1217            0 :           tmp = gen_reg_rtx (vmode);
    1218            0 :           emit_insn_before (gen_rtx_SET (tmp, op1), insn);
    1219            0 :           src = tmp;
    1220              :         }
    1221              :       else
    1222              :         src = op1;
    1223            4 :       src = gen_rtx_XOR (vmode, src, op2);
    1224              :     }
    1225            6 :   else if (GET_CODE (op1) == AND
    1226            0 :            && GET_CODE (XEXP (op1, 0)) == NOT)
    1227              :     {
    1228            0 :       rtx op11 = XEXP (XEXP (op1, 0), 0);
    1229            0 :       rtx op12 = XEXP (op1, 1);
    1230            0 :       convert_op (&op11, insn);
    1231            0 :       convert_op (&op12, insn);
    1232            0 :       if (!REG_P (op11))
    1233              :         {
    1234            0 :           tmp = gen_reg_rtx (vmode);
    1235            0 :           emit_insn_before (gen_rtx_SET (tmp, op11), insn);
    1236            0 :           op11 = tmp;
    1237              :         }
    1238            0 :       src = gen_rtx_AND (vmode, gen_rtx_NOT (vmode, op11), op12);
    1239            0 :     }
    1240            6 :   else if (GET_CODE (op1) == AND)
    1241              :     {
    1242            0 :       rtx op11 = XEXP (op1, 0);
    1243            0 :       rtx op12 = XEXP (op1, 1);
    1244            0 :       convert_op (&op11, insn);
    1245            0 :       convert_op (&op12, insn);
    1246            0 :       if (!REG_P (op11))
    1247              :         {
    1248            0 :           tmp = gen_reg_rtx (vmode);
    1249            0 :           emit_insn_before (gen_rtx_SET (tmp, op11), insn);
    1250            0 :           op11 = tmp;
    1251              :         }
    1252            0 :       return gen_rtx_UNSPEC (CCZmode, gen_rtvec (2, op11, op12),
    1253              :                              UNSPEC_PTEST);
    1254              :     }
    1255              :   else
    1256              :     {
    1257            6 :       convert_op (&op1, insn);
    1258            6 :       src = op1;
    1259              :     }
    1260              : 
    1261           10 :   if (!REG_P (src))
    1262              :     {
    1263            6 :       tmp = gen_reg_rtx (vmode);
    1264            6 :       emit_insn_before (gen_rtx_SET (tmp, src), insn);
    1265            6 :       src = tmp;
    1266              :     }
    1267              : 
    1268           10 :   if (vmode == V2DImode)
    1269              :     {
    1270            0 :       tmp = gen_reg_rtx (vmode);
    1271            0 :       emit_insn_before (gen_vec_interleave_lowv2di (tmp, src, src), insn);
    1272            0 :       src = tmp;
    1273              :     }
    1274           10 :   else if (vmode == V4SImode)
    1275              :     {
    1276            0 :       tmp = gen_reg_rtx (vmode);
    1277            0 :       emit_insn_before (gen_sse2_pshufd (tmp, src, const0_rtx), insn);
    1278            0 :       src = tmp;
    1279              :     }
    1280              : 
    1281           10 :   return gen_rtx_UNSPEC (CCZmode, gen_rtvec (2, src, src), UNSPEC_PTEST);
    1282              : }
    1283              : 
    1284              : /* Helper function for converting INSN to vector mode.  */
    1285              : 
    1286              : void
    1287      1325975 : scalar_chain::convert_insn_common (rtx_insn *insn)
    1288              : {
    1289              :   /* Generate copies for out-of-chain uses of defs and adjust debug uses.  */
    1290      2029905 :   for (df_ref ref = DF_INSN_DEFS (insn); ref; ref = DF_REF_NEXT_LOC (ref))
    1291       703930 :     if (bitmap_bit_p (defs_conv, DF_REF_REGNO (ref)))
    1292              :       {
    1293        23421 :         df_link *use;
    1294        44064 :         for (use = DF_REF_CHAIN (ref); use; use = use->next)
    1295        42641 :           if (NONDEBUG_INSN_P (DF_REF_INSN (use->ref))
    1296        42641 :               && (DF_REF_REG_MEM_P (use->ref)
    1297        38568 :                   || !bitmap_bit_p (insns, DF_REF_INSN_UID (use->ref))))
    1298              :             break;
    1299        23421 :         if (use)
    1300        21998 :           convert_reg (insn, DF_REF_REG (ref),
    1301        21998 :                        *defs_map.get (regno_reg_rtx [DF_REF_REGNO (ref)]));
    1302         1423 :         else if (MAY_HAVE_DEBUG_BIND_INSNS)
    1303              :           {
    1304              :             /* If we generated a scalar copy we can leave debug-insns
    1305              :                as-is, if not, we have to adjust them.  */
    1306         1303 :             auto_vec<rtx_insn *, 5> to_reset_debug_insns;
    1307         3866 :             for (use = DF_REF_CHAIN (ref); use; use = use->next)
    1308         2563 :               if (DEBUG_INSN_P (DF_REF_INSN (use->ref)))
    1309              :                 {
    1310          824 :                   rtx_insn *debug_insn = DF_REF_INSN (use->ref);
    1311              :                   /* If there's a reaching definition outside of the
    1312              :                      chain we have to reset.  */
    1313          824 :                   df_link *def;
    1314         2931 :                   for (def = DF_REF_CHAIN (use->ref); def; def = def->next)
    1315         2290 :                     if (!bitmap_bit_p (insns, DF_REF_INSN_UID (def->ref)))
    1316              :                       break;
    1317          824 :                   if (def)
    1318          183 :                     to_reset_debug_insns.safe_push (debug_insn);
    1319              :                   else
    1320              :                     {
    1321          641 :                       *DF_REF_REAL_LOC (use->ref)
    1322          641 :                         = *defs_map.get (regno_reg_rtx [DF_REF_REGNO (ref)]);
    1323          641 :                       df_insn_rescan (debug_insn);
    1324              :                     }
    1325              :                 }
    1326              :             /* Have to do the reset outside of the DF_CHAIN walk to not
    1327              :                disrupt it.  */
    1328         2789 :             while (!to_reset_debug_insns.is_empty ())
    1329              :               {
    1330          183 :                 rtx_insn *debug_insn = to_reset_debug_insns.pop ();
    1331          183 :                 INSN_VAR_LOCATION_LOC (debug_insn) = gen_rtx_UNKNOWN_VAR_LOC ();
    1332          183 :                 df_insn_rescan_debug_internal (debug_insn);
    1333              :               }
    1334         1303 :           }
    1335              :       }
    1336              : 
    1337              :   /* Replace uses in this insn with the defs we use in the chain.  */
    1338      3316905 :   for (df_ref ref = DF_INSN_USES (insn); ref; ref = DF_REF_NEXT_LOC (ref))
    1339      1990930 :     if (!DF_REF_REG_MEM_P (ref))
    1340       714247 :       if (rtx *vreg = defs_map.get (regno_reg_rtx[DF_REF_REGNO (ref)]))
    1341              :         {
    1342              :           /* Also update a corresponding REG_DEAD note.  */
    1343        35333 :           rtx note = find_reg_note (insn, REG_DEAD, DF_REF_REG (ref));
    1344        35333 :           if (note)
    1345        23369 :             XEXP (note, 0) = *vreg;
    1346        35333 :           *DF_REF_REAL_LOC (ref) = *vreg;
    1347              :         }
    1348      1325975 : }
    1349              : 
    1350              : /* Convert INSN which is an SImode or DImode rotation by a constant
    1351              :    to vector mode.  CODE is either ROTATE or ROTATERT with operands
    1352              :    OP0 and OP1.  Returns the SET_SRC of the last instruction in the
    1353              :    resulting sequence, which is emitted before INSN.  */
    1354              : 
    1355              : rtx
    1356           92 : general_scalar_chain::convert_rotate (enum rtx_code code, rtx op0, rtx op1,
    1357              :                                       rtx_insn *insn)
    1358              : {
    1359           92 :   int bits = INTVAL (op1);
    1360           92 :   rtx pat, result;
    1361              : 
    1362           92 :   convert_op (&op0, insn);
    1363           92 :   if (bits == 0)
    1364            0 :     return op0;
    1365              : 
    1366           92 :   if (smode == DImode)
    1367              :     {
    1368           92 :       if (code == ROTATE)
    1369           45 :         bits = 64 - bits;
    1370           92 :       if (bits == 32)
    1371              :         {
    1372            0 :           rtx tmp1 = gen_reg_rtx (V4SImode);
    1373            0 :           pat = gen_sse2_pshufd (tmp1, gen_lowpart (V4SImode, op0),
    1374              :                                  GEN_INT (225));
    1375            0 :           emit_insn_before (pat, insn);
    1376            0 :           result = gen_lowpart (V2DImode, tmp1);
    1377              :         }
    1378           92 :       else if (TARGET_AVX512VL)
    1379            0 :         result = simplify_gen_binary (code, V2DImode, op0, op1);
    1380           92 :       else if (bits == 16 || bits == 48)
    1381              :         {
    1382            0 :           rtx tmp1 = gen_reg_rtx (V8HImode);
    1383            0 :           pat = gen_sse2_pshuflw (tmp1, gen_lowpart (V8HImode, op0),
    1384              :                                   GEN_INT (bits == 16 ? 57 : 147));
    1385            0 :           emit_insn_before (pat, insn);
    1386            0 :           result = gen_lowpart (V2DImode, tmp1);
    1387              :         }
    1388           92 :       else if ((bits & 0x07) == 0)
    1389              :         {
    1390            0 :           rtx tmp1 = gen_reg_rtx (V4SImode);
    1391            0 :           pat = gen_sse2_pshufd (tmp1, gen_lowpart (V4SImode, op0),
    1392              :                                  GEN_INT (68));
    1393            0 :           emit_insn_before (pat, insn);
    1394            0 :           rtx tmp2 = gen_reg_rtx (V1TImode);
    1395            0 :           pat = gen_sse2_lshrv1ti3 (tmp2, gen_lowpart (V1TImode, tmp1),
    1396              :                                     GEN_INT (bits));
    1397            0 :           emit_insn_before (pat, insn);
    1398            0 :           result = gen_lowpart (V2DImode, tmp2);
    1399              :         }
    1400              :       else
    1401              :         {
    1402           92 :           rtx tmp1 = gen_reg_rtx (V4SImode);
    1403           92 :           pat = gen_sse2_pshufd (tmp1, gen_lowpart (V4SImode, op0),
    1404              :                                  GEN_INT (20));
    1405           92 :           emit_insn_before (pat, insn);
    1406           92 :           rtx tmp2 = gen_reg_rtx (V2DImode);
    1407           92 :           pat = gen_lshrv2di3 (tmp2, gen_lowpart (V2DImode, tmp1),
    1408              :                                GEN_INT (bits & 31));
    1409           92 :           emit_insn_before (pat, insn);
    1410           92 :           rtx tmp3 = gen_reg_rtx (V4SImode);
    1411          139 :           pat = gen_sse2_pshufd (tmp3, gen_lowpart (V4SImode, tmp2),
    1412              :                                  GEN_INT (bits > 32 ? 34 : 136));
    1413           92 :           emit_insn_before (pat, insn);
    1414           92 :           result = gen_lowpart (V2DImode, tmp3);
    1415              :         }
    1416              :     }
    1417            0 :   else if (bits == 16)
    1418              :     {
    1419            0 :       rtx tmp1 = gen_reg_rtx (V8HImode);
    1420            0 :       pat = gen_sse2_pshuflw (tmp1, gen_lowpart (V8HImode, op0), GEN_INT (225));
    1421            0 :       emit_insn_before (pat, insn);
    1422            0 :       result = gen_lowpart (V4SImode, tmp1);
    1423              :     }
    1424            0 :   else if (TARGET_AVX512VL)
    1425            0 :     result = simplify_gen_binary (code, V4SImode, op0, op1);
    1426              :   else
    1427              :     {
    1428            0 :       if (code == ROTATE)
    1429            0 :         bits = 32 - bits;
    1430              : 
    1431            0 :       rtx tmp1 = gen_reg_rtx (V4SImode);
    1432            0 :       emit_insn_before (gen_sse2_pshufd (tmp1, op0, GEN_INT (224)), insn);
    1433            0 :       rtx tmp2 = gen_reg_rtx (V2DImode);
    1434            0 :       pat = gen_lshrv2di3 (tmp2, gen_lowpart (V2DImode, tmp1),
    1435              :                            GEN_INT (bits));
    1436            0 :       emit_insn_before (pat, insn);
    1437            0 :       result = gen_lowpart (V4SImode, tmp2);
    1438              :     }
    1439              : 
    1440              :   return result;
    1441              : }
    1442              : 
    1443              : /* Convert INSN to vector mode.  */
    1444              : 
    1445              : void
    1446       411623 : general_scalar_chain::convert_insn (rtx_insn *insn)
    1447              : {
    1448       411623 :   rtx def_set = single_set (insn);
    1449       411623 :   rtx src = SET_SRC (def_set);
    1450       411623 :   rtx dst = SET_DEST (def_set);
    1451       411623 :   rtx subreg;
    1452              : 
    1453       411623 :   if (MEM_P (dst) && !REG_P (src))
    1454              :     {
    1455              :       /* There are no scalar integer instructions and therefore
    1456              :          temporary register usage is required.  */
    1457          758 :       rtx tmp = gen_reg_rtx (smode);
    1458          758 :       emit_conversion_insns (gen_move_insn (dst, tmp), insn);
    1459          758 :       dst = gen_rtx_SUBREG (vmode, tmp, 0);
    1460          758 :     }
    1461       410865 :   else if (REG_P (dst) && GET_MODE (dst) == smode)
    1462              :     {
    1463              :       /* Replace the definition with a SUBREG to the definition we
    1464              :          use inside the chain.  */
    1465       215538 :       rtx *vdef = defs_map.get (dst);
    1466       215538 :       if (vdef)
    1467        23421 :         dst = *vdef;
    1468       215538 :       dst = gen_rtx_SUBREG (vmode, dst, 0);
    1469              :       /* IRA doesn't like to have REG_EQUAL/EQUIV notes when the SET_DEST
    1470              :          is a non-REG_P.  So kill those off.  */
    1471       215538 :       rtx note = find_reg_equal_equiv_note (insn);
    1472       215538 :       if (note)
    1473         9741 :         remove_note (insn, note);
    1474              :     }
    1475              : 
    1476       411623 :   switch (GET_CODE (src))
    1477              :     {
    1478        30083 :     case PLUS:
    1479        30083 :     case MINUS:
    1480        30083 :     case IOR:
    1481        30083 :     case XOR:
    1482        30083 :     case AND:
    1483        30083 :     case SMAX:
    1484        30083 :     case SMIN:
    1485        30083 :     case UMAX:
    1486        30083 :     case UMIN:
    1487        30083 :       convert_op (&XEXP (src, 1), insn);
    1488              :       /* FALLTHRU */
    1489              : 
    1490        37481 :     case ABS:
    1491        37481 :     case ASHIFT:
    1492        37481 :     case ASHIFTRT:
    1493        37481 :     case LSHIFTRT:
    1494        37481 :       convert_op (&XEXP (src, 0), insn);
    1495        37481 :       PUT_MODE (src, vmode);
    1496        37481 :       break;
    1497              : 
    1498           92 :     case ROTATE:
    1499           92 :     case ROTATERT:
    1500           92 :       src = convert_rotate (GET_CODE (src), XEXP (src, 0), XEXP (src, 1),
    1501              :                             insn);
    1502           92 :       break;
    1503              : 
    1504          391 :     case NEG:
    1505          391 :       src = XEXP (src, 0);
    1506              : 
    1507          391 :       if (GET_CODE (src) == ABS)
    1508              :         {
    1509            0 :           src = XEXP (src, 0);
    1510            0 :           convert_op (&src, insn);
    1511            0 :           subreg = gen_reg_rtx (vmode);
    1512            0 :           emit_insn_before (gen_rtx_SET (subreg,
    1513              :                                          gen_rtx_ABS (vmode, src)), insn);
    1514            0 :           src = subreg;
    1515              :         }
    1516              :       else
    1517          391 :         convert_op (&src, insn);
    1518              : 
    1519          391 :       subreg = gen_reg_rtx (vmode);
    1520          391 :       emit_insn_before (gen_move_insn (subreg, CONST0_RTX (vmode)), insn);
    1521          391 :       src = gen_rtx_MINUS (vmode, subreg, src);
    1522          391 :       break;
    1523              : 
    1524          250 :     case NOT:
    1525          250 :       src = XEXP (src, 0);
    1526          250 :       convert_op (&src, insn);
    1527          250 :       subreg = gen_reg_rtx (vmode);
    1528          250 :       emit_insn_before (gen_move_insn (subreg, CONSTM1_RTX (vmode)), insn);
    1529          250 :       src = gen_rtx_XOR (vmode, src, subreg);
    1530          250 :       break;
    1531              : 
    1532       170873 :     case MEM:
    1533       170873 :       if (!REG_P (dst))
    1534       170873 :         convert_op (&src, insn);
    1535              :       break;
    1536              : 
    1537       196629 :     case REG:
    1538       196629 :       if (!MEM_P (dst))
    1539         1302 :         convert_op (&src, insn);
    1540              :       break;
    1541              : 
    1542            0 :     case SUBREG:
    1543            0 :       gcc_assert (GET_MODE (src) == vmode);
    1544              :       break;
    1545              : 
    1546            0 :     case COMPARE:
    1547            0 :       dst = gen_rtx_REG (CCZmode, FLAGS_REG);
    1548            0 :       src = convert_compare (XEXP (src, 0), XEXP (src, 1), insn);
    1549            0 :       break;
    1550              : 
    1551         3351 :     case CONST_INT:
    1552         3351 :       convert_op (&src, insn);
    1553         3351 :       break;
    1554              : 
    1555         2556 :     case VEC_SELECT:
    1556         2556 :       if (XVECEXP (XEXP (src, 1), 0, 0) == const0_rtx)
    1557         1565 :         src = XEXP (src, 0);
    1558          991 :       else if (smode == DImode)
    1559              :         {
    1560          746 :           rtx tmp = gen_lowpart (V1TImode, XEXP (src, 0));
    1561          746 :           dst = gen_lowpart (V1TImode, dst);
    1562          746 :           src = gen_rtx_LSHIFTRT (V1TImode, tmp, GEN_INT (64));
    1563              :         }
    1564              :       else
    1565              :         {
    1566          245 :           rtx tmp = XVECEXP (XEXP (src, 1), 0, 0);
    1567          245 :           rtvec vec = gen_rtvec (4, tmp, tmp, tmp, tmp);
    1568          245 :           rtx par = gen_rtx_PARALLEL (VOIDmode, vec);
    1569          245 :           src = gen_rtx_VEC_SELECT (vmode, XEXP (src, 0), par);
    1570              :         }
    1571              :       break;
    1572              : 
    1573            0 :     default:
    1574            0 :       gcc_unreachable ();
    1575              :     }
    1576              : 
    1577       411623 :   SET_SRC (def_set) = src;
    1578       411623 :   SET_DEST (def_set) = dst;
    1579              : 
    1580              :   /* Drop possible dead definitions.  */
    1581       411623 :   PATTERN (insn) = def_set;
    1582              : 
    1583       411623 :   INSN_CODE (insn) = -1;
    1584       411623 :   int patt = recog_memoized (insn);
    1585       411623 :   if  (patt == -1)
    1586            0 :     fatal_insn_not_found (insn);
    1587       411623 :   df_insn_rescan (insn);
    1588       411623 : }
    1589              : 
    1590              : /* Helper function to compute gain for loading an immediate constant.
    1591              :    Typically, two movabsq for TImode vs. vmovdqa for V1TImode, but
    1592              :    with numerous special cases.  */
    1593              : 
    1594              : static int
    1595            8 : timode_immed_const_gain (rtx cst, basic_block bb)
    1596              : {
    1597              :   /* movabsq vs. movabsq+vmovq+vunpacklqdq.  */
    1598            8 :   if (CONST_WIDE_INT_P (cst)
    1599            5 :       && CONST_WIDE_INT_NUNITS (cst) == 2
    1600           13 :       && CONST_WIDE_INT_ELT (cst, 0) == CONST_WIDE_INT_ELT (cst, 1))
    1601            0 :     return optimize_bb_for_size_p (bb) ? -COSTS_N_BYTES (9)
    1602              :                                        : -COSTS_N_INSNS (2);
    1603              :   /* 2x movabsq ~ vmovdqa.  */
    1604              :   return 0;
    1605              : }
    1606              : 
    1607              : /* Return true it's cost profitable for for chain conversion.  */
    1608              : 
    1609              : bool
    1610       465867 : timode_scalar_chain::compute_convert_gain ()
    1611              : {
    1612              :   /* Assume that if we have to move TImode values between units,
    1613              :      then transforming this chain isn't worth it.  */
    1614       465867 :   if (cost_sse_integer)
    1615              :     return false;
    1616              : 
    1617       465867 :   bitmap_iterator bi;
    1618       465867 :   unsigned insn_uid;
    1619              : 
    1620              :   /* Split ties to prefer V1TImode when not optimizing for size.  */
    1621       465867 :   int gain = optimize_size ? 0 : 1;
    1622       465867 :   sreal weighted_gain  = 0;
    1623              : 
    1624       465867 :   if (dump_file)
    1625            0 :     fprintf (dump_file, "Computing gain for chain #%d...\n", chain_id);
    1626              : 
    1627      1386392 :   EXECUTE_IF_SET_IN_BITMAP (insns, 0, insn_uid, bi)
    1628              :     {
    1629       920525 :       rtx_insn *insn = DF_INSN_UID_GET (insn_uid)->insn;
    1630       920525 :       rtx def_set = single_set (insn);
    1631       920525 :       rtx src = SET_SRC (def_set);
    1632       920525 :       rtx dst = SET_DEST (def_set);
    1633       920525 :       HOST_WIDE_INT op1val;
    1634       920525 :       basic_block bb = BLOCK_FOR_INSN (insn);
    1635       920525 :       int scost, vcost;
    1636       920525 :       int igain = 0;
    1637       920525 :       profile_count entry_count = ENTRY_BLOCK_PTR_FOR_FN (cfun)->count;
    1638       920525 :       bool speed_p = optimize_bb_for_speed_p (bb);
    1639       920525 :       sreal bb_freq = bb->count.to_sreal_scale (entry_count);
    1640              : 
    1641       920525 :       switch (GET_CODE (src))
    1642              :         {
    1643       454277 :         case REG:
    1644       454277 :           if (!speed_p)
    1645        20528 :             igain = MEM_P (dst) ? COSTS_N_BYTES (6) : COSTS_N_BYTES (3);
    1646              :           else
    1647              :             igain = COSTS_N_INSNS (1);
    1648              :           break;
    1649              : 
    1650       421720 :         case MEM:
    1651       421720 :           igain = !speed_p ? COSTS_N_BYTES (7) : COSTS_N_INSNS (1);
    1652              :           break;
    1653              : 
    1654        10717 :         case CONST_INT:
    1655        10717 :           if (MEM_P (dst)
    1656        10717 :               && standard_sse_constant_p (src, V1TImode))
    1657        10185 :             igain = !speed_p ? COSTS_N_BYTES (11) : 1;
    1658              :           break;
    1659              : 
    1660        30624 :         case CONST_WIDE_INT:
    1661              :           /* 2 x mov vs. vmovdqa.  */
    1662        30624 :           if (MEM_P (dst))
    1663        30419 :             igain = !speed_p ? COSTS_N_BYTES (3) : COSTS_N_INSNS (1);
    1664              :           break;
    1665              : 
    1666           19 :         case NOT:
    1667           19 :           if (MEM_P (dst))
    1668        24423 :             igain = -COSTS_N_INSNS (1);
    1669              :           break;
    1670              : 
    1671           14 :         case AND:
    1672           14 :           if (!MEM_P (dst))
    1673            3 :             igain = COSTS_N_INSNS (1);
    1674           14 :           if (CONST_SCALAR_INT_P (XEXP (src, 1)))
    1675            5 :             igain += timode_immed_const_gain (XEXP (src, 1), bb);
    1676              :           break;
    1677              : 
    1678         2754 :         case XOR:
    1679         2754 :         case IOR:
    1680         2754 :           if (timode_concatdi_p (src))
    1681              :             {
    1682              :               /* vmovq;vpinsrq (11 bytes).  */
    1683         2703 :               igain = speed_p ? -2 * ix86_cost->sse_to_integer
    1684              :                               : -COSTS_N_BYTES (11);
    1685              :               break;
    1686              :             }
    1687           51 :           if (!MEM_P (dst))
    1688           43 :             igain = COSTS_N_INSNS (1);
    1689           51 :           if (CONST_SCALAR_INT_P (XEXP (src, 1)))
    1690            3 :             igain += timode_immed_const_gain (XEXP (src, 1), bb);
    1691              :           break;
    1692              : 
    1693            0 :         case PLUS:
    1694            0 :           if (timode_concatdi_p (src))
    1695              :             /* vmovq;vpinsrq (11 bytes).  */
    1696            0 :             igain = speed_p ? -2 * ix86_cost->sse_to_integer
    1697              :                             : -COSTS_N_BYTES (11);
    1698              :           break;
    1699              : 
    1700          158 :         case ASHIFT:
    1701          158 :         case LSHIFTRT:
    1702              :           /* See ix86_expand_v1ti_shift.  */
    1703          158 :           op1val = INTVAL (XEXP (src, 1));
    1704          158 :           if (!speed_p)
    1705              :             {
    1706           15 :               if (op1val == 64 || op1val == 65)
    1707              :                 scost = COSTS_N_BYTES (5);
    1708           10 :               else if (op1val >= 66)
    1709              :                 scost = COSTS_N_BYTES (6);
    1710           10 :               else if (op1val == 1)
    1711              :                 scost = COSTS_N_BYTES (8);
    1712              :               else
    1713              :                 scost = COSTS_N_BYTES (9);
    1714              : 
    1715           14 :               if ((op1val & 7) == 0)
    1716              :                 vcost = COSTS_N_BYTES (5);
    1717           10 :               else if (op1val > 64)
    1718              :                 vcost = COSTS_N_BYTES (10);
    1719              :               else
    1720           10 :                 vcost = TARGET_AVX ? COSTS_N_BYTES (19) : COSTS_N_BYTES (23);
    1721              :             }
    1722              :           else
    1723              :             {
    1724          143 :               scost = COSTS_N_INSNS (2);
    1725          143 :               if ((op1val & 7) == 0)
    1726              :                 vcost = COSTS_N_INSNS (1);
    1727          110 :               else if (op1val > 64)
    1728              :                 vcost = COSTS_N_INSNS (2);
    1729              :               else
    1730          110 :                 vcost = TARGET_AVX ? COSTS_N_INSNS (4) : COSTS_N_INSNS (5);
    1731              :             }
    1732          158 :           igain = scost - vcost;
    1733          158 :           break;
    1734              : 
    1735          103 :         case ASHIFTRT:
    1736              :           /* See ix86_expand_v1ti_ashiftrt.  */
    1737          103 :           op1val = INTVAL (XEXP (src, 1));
    1738          103 :           if (!speed_p)
    1739              :             {
    1740            7 :               if (op1val == 64 || op1val == 127)
    1741              :                 scost = COSTS_N_BYTES (7);
    1742            7 :               else if (op1val == 1)
    1743              :                 scost = COSTS_N_BYTES (8);
    1744            7 :               else if (op1val == 65)
    1745              :                 scost = COSTS_N_BYTES (10);
    1746            7 :               else if (op1val >= 66)
    1747              :                 scost = COSTS_N_BYTES (11);
    1748              :               else
    1749              :                 scost = COSTS_N_BYTES (9);
    1750              : 
    1751            0 :               if (op1val == 127)
    1752              :                 vcost = COSTS_N_BYTES (10);
    1753            7 :               else if (op1val == 64)
    1754              :                 vcost = COSTS_N_BYTES (14);
    1755            7 :               else if (op1val == 96)
    1756              :                 vcost = COSTS_N_BYTES (18);
    1757            7 :               else if (op1val >= 111)
    1758              :                 vcost = COSTS_N_BYTES (15);
    1759            7 :               else if (TARGET_AVX2 && op1val == 32)
    1760              :                 vcost = COSTS_N_BYTES (16);
    1761            7 :               else if (TARGET_SSE4_1 && op1val == 32)
    1762              :                 vcost = COSTS_N_BYTES (20);
    1763            7 :               else if (op1val >= 96)
    1764              :                 vcost = COSTS_N_BYTES (23);
    1765            7 :               else if ((op1val & 7) == 0)
    1766              :                 vcost = COSTS_N_BYTES (28);
    1767            7 :               else if (TARGET_AVX2 && op1val < 32)
    1768              :                 vcost = COSTS_N_BYTES (30);
    1769            7 :               else if (op1val == 1 || op1val >= 64)
    1770              :                 vcost = COSTS_N_BYTES (42);
    1771              :               else
    1772            7 :                 vcost = COSTS_N_BYTES (47);
    1773              :             }
    1774              :           else
    1775              :             {
    1776           96 :               if (op1val >= 65 && op1val <= 126)
    1777              :                 scost = COSTS_N_INSNS (3);
    1778              :               else
    1779           96 :                 scost = COSTS_N_INSNS (2);
    1780              : 
    1781           96 :               if (op1val == 127)
    1782              :                 vcost = COSTS_N_INSNS (2);
    1783           96 :               else if (op1val == 64)
    1784              :                 vcost = COSTS_N_INSNS (3);
    1785           96 :               else if (op1val == 96)
    1786              :                 vcost = COSTS_N_INSNS (3);
    1787           96 :               else if (op1val >= 111)
    1788              :                 vcost = COSTS_N_INSNS (3);
    1789           96 :               else if (TARGET_SSE4_1 && op1val == 32)
    1790              :                 vcost = COSTS_N_INSNS (3);
    1791           96 :               else if (TARGET_SSE4_1
    1792            0 :                        && (op1val == 8 || op1val == 16 || op1val == 24))
    1793              :                 vcost = COSTS_N_INSNS (3);
    1794           96 :               else if (op1val >= 96)
    1795              :                 vcost = COSTS_N_INSNS (4);
    1796           96 :               else if (TARGET_SSE4_1 && (op1val == 28 || op1val == 80))
    1797              :                 vcost = COSTS_N_INSNS (4);
    1798           96 :               else if ((op1val & 7) == 0)
    1799              :                 vcost = COSTS_N_INSNS (5);
    1800           96 :               else if (TARGET_AVX2 && op1val < 32)
    1801              :                 vcost = COSTS_N_INSNS (6);
    1802           96 :               else if (TARGET_SSE4_1 && op1val < 15)
    1803              :                 vcost = COSTS_N_INSNS (6);
    1804           96 :               else if (op1val == 1 || op1val >= 64)
    1805              :                 vcost = COSTS_N_INSNS (8);
    1806              :               else
    1807            0 :                 vcost = COSTS_N_INSNS (9);
    1808              :             }
    1809          103 :           igain = scost - vcost;
    1810          103 :           break;
    1811              : 
    1812            5 :         case ROTATE:
    1813            5 :         case ROTATERT:
    1814              :           /* See ix86_expand_v1ti_rotate.  */
    1815            5 :           op1val = INTVAL (XEXP (src, 1));
    1816            5 :           if (!speed_p)
    1817              :             {
    1818            0 :               scost = COSTS_N_BYTES (13);
    1819            0 :               if ((op1val & 31) == 0)
    1820              :                 vcost = COSTS_N_BYTES (5);
    1821            0 :               else if ((op1val & 7) == 0)
    1822            0 :                 vcost = TARGET_AVX ? COSTS_N_BYTES (13) : COSTS_N_BYTES (18);
    1823            0 :               else if (op1val > 32 && op1val < 96)
    1824              :                 vcost = COSTS_N_BYTES (24);
    1825              :               else
    1826            0 :                 vcost = COSTS_N_BYTES (19);
    1827              :             }
    1828              :           else
    1829              :             {
    1830            5 :               scost = COSTS_N_INSNS (3);
    1831            5 :               if ((op1val & 31) == 0)
    1832              :                 vcost = COSTS_N_INSNS (1);
    1833            3 :               else if ((op1val & 7) == 0)
    1834            1 :                 vcost = TARGET_AVX ? COSTS_N_INSNS (3) : COSTS_N_INSNS (4);
    1835            2 :               else if (op1val > 32 && op1val < 96)
    1836              :                 vcost = COSTS_N_INSNS (5);
    1837              :               else
    1838            2 :                 vcost = COSTS_N_INSNS (1);
    1839              :             }
    1840            5 :           igain = scost - vcost;
    1841            5 :           break;
    1842              : 
    1843           12 :         case COMPARE:
    1844           12 :           if (XEXP (src, 1) == const0_rtx)
    1845              :             {
    1846            8 :               if (GET_CODE (XEXP (src, 0)) == AND)
    1847              :                 /* and;and;or (9 bytes) vs. ptest (5 bytes).  */
    1848              :                 igain = !speed_p ? COSTS_N_BYTES (4) : COSTS_N_INSNS (2);
    1849              :               /* or (3 bytes) vs. ptest (5 bytes).  */
    1850            8 :               else if (!speed_p)
    1851            0 :                 igain = -COSTS_N_BYTES (2);
    1852              :             }
    1853            4 :           else if (XEXP (src, 1) == const1_rtx)
    1854              :             /* and;cmp -1 (7 bytes) vs. pcmpeqd;pxor;ptest (13 bytes).  */
    1855            0 :             igain = !speed_p ? -COSTS_N_BYTES (6) : -COSTS_N_INSNS (1);
    1856              :           break;
    1857              : 
    1858          122 :         case ZERO_EXTEND:
    1859          122 :           if (GET_MODE (XEXP (src, 0)) == DImode)
    1860              :             /* xor (2 bytes) vs. vmovq (5 bytes).  */
    1861          122 :             igain = speed_p ? COSTS_N_INSNS (1) - ix86_cost->sse_to_integer
    1862              :                             : -COSTS_N_BYTES (3);
    1863              :           break;
    1864              : 
    1865              :         default:
    1866              :           break;
    1867              :         }
    1868              : 
    1869      1799362 :       gain += igain;
    1870       920517 :       if (speed_p)
    1871       878845 :         weighted_gain += bb_freq * igain;
    1872              : 
    1873       920525 :       if (igain != 0 && dump_file)
    1874              :         {
    1875            0 :           fprintf (dump_file, "  Instruction gain %d with bb_freq %.2f for ",
    1876              :                    igain, bb_freq.to_double ());
    1877            0 :           dump_insn_slim (dump_file, insn);
    1878              :         }
    1879              :     }
    1880              : 
    1881       465867 :   if (dump_file)
    1882            0 :     fprintf (dump_file, "  Total gain: %d, weighted gain %.2f\n",
    1883              :              gain, weighted_gain.to_double ());
    1884              : 
    1885       465867 :   if (weighted_gain > (sreal) 0)
    1886              :     return true;
    1887              :   else
    1888        24367 :     return gain > 0;
    1889              : }
    1890              : 
    1891              : /* Fix uses of converted REG in debug insns.  */
    1892              : 
    1893              : void
    1894       422977 : timode_scalar_chain::fix_debug_reg_uses (rtx reg)
    1895              : {
    1896       422977 :   if (!flag_var_tracking)
    1897              :     return;
    1898              : 
    1899       371904 :   df_ref ref, next;
    1900       761648 :   for (ref = DF_REG_USE_CHAIN (REGNO (reg)); ref; ref = next)
    1901              :     {
    1902       389744 :       rtx_insn *insn = DF_REF_INSN (ref);
    1903              :       /* Make sure the next ref is for a different instruction,
    1904              :          so that we're not affected by the rescan.  */
    1905       389744 :       next = DF_REF_NEXT_REG (ref);
    1906       389744 :       while (next && DF_REF_INSN (next) == insn)
    1907            0 :         next = DF_REF_NEXT_REG (next);
    1908              : 
    1909       389744 :       if (DEBUG_INSN_P (insn))
    1910              :         {
    1911              :           /* It may be a debug insn with a TImode variable in
    1912              :              register.  */
    1913              :           bool changed = false;
    1914          176 :           for (; ref != next; ref = DF_REF_NEXT_REG (ref))
    1915              :             {
    1916           88 :               rtx *loc = DF_REF_LOC (ref);
    1917           88 :               if (REG_P (*loc) && GET_MODE (*loc) == V1TImode)
    1918              :                 {
    1919           84 :                   *loc = gen_rtx_SUBREG (TImode, *loc, 0);
    1920           84 :                   changed = true;
    1921              :                 }
    1922              :             }
    1923           88 :           if (changed)
    1924           84 :             df_insn_rescan (insn);
    1925              :         }
    1926              :     }
    1927              : }
    1928              : 
    1929              : /* Convert SRC, a *concatditi3 pattern, into a vec_concatv2di instruction.
    1930              :    Insert this before INSN, and return the result as a V1TImode subreg.  */
    1931              : 
    1932              : static rtx
    1933          253 : timode_convert_concatdi (rtx src, rtx_insn *insn)
    1934              : {
    1935          253 :   rtx hi, lo;
    1936          253 :   rtx tmp = gen_reg_rtx (V2DImode);
    1937          253 :   if (GET_CODE (XEXP (src, 0)) == ASHIFT)
    1938              :     {
    1939          253 :       hi = XEXP (XEXP (XEXP (src, 0), 0), 0);
    1940          253 :       lo = XEXP (XEXP (src, 1), 0);
    1941              :     }
    1942              :   else
    1943              :     {
    1944            0 :       hi = XEXP (XEXP (XEXP (src, 1), 0), 0);
    1945            0 :       lo = XEXP (XEXP (src, 0), 0);
    1946              :     }
    1947          253 :   emit_insn_before (gen_vec_concatv2di (tmp, lo, hi), insn);
    1948          253 :   return gen_rtx_SUBREG (V1TImode, tmp, 0);
    1949              : }
    1950              : 
    1951              : /* Convert INSN from TImode to V1T1mode.  */
    1952              : 
    1953              : void
    1954       914352 : timode_scalar_chain::convert_insn (rtx_insn *insn)
    1955              : {
    1956       914352 :   rtx def_set = single_set (insn);
    1957       914352 :   rtx src = SET_SRC (def_set);
    1958       914352 :   rtx dst = SET_DEST (def_set);
    1959       914352 :   rtx tmp;
    1960              : 
    1961       914352 :   switch (GET_CODE (dst))
    1962              :     {
    1963       422987 :     case REG:
    1964       422987 :       if (GET_MODE (dst) == TImode)
    1965              :         {
    1966       421214 :           PUT_MODE (dst, V1TImode);
    1967       421214 :           fix_debug_reg_uses (dst);
    1968              :         }
    1969       422987 :       if (GET_MODE (dst) == V1TImode)
    1970              :         {
    1971              :           /* It might potentially be helpful to convert REG_EQUAL notes,
    1972              :              but for now we just remove them.  */
    1973       422977 :           rtx note = find_reg_equal_equiv_note (insn);
    1974       422977 :           if (note)
    1975          444 :             remove_note (insn, note);
    1976              :         }
    1977              :       break;
    1978       491365 :     case MEM:
    1979       491365 :       PUT_MODE (dst, V1TImode);
    1980       491365 :       break;
    1981              : 
    1982            0 :     default:
    1983            0 :       gcc_unreachable ();
    1984              :     }
    1985              : 
    1986       914352 :   switch (GET_CODE (src))
    1987              :     {
    1988       450868 :     case REG:
    1989       450868 :       if (GET_MODE (src) == TImode)
    1990              :         {
    1991         1763 :           PUT_MODE (src, V1TImode);
    1992         1763 :           fix_debug_reg_uses (src);
    1993              :         }
    1994              :       break;
    1995              : 
    1996       421672 :     case MEM:
    1997       421672 :       PUT_MODE (src, V1TImode);
    1998       421672 :       break;
    1999              : 
    2000        30623 :     case CONST_WIDE_INT:
    2001        30623 :       if (NONDEBUG_INSN_P (insn))
    2002              :         {
    2003              :           /* Since there are no instructions to store 128-bit constant,
    2004              :              temporary register usage is required.  */
    2005        30623 :           bool use_move;
    2006        30623 :           start_sequence ();
    2007        30623 :           tmp = ix86_convert_const_wide_int_to_broadcast (TImode, src);
    2008        30623 :           if (tmp)
    2009              :             {
    2010          194 :               src = lowpart_subreg (V1TImode, tmp, TImode);
    2011          194 :               use_move = true;
    2012              :             }
    2013              :           else
    2014              :             {
    2015        30429 :               src = smode_convert_cst (src, V1TImode);
    2016        30429 :               src = validize_mem (force_const_mem (V1TImode, src));
    2017        30429 :               use_move = MEM_P (dst);
    2018              :             }
    2019        30623 :           rtx_insn *seq = end_sequence ();
    2020        30623 :           if (seq)
    2021          195 :             emit_insn_before (seq, insn);
    2022        30623 :           if (use_move)
    2023              :             {
    2024        30420 :               tmp = gen_reg_rtx (V1TImode);
    2025        30420 :               emit_insn_before (gen_rtx_SET (tmp, src), insn);
    2026        30420 :               src = tmp;
    2027              :             }
    2028              :         }
    2029              :       break;
    2030              : 
    2031        10717 :     case CONST_INT:
    2032        10717 :       switch (standard_sse_constant_p (src, TImode))
    2033              :         {
    2034        10494 :         case 1:
    2035        10494 :           src = CONST0_RTX (GET_MODE (dst));
    2036        10494 :           break;
    2037          223 :         case 2:
    2038          223 :           src = CONSTM1_RTX (GET_MODE (dst));
    2039          223 :           break;
    2040            0 :         default:
    2041            0 :           gcc_unreachable ();
    2042              :         }
    2043        10717 :       if (MEM_P (dst))
    2044              :         {
    2045        10185 :           tmp = gen_reg_rtx (V1TImode);
    2046        10185 :           emit_insn_before (gen_rtx_SET (tmp, src), insn);
    2047        10185 :           src = tmp;
    2048              :         }
    2049              :       break;
    2050              : 
    2051           13 :     case AND:
    2052           13 :       if (GET_CODE (XEXP (src, 0)) == NOT)
    2053              :         {
    2054            0 :           convert_op (&XEXP (XEXP (src, 0), 0), insn);
    2055            0 :           convert_op (&XEXP (src, 1), insn);
    2056            0 :           PUT_MODE (XEXP (src, 0), V1TImode);
    2057            0 :           PUT_MODE (src, V1TImode);
    2058            0 :           break;
    2059              :         }
    2060           13 :       convert_op (&XEXP (src, 0), insn);
    2061           13 :       convert_op (&XEXP (src, 1), insn);
    2062           13 :       PUT_MODE (src, V1TImode);
    2063           13 :       if (MEM_P (dst))
    2064              :         {
    2065           10 :           tmp = gen_reg_rtx (V1TImode);
    2066           10 :           emit_insn_before (gen_rtx_SET (tmp, src), insn);
    2067           10 :           src = tmp;
    2068              :         }
    2069              :       break;
    2070              : 
    2071          304 :     case XOR:
    2072          304 :     case IOR:
    2073          304 :       if (timode_concatdi_p (src))
    2074              :         {
    2075          253 :           src = timode_convert_concatdi (src, insn);
    2076          253 :           break;
    2077              :         }
    2078           51 :       convert_op (&XEXP (src, 0), insn);
    2079           51 :       convert_op (&XEXP (src, 1), insn);
    2080           51 :       PUT_MODE (src, V1TImode);
    2081           51 :       if (MEM_P (dst))
    2082              :         {
    2083            8 :           tmp = gen_reg_rtx (V1TImode);
    2084            8 :           emit_insn_before (gen_rtx_SET (tmp, src), insn);
    2085            8 :           src = tmp;
    2086              :         }
    2087              :       break;
    2088              : 
    2089            3 :     case NOT:
    2090            3 :       src = XEXP (src, 0);
    2091            3 :       convert_op (&src, insn);
    2092            3 :       tmp = gen_reg_rtx (V1TImode);
    2093            3 :       emit_insn_before (gen_move_insn (tmp, CONSTM1_RTX (V1TImode)), insn);
    2094            3 :       src = gen_rtx_XOR (V1TImode, src, tmp);
    2095            3 :       if (MEM_P (dst))
    2096              :         {
    2097            0 :           tmp = gen_reg_rtx (V1TImode);
    2098            0 :           emit_insn_before (gen_rtx_SET (tmp, src), insn);
    2099            0 :           src = tmp;
    2100              :         }
    2101              :       break;
    2102              : 
    2103           10 :     case COMPARE:
    2104           10 :       dst = gen_rtx_REG (CCZmode, FLAGS_REG);
    2105           10 :       src = convert_compare (XEXP (src, 0), XEXP (src, 1), insn);
    2106           10 :       break;
    2107              : 
    2108           43 :     case ASHIFT:
    2109           43 :     case LSHIFTRT:
    2110           43 :     case ASHIFTRT:
    2111           43 :     case ROTATERT:
    2112           43 :     case ROTATE:
    2113           43 :       convert_op (&XEXP (src, 0), insn);
    2114           43 :       PUT_MODE (src, V1TImode);
    2115           43 :       break;
    2116              : 
    2117           99 :     case ZERO_EXTEND:
    2118           99 :       if (GET_MODE (XEXP (src, 0)) == DImode)
    2119              :         {
    2120              :           /* Convert to *vec_concatv2di_0.  */
    2121           99 :           rtx tmp = gen_reg_rtx (V2DImode);
    2122           99 :           rtx pat = gen_rtx_VEC_CONCAT (V2DImode, XEXP (src, 0), const0_rtx);
    2123           99 :           emit_insn_before (gen_move_insn (tmp, pat), insn);
    2124           99 :           src = gen_rtx_SUBREG (vmode, tmp, 0);
    2125              :         }
    2126              :       else
    2127            0 :         gcc_unreachable ();
    2128           99 :       break;
    2129              : 
    2130            0 :     case PLUS:
    2131            0 :       if (timode_concatdi_p (src))
    2132            0 :         src = timode_convert_concatdi (src, insn);
    2133              :       else
    2134            0 :         gcc_unreachable ();
    2135            0 :       break;
    2136              : 
    2137            0 :     default:
    2138            0 :       gcc_unreachable ();
    2139              :     }
    2140              : 
    2141       914352 :   SET_SRC (def_set) = src;
    2142       914352 :   SET_DEST (def_set) = dst;
    2143              : 
    2144              :   /* Drop possible dead definitions.  */
    2145       914352 :   PATTERN (insn) = def_set;
    2146              : 
    2147       914352 :   INSN_CODE (insn) = -1;
    2148       914352 :   recog_memoized (insn);
    2149       914352 :   df_insn_rescan (insn);
    2150       914352 : }
    2151              : 
    2152              : /* Generate copies from defs used by the chain but not defined therein.
    2153              :    Also populates defs_map which is used later by convert_insn.  */
    2154              : 
    2155              : void
    2156       636221 : scalar_chain::convert_registers ()
    2157              : {
    2158       636221 :   bitmap_iterator bi;
    2159       636221 :   unsigned id;
    2160       662357 :   EXECUTE_IF_SET_IN_BITMAP (defs_conv, 0, id, bi)
    2161              :     {
    2162        26136 :       rtx chain_reg = gen_reg_rtx (smode);
    2163        26136 :       defs_map.put (regno_reg_rtx[id], chain_reg);
    2164              :     }
    2165       644636 :   EXECUTE_IF_SET_IN_BITMAP (insns_conv, 0, id, bi)
    2166        21074 :     for (df_ref ref = DF_INSN_UID_DEFS (id); ref; ref = DF_REF_NEXT_LOC (ref))
    2167        12659 :       if (bitmap_bit_p (defs_conv, DF_REF_REGNO (ref)))
    2168         8415 :         make_vector_copies (DF_REF_INSN (ref), DF_REF_REAL_REG (ref));
    2169       636221 : }
    2170              : 
    2171              : /* Convert whole chain creating required register
    2172              :    conversions and copies.  */
    2173              : 
    2174              : int
    2175       636221 : scalar_chain::convert ()
    2176              : {
    2177       636221 :   bitmap_iterator bi;
    2178       636221 :   unsigned id;
    2179       636221 :   int converted_insns = 0;
    2180              : 
    2181       636221 :   if (!dbg_cnt (stv_conversion))
    2182              :     return 0;
    2183              : 
    2184       636221 :   if (dump_file)
    2185            0 :     fprintf (dump_file, "Converting chain #%d...\n", chain_id);
    2186              : 
    2187       636221 :   convert_registers ();
    2188              : 
    2189      1962196 :   EXECUTE_IF_SET_IN_BITMAP (insns, 0, id, bi)
    2190              :     {
    2191      1325975 :       rtx_insn *insn = DF_INSN_UID_GET (id)->insn;
    2192      1325975 :       convert_insn_common (insn);
    2193      1325975 :       convert_insn (insn);
    2194      1325975 :       converted_insns++;
    2195              :     }
    2196              : 
    2197              :   return converted_insns;
    2198              : }
    2199              : 
    2200              : /* Return the SET expression if INSN doesn't reference hard register.
    2201              :    Return NULL if INSN uses or defines a hard register, excluding
    2202              :    pseudo register pushes, hard register uses in a memory address,
    2203              :    clobbers and flags definitions.  */
    2204              : 
    2205              : static rtx
    2206    335086571 : pseudo_reg_set (rtx_insn *insn)
    2207              : {
    2208    335086571 :   rtx set = single_set (insn);
    2209    335086571 :   if (!set)
    2210              :     return NULL;
    2211              : 
    2212              :   /* Check pseudo register push first. */
    2213    134375852 :   machine_mode mode = TARGET_64BIT ? TImode : DImode;
    2214    134375852 :   if (REG_P (SET_SRC (set))
    2215     38048947 :       && !HARD_REGISTER_P (SET_SRC (set))
    2216    164067858 :       && push_operand (SET_DEST (set), mode))
    2217              :     return set;
    2218              : 
    2219    134122697 :   df_ref ref;
    2220    216937303 :   FOR_EACH_INSN_DEF (ref, insn)
    2221    119434042 :     if (HARD_REGISTER_P (DF_REF_REAL_REG (ref))
    2222     64242673 :         && !DF_REF_FLAGS_IS_SET (ref, DF_REF_MUST_CLOBBER)
    2223    169417210 :         && DF_REF_REGNO (ref) != FLAGS_REG)
    2224              :       return NULL;
    2225              : 
    2226    186944983 :   FOR_EACH_INSN_USE (ref, insn)
    2227    114655617 :     if (!DF_REF_REG_MEM_P (ref) && HARD_REGISTER_P (DF_REF_REAL_REG (ref)))
    2228              :       return NULL;
    2229              : 
    2230              :   return set;
    2231              : }
    2232              : 
    2233              : /* Return true if the register REG is defined in a single DEF chain.
    2234              :    If it is defined in more than one DEF chains, we may not be able
    2235              :    to convert it in all chains.  */
    2236              : 
    2237              : static bool
    2238      1147323 : single_def_chain_p (rtx reg)
    2239              : {
    2240      1147323 :   df_ref ref = DF_REG_DEF_CHAIN (REGNO (reg));
    2241      1147323 :   if (!ref)
    2242              :     return false;
    2243      1147307 :   return DF_REF_NEXT_REG (ref) == nullptr;
    2244              : }
    2245              : 
    2246              : /* Check if comparison INSN may be transformed into vector comparison.
    2247              :    Currently we transform equality/inequality checks which look like:
    2248              :    (set (reg:CCZ 17 flags) (compare:CCZ (reg:TI x) (reg:TI y)))  */
    2249              : 
    2250              : static bool
    2251     12750555 : convertible_comparison_p (rtx_insn *insn, enum machine_mode mode)
    2252              : {
    2253     14159771 :   if (mode != (TARGET_64BIT ? TImode : DImode))
    2254              :     return false;
    2255              : 
    2256      4659163 :   if (!TARGET_SSE4_1)
    2257              :     return false;
    2258              : 
    2259       166622 :   rtx def_set = single_set (insn);
    2260              : 
    2261       166622 :   gcc_assert (def_set);
    2262              : 
    2263       166622 :   rtx src = SET_SRC (def_set);
    2264       166622 :   rtx dst = SET_DEST (def_set);
    2265              : 
    2266       166622 :   gcc_assert (GET_CODE (src) == COMPARE);
    2267              : 
    2268       166622 :   if (!REG_P (dst)
    2269       166622 :       || REGNO (dst) != FLAGS_REG
    2270       333244 :       || GET_MODE (dst) != CCZmode)
    2271              :     return false;
    2272              : 
    2273       116416 :   rtx op1 = XEXP (src, 0);
    2274       116416 :   rtx op2 = XEXP (src, 1);
    2275              : 
    2276              :   /* *cmp<dwi>_doubleword.  */
    2277       116416 :   if ((CONST_SCALAR_INT_P (op1)
    2278       116416 :        || ((REG_P (op1) || MEM_P (op1))
    2279       114737 :            && GET_MODE (op1) == mode))
    2280           60 :       && (CONST_SCALAR_INT_P (op2)
    2281           12 :           || ((REG_P (op2) || MEM_P (op2))
    2282           10 :               && GET_MODE (op2) == mode)))
    2283              :     return true;
    2284              : 
    2285              :   /* *testti_doubleword.  */
    2286       116358 :   if (op2 == const0_rtx
    2287        38416 :       && GET_CODE (op1) == AND
    2288          142 :       && REG_P (XEXP (op1, 0)))
    2289              :     {
    2290          142 :       rtx op12 = XEXP (op1, 1);
    2291          142 :       return GET_MODE (XEXP (op1, 0)) == TImode
    2292          142 :              && (CONST_SCALAR_INT_P (op12)
    2293            0 :                  || ((REG_P (op12) || MEM_P (op12))
    2294            0 :                      && GET_MODE (op12) == TImode));
    2295              :     }
    2296              : 
    2297              :   /* *test<dwi>_not_doubleword.  */
    2298       116216 :   if (op2 == const0_rtx
    2299        38274 :       && GET_CODE (op1) == AND
    2300            0 :       && GET_CODE (XEXP (op1, 0)) == NOT)
    2301              :     {
    2302            0 :       rtx op11 = XEXP (XEXP (op1, 0), 0);
    2303            0 :       rtx op12 = XEXP (op1, 1);
    2304            0 :       return (REG_P (op11) || MEM_P (op11))
    2305            0 :              && (REG_P (op12) || MEM_P (op12))
    2306            0 :              && GET_MODE (op11) == mode
    2307            0 :              && GET_MODE (op12) == mode;
    2308              :     }
    2309              : 
    2310              :   return false;
    2311              : }
    2312              : 
    2313              : /* The general version of scalar_to_vector_candidate_p.  */
    2314              : 
    2315              : static bool
    2316    234484123 : general_scalar_to_vector_candidate_p (rtx_insn *insn, enum machine_mode mode)
    2317              : {
    2318    234484123 :   rtx def_set = pseudo_reg_set (insn);
    2319              : 
    2320    234484123 :   if (!def_set)
    2321              :     return false;
    2322              : 
    2323     49064443 :   rtx src = SET_SRC (def_set);
    2324     49064443 :   rtx dst = SET_DEST (def_set);
    2325              : 
    2326     49064443 :   if (GET_CODE (src) == COMPARE)
    2327      8796000 :     return convertible_comparison_p (insn, mode);
    2328              : 
    2329              :   /* We are interested in "mode" only.  */
    2330     40268443 :   if ((GET_MODE (src) != mode
    2331     27544208 :        && !CONST_INT_P (src))
    2332     17816088 :       || GET_MODE (dst) != mode)
    2333              :     return false;
    2334              : 
    2335     14983411 :   if (!REG_P (dst) && !MEM_P (dst))
    2336              :     return false;
    2337              : 
    2338     14727299 :   switch (GET_CODE (src))
    2339              :     {
    2340       520923 :     case ASHIFT:
    2341       520923 :     case LSHIFTRT:
    2342       520923 :     case ASHIFTRT:
    2343       520923 :     case ROTATE:
    2344       520923 :     case ROTATERT:
    2345       520923 :       if (!CONST_INT_P (XEXP (src, 1))
    2346      1005848 :           || !IN_RANGE (INTVAL (XEXP (src, 1)), 0, GET_MODE_BITSIZE (mode)-1))
    2347              :         return false;
    2348              : 
    2349              :       /* Check for extend highpart case.  */
    2350       484921 :       if (mode != DImode
    2351       350124 :           || GET_CODE (src) != ASHIFTRT
    2352        76128 :           || GET_CODE (XEXP (src, 0)) != ASHIFT)
    2353              :         break;
    2354              : 
    2355      3640642 :       src = XEXP (src, 0);
    2356              :       break;
    2357              : 
    2358        78418 :     case SMAX:
    2359        78418 :     case SMIN:
    2360        78418 :     case UMAX:
    2361        78418 :     case UMIN:
    2362        78418 :       if ((mode == DImode && !TARGET_AVX512VL)
    2363        17615 :           || (mode == SImode && !TARGET_SSE4_1))
    2364              :         return false;
    2365              :       /* Fallthru.  */
    2366              : 
    2367      3194873 :     case AND:
    2368      3194873 :     case IOR:
    2369      3194873 :     case XOR:
    2370      3194873 :     case PLUS:
    2371      3194873 :     case MINUS:
    2372      3194873 :       if (!REG_P (XEXP (src, 1))
    2373              :           && !MEM_P (XEXP (src, 1))
    2374              :           && !CONST_INT_P (XEXP (src, 1)))
    2375              :         return false;
    2376              : 
    2377      3103953 :       if (GET_MODE (XEXP (src, 1)) != mode
    2378      1817212 :           && !CONST_INT_P (XEXP (src, 1)))
    2379              :         return false;
    2380              : 
    2381              :       /* Check for andnot case.  */
    2382      3103953 :       if (GET_CODE (src) != AND
    2383       177049 :           || GET_CODE (XEXP (src, 0)) != NOT)
    2384              :         break;
    2385              : 
    2386      3640642 :       src = XEXP (src, 0);
    2387              :       /* FALLTHRU */
    2388              : 
    2389              :     case NOT:
    2390              :       break;
    2391              : 
    2392        24421 :     case NEG:
    2393              :       /* Check for nabs case.  */
    2394        24421 :       if (GET_CODE (XEXP (src, 0)) != ABS)
    2395              :         break;
    2396              : 
    2397              :       src = XEXP (src, 0);
    2398              :       /* FALLTHRU */
    2399              : 
    2400         2793 :     case ABS:
    2401         2793 :       if ((mode == DImode && !TARGET_AVX512VL)
    2402         1385 :           || (mode == SImode && !TARGET_SSSE3))
    2403              :         return false;
    2404              :       break;
    2405              : 
    2406              :     case REG:
    2407              :       return true;
    2408              : 
    2409      5935729 :     case MEM:
    2410      5935729 :     case CONST_INT:
    2411      5935729 :       return REG_P (dst);
    2412              : 
    2413        56175 :     case VEC_SELECT:
    2414              :       /* Excluding MEM_P (dst) avoids intefering with vpextr[dq].  */
    2415        56175 :       return REG_P (dst)
    2416        45733 :              && REG_P (XEXP (src, 0))
    2417        52628 :              && GET_MODE (XEXP (src, 0)) == (mode == DImode ? V2DImode
    2418              :                                                             : V4SImode)
    2419        36134 :              && GET_CODE (XEXP (src, 1)) == PARALLEL
    2420        36134 :              && XVECLEN (XEXP (src, 1), 0) == 1
    2421        92309 :              && CONST_INT_P (XVECEXP (XEXP (src, 1), 0, 0));
    2422              : 
    2423              :     default:
    2424              :       return false;
    2425              :     }
    2426              : 
    2427      3640642 :   if (!REG_P (XEXP (src, 0))
    2428              :       && !MEM_P (XEXP (src, 0))
    2429              :       && !CONST_INT_P (XEXP (src, 0)))
    2430              :     return false;
    2431              : 
    2432      3335469 :   if (GET_MODE (XEXP (src, 0)) != mode
    2433            0 :       && !CONST_INT_P (XEXP (src, 0)))
    2434              :     return false;
    2435              : 
    2436              :   return true;
    2437              : }
    2438              : 
    2439              : /* Check for a suitable TImode memory operand.  */
    2440              : 
    2441              : static bool
    2442         1561 : timode_mem_p (rtx x)
    2443              : {
    2444         1561 :   return MEM_P (x)
    2445         1561 :          && (TARGET_SSE_UNALIGNED_LOAD_OPTIMAL
    2446            0 :              || !misaligned_operand (x, TImode));
    2447              : }
    2448              : 
    2449              : /* The TImode version of scalar_to_vector_candidate_p.  */
    2450              : 
    2451              : static bool
    2452    100602448 : timode_scalar_to_vector_candidate_p (rtx_insn *insn)
    2453              : {
    2454    100602448 :   rtx def_set = pseudo_reg_set (insn);
    2455              : 
    2456    100602448 :   if (!def_set)
    2457              :     return false;
    2458              : 
    2459     23478078 :   rtx src = SET_SRC (def_set);
    2460     23478078 :   rtx dst = SET_DEST (def_set);
    2461              : 
    2462     23478078 :   if (GET_CODE (src) == COMPARE)
    2463      3954555 :     return convertible_comparison_p (insn, TImode);
    2464              : 
    2465     19523523 :   if (GET_MODE (dst) != TImode
    2466      1190975 :       || (GET_MODE (src) != TImode
    2467        59632 :           && !CONST_SCALAR_INT_P (src)))
    2468              :     return false;
    2469              : 
    2470      1190975 :   if (!REG_P (dst) && !MEM_P (dst))
    2471              :     return false;
    2472              : 
    2473      1189522 :   if (MEM_P (dst)
    2474       529647 :       && misaligned_operand (dst, TImode)
    2475      1502968 :       && !TARGET_SSE_UNALIGNED_STORE_OPTIMAL)
    2476              :     return false;
    2477              : 
    2478      1189517 :   if (REG_P (dst) && !single_def_chain_p (dst))
    2479              :     return false;
    2480              : 
    2481      1039438 :   switch (GET_CODE (src))
    2482              :     {
    2483       487448 :     case REG:
    2484       487448 :       return single_def_chain_p (src);
    2485              : 
    2486              :     case CONST_WIDE_INT:
    2487              :       return true;
    2488              : 
    2489        12567 :     case CONST_INT:
    2490              :       /* ??? Verify performance impact before enabling CONST_INT for
    2491              :          __int128 store.  */
    2492        12567 :       return standard_sse_constant_p (src, TImode);
    2493              : 
    2494       444089 :     case MEM:
    2495              :       /* Memory must be aligned or unaligned load is optimal.  */
    2496       444089 :       return (REG_P (dst)
    2497       444089 :               && (!misaligned_operand (src, TImode)
    2498       149142 :                   || TARGET_SSE_UNALIGNED_LOAD_OPTIMAL));
    2499              : 
    2500         3102 :     case AND:
    2501         3102 :       if (!MEM_P (dst)
    2502         3061 :           && GET_CODE (XEXP (src, 0)) == NOT
    2503            0 :           && REG_P (XEXP (XEXP (src, 0), 0))
    2504         3102 :           && (REG_P (XEXP (src, 1))
    2505            0 :               || CONST_SCALAR_INT_P (XEXP (src, 1))
    2506            0 :               || timode_mem_p (XEXP (src, 1))))
    2507            0 :         return true;
    2508         3102 :       return (REG_P (XEXP (src, 0))
    2509           46 :               || timode_mem_p (XEXP (src, 0)))
    2510         3148 :              && (REG_P (XEXP (src, 1))
    2511         1280 :                  || CONST_SCALAR_INT_P (XEXP (src, 1))
    2512           35 :                  || timode_mem_p (XEXP (src, 1)));
    2513              : 
    2514        13987 :     case IOR:
    2515        13987 :     case XOR:
    2516        13987 :       if (timode_concatdi_p (src))
    2517              :         return true;
    2518         2673 :       return (REG_P (XEXP (src, 0))
    2519         1433 :               || timode_mem_p (XEXP (src, 0)))
    2520         2690 :              && (REG_P (XEXP (src, 1))
    2521          267 :                  || CONST_SCALAR_INT_P (XEXP (src, 1))
    2522           31 :                  || timode_mem_p (XEXP (src, 1)));
    2523              : 
    2524          504 :     case NOT:
    2525          504 :       return REG_P (XEXP (src, 0)) || timode_mem_p (XEXP (src, 0));
    2526              : 
    2527        11647 :     case ASHIFT:
    2528        11647 :     case LSHIFTRT:
    2529        11647 :     case ASHIFTRT:
    2530        11647 :     case ROTATERT:
    2531        11647 :     case ROTATE:
    2532              :       /* Handle shifts/rotates by integer constants between 0 and 127.  */
    2533        11647 :       return REG_P (XEXP (src, 0))
    2534        11615 :              && CONST_INT_P (XEXP (src, 1))
    2535        22921 :              && (INTVAL (XEXP (src, 1)) & ~0x7f) == 0;
    2536              : 
    2537         7031 :     case PLUS:
    2538         7031 :       return timode_concatdi_p (src);
    2539              : 
    2540         3730 :     case ZERO_EXTEND:
    2541         3730 :       return REG_P (XEXP (src, 0))
    2542         3730 :              && GET_MODE (XEXP (src, 0)) == DImode;
    2543              : 
    2544              :     default:
    2545              :       return false;
    2546              :     }
    2547              : }
    2548              : 
    2549              : /* For a register REGNO, scan instructions for its defs and uses.
    2550              :    Put REGNO in REGS if a def or use isn't in CANDIDATES.  */
    2551              : 
    2552              : static void
    2553      1273546 : timode_check_non_convertible_regs (bitmap candidates, bitmap regs,
    2554              :                                    unsigned int regno)
    2555              : {
    2556              :   /* Do nothing if REGNO is already in REGS or is a hard reg.  */
    2557      1273546 :   if (bitmap_bit_p (regs, regno)
    2558      1273546 :       || HARD_REGISTER_NUM_P (regno))
    2559              :     return;
    2560              : 
    2561      1261873 :   for (df_ref def = DF_REG_DEF_CHAIN (regno);
    2562      2499494 :        def;
    2563      1237621 :        def = DF_REF_NEXT_REG (def))
    2564              :     {
    2565      1261853 :       if (!bitmap_bit_p (candidates, DF_REF_INSN_UID (def)))
    2566              :         {
    2567        24232 :           if (dump_file)
    2568            0 :             fprintf (dump_file,
    2569              :                      "r%d has non convertible def in insn %d\n",
    2570            0 :                      regno, DF_REF_INSN_UID (def));
    2571              : 
    2572        24232 :           bitmap_set_bit (regs, regno);
    2573        24232 :           break;
    2574              :         }
    2575              :     }
    2576              : 
    2577      1261873 :   for (df_ref ref = DF_REG_USE_CHAIN (regno);
    2578      2778641 :        ref;
    2579      1516768 :        ref = DF_REF_NEXT_REG (ref))
    2580              :     {
    2581              :       /* Debug instructions are skipped.  */
    2582      1579553 :       if (NONDEBUG_INSN_P (DF_REF_INSN (ref))
    2583      1579553 :           && !bitmap_bit_p (candidates, DF_REF_INSN_UID (ref)))
    2584              :         {
    2585        62785 :           if (dump_file)
    2586            0 :             fprintf (dump_file,
    2587              :                      "r%d has non convertible use in insn %d\n",
    2588            0 :                      regno, DF_REF_INSN_UID (ref));
    2589              : 
    2590        62785 :           bitmap_set_bit (regs, regno);
    2591        62785 :           break;
    2592              :         }
    2593              :     }
    2594              : }
    2595              : 
    2596              : /* For a given bitmap of insn UIDs scans all instructions and
    2597              :    remove insn from CANDIDATES in case it has both convertible
    2598              :    and not convertible definitions.
    2599              : 
    2600              :    All insns in a bitmap are conversion candidates according to
    2601              :    scalar_to_vector_candidate_p.  Currently it implies all insns
    2602              :    are single_set.  */
    2603              : 
    2604              : static void
    2605       829665 : timode_remove_non_convertible_regs (bitmap candidates)
    2606              : {
    2607       829665 :   bitmap_iterator bi;
    2608       829665 :   unsigned id;
    2609       829665 :   bitmap regs = BITMAP_ALLOC (NULL);
    2610       856651 :   bool changed;
    2611              : 
    2612       856651 :   do {
    2613       856651 :     changed = false;
    2614      2156228 :     EXECUTE_IF_SET_IN_BITMAP (candidates, 0, id, bi)
    2615              :       {
    2616      1299577 :         rtx_insn *insn = DF_INSN_UID_GET (id)->insn;
    2617      1299577 :         df_ref ref;
    2618              : 
    2619      1949783 :         FOR_EACH_INSN_DEF (ref, insn)
    2620       650206 :           if (!DF_REF_REG_MEM_P (ref)
    2621       650206 :               && GET_MODE (DF_REF_REG (ref)) == TImode)
    2622       628917 :             timode_check_non_convertible_regs (candidates, regs,
    2623              :                                                DF_REF_REGNO (ref));
    2624              : 
    2625      3206651 :         FOR_EACH_INSN_USE (ref, insn)
    2626      1907074 :           if (!DF_REF_REG_MEM_P (ref)
    2627       674908 :               && GET_MODE (DF_REF_REG (ref)) == TImode)
    2628       644629 :             timode_check_non_convertible_regs (candidates, regs,
    2629              :                                                DF_REF_REGNO (ref));
    2630              :       }
    2631              : 
    2632      1044144 :     EXECUTE_IF_SET_IN_BITMAP (regs, 0, id, bi)
    2633              :       {
    2634       187493 :         for (df_ref def = DF_REG_DEF_CHAIN (id);
    2635       380765 :              def;
    2636       193272 :              def = DF_REF_NEXT_REG (def))
    2637       193272 :           if (bitmap_bit_p (candidates, DF_REF_INSN_UID (def)))
    2638              :             {
    2639        48825 :               if (dump_file)
    2640            0 :                 fprintf (dump_file, "Removing insn %d from candidates list\n",
    2641            0 :                          DF_REF_INSN_UID (def));
    2642              : 
    2643        48825 :               bitmap_clear_bit (candidates, DF_REF_INSN_UID (def));
    2644        48825 :               changed = true;
    2645              :             }
    2646              : 
    2647       187493 :         for (df_ref ref = DF_REG_USE_CHAIN (id);
    2648       495859 :              ref;
    2649       308366 :              ref = DF_REF_NEXT_REG (ref))
    2650       308366 :           if (bitmap_bit_p (candidates, DF_REF_INSN_UID (ref)))
    2651              :             {
    2652        34159 :               if (dump_file)
    2653            0 :                 fprintf (dump_file, "Removing insn %d from candidates list\n",
    2654            0 :                          DF_REF_INSN_UID (ref));
    2655              : 
    2656        34159 :               bitmap_clear_bit (candidates, DF_REF_INSN_UID (ref));
    2657        34159 :               changed = true;
    2658              :             }
    2659              :       }
    2660              :   } while (changed);
    2661              : 
    2662       829665 :   BITMAP_FREE (regs);
    2663       829665 : }
    2664              : 
    2665              : /* Main STV pass function.  Find and convert scalar
    2666              :    instructions into vector mode when profitable.  */
    2667              : 
    2668              : static unsigned int
    2669      1785493 : convert_scalars_to_vector (bool timode_p)
    2670              : {
    2671      1785493 :   basic_block bb;
    2672      1785493 :   int converted_insns = 0;
    2673      1785493 :   auto_vec<rtx_insn *> control_flow_insns;
    2674              : 
    2675      1785493 :   bitmap_obstack_initialize (NULL);
    2676      1785493 :   const machine_mode cand_mode[3] = { SImode, DImode, TImode };
    2677      1785493 :   const machine_mode cand_vmode[3] = { V4SImode, V2DImode, V1TImode };
    2678      5356479 :   bitmap_head candidates[3];  /* { SImode, DImode, TImode } */
    2679      7141972 :   for (unsigned i = 0; i < 3; ++i)
    2680      5356479 :     bitmap_initialize (&candidates[i], &bitmap_default_obstack);
    2681              : 
    2682      1785493 :   calculate_dominance_info (CDI_DOMINATORS);
    2683      1785493 :   df_set_flags (DF_DEFER_INSN_RESCAN | DF_RD_PRUNE_DEAD_DEFS);
    2684      1785493 :   df_chain_add_problem (DF_DU_CHAIN | DF_UD_CHAIN);
    2685      1785493 :   df_analyze ();
    2686              : 
    2687              :   /* Find all instructions we want to convert into vector mode.  */
    2688      1785493 :   if (dump_file)
    2689           44 :     fprintf (dump_file, "Searching for mode conversion candidates...\n");
    2690              : 
    2691     19627540 :   FOR_EACH_BB_FN (bb, cfun)
    2692              :     {
    2693     17842047 :       rtx_insn *insn;
    2694    237447419 :       FOR_BB_INSNS (bb, insn)
    2695    219605372 :         if (timode_p
    2696    219605372 :             && timode_scalar_to_vector_candidate_p (insn))
    2697              :           {
    2698      1003509 :             if (dump_file)
    2699            0 :               fprintf (dump_file, "  insn %d is marked as a TImode candidate\n",
    2700            0 :                        INSN_UID (insn));
    2701              : 
    2702      1003509 :             bitmap_set_bit (&candidates[2], INSN_UID (insn));
    2703              :           }
    2704    218601863 :         else if (!timode_p)
    2705              :           {
    2706              :             /* Check {SI,DI}mode.  */
    2707    341948340 :             for (unsigned i = 0; i <= 1; ++i)
    2708    234484123 :               if (general_scalar_to_vector_candidate_p (insn, cand_mode[i]))
    2709              :                 {
    2710     11538707 :                   if (dump_file)
    2711          554 :                     fprintf (dump_file, "  insn %d is marked as a %s candidate\n",
    2712          277 :                              INSN_UID (insn), i == 0 ? "SImode" : "DImode");
    2713              : 
    2714     11538707 :                   bitmap_set_bit (&candidates[i], INSN_UID (insn));
    2715     11538707 :                   break;
    2716              :                 }
    2717              :           }
    2718              :     }
    2719              : 
    2720      1785493 :   if (timode_p)
    2721       829665 :     timode_remove_non_convertible_regs (&candidates[2]);
    2722              : 
    2723      5666550 :   for (unsigned i = 0; i <= 2; ++i)
    2724      4508137 :     if (!bitmap_empty_p (&candidates[i]))
    2725              :       break;
    2726      3881057 :     else if (i == 2 && dump_file)
    2727           23 :       fprintf (dump_file, "There are no candidates for optimization.\n");
    2728              : 
    2729      7141972 :   for (unsigned i = 0; i <= 2; ++i)
    2730              :     {
    2731      5356479 :       auto_bitmap disallowed;
    2732      5356479 :       bitmap_tree_view (&candidates[i]);
    2733     17026775 :       while (!bitmap_empty_p (&candidates[i]))
    2734              :         {
    2735      6313817 :           unsigned uid = bitmap_first_set_bit (&candidates[i]);
    2736      6313817 :           scalar_chain *chain;
    2737              : 
    2738      6313817 :           if (cand_mode[i] == TImode)
    2739       465867 :             chain = new timode_scalar_chain;
    2740              :           else
    2741      5847950 :             chain = new general_scalar_chain (cand_mode[i], cand_vmode[i]);
    2742              : 
    2743              :           /* Find instructions chain we want to convert to vector mode.
    2744              :              Check all uses and definitions to estimate all required
    2745              :              conversions.  */
    2746      6313817 :           if (chain->build (&candidates[i], uid, disallowed))
    2747              :             {
    2748      6308402 :               if (chain->compute_convert_gain ())
    2749       636221 :                 converted_insns += chain->convert ();
    2750      5672181 :               else if (dump_file)
    2751          136 :                 fprintf (dump_file, "Chain #%d conversion is not profitable\n",
    2752              :                          chain->chain_id);
    2753              :             }
    2754              : 
    2755      6313817 :           rtx_insn* iter_insn;
    2756      6313817 :           unsigned int ii;
    2757      6317405 :           FOR_EACH_VEC_ELT (chain->control_flow_insns, ii, iter_insn)
    2758         3588 :             control_flow_insns.safe_push (iter_insn);
    2759              : 
    2760      6313817 :           delete chain;
    2761              :         }
    2762      5356479 :     }
    2763              : 
    2764      1785493 :   if (dump_file)
    2765           44 :     fprintf (dump_file, "Total insns converted: %d\n", converted_insns);
    2766              : 
    2767      7141972 :   for (unsigned i = 0; i <= 2; ++i)
    2768      5356479 :     bitmap_release (&candidates[i]);
    2769      1785493 :   bitmap_obstack_release (NULL);
    2770      1785493 :   df_process_deferred_rescans ();
    2771              : 
    2772              :   /* Conversion means we may have 128bit register spills/fills
    2773              :      which require aligned stack.  */
    2774      1785493 :   if (converted_insns)
    2775              :     {
    2776       103401 :       if (crtl->stack_alignment_needed < 128)
    2777         2395 :         crtl->stack_alignment_needed = 128;
    2778       103401 :       if (crtl->stack_alignment_estimated < 128)
    2779          219 :         crtl->stack_alignment_estimated = 128;
    2780              : 
    2781       103401 :       crtl->stack_realign_needed
    2782       103401 :         = INCOMING_STACK_BOUNDARY < crtl->stack_alignment_estimated;
    2783       103401 :       crtl->stack_realign_tried = crtl->stack_realign_needed;
    2784              : 
    2785       103401 :       crtl->stack_realign_processed = true;
    2786              : 
    2787       103401 :       if (!crtl->drap_reg)
    2788              :         {
    2789       103224 :           rtx drap_rtx = targetm.calls.get_drap_rtx ();
    2790              : 
    2791              :           /* stack_realign_drap and drap_rtx must match.  */
    2792       103224 :           gcc_assert ((stack_realign_drap != 0) == (drap_rtx != NULL));
    2793              : 
    2794              :           /* Do nothing if NULL is returned,
    2795              :              which means DRAP is not needed.  */
    2796       103224 :           if (drap_rtx != NULL)
    2797              :             {
    2798            0 :               crtl->args.internal_arg_pointer = drap_rtx;
    2799              : 
    2800              :               /* Call fixup_tail_calls to clean up
    2801              :                  REG_EQUIV note if DRAP is needed. */
    2802            0 :               fixup_tail_calls ();
    2803              :             }
    2804              :         }
    2805              : 
    2806              :       /* Fix up DECL_RTL/DECL_INCOMING_RTL of arguments.  */
    2807       103401 :       if (TARGET_64BIT)
    2808        64906 :         for (tree parm = DECL_ARGUMENTS (current_function_decl);
    2809       177143 :              parm; parm = DECL_CHAIN (parm))
    2810              :           {
    2811       112237 :             if (TYPE_MODE (TREE_TYPE (parm)) != TImode)
    2812        96566 :               continue;
    2813        15671 :             if (DECL_RTL_SET_P (parm)
    2814        31342 :                 && GET_MODE (DECL_RTL (parm)) == V1TImode)
    2815              :               {
    2816          522 :                 rtx r = DECL_RTL (parm);
    2817          522 :                 if (REG_P (r))
    2818          522 :                   SET_DECL_RTL (parm, gen_rtx_SUBREG (TImode, r, 0));
    2819              :               }
    2820        15671 :             if (DECL_INCOMING_RTL (parm)
    2821        15671 :                 && GET_MODE (DECL_INCOMING_RTL (parm)) == V1TImode)
    2822              :               {
    2823            0 :                 rtx r = DECL_INCOMING_RTL (parm);
    2824            0 :                 if (REG_P (r))
    2825            0 :                   DECL_INCOMING_RTL (parm) = gen_rtx_SUBREG (TImode, r, 0);
    2826              :               }
    2827              :           }
    2828              : 
    2829       103401 :       if (!control_flow_insns.is_empty ())
    2830              :         {
    2831         1130 :           free_dominance_info (CDI_DOMINATORS);
    2832              : 
    2833         1130 :           unsigned int i;
    2834         1130 :           rtx_insn* insn;
    2835         5848 :           FOR_EACH_VEC_ELT (control_flow_insns, i, insn)
    2836         3588 :             if (control_flow_insn_p (insn))
    2837              :               {
    2838              :                 /* Split the block after insn.  There will be a fallthru
    2839              :                    edge, which is OK so we keep it.  We have to create
    2840              :                    the exception edges ourselves.  */
    2841         3588 :                 bb = BLOCK_FOR_INSN (insn);
    2842         3588 :                 split_block (bb, insn);
    2843         3588 :                 rtl_make_eh_edge (NULL, bb, BB_END (bb));
    2844              :               }
    2845              :         }
    2846              :     }
    2847              : 
    2848      1785493 :   return 0;
    2849      1785493 : }
    2850              : 
    2851              : static unsigned int
    2852        74536 : rest_of_handle_insert_vzeroupper (void)
    2853              : {
    2854              :   /* vzeroupper instructions are inserted immediately after reload and
    2855              :      postreload_cse to clean up after it a little bit to account for possible
    2856              :      spills from 256bit or 512bit registers.  The pass reuses mode switching
    2857              :      infrastructure by re-running mode insertion pass, so disable entities
    2858              :      that have already been processed.  */
    2859       521752 :   for (int i = 0; i < MAX_386_ENTITIES; i++)
    2860       447216 :     ix86_optimize_mode_switching[i] = 0;
    2861              : 
    2862        74536 :   ix86_optimize_mode_switching[AVX_U128] = 1;
    2863              : 
    2864              :   /* Call optimize_mode_switching.  */
    2865        74536 :   g->get_passes ()->execute_pass_mode_switching ();
    2866              : 
    2867              :   /* LRA removes all REG_DEAD/REG_UNUSED notes and normally they
    2868              :      reappear in the IL only at the start of pass_rtl_dse2, which does
    2869              :      df_note_add_problem (); df_analyze ();
    2870              :      The vzeroupper is scheduled after postreload_cse pass and mode
    2871              :      switching computes the notes as well, the problem is that e.g.
    2872              :      pass_gcse2 doesn't maintain the notes, see PR113059 and
    2873              :      PR112760.  Remove the notes now to restore status quo ante
    2874              :      until we figure out how to maintain the notes or what else
    2875              :      to do.  */
    2876        74536 :   basic_block bb;
    2877        74536 :   rtx_insn *insn;
    2878       412477 :   FOR_EACH_BB_FN (bb, cfun)
    2879      4294160 :     FOR_BB_INSNS (bb, insn)
    2880      3956219 :       if (NONDEBUG_INSN_P (insn))
    2881              :         {
    2882      2106982 :           rtx *pnote = &REG_NOTES (insn);
    2883      3908360 :           while (*pnote != 0)
    2884              :             {
    2885      1801378 :               if (REG_NOTE_KIND (*pnote) == REG_DEAD
    2886       822487 :                   || REG_NOTE_KIND (*pnote) == REG_UNUSED)
    2887      1292531 :                 *pnote = XEXP (*pnote, 1);
    2888              :               else
    2889       508847 :                 pnote = &XEXP (*pnote, 1);
    2890              :             }
    2891              :         }
    2892              : 
    2893        74536 :   df_remove_problem (df_note);
    2894        74536 :   df_analyze ();
    2895        74536 :   return 0;
    2896              : }
    2897              : 
    2898              : namespace {
    2899              : 
    2900              : const pass_data pass_data_insert_vzeroupper =
    2901              : {
    2902              :   RTL_PASS, /* type */
    2903              :   "vzeroupper", /* name */
    2904              :   OPTGROUP_NONE, /* optinfo_flags */
    2905              :   TV_MACH_DEP, /* tv_id */
    2906              :   0, /* properties_required */
    2907              :   0, /* properties_provided */
    2908              :   0, /* properties_destroyed */
    2909              :   0, /* todo_flags_start */
    2910              :   TODO_df_finish, /* todo_flags_finish */
    2911              : };
    2912              : 
    2913              : class pass_insert_vzeroupper : public rtl_opt_pass
    2914              : {
    2915              : public:
    2916       288047 :   pass_insert_vzeroupper(gcc::context *ctxt)
    2917       576094 :     : rtl_opt_pass(pass_data_insert_vzeroupper, ctxt)
    2918              :   {}
    2919              : 
    2920              :   /* opt_pass methods: */
    2921      1474422 :   bool gate (function *) final override
    2922              :     {
    2923      1474422 :       return TARGET_AVX && TARGET_VZEROUPPER;
    2924              :     }
    2925              : 
    2926        74536 :   unsigned int execute (function *) final override
    2927              :     {
    2928        74536 :       return rest_of_handle_insert_vzeroupper ();
    2929              :     }
    2930              : 
    2931              : }; // class pass_insert_vzeroupper
    2932              : 
    2933              : const pass_data pass_data_stv =
    2934              : {
    2935              :   RTL_PASS, /* type */
    2936              :   "stv", /* name */
    2937              :   OPTGROUP_NONE, /* optinfo_flags */
    2938              :   TV_MACH_DEP, /* tv_id */
    2939              :   0, /* properties_required */
    2940              :   0, /* properties_provided */
    2941              :   0, /* properties_destroyed */
    2942              :   0, /* todo_flags_start */
    2943              :   TODO_df_finish, /* todo_flags_finish */
    2944              : };
    2945              : 
    2946              : class pass_stv : public rtl_opt_pass
    2947              : {
    2948              : public:
    2949       576094 :   pass_stv (gcc::context *ctxt)
    2950       576094 :     : rtl_opt_pass (pass_data_stv, ctxt),
    2951      1152188 :       timode_p (false)
    2952              :   {}
    2953              : 
    2954              :   /* opt_pass methods: */
    2955      2948844 :   bool gate (function *) final override
    2956              :     {
    2957      1474422 :       return ((!timode_p || TARGET_64BIT)
    2958      4296782 :               && TARGET_STV && TARGET_SSE2 && optimize > 1);
    2959              :     }
    2960              : 
    2961      1785493 :   unsigned int execute (function *) final override
    2962              :     {
    2963      1785493 :       return convert_scalars_to_vector (timode_p);
    2964              :     }
    2965              : 
    2966       288047 :   opt_pass *clone () final override
    2967              :     {
    2968       288047 :       return new pass_stv (m_ctxt);
    2969              :     }
    2970              : 
    2971       576094 :   void set_pass_param (unsigned int n, bool param) final override
    2972              :     {
    2973       576094 :       gcc_assert (n == 0);
    2974       576094 :       timode_p = param;
    2975       576094 :     }
    2976              : 
    2977              : private:
    2978              :   bool timode_p;
    2979              : }; // class pass_stv
    2980              : 
    2981              : } // anon namespace
    2982              : 
    2983              : rtl_opt_pass *
    2984       288047 : make_pass_insert_vzeroupper (gcc::context *ctxt)
    2985              : {
    2986       288047 :   return new pass_insert_vzeroupper (ctxt);
    2987              : }
    2988              : 
    2989              : rtl_opt_pass *
    2990       288047 : make_pass_stv (gcc::context *ctxt)
    2991              : {
    2992       288047 :   return new pass_stv (ctxt);
    2993              : }
    2994              : 
    2995              : /* Inserting ENDBR and pseudo patchable-area instructions.  */
    2996              : 
    2997              : static void
    2998       193924 : rest_of_insert_endbr_and_patchable_area (bool need_endbr,
    2999              :                                          unsigned int patchable_area_size)
    3000              : {
    3001       193924 :   rtx endbr;
    3002       193924 :   rtx_insn *insn;
    3003       193924 :   rtx_insn *endbr_insn = NULL;
    3004       193924 :   basic_block bb;
    3005              : 
    3006       193924 :   if (need_endbr)
    3007              :     {
    3008              :       /* Currently emit EB if it's a tracking function, i.e. 'nocf_check'
    3009              :          is absent among function attributes.  Later an optimization will
    3010              :          be introduced to make analysis if an address of a static function
    3011              :          is taken.  A static function whose address is not taken will get
    3012              :          a nocf_check attribute.  This will allow to reduce the number of
    3013              :          EB.  */
    3014       193879 :       if (!lookup_attribute ("nocf_check",
    3015       193879 :                              TYPE_ATTRIBUTES (TREE_TYPE (cfun->decl)))
    3016       193861 :           && (!flag_manual_endbr
    3017            8 :               || lookup_attribute ("cf_check",
    3018            8 :                                    DECL_ATTRIBUTES (cfun->decl)))
    3019       387739 :           && (!cgraph_node::get (cfun->decl)->only_called_directly_p ()
    3020        27348 :               || ix86_cmodel == CM_LARGE
    3021        27347 :               || ix86_cmodel == CM_LARGE_PIC
    3022        27346 :               || flag_force_indirect_call
    3023        27346 :               || (TARGET_DLLIMPORT_DECL_ATTRIBUTES
    3024              :                   && DECL_DLLIMPORT_P (cfun->decl))))
    3025              :         {
    3026       166515 :           if (crtl->profile && flag_fentry)
    3027              :             {
    3028              :               /* Queue ENDBR insertion to x86_function_profiler.
    3029              :                  NB: Any patchable-area insn will be inserted after
    3030              :                  ENDBR.  */
    3031            6 :               cfun->machine->insn_queued_at_entrance = TYPE_ENDBR;
    3032              :             }
    3033              :           else
    3034              :             {
    3035       166509 :               endbr = gen_nop_endbr ();
    3036       166509 :               bb = ENTRY_BLOCK_PTR_FOR_FN (cfun)->next_bb;
    3037       166509 :               rtx_insn *insn = BB_HEAD (bb);
    3038       166509 :               endbr_insn = emit_insn_before (endbr, insn);
    3039              :             }
    3040              :         }
    3041              :     }
    3042              : 
    3043       193924 :   if (patchable_area_size)
    3044              :     {
    3045           51 :       if (crtl->profile && flag_fentry)
    3046              :         {
    3047              :           /* Queue patchable-area insertion to x86_function_profiler.
    3048              :              NB: If there is a queued ENDBR, x86_function_profiler
    3049              :              will also handle patchable-area.  */
    3050            2 :           if (!cfun->machine->insn_queued_at_entrance)
    3051            1 :             cfun->machine->insn_queued_at_entrance = TYPE_PATCHABLE_AREA;
    3052              :         }
    3053              :       else
    3054              :         {
    3055           49 :           rtx patchable_area
    3056           49 :             = gen_patchable_area (GEN_INT (patchable_area_size),
    3057           49 :                                   GEN_INT (crtl->patch_area_entry == 0));
    3058           49 :           if (endbr_insn)
    3059            3 :             emit_insn_after (patchable_area, endbr_insn);
    3060              :           else
    3061              :             {
    3062           46 :               bb = ENTRY_BLOCK_PTR_FOR_FN (cfun)->next_bb;
    3063           46 :               insn = BB_HEAD (bb);
    3064           46 :               emit_insn_before (patchable_area, insn);
    3065              :             }
    3066              :         }
    3067              :     }
    3068              : 
    3069       193924 :   if (!need_endbr)
    3070              :     return;
    3071              : 
    3072       193879 :   bb = 0;
    3073      3988087 :   FOR_EACH_BB_FN (bb, cfun)
    3074              :     {
    3075     72430749 :       for (insn = BB_HEAD (bb); insn != NEXT_INSN (BB_END (bb));
    3076     68636541 :            insn = NEXT_INSN (insn))
    3077              :         {
    3078     68636541 :           if (CALL_P (insn))
    3079              :             {
    3080      1363041 :               need_endbr = find_reg_note (insn, REG_SETJMP, NULL) != NULL;
    3081      1363041 :               if (!need_endbr && !SIBLING_CALL_P (insn))
    3082              :                 {
    3083      1313361 :                   rtx call = get_call_rtx_from (insn);
    3084      1313361 :                   rtx fnaddr = XEXP (call, 0);
    3085      1313361 :                   tree fndecl = NULL_TREE;
    3086              : 
    3087              :                   /* Also generate ENDBRANCH for non-tail call which
    3088              :                      may return via indirect branch.  */
    3089      1313361 :                   if (SYMBOL_REF_P (XEXP (fnaddr, 0)))
    3090      1256032 :                     fndecl = SYMBOL_REF_DECL (XEXP (fnaddr, 0));
    3091      1256032 :                   if (fndecl == NULL_TREE)
    3092        57697 :                     fndecl = MEM_EXPR (fnaddr);
    3093        57697 :                   if (fndecl
    3094      1311028 :                       && TREE_CODE (TREE_TYPE (fndecl)) != FUNCTION_TYPE
    3095       553942 :                       && TREE_CODE (TREE_TYPE (fndecl)) != METHOD_TYPE)
    3096              :                     fndecl = NULL_TREE;
    3097      1313361 :                   if (fndecl && TYPE_ARG_TYPES (TREE_TYPE (fndecl)))
    3098              :                     {
    3099      1272706 :                       tree fntype = TREE_TYPE (fndecl);
    3100      1272706 :                       if (lookup_attribute ("indirect_return",
    3101      1272706 :                                             TYPE_ATTRIBUTES (fntype)))
    3102              :                         need_endbr = true;
    3103              :                     }
    3104              :                 }
    3105      1363029 :               if (!need_endbr)
    3106      1363021 :                 continue;
    3107              :               /* Generate ENDBRANCH after CALL, which can return more than
    3108              :                  twice, setjmp-like functions.  */
    3109              : 
    3110           20 :               endbr = gen_nop_endbr ();
    3111           20 :               emit_insn_after_setloc (endbr, insn, INSN_LOCATION (insn));
    3112           20 :               continue;
    3113           20 :             }
    3114              : 
    3115     67273500 :           if (JUMP_P (insn) && flag_cet_switch)
    3116              :             {
    3117            9 :               rtx target = JUMP_LABEL (insn);
    3118            9 :               if (target == NULL_RTX || ANY_RETURN_P (target))
    3119            5 :                 continue;
    3120              : 
    3121              :               /* Check the jump is a switch table.  */
    3122            4 :               rtx_insn *label = as_a<rtx_insn *> (target);
    3123            4 :               rtx_insn *table = next_insn (label);
    3124            4 :               if (table == NULL_RTX || !JUMP_TABLE_DATA_P (table))
    3125            2 :                 continue;
    3126              : 
    3127              :               /* For the indirect jump find out all places it jumps and insert
    3128              :                  ENDBRANCH there.  It should be done under a special flag to
    3129              :                  control ENDBRANCH generation for switch stmts.  */
    3130            2 :               edge_iterator ei;
    3131            2 :               edge e;
    3132            2 :               basic_block dest_blk;
    3133              : 
    3134           24 :               FOR_EACH_EDGE (e, ei, bb->succs)
    3135              :                 {
    3136           22 :                   rtx_insn *insn;
    3137              : 
    3138           22 :                   dest_blk = e->dest;
    3139           22 :                   insn = BB_HEAD (dest_blk);
    3140           22 :                   gcc_assert (LABEL_P (insn));
    3141           22 :                   endbr = gen_nop_endbr ();
    3142           22 :                   emit_insn_after (endbr, insn);
    3143              :                 }
    3144            2 :               continue;
    3145            2 :             }
    3146              : 
    3147     67273491 :           if (LABEL_P (insn) && LABEL_PRESERVE_P (insn))
    3148              :             {
    3149       139305 :               endbr = gen_nop_endbr ();
    3150       139305 :               emit_insn_after (endbr, insn);
    3151       139305 :               continue;
    3152              :             }
    3153              :         }
    3154              :     }
    3155              : 
    3156              :   return;
    3157              : }
    3158              : 
    3159              : namespace {
    3160              : 
    3161              : const pass_data pass_data_insert_endbr_and_patchable_area =
    3162              : {
    3163              :   RTL_PASS, /* type.  */
    3164              :   "endbr_and_patchable_area", /* name.  */
    3165              :   OPTGROUP_NONE, /* optinfo_flags.  */
    3166              :   TV_MACH_DEP, /* tv_id.  */
    3167              :   0, /* properties_required.  */
    3168              :   0, /* properties_provided.  */
    3169              :   0, /* properties_destroyed.  */
    3170              :   0, /* todo_flags_start.  */
    3171              :   0, /* todo_flags_finish.  */
    3172              : };
    3173              : 
    3174              : class pass_insert_endbr_and_patchable_area : public rtl_opt_pass
    3175              : {
    3176              : public:
    3177       288047 :   pass_insert_endbr_and_patchable_area (gcc::context *ctxt)
    3178       576094 :     : rtl_opt_pass (pass_data_insert_endbr_and_patchable_area, ctxt)
    3179              :   {}
    3180              : 
    3181              :   /* opt_pass methods: */
    3182      1474422 :   bool gate (function *) final override
    3183              :     {
    3184      1474422 :       need_endbr = (flag_cf_protection & CF_BRANCH) != 0;
    3185      1474422 :       patchable_area_size = crtl->patch_area_size - crtl->patch_area_entry;
    3186      1474422 :       return need_endbr || patchable_area_size;
    3187              :     }
    3188              : 
    3189       193924 :   unsigned int execute (function *) final override
    3190              :     {
    3191       193924 :       timevar_push (TV_MACH_DEP);
    3192       193924 :       rest_of_insert_endbr_and_patchable_area (need_endbr,
    3193              :                                                patchable_area_size);
    3194       193924 :       timevar_pop (TV_MACH_DEP);
    3195       193924 :       return 0;
    3196              :     }
    3197              : 
    3198              : private:
    3199              :   bool need_endbr;
    3200              :   unsigned int patchable_area_size;
    3201              : }; // class pass_insert_endbr_and_patchable_area
    3202              : 
    3203              : } // anon namespace
    3204              : 
    3205              : rtl_opt_pass *
    3206       288047 : make_pass_insert_endbr_and_patchable_area (gcc::context *ctxt)
    3207              : {
    3208       288047 :   return new pass_insert_endbr_and_patchable_area (ctxt);
    3209              : }
    3210              : 
    3211              : bool
    3212      6104462 : ix86_rpad_gate ()
    3213              : {
    3214      6104462 :   return (TARGET_AVX
    3215       387221 :           && TARGET_SSE_PARTIAL_REG_DEPENDENCY
    3216       292326 :           && TARGET_SSE_MATH
    3217       292102 :           && optimize
    3218      6391349 :           && optimize_function_for_speed_p (cfun));
    3219              : }
    3220              : 
    3221              : enum x86_cse_kind
    3222              : {
    3223              :   X86_CSE_CONST0_VECTOR,
    3224              :   X86_CSE_CONSTM1_VECTOR,
    3225              :   X86_CSE_CONST_VECTOR,
    3226              :   X86_CSE_VEC_DUP,
    3227              :   X86_CSE_TLS_GD,
    3228              :   X86_CSE_TLS_LD_BASE,
    3229              :   X86_CSE_TLSDESC
    3230              : };
    3231              : 
    3232       153890 : struct redundant_pattern
    3233              : {
    3234              :   /* Bitmap of basic blocks with broadcast instructions.  */
    3235              :   auto_bitmap bbs;
    3236              :   /* Bitmap of broadcast instructions.  */
    3237              :   auto_bitmap insns;
    3238              :   /* The broadcast inner scalar.  */
    3239              :   rtx val;
    3240              :   /* The actual redundant source value for UNSPEC_TLSDESC.  */
    3241              :   rtx tlsdesc_val;
    3242              :   /* The inner scalar mode.  */
    3243              :   machine_mode mode;
    3244              :   /* The destination mode which can be changed to the integer mode of
    3245              :      the same time.  */
    3246              :   machine_mode dest_mode;
    3247              :   /* The instruction which sets the inner scalar.  Nullptr if the inner
    3248              :      scalar is applied to the whole function, instead of within the same
    3249              :      block.  */
    3250              :   rtx_insn *def_insn;
    3251              :   /* The widest broadcast source.  */
    3252              :   rtx broadcast_source;
    3253              :   /* The widest broadcast register.  */
    3254              :   rtx broadcast_reg;
    3255              :   /* The basic block of the broadcast instruction.  */
    3256              :   basic_block bb;
    3257              :   /* The number of broadcast instructions with the same inner scalar.  */
    3258              :   unsigned HOST_WIDE_INT count;
    3259              :   /* The threshold of broadcast instructions with the same inner
    3260              :      scalar.  */
    3261              :   unsigned int threshold;
    3262              :   /* The widest broadcast size in bytes.  */
    3263              :   unsigned int size;
    3264              :   /* Load kind.  */
    3265              :   x86_cse_kind kind;
    3266              : };
    3267              : 
    3268              : /* Generate a vector set, DEST = SRC, at entry of the nearest dominator
    3269              :    for basic block map BBS, which is in the fake loop that contains the
    3270              :    whole function, so that there is only a single vector set in the
    3271              :    whole function.  If not nullptr, LOAD is a pointer to the load.  */
    3272              : 
    3273              : static void
    3274        42784 : ix86_place_single_vector_set (rtx dest, rtx src, bitmap bbs,
    3275              :                               redundant_pattern *load = nullptr)
    3276              : {
    3277        42784 :   basic_block bb = nearest_common_dominator_for_set (CDI_DOMINATORS, bbs);
    3278              :   /* For X86_CSE_VEC_DUP and X86_CSE_CONST_VECTOR, don't place the vector
    3279              :      set outside of the loop to avoid extra spills.  */
    3280        42784 :   if (!load
    3281        41762 :       || (load->kind != X86_CSE_VEC_DUP
    3282        41762 :           && load->kind != X86_CSE_CONST_VECTOR))
    3283              :     {
    3284        23389 :       while (bb->loop_father->latch
    3285        23389 :              != EXIT_BLOCK_PTR_FOR_FN (cfun))
    3286         1363 :         bb = get_immediate_dominator (CDI_DOMINATORS,
    3287              :                                       bb->loop_father->header);
    3288              :     }
    3289              : 
    3290        42784 :   if (CONST_INT_P (src))
    3291        10452 :     dest = gen_rtx_SUBREG (load->dest_mode, dest, 0);
    3292        32332 :   else if (CONST_VECTOR_P (src))
    3293              :     {
    3294              :       /* The only possible CONST_VECTORs of SRC are CONST0_RTX and
    3295              :          CONSTM1_RTX.  Otherwise,
    3296              : 
    3297              :          rtx set = gen_rtx_SET (dest, src);
    3298              : 
    3299              :          won't be a valid instruction.  CONST0_RTX always works.  It
    3300              :          can comes from:
    3301              : 
    3302              :          1. remove_partial_avx_dependency with LOAD == NULL.
    3303              :          2. X86_CSE_VEC_DUP with
    3304              : 
    3305              :          (insn 48 58 16 3 (set (reg:V4HI 123)
    3306              :                 (const_vector:V4HI [
    3307              :                         (const_int 0 [0]) repeated x4
    3308              :                   ])) 2065 {*movv4hi_internal} (nil))
    3309              : 
    3310              :          3. X86_CSE_CONST0_VECTOR.
    3311              :        */
    3312        22026 :       machine_mode mode = GET_MODE (dest);
    3313        22026 :       if (!(src == CONST0_RTX (mode)
    3314         1235 :             || (src == CONSTM1_RTX (mode)
    3315         1235 :                 && load->kind == X86_CSE_CONSTM1_VECTOR)))
    3316            0 :         gcc_unreachable ();
    3317              :     }
    3318        42784 :   rtx set = gen_rtx_SET (dest, src);
    3319              : 
    3320        42784 :   rtx_insn *insn = BB_HEAD (bb);
    3321       164837 :   while (insn && !NONDEBUG_INSN_P (insn))
    3322              :     {
    3323       122057 :       if (insn == BB_END (bb))
    3324              :         {
    3325              :           insn = NULL;
    3326              :           break;
    3327              :         }
    3328       122053 :       insn = NEXT_INSN (insn);
    3329              :     }
    3330              : 
    3331        42784 :   rtx_insn *set_insn;
    3332        42784 :   if (insn == BB_HEAD (bb))
    3333              :     {
    3334            0 :       set_insn = emit_insn_before (set, insn);
    3335            0 :       if (dump_file)
    3336              :         {
    3337            0 :           fprintf (dump_file, "\nPlace:\n\n");
    3338            0 :           print_rtl_single (dump_file, set_insn);
    3339            0 :           fprintf (dump_file, "\nbefore:\n\n");
    3340            0 :           print_rtl_single (dump_file, insn);
    3341            0 :           fprintf (dump_file, "\n");
    3342              :         }
    3343              :     }
    3344              :   else
    3345              :     {
    3346        42784 :       rtx_insn *after = insn ? PREV_INSN (insn) : BB_END (bb);
    3347        42784 :       set_insn = emit_insn_after (set, after);
    3348        42784 :       if (dump_file)
    3349              :         {
    3350            2 :           fprintf (dump_file, "\nPlace:\n\n");
    3351            2 :           print_rtl_single (dump_file, set_insn);
    3352            2 :           fprintf (dump_file, "\nafter:\n\n");
    3353            2 :           print_rtl_single (dump_file, after);
    3354            2 :           fprintf (dump_file, "\n");
    3355              :         }
    3356              :     }
    3357              : 
    3358        42784 :   if (load && load->kind == X86_CSE_VEC_DUP)
    3359              :     {
    3360              :       /* Get the source from LOAD as (reg:SI 99) in
    3361              : 
    3362              :          (vec_duplicate:V4SI (reg:SI 99))
    3363              : 
    3364              :        */
    3365        10306 :       rtx inner_scalar = load->val;
    3366              :       /* Set the source in (vec_duplicate:V4SI (reg:SI 99)).  */
    3367        10306 :       rtx reg = XEXP (src, 0);
    3368        10306 :       machine_mode reg_mode = GET_MODE (reg);
    3369        10306 :       if (reg_mode != GET_MODE (inner_scalar))
    3370              :         {
    3371        10027 :           if (REG_P (inner_scalar) || MEM_P (inner_scalar))
    3372            0 :             inner_scalar = gen_rtx_SUBREG (reg_mode, inner_scalar, 0);
    3373        10027 :           else if (!SCALAR_INT_MODE_P (reg_mode))
    3374              :             {
    3375              :               /* For non-int load with integer constant, generate
    3376              : 
    3377              :                  (set (subreg:SI (reg/v:SF 105 [ f ]) 0)
    3378              :                       (const_int 1313486336 [0x4e4a3600]))
    3379              : 
    3380              :                */
    3381            1 :               gcc_assert (CONST_INT_P (inner_scalar));
    3382            1 :               unsigned int bits = GET_MODE_BITSIZE (reg_mode);
    3383            1 :               machine_mode mode = int_mode_for_size (bits, 0).require ();
    3384            1 :               reg = gen_rtx_SUBREG (mode, reg, 0);
    3385              :             }
    3386              :         }
    3387        10306 :       rtx set = gen_rtx_SET (reg, inner_scalar);
    3388        10306 :       insn = emit_insn_before (set, set_insn);
    3389        10306 :       if (dump_file)
    3390              :         {
    3391            0 :           fprintf (dump_file, "\nAdd:\n\n");
    3392            0 :           print_rtl_single (dump_file, insn);
    3393            0 :           fprintf (dump_file, "\nbefore:\n\n");
    3394            0 :           print_rtl_single (dump_file, set_insn);
    3395            0 :           fprintf (dump_file, "\n");
    3396              :         }
    3397              :     }
    3398        42784 : }
    3399              : 
    3400              : /* At entry of the nearest common dominator for basic blocks with
    3401              :    conversions/rcp/sqrt/rsqrt/round, generate a single
    3402              :         vxorps %xmmN, %xmmN, %xmmN
    3403              :    for all
    3404              :         vcvtss2sd  op, %xmmN, %xmmX
    3405              :         vcvtsd2ss  op, %xmmN, %xmmX
    3406              :         vcvtsi2ss  op, %xmmN, %xmmX
    3407              :         vcvtsi2sd  op, %xmmN, %xmmX
    3408              : 
    3409              :    NB: We want to generate only a single vxorps to cover the whole
    3410              :    function.  The LCM algorithm isn't appropriate here since it may
    3411              :    place a vxorps inside the loop.  */
    3412              : 
    3413              : static unsigned int
    3414        33373 : remove_partial_avx_dependency (void)
    3415              : {
    3416        33373 :   timevar_push (TV_MACH_DEP);
    3417              : 
    3418        33373 :   bitmap_obstack_initialize (NULL);
    3419        33373 :   bitmap convert_bbs = BITMAP_ALLOC (NULL);
    3420              : 
    3421        33373 :   basic_block bb;
    3422        33373 :   rtx_insn *insn, *set_insn;
    3423        33373 :   rtx set;
    3424        33373 :   rtx v4sf_const0 = NULL_RTX;
    3425              : 
    3426        33373 :   auto_vec<rtx_insn *> control_flow_insns;
    3427              : 
    3428              :   /* We create invalid RTL initially so defer rescans.  */
    3429        33373 :   df_set_flags (DF_DEFER_INSN_RESCAN);
    3430              : 
    3431       315942 :   FOR_EACH_BB_FN (bb, cfun)
    3432              :     {
    3433      3518513 :       FOR_BB_INSNS (bb, insn)
    3434              :         {
    3435      3235944 :           if (!NONDEBUG_INSN_P (insn))
    3436      1446432 :             continue;
    3437              : 
    3438      1789512 :           set = single_set (insn);
    3439      1789512 :           if (!set)
    3440        71013 :             continue;
    3441              : 
    3442      1718499 :           if (get_attr_avx_partial_xmm_update (insn)
    3443              :               != AVX_PARTIAL_XMM_UPDATE_TRUE)
    3444      1715318 :             continue;
    3445              : 
    3446              :           /* Convert PARTIAL_XMM_UPDATE_TRUE insns, DF -> SF, SF -> DF,
    3447              :              SI -> SF, SI -> DF, DI -> SF, DI -> DF, sqrt, rsqrt, rcp,
    3448              :              round, to vec_dup and vec_merge with subreg.  */
    3449         3181 :           rtx src = SET_SRC (set);
    3450         3181 :           rtx dest = SET_DEST (set);
    3451         3181 :           machine_mode dest_mode = GET_MODE (dest);
    3452         3181 :           bool convert_p = false;
    3453         3181 :           switch (GET_CODE (src))
    3454              :             {
    3455         3116 :             case FLOAT:
    3456         3116 :             case FLOAT_EXTEND:
    3457         3116 :             case FLOAT_TRUNCATE:
    3458         3116 :             case UNSIGNED_FLOAT:
    3459         3116 :               convert_p = true;
    3460         3116 :               break;
    3461              :             default:
    3462              :               break;
    3463              :             }
    3464              : 
    3465              :           /* Only handle conversion here.  */
    3466         3116 :           machine_mode src_mode
    3467         3116 :             = convert_p ? GET_MODE (XEXP (src, 0)) : VOIDmode;
    3468         3116 :           switch (src_mode)
    3469              :             {
    3470          155 :             case E_SFmode:
    3471          155 :             case E_DFmode:
    3472          155 :               if (TARGET_USE_VECTOR_FP_CONVERTS
    3473          149 :                   || !TARGET_SSE_PARTIAL_REG_FP_CONVERTS_DEPENDENCY)
    3474            8 :                 continue;
    3475              :               break;
    3476         2961 :             case E_SImode:
    3477         2961 :             case E_DImode:
    3478         2961 :               if (TARGET_USE_VECTOR_CONVERTS
    3479         2949 :                   || !TARGET_SSE_PARTIAL_REG_CONVERTS_DEPENDENCY)
    3480           14 :                 continue;
    3481              :               break;
    3482           65 :             case E_VOIDmode:
    3483           65 :               gcc_assert (!convert_p);
    3484              :               break;
    3485            0 :             default:
    3486            0 :               gcc_unreachable ();
    3487              :             }
    3488              : 
    3489         3159 :           if (!v4sf_const0)
    3490         1022 :             v4sf_const0 = gen_reg_rtx (V4SFmode);
    3491              : 
    3492         3159 :           rtx zero;
    3493         3159 :           machine_mode dest_vecmode;
    3494         3159 :           switch (dest_mode)
    3495              :             {
    3496           50 :             case E_HFmode:
    3497           50 :               dest_vecmode = V8HFmode;
    3498           50 :               zero = gen_rtx_SUBREG (V8HFmode, v4sf_const0, 0);
    3499           50 :               break;
    3500              :             case E_SFmode:
    3501              :               dest_vecmode = V4SFmode;
    3502              :               zero = v4sf_const0;
    3503              :               break;
    3504         1167 :             case E_DFmode:
    3505         1167 :               dest_vecmode = V2DFmode;
    3506         1167 :               zero = gen_rtx_SUBREG (V2DFmode, v4sf_const0, 0);
    3507         1167 :               break;
    3508            0 :             default:
    3509            0 :               gcc_unreachable ();
    3510              :             }
    3511              : 
    3512              :           /* Change source to vector mode.  */
    3513         3159 :           src = gen_rtx_VEC_DUPLICATE (dest_vecmode, src);
    3514         3159 :           src = gen_rtx_VEC_MERGE (dest_vecmode, src, zero,
    3515              :                                    GEN_INT (HOST_WIDE_INT_1U));
    3516              :           /* Change destination to vector mode.  */
    3517         3159 :           rtx vec = gen_reg_rtx (dest_vecmode);
    3518              :           /* Generate an XMM vector SET.  */
    3519         3159 :           set = gen_rtx_SET (vec, src);
    3520         3159 :           set_insn = emit_insn_before (set, insn);
    3521              : 
    3522         3159 :           if (cfun->can_throw_non_call_exceptions)
    3523              :             {
    3524              :               /* Handle REG_EH_REGION note.  */
    3525            0 :               rtx note = find_reg_note (insn, REG_EH_REGION, NULL_RTX);
    3526            0 :               if (note)
    3527              :                 {
    3528            0 :                   control_flow_insns.safe_push (set_insn);
    3529            0 :                   add_reg_note (set_insn, REG_EH_REGION, XEXP (note, 0));
    3530              :                 }
    3531              :             }
    3532              : 
    3533         3159 :           src = gen_rtx_SUBREG (dest_mode, vec, 0);
    3534         3159 :           set = gen_rtx_SET (dest, src);
    3535              : 
    3536              :           /* Drop possible dead definitions.  */
    3537         3159 :           PATTERN (insn) = set;
    3538              : 
    3539         3159 :           INSN_CODE (insn) = -1;
    3540         3159 :           recog_memoized (insn);
    3541         3159 :           df_insn_rescan (insn);
    3542         3159 :           bitmap_set_bit (convert_bbs, bb->index);
    3543              :         }
    3544              :     }
    3545              : 
    3546        33373 :   if (v4sf_const0)
    3547              :     {
    3548              :       /* (Re-)discover loops so that bb->loop_father can be used in the
    3549              :          analysis below.  */
    3550         1022 :       calculate_dominance_info (CDI_DOMINATORS);
    3551         1022 :       loop_optimizer_init (AVOID_CFG_MODIFICATIONS);
    3552              : 
    3553         1022 :       ix86_place_single_vector_set (v4sf_const0,
    3554              :                                     CONST0_RTX (V4SFmode),
    3555              :                                     convert_bbs);
    3556              : 
    3557         1022 :       loop_optimizer_finalize ();
    3558              : 
    3559         1022 :       if (!control_flow_insns.is_empty ())
    3560              :         {
    3561            0 :           free_dominance_info (CDI_DOMINATORS);
    3562              : 
    3563            0 :           unsigned int i;
    3564            0 :           FOR_EACH_VEC_ELT (control_flow_insns, i, insn)
    3565            0 :             if (control_flow_insn_p (insn))
    3566              :               {
    3567              :                 /* Split the block after insn.  There will be a fallthru
    3568              :                    edge, which is OK so we keep it.  We have to create
    3569              :                    the exception edges ourselves.  */
    3570            0 :                 bb = BLOCK_FOR_INSN (insn);
    3571            0 :                 split_block (bb, insn);
    3572            0 :                 rtl_make_eh_edge (NULL, bb, BB_END (bb));
    3573              :               }
    3574              :         }
    3575              :     }
    3576              : 
    3577        33373 :   df_process_deferred_rescans ();
    3578        33373 :   df_clear_flags (DF_DEFER_INSN_RESCAN);
    3579        33373 :   bitmap_obstack_release (NULL);
    3580        33373 :   BITMAP_FREE (convert_bbs);
    3581              : 
    3582        33373 :   timevar_pop (TV_MACH_DEP);
    3583        33373 :   return 0;
    3584        33373 : }
    3585              : 
    3586              : namespace {
    3587              : 
    3588              : const pass_data pass_data_remove_partial_avx_dependency =
    3589              : {
    3590              :   RTL_PASS, /* type */
    3591              :   "rpad", /* name */
    3592              :   OPTGROUP_NONE, /* optinfo_flags */
    3593              :   TV_MACH_DEP, /* tv_id */
    3594              :   0, /* properties_required */
    3595              :   0, /* properties_provided */
    3596              :   0, /* properties_destroyed */
    3597              :   0, /* todo_flags_start */
    3598              :   0, /* todo_flags_finish */
    3599              : };
    3600              : 
    3601              : class pass_remove_partial_avx_dependency : public rtl_opt_pass
    3602              : {
    3603              : public:
    3604       288047 :   pass_remove_partial_avx_dependency (gcc::context *ctxt)
    3605       576094 :     : rtl_opt_pass (pass_data_remove_partial_avx_dependency, ctxt)
    3606              :   {}
    3607              : 
    3608              :   /* opt_pass methods: */
    3609      1474422 :   bool gate (function *) final override
    3610              :     {
    3611      1474422 :       return ix86_rpad_gate ();
    3612              :     }
    3613              : 
    3614        33373 :   unsigned int execute (function *) final override
    3615              :     {
    3616        33373 :       return remove_partial_avx_dependency ();
    3617              :     }
    3618              : }; // class pass_rpad
    3619              : 
    3620              : } // anon namespace
    3621              : 
    3622              : rtl_opt_pass *
    3623       288047 : make_pass_remove_partial_avx_dependency (gcc::context *ctxt)
    3624              : {
    3625       288047 :   return new pass_remove_partial_avx_dependency (ctxt);
    3626              : }
    3627              : 
    3628              : /* Return a machine mode suitable for vector SIZE with SMODE inner
    3629              :    mode.  */
    3630              : 
    3631              : static machine_mode
    3632        63539 : ix86_get_vector_cse_mode (unsigned int size, machine_mode smode)
    3633              : {
    3634              :   /* Use the inner scalar mode of vector broadcast source in:
    3635              : 
    3636              :      (set (reg:V8DF 394)
    3637              :           (vec_duplicate:V8DF (reg:V2DF 190 [ alpha ])))
    3638              : 
    3639              :      to compute the vector mode for broadcast from vector source.
    3640              :    */
    3641        63539 :   if (VECTOR_MODE_P (smode))
    3642        30941 :     smode = GET_MODE_INNER (smode);
    3643        63539 :   scalar_mode s_mode = as_a <scalar_mode> (smode);
    3644       127078 :   poly_uint64 nunits = size / GET_MODE_SIZE (smode);
    3645        63539 :   machine_mode mode = mode_for_vector (s_mode, nunits).require ();
    3646        63539 :   return mode;
    3647              : }
    3648              : 
    3649              : /* Replace the source operand of instructions in VECTOR_INSNS with
    3650              :    VECTOR_CONST in VECTOR_MODE.  */
    3651              : 
    3652              : static void
    3653        63064 : replace_vector_const (machine_mode vector_mode, rtx vector_const,
    3654              :                       auto_bitmap &vector_insns,
    3655              :                       machine_mode scalar_mode)
    3656              : {
    3657        63064 :   bitmap_iterator bi;
    3658        63064 :   unsigned int id;
    3659              : 
    3660       220843 :   EXECUTE_IF_SET_IN_BITMAP (vector_insns, 0, id, bi)
    3661              :     {
    3662       157779 :       rtx_insn *insn = DF_INSN_UID_GET (id)->insn;
    3663              : 
    3664              :       /* Get the single SET instruction.  */
    3665       157779 :       rtx set = single_set (insn);
    3666       157779 :       rtx src = SET_SRC (set);
    3667       157779 :       rtx dest = SET_DEST (set);
    3668       157779 :       machine_mode mode = GET_MODE (dest);
    3669              : 
    3670       157779 :       rtx replace;
    3671              :       /* Replace the source operand with VECTOR_CONST.  */
    3672       157779 :       if (SUBREG_P (src)
    3673       157779 :           || mode == vector_mode
    3674        59265 :           || CONST_INT_P (vector_const))
    3675              :         replace = vector_const;
    3676              :       else
    3677              :         {
    3678        59265 :           unsigned int size = GET_MODE_SIZE (mode);
    3679        59265 :           if (size < ix86_regmode_natural_size (mode))
    3680              :             {
    3681              :               /* If the mode size is smaller than its natural size,
    3682              :                  first insert an extra move with a QI vector SUBREG
    3683              :                  of the same size to avoid validate_subreg failure.  */
    3684          475 :               machine_mode vmode
    3685          475 :                 = ix86_get_vector_cse_mode (size, scalar_mode);
    3686          475 :               rtx vreg;
    3687          475 :               if (mode == vmode)
    3688              :                 vreg = vector_const;
    3689              :               else
    3690              :                 {
    3691           59 :                   vreg = gen_reg_rtx (vmode);
    3692           59 :                   rtx vsubreg = gen_rtx_SUBREG (vmode, vector_const, 0);
    3693           59 :                   rtx pat = gen_rtx_SET (vreg, vsubreg);
    3694           59 :                   rtx_insn *vinsn = emit_insn_before (pat, insn);
    3695           59 :                   if (dump_file)
    3696              :                     {
    3697            0 :                       fprintf (dump_file, "\nInsert an extra move:\n\n");
    3698            0 :                       print_rtl_single (dump_file, vinsn);
    3699            0 :                       fprintf (dump_file, "\nbefore:\n\n");
    3700            0 :                       print_rtl_single (dump_file, insn);
    3701            0 :                       fprintf (dump_file, "\n");
    3702              :                     }
    3703              :                 }
    3704          475 :               replace = gen_rtx_SUBREG (mode, vreg, 0);
    3705              :             }
    3706              :           else
    3707        58790 :             replace = gen_rtx_SUBREG (mode, vector_const, 0);
    3708              :         }
    3709              : 
    3710       157779 :       if (dump_file)
    3711              :         {
    3712            3 :           fprintf (dump_file, "\nReplace:\n\n");
    3713            3 :           print_rtl_single (dump_file, insn);
    3714              :         }
    3715       157779 :       SET_SRC (set) = replace;
    3716       157779 :       if (CONST_INT_P (replace))
    3717              :         {
    3718        23743 :           dest = gen_rtx_SUBREG (scalar_mode, dest, 0);
    3719        23743 :           SET_DEST (set) = dest;
    3720              :         }
    3721              :       /* Drop possible dead definitions.  */
    3722       157779 :       PATTERN (insn) = set;
    3723       157779 :       INSN_CODE (insn) = -1;
    3724       157779 :       recog_memoized (insn);
    3725       157779 :       if (dump_file)
    3726              :         {
    3727            3 :           fprintf (dump_file, "\nwith:\n\n");
    3728            3 :           print_rtl_single (dump_file, insn);
    3729            3 :           fprintf (dump_file, "\n");
    3730              :         }
    3731       157779 :       df_insn_rescan (insn);
    3732              :     }
    3733        63064 : }
    3734              : 
    3735              : /* Return the inner scalar if OP is a broadcast, else return nullptr.  */
    3736              : 
    3737              : static rtx
    3738      2203660 : ix86_broadcast_inner (rtx op, machine_mode mode,
    3739              :                       machine_mode *scalar_mode_p,
    3740              :                       x86_cse_kind *kind_p, rtx_insn **insn_p)
    3741              : {
    3742      2203660 :   switch (standard_sse_constant_p (op, mode))
    3743              :     {
    3744       113918 :     case 1:
    3745       113918 :       *scalar_mode_p = QImode;
    3746       113918 :       *kind_p = X86_CSE_CONST0_VECTOR;
    3747       113918 :       *insn_p = nullptr;
    3748       113918 :       return const0_rtx;
    3749        11355 :     case 2:
    3750        11355 :       *scalar_mode_p = QImode;
    3751        11355 :       *kind_p = X86_CSE_CONSTM1_VECTOR;
    3752        11355 :       *insn_p = nullptr;
    3753        11355 :       return constm1_rtx;
    3754      2078387 :     default:
    3755      2078387 :       break;
    3756              :     }
    3757              : 
    3758      2078387 :   mode = GET_MODE (op);
    3759      2078387 :   int nunits = GET_MODE_NUNITS (mode);
    3760      2078387 :   if (nunits < 2)
    3761              :     return nullptr;
    3762              : 
    3763      1606927 :   bool const_vector_p = CONST_VECTOR_P (op);
    3764      1606927 :   bool duplicated = GET_CODE (op) == VEC_DUPLICATE;
    3765      1606927 :   rtx orig_op = op;
    3766      1606927 :   if (!const_vector_p)
    3767              :     {
    3768              :       /* Check CONST_VECTOR in REG_EQUAL note.  */
    3769      1606907 :       rtx equal = find_reg_equal_equiv_note (*insn_p);
    3770      1606907 :       if (equal)
    3771              :         {
    3772       346491 :           equal = XEXP (equal, 0);
    3773       346491 :           const_vector_p = CONST_VECTOR_P (equal);
    3774              :           /* Use CONST_VECTOR in REG_EQUAL note.  */
    3775       346491 :           if (const_vector_p)
    3776              :             {
    3777              :               /* Handle REG_EQUAL note in:
    3778              : 
    3779              :                  (insn 7 5 12 2 (set (subreg:V8SI (reg:V4DI 100) 0)
    3780              :                         (vec_duplicate:V8SI (reg:SI 102)))
    3781              :                     (expr_list:REG_DEAD (reg:SI 102)
    3782              :                        (expr_list:REG_EQUAL (const_vector:V4DI [
    3783              :                           (const_int -1 [0xffffffffffffffff]) repeated x4]) (nil))))
    3784              : 
    3785              :                  NB: Don't treat it as CONST_VECTOR since EQUAL isn't
    3786              :                  supported by ISAs as in gcc.target/i386/pr40957.c.  */
    3787       262594 :               if (GET_MODE (equal) != mode)
    3788              :                 const_vector_p = false;
    3789              :               else
    3790      1606927 :                 op = equal;
    3791              :             }
    3792              :         }
    3793              :     }
    3794              : 
    3795      1606927 :   machine_mode inner_mode = GET_MODE_INNER (mode);
    3796              : 
    3797      1606927 :   if (const_vector_p)
    3798              :     {
    3799       525160 :       bool int_load_p = GET_MODE_SIZE (mode) <= UNITS_PER_WORD;
    3800       262580 :       *kind_p = X86_CSE_CONST_VECTOR;
    3801       262580 :       if (int_load_p)
    3802              :         {
    3803              :           /* This CONST_VECTOR load can be converted to constant
    3804              :              integer load.  */
    3805        34808 :           *scalar_mode_p = mode;
    3806        34808 :           *insn_p = nullptr;
    3807        34808 :           return op;
    3808              :         }
    3809              : 
    3810              :       /* This CONST_VECTOR is wider than the integer register.  */
    3811       227772 :       rtx first = XVECEXP (op, 0, 0);
    3812              : 
    3813       227772 :       if (duplicated)
    3814              :         {
    3815              :           /* Check if CONST_VECTOR in REG_EQUAL note is duplicated in
    3816              : 
    3817              :              (insn 10 7 12 2 (set (reg:V8SI 128)
    3818              :                 (vec_duplicate:V8SI (vec_select:V2SI (reg:V4SI 180)
    3819              :                         (parallel [(const_int 0 [0])
    3820              :                                    (const_int 1 [0x1])]))))
    3821              :                   (expr_list:REG_EQUAL (const_vector:V8SI [
    3822              :                     (const_int 0 [0])
    3823              :                     (const_int 34 [0x22])
    3824              :                     (const_int 0 [0])
    3825              :                     (const_int 34 [0x22])
    3826              :                     (const_int 0 [0])
    3827              :                     (const_int 34 [0x22])
    3828              :                     (const_int 0 [0])
    3829              :                     (const_int 34 [0x22])])(nil)))
    3830              : 
    3831              :            */
    3832              : 
    3833       210802 :           bool duplicated_const_vector = true;
    3834       210802 :           for (int i = 1; i < nunits; ++i)
    3835              :             {
    3836       137963 :               rtx tmp = XVECEXP (op, 0, i);
    3837       137963 :               if (!rtx_equal_p (tmp, first))
    3838              :                 {
    3839              :                   duplicated_const_vector = false;
    3840              :                   break;
    3841              :                 }
    3842              :             }
    3843              : 
    3844        72855 :           if (duplicated_const_vector)
    3845              :             {
    3846        72839 :               bool const_double_p = CONST_DOUBLE_P (first);
    3847              :               /* Force the floating point constant to memory.  */
    3848        72839 :               if (const_double_p)
    3849         5491 :                 first = validize_mem (force_const_mem (inner_mode, first));
    3850              : 
    3851        72839 :               if (const_double_p || CONST_INT_P (first))
    3852              :                 {
    3853              :                   /* Handle
    3854              : 
    3855              :                      (insn 7 6 8 2 (set (reg:V4SF 99)
    3856              :                           (vec_duplicate:V4SF (mem/u/c:SF (symbol_ref/u:DI ("*.LC2") [flags 0x2]) [0  S4 A32])))
    3857              :                         (expr_list:REG_EQUAL (const_vector:V4SF [
    3858              :                            (const_double:SF 3.4e+1 [0x0.88p+6]) repeated x4]) (nil)))
    3859              : 
    3860              :                      and
    3861              : 
    3862              :                      (insn 14 15 16 3 (set (reg:V4SI 116)
    3863              :                           (vec_duplicate:V4SI (reg:SI 117)))
    3864              :                        (expr_list:REG_EQUAL (const_vector:V4SI [
    3865              :                           (const_int 34 [0x22]) repeated x4]) (nil)))
    3866              : 
    3867              :                    */
    3868        72839 :                   *kind_p = X86_CSE_VEC_DUP;
    3869        72839 :                   *insn_p = nullptr;
    3870        72839 :                   *scalar_mode_p = inner_mode;
    3871        72839 :                   return first;
    3872              :                 }
    3873              :             }
    3874              : 
    3875              :           op = orig_op;
    3876              :         }
    3877              :       else
    3878              :         {
    3879              :           /* Only native CONST_VECTOR is allowed.  */
    3880       154917 :           if (orig_op != op)
    3881              :             return nullptr;
    3882              : 
    3883              :           /* Check if VEC_DUPLICATE can be used.  */
    3884           48 :           for (int i = 1; i < nunits; ++i)
    3885              :             {
    3886           48 :               rtx tmp = XVECEXP (op, 0, i);
    3887              :               /* Vector duplicate value.  */
    3888           48 :               if (!rtx_equal_p (tmp, first))
    3889              :                 return nullptr;
    3890              :             }
    3891              : 
    3892              :           /* Use the inner mode to handle
    3893              :              (const_vector:V2QI [(const_int 0 [0]) repeated x2])
    3894              :            */
    3895            0 :           *scalar_mode_p = inner_mode;
    3896            0 :           *insn_p = nullptr;
    3897            0 :           return first;
    3898              :         }
    3899              :     }
    3900              : 
    3901      1344363 :   if (!duplicated)
    3902              :     return nullptr;
    3903              : 
    3904        22503 :   *kind_p = X86_CSE_VEC_DUP;
    3905              : 
    3906              :   /* Only
    3907              : 
    3908              :      (vec_duplicate:V4SI (reg:SI 99))
    3909              :      (vec_duplicate:V2DF (mem/u/c:DF (symbol_ref/u:DI ("*.LC1") [flags 0x2]) [0 S8 A64]))
    3910              : 
    3911              :      are supported.  Set OP to the broadcast source by default.  */
    3912        22503 :   op = XEXP (op, 0);
    3913        22503 :   rtx reg = op;
    3914        22503 :   if (SUBREG_P (op)
    3915          401 :       && SUBREG_BYTE (op) == 0
    3916        22904 :       && !paradoxical_subreg_p (op))
    3917          401 :     reg = SUBREG_REG (op);
    3918        22503 :   if (!REG_P (reg))
    3919              :     {
    3920         2290 :       if (MEM_P (op)
    3921         2035 :           && SYMBOL_REF_P (XEXP (op, 0))
    3922         2524 :           && CONSTANT_POOL_ADDRESS_P (XEXP (op, 0)))
    3923              :         {
    3924              :           /* Handle constant broadcast from memory.  */
    3925           11 :           *scalar_mode_p = inner_mode;
    3926           11 :           *insn_p = nullptr;
    3927           11 :           return op;
    3928              :         }
    3929              :       return nullptr;
    3930              :     }
    3931              : 
    3932        20213 :   machine_mode orig_mode = mode;
    3933        20213 :   mode = GET_MODE (op);
    3934              : 
    3935              :   /* Only single def chain is supported.  */
    3936        20213 :   df_ref ref = DF_REG_DEF_CHAIN (REGNO (reg));
    3937        20213 :   if (!ref
    3938        20212 :       || DF_REF_IS_ARTIFICIAL (ref)
    3939        20212 :       || DF_REF_NEXT_REG (ref) != nullptr)
    3940              :     return nullptr;
    3941              : 
    3942        14750 :   rtx_insn *insn = DF_REF_INSN (ref);
    3943        14750 :   rtx set = single_set (insn);
    3944        14750 :   if (!set)
    3945              :     return nullptr;
    3946              : 
    3947        14711 :   rtx src = SET_SRC (set);
    3948              : 
    3949        14711 :   if (CONST_INT_P (src))
    3950              :     {
    3951              :       /* Handle sequences like
    3952              : 
    3953              :          (set (subreg:SI (reg/v:SF 105 [ f ]) 0)
    3954              :               (const_int 0 [0]))
    3955              :          (set (reg:V4SF 110)
    3956              :               (vec_duplicate:V4SF (reg/v:SF 105 [ f ])))
    3957              : 
    3958              :          and
    3959              : 
    3960              :          (set (reg:SI 99)
    3961              :                (const_int 34 [0x22]))
    3962              :          (set (reg:V4SI 98)
    3963              :                (vec_duplicate:V4SI (reg:SI 99)))
    3964              : 
    3965              :          Set *INSN_P to nullptr and return SET_SRC if SET_SRC is an
    3966              :          integer constant.  */
    3967          235 :       op = src;
    3968          235 :       if (SCALAR_INT_MODE_P (mode) && mode != GET_MODE (reg))
    3969            0 :         op = gen_int_mode (INTVAL (src), mode);
    3970          235 :       if (op == const0_rtx)
    3971              :         {
    3972            6 :            if (standard_sse_constant_p (CONST0_RTX (orig_mode),
    3973              :                                         orig_mode) == 1)
    3974              :              {
    3975            6 :                *scalar_mode_p = QImode;
    3976            6 :                *kind_p = X86_CSE_CONST0_VECTOR;
    3977            6 :                *insn_p = nullptr;
    3978            6 :                return const0_rtx;
    3979              :              }
    3980            0 :            op = CONST0_RTX (mode);
    3981              :         }
    3982          229 :       else if (op == constm1_rtx
    3983          229 :                && standard_sse_constant_p (CONSTM1_RTX (orig_mode),
    3984              :                                            orig_mode) == 2)
    3985              :         {
    3986            0 :           *scalar_mode_p = QImode;
    3987            0 :           *kind_p = X86_CSE_CONSTM1_VECTOR;
    3988            0 :           *insn_p = nullptr;
    3989            0 :           return constm1_rtx;
    3990              :         }
    3991              : 
    3992              :       /* Check if we can convert:
    3993              : 
    3994              :          (insn 14 465 412 3 (set (reg:SI 507 [ j_lsm.26 ])
    3995              :                 (const_int 2 [0x2])) "foo.c":10:12 discrim 2 100 {*movsi_internal} (nil))
    3996              :          ...
    3997              :          (insn 518 507 434 16 (set (reg:V2SI 493)
    3998              :                 (vec_duplicate:V2SI (reg:SI 507 [ j_lsm.26 ]))) 2395 {*vec_dupv2si} (nil))
    3999              : 
    4000              :          to constant integer load:
    4001              : 
    4002              :          (insn 566 55 56 6 (set (subreg:DI (reg:V2SI 517) 0)
    4003              :                 (const_int 8589934594 [0x200000002])) -1 (nil))
    4004              :          ...
    4005              :          (insn 518 507 434 16 (set (reg:V2SI 493)
    4006              :                 (reg:V2SI 517)) 2066 {*movv2si_internal} (nil))
    4007              : 
    4008              :        */
    4009          458 :       if (GET_MODE_SIZE (orig_mode) <= UNITS_PER_WORD)
    4010            6 :         *kind_p = X86_CSE_CONST_VECTOR;
    4011              : 
    4012          229 :       *insn_p = nullptr;
    4013              :     }
    4014              :   else
    4015              :     {
    4016              :       /* Handle sequences like
    4017              : 
    4018              :          (set (reg:QI 105 [ c ])
    4019              :               (reg:QI 5 di [ c ]))
    4020              :          (set (reg:V64QI 102 [ _1 ])
    4021              :               (vec_duplicate:V64QI (reg:QI 105 [ c ])))
    4022              : 
    4023              :          (set (reg/v:SI 116 [ argc ])
    4024              :               (mem/c:SI (reg:SI 135) [2 argc+0 S4 A32]))
    4025              :          (set (reg:V4SI 119 [ _45 ])
    4026              :               (vec_duplicate:V4SI (reg/v:SI 116 [ argc ])))
    4027              : 
    4028              :          (set (reg:SI 98 [ _1 ])
    4029              :               (sign_extend:SI (reg:QI 106 [ c ])))
    4030              :          (set (reg:V16SI 103 [ _2 ])
    4031              :                (vec_duplicate:V16SI (reg:SI 98 [ _1 ])))
    4032              : 
    4033              :          (set (reg:SI 102 [ cost ])
    4034              :               (mem/c:SI (symbol_ref:DI ("cost") [flags 0x40])))
    4035              :          (set (reg:V4HI 103 [ _16 ])
    4036              :               (vec_duplicate:V4HI (subreg:HI (reg:SI 102 [ cost ]) 0)))
    4037              : 
    4038              :          (set (subreg:SI (reg/v:HI 107 [ cr_val ]) 0)
    4039              :               (ashift:SI (reg:SI 158)
    4040              :                          (subreg:QI (reg:SI 156 [ _2 ]) 0)))
    4041              :          (set (reg:V16HI 183 [ _61 ])
    4042              :               (vec_duplicate:V16HI (reg/v:HI 107 [ cr_val ])))
    4043              : 
    4044              :          Set *INSN_P to INSN and return the broadcast source otherwise.  */
    4045        14476 :       *insn_p = insn;
    4046              :     }
    4047              : 
    4048        14705 :   *scalar_mode_p = mode;
    4049        14705 :   return op;
    4050              : }
    4051              : 
    4052              : /* Replace CALL instruction in TLS_CALL_INSNS with SET from SRC and
    4053              :    put the updated instruction in UPDATED_TLS_INSNS.  */
    4054              : 
    4055              : static void
    4056          313 : replace_tls_call (rtx src, auto_bitmap &tls_call_insns,
    4057              :                   auto_bitmap &updated_tls_insns)
    4058              : {
    4059          313 :   bitmap_iterator bi;
    4060          313 :   unsigned int id;
    4061              : 
    4062         1739 :   EXECUTE_IF_SET_IN_BITMAP (tls_call_insns, 0, id, bi)
    4063              :     {
    4064         1426 :       rtx_insn *insn = DF_INSN_UID_GET (id)->insn;
    4065              : 
    4066              :       /* If this isn't a CALL, only GNU2 TLS implicit CALL patterns are
    4067              :          allowed.  */
    4068         1426 :       if (!CALL_P (insn))
    4069              :         {
    4070           47 :           attr_tls64 tls64 = get_attr_tls64 (insn);
    4071           47 :           if (tls64 != TLS64_CALL && tls64 != TLS64_COMBINE)
    4072            0 :             gcc_unreachable ();
    4073              :         }
    4074              : 
    4075         1426 :       rtx pat = PATTERN (insn);
    4076         1426 :       gcc_assert (GET_CODE (pat) == PARALLEL);
    4077         1426 :       rtx set = XVECEXP (pat, 0, 0);
    4078         1426 :       gcc_assert (GET_CODE (set) == SET);
    4079         1426 :       rtx dest = SET_DEST (set);
    4080              : 
    4081         1426 :       set = gen_rtx_SET (dest, src);
    4082         1426 :       rtx_insn *set_insn = emit_insn_after (set, insn);
    4083         1426 :       if (recog_memoized (set_insn) < 0)
    4084            0 :         gcc_unreachable ();
    4085              : 
    4086              :       /* Put SET_INSN in UPDATED_TLS_INSNS.  */
    4087         1426 :       bitmap_set_bit (updated_tls_insns, INSN_UID (set_insn));
    4088              : 
    4089         1426 :       if (dump_file)
    4090              :         {
    4091            0 :           fprintf (dump_file, "\nReplace:\n\n");
    4092            0 :           print_rtl_single (dump_file, insn);
    4093            0 :           fprintf (dump_file, "\nwith:\n\n");
    4094            0 :           print_rtl_single (dump_file, set_insn);
    4095            0 :           fprintf (dump_file, "\n");
    4096              :         }
    4097              : 
    4098              :       /* Delete the CALL insn.  */
    4099         1426 :       delete_insn (insn);
    4100              : 
    4101         1426 :       df_insn_rescan (set_insn);
    4102              :     }
    4103          313 : }
    4104              : 
    4105              : /* Return the basic block which dominates all basic blocks which set
    4106              :    hard register REGNO used in basic block BB.  */
    4107              : 
    4108              : static basic_block
    4109            2 : ix86_get_dominator_for_reg (unsigned int regno, basic_block bb)
    4110              : {
    4111            2 :   basic_block set_bb;
    4112            2 :   auto_bitmap set_bbs;
    4113              : 
    4114              :   /* Get all BBs which set REGNO and dominate the current BB from all
    4115              :      DEFs of REGNO.  */
    4116            2 :   for (df_ref def = DF_REG_DEF_CHAIN (regno);
    4117           18 :        def;
    4118           16 :        def = DF_REF_NEXT_REG (def))
    4119           16 :     if (!DF_REF_IS_ARTIFICIAL (def)
    4120           16 :         && !DF_REF_FLAGS_IS_SET (def, DF_REF_MAY_CLOBBER)
    4121            6 :         && !DF_REF_FLAGS_IS_SET (def, DF_REF_MUST_CLOBBER))
    4122              :       {
    4123            4 :         set_bb = DF_REF_BB (def);
    4124            4 :         if (dominated_by_p (CDI_DOMINATORS, bb, set_bb))
    4125            2 :           bitmap_set_bit (set_bbs, set_bb->index);
    4126              :       }
    4127              : 
    4128            2 :   bb = nearest_common_dominator_for_set (CDI_DOMINATORS, set_bbs);
    4129            2 :   return bb;
    4130            2 : }
    4131              : 
    4132              : /* Mark FLAGS register as live in DATA, a bitmap of live caller-saved
    4133              :    registers, if DEST is FLAGS register.  */
    4134              : 
    4135              : static void
    4136          381 : ix86_check_flags_reg (rtx dest, const_rtx x, void *data)
    4137              : {
    4138          381 :   if (GET_CODE (x) == CLOBBER)
    4139              :     return;
    4140              : 
    4141          374 :   auto_bitmap *live_caller_saved_regs = (auto_bitmap *) data;
    4142          374 :   if (REG_P (dest) && REGNO (dest) == FLAGS_REG)
    4143            0 :     bitmap_set_bit (*live_caller_saved_regs, FLAGS_REG);
    4144              : }
    4145              : 
    4146              : /* Emit a TLS_SET instruction of KIND in basic block BB.   Store the
    4147              :    insertion point in *BEFORE_P for emit_insn_before or in *AFTER_P
    4148              :    for emit_insn_after.  UPDATED_GNU_TLS_INSNS contains instructions
    4149              :    which replace the GNU TLS instructions.  UPDATED_GNU2_TLS_INSNS
    4150              :    contains instructions which replace the GNU2 TLS instructions.  */
    4151              : 
    4152              : static rtx_insn *
    4153          313 : ix86_emit_tls_call (rtx tls_set, x86_cse_kind kind, basic_block bb,
    4154              :                     rtx_insn **before_p, rtx_insn **after_p,
    4155              :                     auto_bitmap &updated_gnu_tls_insns,
    4156              :                     auto_bitmap &updated_gnu2_tls_insns)
    4157              : {
    4158          315 :   rtx_insn *tls_insn;
    4159              : 
    4160          315 :   do
    4161              :     {
    4162          315 :       rtx_insn *insn = BB_HEAD (bb);
    4163         1297 :       while (insn && !NONDEBUG_INSN_P (insn))
    4164              :         {
    4165          986 :           if (insn == BB_END (bb))
    4166              :             {
    4167              :               /* This must be the beginning basic block:
    4168              : 
    4169              :                  (note 4 0 2 2 [bb 2] NOTE_INSN_BASIC_BLOCK)
    4170              :                  (note 2 4 26 2 NOTE_INSN_FUNCTION_BEG)
    4171              : 
    4172              :                  or a basic block with only a label:
    4173              : 
    4174              :                  (code_label 78 11 77 3 14 (nil) [1 uses])
    4175              :                  (note 77 78 54 3 [bb 3] NOTE_INSN_BASIC_BLOCK)
    4176              : 
    4177              :                  or a basic block with only a debug marker:
    4178              : 
    4179              :                  (note 3 0 2 2 [bb 2] NOTE_INSN_BASIC_BLOCK)
    4180              :                  (note 2 3 5 2 NOTE_INSN_FUNCTION_BEG)
    4181              :                  (debug_insn 5 2 16 2 (debug_marker) "x.c":6:3 -1 (nil))
    4182              : 
    4183              :                  or a basic block with only deleted instructions:
    4184              : 
    4185              :                  (code_label 348 23 349 45 3 (nil) [0 uses])
    4186              :                  (note 349 348 436 45 [bb 45] NOTE_INSN_BASIC_BLOCK)
    4187              :                  (note 436 349 362 45 NOTE_INSN_DELETED)
    4188              : 
    4189              :                */
    4190            4 :               gcc_assert (DEBUG_INSN_P (insn)
    4191              :                           || (NOTE_P (insn)
    4192              :                               && ((NOTE_KIND (insn)
    4193              :                                    == NOTE_INSN_FUNCTION_BEG)
    4194              :                                   || (NOTE_KIND (insn)
    4195              :                                       == NOTE_INSN_DELETED)
    4196              :                                   || (NOTE_KIND (insn)
    4197              :                                       == NOTE_INSN_BASIC_BLOCK))));
    4198              :               insn = NULL;
    4199              :               break;
    4200              :             }
    4201          982 :           insn = NEXT_INSN (insn);
    4202              :         }
    4203              : 
    4204              :       /* TLS_GD and TLS_LD_BASE instructions are normal functions which
    4205              :          clobber caller-saved registers.  TLSDESC instructions only
    4206              :          clobber FLAGS.  If any registers clobbered by TLS instructions
    4207              :          are live in this basic block, we must insert TLS instructions
    4208              :          after all live registers clobbered are dead.  */
    4209              : 
    4210          315 :       auto_bitmap live_caller_saved_regs;
    4211          630 :       bitmap in = df_live ? DF_LIVE_IN (bb) : DF_LR_IN (bb);
    4212              : 
    4213          315 :       if (bitmap_bit_p (in, FLAGS_REG))
    4214            4 :         bitmap_set_bit (live_caller_saved_regs, FLAGS_REG);
    4215              : 
    4216          315 :       unsigned int i;
    4217              : 
    4218              :       /* Get all live caller-saved registers for TLS_GD and TLS_LD_BASE
    4219              :          instructions.  */
    4220          315 :       if (kind != X86_CSE_TLSDESC)
    4221        27249 :         for (i = 0; i < FIRST_PSEUDO_REGISTER; i++)
    4222        26956 :           if (call_used_regs[i]
    4223        25198 :               && !fixed_regs[i]
    4224        38993 :               && bitmap_bit_p (in, i))
    4225          344 :             bitmap_set_bit (live_caller_saved_regs, i);
    4226              : 
    4227          315 :       if (bitmap_empty_p (live_caller_saved_regs))
    4228              :         {
    4229           82 :           if (insn == BB_HEAD (bb))
    4230              :             {
    4231            0 :               *before_p = insn;
    4232            0 :               tls_insn = emit_insn_before (tls_set, insn);
    4233              :             }
    4234              :           else
    4235              :             {
    4236              :               /* Emit the TLS call after NOTE_INSN_FUNCTION_BEG in the
    4237              :                  beginning basic block:
    4238              : 
    4239              :                  (note 4 0 2 2 [bb 2] NOTE_INSN_BASIC_BLOCK)
    4240              :                  (note 2 4 26 2 NOTE_INSN_FUNCTION_BEG)
    4241              : 
    4242              :                  or after NOTE_INSN_BASIC_BLOCK in a basic block with
    4243              :                  only a label:
    4244              : 
    4245              :                  (code_label 78 11 77 3 14 (nil) [1 uses])
    4246              :                  (note 77 78 54 3 [bb 3] NOTE_INSN_BASIC_BLOCK)
    4247              : 
    4248              :                  or after debug marker in a basic block with only a
    4249              :                  debug marker:
    4250              : 
    4251              :                  (note 3 0 2 2 [bb 2] NOTE_INSN_BASIC_BLOCK)
    4252              :                  (note 2 3 5 2 NOTE_INSN_FUNCTION_BEG)
    4253              :                  (debug_insn 5 2 16 2 (debug_marker) "x.c":6:3 -1 (nil))
    4254              : 
    4255              :                */
    4256           82 :               insn = insn ? PREV_INSN (insn) : BB_END (bb);
    4257           82 :               *after_p = insn;
    4258           82 :               tls_insn = emit_insn_after (tls_set, insn);
    4259              :             }
    4260           82 :           return tls_insn;
    4261              :         }
    4262              : 
    4263          233 :       bool repeat = false;
    4264              : 
    4265              :       /* Search for REG_DEAD notes in this basic block.  */
    4266          661 :       FOR_BB_INSNS (bb, insn)
    4267              :         {
    4268          661 :           if (!NONDEBUG_INSN_P (insn))
    4269          283 :             continue;
    4270              : 
    4271              :           /* NB: Conditional jump is the only instruction which reads
    4272              :              flags register and changes control flow.  We can never
    4273              :              place the TLS call after unconditional jump.  */
    4274          378 :           if (JUMP_P (insn))
    4275              :             {
    4276              :               /* This must be a conditional jump.  */
    4277            2 :               rtx label = JUMP_LABEL (insn);
    4278            2 :               if (label == nullptr
    4279            2 :                   || ANY_RETURN_P (label)
    4280            2 :                   || !(LABEL_P (label) || SYMBOL_REF_P (label)))
    4281            0 :                 gcc_unreachable ();
    4282              : 
    4283              :               /* Place the call before all FLAGS_REG setting BBs since
    4284              :                  we can't place a call before nor after a conditional
    4285              :                  jump.  */
    4286            2 :               bb = ix86_get_dominator_for_reg (FLAGS_REG, bb);
    4287              : 
    4288              :               /* Start over again.  */
    4289            2 :               repeat = true;
    4290            2 :               break;
    4291              :             }
    4292              : 
    4293          376 :           if (bitmap_bit_p (updated_gnu_tls_insns, INSN_UID (insn)))
    4294              :             {
    4295              :               /* Insert the __tls_get_addr call before INSN which
    4296              :                  replaces a __tls_get_addr call.  */
    4297            1 :               *before_p = insn;
    4298            1 :               tls_insn = emit_insn_before (tls_set, insn);
    4299            1 :               return tls_insn;
    4300              :             }
    4301              : 
    4302          375 :           if (bitmap_bit_p (updated_gnu2_tls_insns, INSN_UID (insn)))
    4303              :             {
    4304              :               /* Mark FLAGS register as dead since FLAGS register
    4305              :                  would be clobbered by the GNU2 TLS instruction.  */
    4306            1 :               bitmap_clear_bit (live_caller_saved_regs, FLAGS_REG);
    4307            1 :               continue;
    4308              :             }
    4309              : 
    4310              :           /* Check if FLAGS register is live.  */
    4311          374 :           note_stores (insn, ix86_check_flags_reg,
    4312              :                        &live_caller_saved_regs);
    4313              : 
    4314          374 :           rtx link;
    4315          515 :           for (link = REG_NOTES (insn); link; link = XEXP (link, 1))
    4316          371 :             if ((REG_NOTE_KIND (link) == REG_DEAD
    4317            9 :                  || (REG_NOTE_KIND (link) == REG_UNUSED
    4318            7 :                      && REGNO (XEXP (link, 0)) == FLAGS_REG))
    4319          378 :                 && REG_P (XEXP (link, 0)))
    4320              :               {
    4321              :                 /* Mark the live caller-saved register as dead.  */
    4322          743 :                 for (i = REGNO (XEXP (link, 0));
    4323          743 :                      i < END_REGNO (XEXP (link, 0));
    4324              :                      i++)
    4325          374 :                   if (i < FIRST_PSEUDO_REGISTER)
    4326          351 :                     bitmap_clear_bit (live_caller_saved_regs, i);
    4327              : 
    4328          369 :                 if (bitmap_empty_p (live_caller_saved_regs))
    4329              :                   {
    4330          230 :                     *after_p = insn;
    4331          230 :                     tls_insn = emit_insn_after (tls_set, insn);
    4332          230 :                     return tls_insn;
    4333              :                   }
    4334              :               }
    4335              :         }
    4336              : 
    4337              :       /* NB: Start over again for conditional jump.  */
    4338            2 :       if (repeat)
    4339            2 :         continue;
    4340              : 
    4341            0 :       gcc_assert (!bitmap_empty_p (live_caller_saved_regs));
    4342              : 
    4343              :       /* If any live caller-saved registers aren't dead at the end of
    4344              :          this basic block, get the basic block which dominates all
    4345              :          basic blocks which set the remaining live registers.  */
    4346            0 :       auto_bitmap set_bbs;
    4347            0 :       bitmap_iterator bi;
    4348            0 :       unsigned int id;
    4349            0 :       EXECUTE_IF_SET_IN_BITMAP (live_caller_saved_regs, 0, id, bi)
    4350              :         {
    4351            0 :           basic_block set_bb = ix86_get_dominator_for_reg (id, bb);
    4352            0 :           bitmap_set_bit (set_bbs, set_bb->index);
    4353              :         }
    4354            0 :       bb = nearest_common_dominator_for_set (CDI_DOMINATORS, set_bbs);
    4355            2 :     }
    4356              :   while (true);
    4357              : }
    4358              : 
    4359              : /* Generate a TLS call of KIND with VAL and copy the call result to DEST,
    4360              :    at entry of the nearest dominator for basic block map BBS, which is in
    4361              :    the fake loop that contains the whole function, so that there is only
    4362              :    a single TLS CALL of KIND with VAL in the whole function.
    4363              :    UPDATED_GNU_TLS_INSNS contains instructions which replace the GNU TLS
    4364              :    instructions.  UPDATED_GNU2_TLS_INSNS contains instructions which
    4365              :    replace the GNU2 TLS instructions.  If TLSDESC_SET isn't nullptr,
    4366              :    insert it before the TLS call.  */
    4367              : 
    4368              : static void
    4369          313 : ix86_place_single_tls_call (rtx dest, rtx val, x86_cse_kind kind,
    4370              :                             auto_bitmap &bbs,
    4371              :                             auto_bitmap &updated_gnu_tls_insns,
    4372              :                             auto_bitmap &updated_gnu2_tls_insns,
    4373              :                             rtx tlsdesc_set = nullptr)
    4374              : {
    4375          313 :   basic_block bb = nearest_common_dominator_for_set (CDI_DOMINATORS, bbs);
    4376          313 :   while (bb->loop_father->latch
    4377          322 :          != EXIT_BLOCK_PTR_FOR_FN (cfun))
    4378            9 :     bb = get_immediate_dominator (CDI_DOMINATORS,
    4379              :                                   bb->loop_father->header);
    4380              : 
    4381          313 :   rtx rax = nullptr, rdi;
    4382          313 :   rtx eqv = nullptr;
    4383          313 :   rtx caddr;
    4384          313 :   rtx set;
    4385          313 :   rtx clob;
    4386          313 :   rtx symbol;
    4387          313 :   rtx tls;
    4388              : 
    4389          313 :   switch (kind)
    4390              :     {
    4391          262 :     case X86_CSE_TLS_GD:
    4392          262 :       rax = gen_rtx_REG (Pmode, AX_REG);
    4393          262 :       rdi = gen_rtx_REG (Pmode, DI_REG);
    4394          262 :       caddr = ix86_tls_get_addr ();
    4395              : 
    4396          262 :       symbol = XVECEXP (val, 0, 0);
    4397          262 :       tls = gen_tls_global_dynamic_64 (Pmode, rax, symbol, caddr, rdi);
    4398              : 
    4399          262 :       if (GET_MODE (symbol) != Pmode)
    4400            0 :         symbol = gen_rtx_ZERO_EXTEND (Pmode, symbol);
    4401              :       eqv = symbol;
    4402              :       break;
    4403              : 
    4404           30 :     case X86_CSE_TLS_LD_BASE:
    4405           30 :       rax = gen_rtx_REG (Pmode, AX_REG);
    4406           30 :       rdi = gen_rtx_REG (Pmode, DI_REG);
    4407           30 :       caddr = ix86_tls_get_addr ();
    4408              : 
    4409           30 :       tls = gen_tls_local_dynamic_base_64 (Pmode, rax, caddr, rdi);
    4410              : 
    4411              :       /* Attach a unique REG_EQUAL to DEST, to allow the RTL optimizers
    4412              :          to share the LD_BASE result with other LD model accesses.  */
    4413           30 :       eqv = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, const0_rtx),
    4414              :                             UNSPEC_TLS_LD_BASE);
    4415              : 
    4416           30 :       break;
    4417              : 
    4418           21 :     case X86_CSE_TLSDESC:
    4419           21 :       set = gen_rtx_SET (dest, val);
    4420           21 :       clob = gen_rtx_CLOBBER (VOIDmode,
    4421              :                               gen_rtx_REG (CCmode, FLAGS_REG));
    4422           21 :       tls = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, set, clob));
    4423           21 :       break;
    4424              : 
    4425            0 :     default:
    4426            0 :       gcc_unreachable ();
    4427              :     }
    4428              : 
    4429              :   /* Emit the TLS CALL insn.  */
    4430          313 :   rtx_insn *before = nullptr;
    4431          313 :   rtx_insn *after = nullptr;
    4432          313 :   rtx_insn *tls_insn = ix86_emit_tls_call (tls, kind, bb, &before,
    4433              :                                            &after,
    4434              :                                            updated_gnu_tls_insns,
    4435              :                                            updated_gnu2_tls_insns);
    4436              : 
    4437          313 :   rtx_insn *tlsdesc_insn = nullptr;
    4438          313 :   if (tlsdesc_set)
    4439              :     {
    4440           16 :       rtx dest = copy_rtx (SET_DEST (tlsdesc_set));
    4441           16 :       rtx src = copy_rtx (SET_SRC (tlsdesc_set));
    4442           16 :       tlsdesc_set = gen_rtx_SET (dest, src);
    4443           16 :       tlsdesc_insn = emit_insn_before (tlsdesc_set, tls_insn);
    4444              :     }
    4445              : 
    4446          313 :   if (kind != X86_CSE_TLSDESC)
    4447              :     {
    4448          292 :       RTL_CONST_CALL_P (tls_insn) = 1;
    4449              : 
    4450              :       /* Indicate that this function can't jump to non-local gotos.  */
    4451          292 :       make_reg_eh_region_note_nothrow_nononlocal (tls_insn);
    4452              :     }
    4453              : 
    4454          313 :   if (recog_memoized (tls_insn) < 0)
    4455            0 :     gcc_unreachable ();
    4456              : 
    4457          313 :   if (dump_file)
    4458              :     {
    4459            0 :       if (after)
    4460              :         {
    4461            0 :           fprintf (dump_file, "\nPlace:\n\n");
    4462            0 :           if (tlsdesc_insn)
    4463            0 :             print_rtl_single (dump_file, tlsdesc_insn);
    4464            0 :           print_rtl_single (dump_file, tls_insn);
    4465            0 :           fprintf (dump_file, "\nafter:\n\n");
    4466            0 :           print_rtl_single (dump_file, after);
    4467            0 :           fprintf (dump_file, "\n");
    4468              :         }
    4469              :       else
    4470              :         {
    4471            0 :           fprintf (dump_file, "\nPlace:\n\n");
    4472            0 :           if (tlsdesc_insn)
    4473            0 :             print_rtl_single (dump_file, tlsdesc_insn);
    4474            0 :           print_rtl_single (dump_file, tls_insn);
    4475            0 :           fprintf (dump_file, "\nbefore:\n\n");
    4476            0 :           print_rtl_single (dump_file, before);
    4477            0 :           fprintf (dump_file, "\n");
    4478              :         }
    4479              :     }
    4480              : 
    4481          313 :   if (kind != X86_CSE_TLSDESC)
    4482              :     {
    4483              :       /* Copy RAX to DEST.  */
    4484          292 :       set = gen_rtx_SET (dest, rax);
    4485          292 :       rtx_insn *set_insn = emit_insn_after (set, tls_insn);
    4486          292 :       set_dst_reg_note (set_insn, REG_EQUAL, copy_rtx (eqv), dest);
    4487          292 :       if (dump_file)
    4488              :         {
    4489            0 :           fprintf (dump_file, "\nPlace:\n\n");
    4490            0 :           print_rtl_single (dump_file, set_insn);
    4491            0 :           fprintf (dump_file, "\nafter:\n\n");
    4492            0 :           print_rtl_single (dump_file, tls_insn);
    4493            0 :           fprintf (dump_file, "\n");
    4494              :         }
    4495              :     }
    4496          313 : }
    4497              : 
    4498              : namespace {
    4499              : 
    4500              : const pass_data pass_data_x86_cse =
    4501              : {
    4502              :   RTL_PASS, /* type */
    4503              :   "x86_cse", /* name */
    4504              :   OPTGROUP_NONE, /* optinfo_flags */
    4505              :   TV_MACH_DEP, /* tv_id */
    4506              :   0, /* properties_required */
    4507              :   0, /* properties_provided */
    4508              :   0, /* properties_destroyed */
    4509              :   0, /* todo_flags_start */
    4510              :   0, /* todo_flags_finish */
    4511              : };
    4512              : 
    4513              : class pass_x86_cse : public rtl_opt_pass
    4514              : {
    4515              : public:
    4516       288047 :   pass_x86_cse (gcc::context *ctxt)
    4517       576094 :     : rtl_opt_pass (pass_data_x86_cse, ctxt)
    4518              :   {}
    4519              : 
    4520              :   /* opt_pass methods: */
    4521      1474422 :   bool gate (function *fun) final override
    4522              :     {
    4523      1474422 :       return optimize && optimize_function_for_speed_p (fun);
    4524              :     }
    4525              : 
    4526       976721 :   unsigned int execute (function *) final override
    4527              :     {
    4528       976721 :       return x86_cse ();
    4529              :     }
    4530              : 
    4531              : private:
    4532              :   /* The redundant source value.  */
    4533              :   rtx val;
    4534              :   /* The actual redundant source value for UNSPEC_TLSDESC.  */
    4535              :   rtx tlsdesc_val;
    4536              :   /* The instruction which defines the redundant value.  */
    4537              :   rtx_insn *def_insn;
    4538              :   /* Mode of the destination of the candidate redundant instruction.  */
    4539              :   machine_mode mode;
    4540              :   /* Mode of the source of the candidate redundant instruction.  */
    4541              :   machine_mode scalar_mode;
    4542              :   /* The classification of the candidate redundant instruction.  */
    4543              :   x86_cse_kind kind;
    4544              : 
    4545              :   unsigned int x86_cse (void);
    4546              :   bool candidate_gnu_tls_p (rtx_insn *, attr_tls64);
    4547              :   bool candidate_gnu2_tls_p (rtx, attr_tls64);
    4548              :   bool candidate_vector_p (rtx, rtx_insn *);
    4549              :   rtx_insn *tls_set_insn_from_symbol (const_rtx, const_rtx);
    4550              : }; // class pass_x86_cse
    4551              : 
    4552              : /* Return the instruction which sets REG from TLS_SYMBOL.  */
    4553              : 
    4554              : rtx_insn *
    4555           42 : pass_x86_cse::tls_set_insn_from_symbol (const_rtx reg,
    4556              :                                         const_rtx tls_symbol)
    4557              : {
    4558           42 :   rtx_insn *set_insn = nullptr;
    4559           42 :   for (df_ref ref = DF_REG_DEF_CHAIN (REGNO (reg));
    4560          111 :        ref;
    4561           69 :        ref = DF_REF_NEXT_REG (ref))
    4562              :     {
    4563           69 :       if (DF_REF_IS_ARTIFICIAL (ref))
    4564              :         return nullptr;
    4565              : 
    4566           69 :       set_insn = DF_REF_INSN (ref);
    4567           69 :       if (get_attr_tls64 (set_insn) != TLS64_LEA)
    4568              :         return nullptr;
    4569              : 
    4570           69 :       rtx tls_set = PATTERN (set_insn);
    4571           69 :       rtx tls_src = XVECEXP (SET_SRC (tls_set), 0, 0);
    4572           69 :       if (!rtx_equal_p (tls_symbol, tls_src))
    4573              :         return nullptr;
    4574              :     }
    4575              : 
    4576              :   return set_insn;
    4577              : }
    4578              : 
    4579              : /* Return true and output def_insn, val, mode, scalar_mode and kind if
    4580              :    INSN is UNSPEC_TLS_GD or UNSPEC_TLS_LD_BASE.  */
    4581              : 
    4582              : bool
    4583         2185 : pass_x86_cse::candidate_gnu_tls_p (rtx_insn *insn, attr_tls64 tls64)
    4584              : {
    4585         2185 :   if (!TARGET_64BIT || !cfun->machine->tls_descriptor_call_multiple_p)
    4586              :     return false;
    4587              : 
    4588              :   /* Record the redundant TLS CALLs for 64-bit:
    4589              : 
    4590              :      (parallel [
    4591              :         (set (reg:DI 0 ax)
    4592              :              (call:DI (mem:QI (symbol_ref:DI ("__tls_get_addr")))
    4593              :                       (const_int 0 [0])))
    4594              :         (unspec:DI [(symbol_ref:DI ("foo") [flags 0x50])
    4595              :                     (reg/f:DI 7 sp)] UNSPEC_TLS_GD)
    4596              :         (clobber (reg:DI 5 di))])
    4597              : 
    4598              : 
    4599              :      and
    4600              : 
    4601              :      (parallel [
    4602              :         (set (reg:DI 0 ax)
    4603              :              (call:DI (mem:QI (symbol_ref:DI ("__tls_get_addr")))
    4604              :                       (const_int 0 [0])))
    4605              :         (unspec:DI [(reg/f:DI 7 sp)] UNSPEC_TLS_LD_BASE)])
    4606              : 
    4607              :    */
    4608              : 
    4609         2022 :   rtx pat = PATTERN (insn);
    4610         2022 :   rtx set = XVECEXP (pat, 0, 0);
    4611         2022 :   gcc_assert (GET_CODE (set) == SET);
    4612         2022 :   rtx dest = SET_DEST (set);
    4613         2022 :   scalar_mode = mode = GET_MODE (dest);
    4614         2022 :   val = XVECEXP (pat, 0, 1);
    4615         2022 :   gcc_assert (GET_CODE (val) == UNSPEC);
    4616              : 
    4617         2022 :   if (tls64 == TLS64_GD)
    4618         1921 :     kind = X86_CSE_TLS_GD;
    4619              :   else
    4620          101 :     kind = X86_CSE_TLS_LD_BASE;
    4621              : 
    4622         2022 :   def_insn = nullptr;
    4623         2022 :   return true;
    4624              : }
    4625              : 
    4626              : /* Return true and output def_insn, val, mode, scalar_mode and kind if
    4627              :    SET is UNSPEC_TLSDESC.  */
    4628              : 
    4629              : bool
    4630           56 : pass_x86_cse::candidate_gnu2_tls_p (rtx set, attr_tls64 tls64)
    4631              : {
    4632           56 :   if (!TARGET_64BIT || !cfun->machine->tls_descriptor_call_multiple_p)
    4633              :     return false;
    4634              : 
    4635           54 :   rtx tls_symbol;
    4636           54 :   rtx_insn *set_insn;
    4637           54 :   rtx src = SET_SRC (set);
    4638           54 :   val = src;
    4639           54 :   tlsdesc_val = src;
    4640           54 :   kind = X86_CSE_TLSDESC;
    4641              : 
    4642           54 :   if (tls64 == TLS64_COMBINE)
    4643              :     {
    4644              :       /* Record 64-bit TLS64_COMBINE:
    4645              : 
    4646              :          (set (reg/f:DI 104)
    4647              :               (plus:DI (unspec:DI [
    4648              :                           (symbol_ref:DI ("_TLS_MODULE_BASE_") [flags 0x10])
    4649              :                           (reg:DI 114)
    4650              :                           (reg/f:DI 7 sp)] UNSPEC_TLSDESC)
    4651              :                        (const:DI (unspec:DI [
    4652              :                                     (symbol_ref:DI ("e") [flags 0x1a])
    4653              :                                   ] UNSPEC_DTPOFF))))
    4654              : 
    4655              :          (set (reg/f:DI 104)
    4656              :               (plus:DI (unspec:DI [
    4657              :                           (symbol_ref:DI ("_TLS_MODULE_BASE_") [flags 0x10])
    4658              :                           (unspec:DI [
    4659              :                              (symbol_ref:DI ("_TLS_MODULE_BASE_") [flags 0x10])
    4660              :                           ] UNSPEC_TLSDESC)
    4661              :                           (reg/f:DI 7 sp)] UNSPEC_TLSDESC)
    4662              :                        (const:DI (unspec:DI [
    4663              :                                     (symbol_ref:DI ("e") [flags 0x1a])
    4664              :                                  ] UNSPEC_DTPOFF))))
    4665              :      */
    4666              : 
    4667           12 :       scalar_mode = mode = GET_MODE (src);
    4668              : 
    4669              :       /* Since the first operand of PLUS in the source TLS_COMBINE
    4670              :          pattern is unused, use the second operand of PLUS:
    4671              : 
    4672              :          (const:DI (unspec:DI [
    4673              :                       (symbol_ref:DI ("e") [flags 0x1a])
    4674              :                    ] UNSPEC_DTPOFF))
    4675              : 
    4676              :          as VAL to check if 2 TLS_COMBINE patterns have the same
    4677              :          source.  */
    4678           12 :       val = XEXP (src, 1);
    4679           12 :       gcc_assert (GET_CODE (val) == CONST
    4680              :                   && GET_CODE (XEXP (val, 0)) == UNSPEC
    4681              :                       && XINT (XEXP (val, 0), 1) == UNSPEC_DTPOFF
    4682              :                       && SYMBOL_REF_P (XVECEXP (XEXP (val, 0), 0, 0)));
    4683           12 :       def_insn = nullptr;
    4684           12 :       return true;
    4685              :     }
    4686              : 
    4687              :   /* Record 64-bit TLS_CALL:
    4688              : 
    4689              :      (set (reg:DI 101)
    4690              :           (unspec:DI [(symbol_ref:DI ("foo") [flags 0x50])
    4691              :                       (reg:DI 112)
    4692              :                       (reg/f:DI 7 sp)] UNSPEC_TLSDESC))
    4693              : 
    4694              :    */
    4695              : 
    4696           42 :   gcc_assert (GET_CODE (src) == UNSPEC);
    4697           42 :   tls_symbol = XVECEXP (src, 0, 0);
    4698           42 :   src = XVECEXP (src, 0, 1);
    4699           42 :   scalar_mode = mode = GET_MODE (src);
    4700           42 :   gcc_assert (REG_P (src));
    4701              : 
    4702              :   /* All definitions of reg:DI 129 in
    4703              : 
    4704              :      (set (reg:DI 110)
    4705              :           (unspec:DI [(symbol_ref:DI ("foo"))
    4706              :                       (reg:DI 129)
    4707              :                       (reg/f:DI 7 sp)] UNSPEC_TLSDESC))
    4708              : 
    4709              :      should have the same source as in
    4710              : 
    4711              :      (set (reg:DI 129)
    4712              :           (unspec:DI [(symbol_ref:DI ("foo"))] UNSPEC_TLSDESC))
    4713              : 
    4714              :    */
    4715              : 
    4716           42 :   set_insn = tls_set_insn_from_symbol (src, tls_symbol);
    4717           42 :   if (!set_insn)
    4718              :     return false;
    4719              : 
    4720              :   /* Use TLS_SYMBOL as VAL to check if 2 patterns have the same source.  */
    4721           42 :   val = tls_symbol;
    4722           42 :   def_insn = set_insn;
    4723           42 :   return true;
    4724              : }
    4725              : 
    4726              : /* Return true and output def_insn, val, mode, scalar_mode and kind if
    4727              :   INSN is a vector broadcast instruction.  */
    4728              : 
    4729              : bool
    4730     49796522 : pass_x86_cse::candidate_vector_p (rtx set, rtx_insn *insn)
    4731              : {
    4732     49796522 :   rtx src = SET_SRC (set);
    4733     49796522 :   rtx dest = SET_DEST (set);
    4734     49796522 :   mode = GET_MODE (dest);
    4735              :   /* Skip non-vector instruction.  */
    4736     49796522 :   if (!VECTOR_MODE_P (mode))
    4737              :     return false;
    4738              : 
    4739              :   /* Skip non-vector load instruction.  */
    4740      3715758 :   if (!REG_P (dest) && !SUBREG_P (dest))
    4741              :     return false;
    4742              : 
    4743      2203660 :   def_insn = insn;
    4744      2203660 :   val = ix86_broadcast_inner (src, mode, &scalar_mode, &kind,
    4745              :                               &def_insn);
    4746      2203660 :   return val ? true : false;
    4747              : }
    4748              : 
    4749              : /* At entry of the nearest common dominator for basic blocks with
    4750              : 
    4751              :    1. Vector CONST0_RTX patterns.
    4752              :    2. Vector CONSTM1_RTX patterns.
    4753              :    3. Vector broadcast patterns.
    4754              :    4. UNSPEC_TLS_GD patterns.
    4755              :    5. UNSPEC_TLS_LD_BASE patterns.
    4756              :    6. UNSPEC_TLSDESC patterns.
    4757              : 
    4758              :    generate a single pattern whose destination is used to replace the
    4759              :    source in all identical patterns.
    4760              : 
    4761              :    NB: We want to generate a pattern, which is executed only once, to
    4762              :    cover the whole function.  The LCM algorithm isn't appropriate here
    4763              :    since it may place a pattern inside the loop.  */
    4764              : 
    4765              : unsigned int
    4766       976721 : pass_x86_cse::x86_cse (void)
    4767              : {
    4768       976721 :   timevar_push (TV_MACH_DEP);
    4769              : 
    4770       976721 :   auto_vec<redundant_pattern *> loads;
    4771       976721 :   redundant_pattern *load;
    4772       976721 :   basic_block bb;
    4773       976721 :   rtx_insn *insn;
    4774       976721 :   unsigned int i;
    4775       976721 :   auto_bitmap updated_gnu_tls_insns;
    4776       976721 :   auto_bitmap updated_gnu2_tls_insns;
    4777       976721 :   auto_bitmap call_bbs;
    4778              : 
    4779       976721 :   df_set_flags (DF_DEFER_INSN_RESCAN);
    4780              : 
    4781       976721 :   bool recursive_call_p = cfun->machine->recursive_function;
    4782              : 
    4783     10901283 :   FOR_EACH_BB_FN (bb, cfun)
    4784              :     {
    4785    130959437 :       FOR_BB_INSNS (bb, insn)
    4786              :         {
    4787    121034875 :           if (!NONDEBUG_INSN_P (insn))
    4788     67586072 :             continue;
    4789              : 
    4790     53448803 :           bool matched = false;
    4791              :           /* Remove redundant pattens if there are more than 2 of
    4792              :              them.  */
    4793     53448803 :           unsigned int threshold = 2;
    4794              : 
    4795     53448803 :           bool call_p = CALL_P (insn);
    4796     53448803 :           rtx set = single_set (insn);
    4797     53448803 :           if (!set && !call_p)
    4798      1101789 :             continue;
    4799              : 
    4800     52347014 :           tlsdesc_val = nullptr;
    4801              : 
    4802     52347014 :           attr_tls64 tls64 = get_attr_tls64 (insn);
    4803              : 
    4804              :           /* NB: TLS calls preserve all registers.  */
    4805     52347014 :           if (call_p && tls64 == TLS64_NONE)
    4806      4414050 :             bitmap_set_bit (call_bbs, BLOCK_FOR_INSN (insn)->index);
    4807              : 
    4808     52347014 :           switch (tls64)
    4809              :             {
    4810         2185 :             case TLS64_GD:
    4811         2185 :             case TLS64_LD_BASE:
    4812              :               /* Verify UNSPEC_TLS_GD and UNSPEC_TLS_LD_BASE.  */
    4813         2185 :               if (candidate_gnu_tls_p (insn, tls64))
    4814              :                 break;
    4815          163 :               continue;
    4816              : 
    4817           56 :             case TLS64_CALL:
    4818           56 :             case TLS64_COMBINE:
    4819              :               /* Verify UNSPEC_TLSDESC.  */
    4820           56 :               if (candidate_gnu2_tls_p (set, tls64))
    4821              :                 break;
    4822            2 :               continue;
    4823              : 
    4824           38 :             case TLS64_LEA:
    4825              :               /* Skip TLS64_LEA.  */
    4826           38 :               continue;
    4827              : 
    4828     52344735 :             case TLS64_NONE:
    4829     52344735 :               if (!set)
    4830      2548213 :                 continue;
    4831              : 
    4832              :               /* Check for vector broadcast.  */
    4833     49796522 :               if (candidate_vector_p (set, insn))
    4834              :                 break;
    4835     49548880 :               continue;
    4836              :             }
    4837              : 
    4838              :           /* Check if there is a matching redundant load.   */
    4839       590397 :           FOR_EACH_VEC_ELT (loads, i, load)
    4840       436507 :             if (load->val
    4841       436507 :                 && load->kind == kind
    4842       291507 :                 && load->mode == scalar_mode
    4843       255907 :                 && (load->bb == bb
    4844       196767 :                     || (kind != X86_CSE_VEC_DUP
    4845       196767 :                         && kind != X86_CSE_CONST_VECTOR)
    4846              :                     /* Non all 0s/1s vector load must be in the same
    4847              :                        basic block if it is in a recursive call.  */
    4848       137647 :                     || !recursive_call_p)
    4849       690305 :                 && rtx_equal_p (load->val, val))
    4850              :               {
    4851              :                 /* Record instruction.  */
    4852        95828 :                 bitmap_set_bit (load->insns, INSN_UID (insn));
    4853              : 
    4854              :                 /* Record the maximum vector size.  */
    4855        95828 :                 if (kind <= X86_CSE_VEC_DUP
    4856       190543 :                     && load->size < GET_MODE_SIZE (mode))
    4857         1016 :                   load->size = GET_MODE_SIZE (mode);
    4858              : 
    4859              :                 /* Record the basic block.  */
    4860        95828 :                 bitmap_set_bit (load->bbs, bb->index);
    4861              : 
    4862              :                 /* Increment the count.  */
    4863        95828 :                 load->count++;
    4864              : 
    4865        95828 :                 matched = true;
    4866        95828 :                 break;
    4867              :               }
    4868              : 
    4869       249718 :           if (matched)
    4870        95828 :             continue;
    4871              : 
    4872              :           /* We see this instruction the first time.  Record the
    4873              :              redundant source value, its mode, the destination size,
    4874              :              instruction which defines the redundant source value,
    4875              :              instruction basic block and the instruction kind.  */
    4876       153890 :           load = new redundant_pattern;
    4877              : 
    4878              :           /* Convert CONST_VECTOR load no larger than integer register
    4879              :              to constant integer load even if there is no redundant
    4880              :              CONST_VECTOR load.  */
    4881       153890 :           if (CONST_VECTOR_P (val))
    4882        30940 :             threshold = 1;
    4883              : 
    4884       153890 :           load->val = copy_rtx (val);
    4885       153890 :           if (tlsdesc_val)
    4886           28 :             load->tlsdesc_val = copy_rtx (tlsdesc_val);
    4887              :           else
    4888       153862 :             load->tlsdesc_val = nullptr;
    4889       153890 :           load->mode = scalar_mode;
    4890       153890 :           load->dest_mode = mode;
    4891       153890 :           load->size = GET_MODE_SIZE (mode);
    4892       153890 :           load->def_insn = def_insn;
    4893       153890 :           load->count = 1;
    4894       153890 :           load->threshold = threshold;
    4895       153890 :           load->bb = BLOCK_FOR_INSN (insn);
    4896       153890 :           load->kind = kind;
    4897              : 
    4898       153890 :           bitmap_set_bit (load->insns, INSN_UID (insn));
    4899       153890 :           bitmap_set_bit (load->bbs, bb->index);
    4900              : 
    4901       153890 :           loads.safe_push (load);
    4902              :         }
    4903              :     }
    4904              : 
    4905              :   bool replaced = false;
    4906      1130611 :   FOR_EACH_VEC_ELT (loads, i, load)
    4907       153890 :     if (load->count >= load->threshold)
    4908              :       {
    4909        63377 :         machine_mode mode;
    4910        63377 :         rtx reg, broadcast_reg;
    4911        63377 :         rtx broadcast_source = nullptr;
    4912        63377 :         replaced = true;
    4913        63377 :         switch (load->kind)
    4914              :           {
    4915          313 :           case X86_CSE_TLS_GD:
    4916          313 :           case X86_CSE_TLS_LD_BASE:
    4917          313 :           case X86_CSE_TLSDESC:
    4918          313 :             broadcast_reg = gen_reg_rtx (load->mode);
    4919          313 :             replace_tls_call (broadcast_reg, load->insns,
    4920          313 :                               (load->kind == X86_CSE_TLSDESC
    4921              :                                ? updated_gnu2_tls_insns
    4922              :                                : updated_gnu_tls_insns));
    4923          313 :             load->broadcast_reg = broadcast_reg;
    4924          313 :             break;
    4925              : 
    4926        11119 :           case X86_CSE_VEC_DUP:
    4927        11119 :             if (CONST_INT_P (load->val)
    4928        10027 :                 && (load->val == CONST0_RTX (load->mode)
    4929        10051 :                     || load->size <= UNITS_PER_WORD))
    4930              :               {
    4931              :                 /* Generate CONST_VECTOR load.  */
    4932        30941 :               case X86_CSE_CONST_VECTOR:
    4933        30941 :                 mode = ix86_get_vector_cse_mode (load->size,
    4934              :                                                  load->mode);
    4935              : 
    4936        30941 :                 if (CONST_VECTOR_P (load->val))
    4937              :                   broadcast_source = load->val;
    4938            1 :                 else if (load->val == CONST0_RTX (load->mode))
    4939            0 :                   broadcast_source = CONST0_RTX (mode);
    4940            1 :                 else if (load->val == CONSTM1_RTX (load->mode))
    4941            0 :                   broadcast_source = CONSTM1_RTX (mode);
    4942              :                 else
    4943              :                   {
    4944            1 :                     int nunits = GET_MODE_NUNITS (mode);
    4945            1 :                     rtvec v = rtvec_alloc (nunits);
    4946            3 :                     for (int j = 0; j < nunits ; j++)
    4947            2 :                       RTVEC_ELT (v, j) = load->val;
    4948            1 :                     broadcast_source = gen_rtx_CONST_VECTOR (mode, v);
    4949              :                   }
    4950              : 
    4951              :                 /* NB: Zero CONST_VECTOR load works for MMX and XMM
    4952              :                    registers.  */
    4953        32352 :                 if (load->size <= UNITS_PER_WORD)
    4954              :                   {
    4955              :                     /* Convert CONST_VECTOR load no larger than integer
    4956              :                        register:
    4957              : 
    4958              :                        (set (reg:V2SI 106)
    4959              :                             (const_vector:V2SI [(const_int 1 [1]) repeated x2]))
    4960              : 
    4961              :                        to constant integer load:
    4962              : 
    4963              :                        (set (subreg:DI (reg:V2SI 106 [ _20 ]) 0)
    4964              :                             (const_int 4294967297 [0x100000001]))
    4965              :                        */
    4966        30941 :                     machine_mode int_mode
    4967        30941 :                       = int_mode_for_mode (mode).require ();
    4968        30941 :                     load->dest_mode = int_mode;
    4969        30941 :                     broadcast_source = simplify_subreg (int_mode,
    4970              :                                                         broadcast_source,
    4971              :                                                         mode, 0);
    4972        30941 :                     gcc_assert (broadcast_source != nullptr);
    4973              : 
    4974        30941 :                     bool keep_const_int_load = false;
    4975        30941 :                     if (!bitmap_empty_p (call_bbs))
    4976              :                       {
    4977        27734 :                         bitmap_iterator bi;
    4978        27734 :                         unsigned int id;
    4979        36148 :                         EXECUTE_IF_SET_IN_BITMAP (load->bbs, 0, id, bi)
    4980        28903 :                           if (bitmap_bit_p (call_bbs, id))
    4981              :                             {
    4982              :                               /* NB: Constant integer load is faster
    4983              :                                  than save and restore an integer
    4984              :                                  register when crossing a function call.
    4985              :                                */
    4986              :                               keep_const_int_load = true;
    4987              :                               break;
    4988              :                             }
    4989              :                       }
    4990              : 
    4991        27734 :                     if (keep_const_int_load)
    4992              :                       {
    4993              :                         /* Keep constant integer load.  */
    4994        20489 :                         replace_vector_const (mode, broadcast_source,
    4995        20489 :                                               load->insns, int_mode);
    4996        20489 :                         load->broadcast_source = nullptr;
    4997        20489 :                         load->broadcast_reg = nullptr;
    4998              :                       }
    4999              :                     else
    5000              :                       {
    5001        10452 :                         broadcast_reg = gen_reg_rtx (mode);
    5002        10452 :                         reg = gen_reg_rtx (load->mode);
    5003        10452 :                         replace_vector_const (mode, broadcast_reg,
    5004        10452 :                                               load->insns, load->mode);
    5005        10452 :                         load->broadcast_source = broadcast_source;
    5006        10452 :                         load->broadcast_reg = broadcast_reg;
    5007              :                       }
    5008              :                     break;
    5009              :                   }
    5010              :               }
    5011              :             /* FALLTHRU */
    5012              : 
    5013        32123 :           case X86_CSE_CONST0_VECTOR:
    5014        32123 :           case X86_CSE_CONSTM1_VECTOR:
    5015        32123 :             mode = ix86_get_vector_cse_mode (load->size, load->mode);
    5016        32123 :             broadcast_reg = gen_reg_rtx (mode);
    5017        32123 :             if (load->def_insn)
    5018              :               {
    5019              :                 /* Replace redundant vector loads with a single vector
    5020              :                    load in the same basic block.  */
    5021          813 :                 reg = load->val;
    5022          813 :                 if (load->mode != GET_MODE (reg))
    5023            0 :                   reg = gen_rtx_SUBREG (load->mode, reg, 0);
    5024          813 :                 broadcast_source = gen_rtx_VEC_DUPLICATE (mode, reg);
    5025              :               }
    5026              :             else
    5027              :               /* This is a constant integer/double vector.  If the
    5028              :                  inner scalar is 0 or -1, set vector to CONST0_RTX
    5029              :                  or CONSTM1_RTX directly.  */
    5030        31310 :               switch (load->kind)
    5031              :                 {
    5032        19769 :                 case X86_CSE_CONST0_VECTOR:
    5033        19769 :                   broadcast_source = CONST0_RTX (mode);
    5034        19769 :                   break;
    5035         1235 :                 case X86_CSE_CONSTM1_VECTOR:
    5036         1235 :                   broadcast_source = CONSTM1_RTX (mode);
    5037         1235 :                   break;
    5038        10306 :                 case X86_CSE_CONST_VECTOR:
    5039        10306 :                 case X86_CSE_VEC_DUP:
    5040        10306 :                   if (!broadcast_source)
    5041              :                     {
    5042        10306 :                       reg = gen_reg_rtx (load->mode);
    5043        10306 :                       broadcast_source = gen_rtx_VEC_DUPLICATE (mode,
    5044              :                                                                 reg);
    5045              :                     }
    5046              :                   break;
    5047            0 :                 default:
    5048            0 :                   gcc_unreachable ();
    5049              :                 }
    5050        32123 :             replace_vector_const (mode, broadcast_reg, load->insns,
    5051              :                                   load->mode);
    5052        32123 :             load->broadcast_source = broadcast_source;
    5053        32123 :             load->broadcast_reg = broadcast_reg;
    5054        32123 :             break;
    5055              :           }
    5056              :       }
    5057              : 
    5058       976721 :   if (replaced)
    5059              :     {
    5060        41352 :       auto_vec<rtx_insn *> control_flow_insns;
    5061              : 
    5062              :       /* (Re-)discover loops so that bb->loop_father can be used in the
    5063              :          analysis below.  */
    5064        41352 :       calculate_dominance_info (CDI_DOMINATORS);
    5065        41352 :       loop_optimizer_init (AVOID_CFG_MODIFICATIONS);
    5066              : 
    5067       125313 :       FOR_EACH_VEC_ELT (loads, i, load)
    5068        83961 :         if (load->count >= load->threshold)
    5069              :           {
    5070        63377 :             rtx set;
    5071        63377 :             if (load->def_insn)
    5072          829 :               switch (load->kind)
    5073              :                 {
    5074           16 :                 case X86_CSE_TLSDESC:
    5075           16 :                   ix86_place_single_tls_call (load->broadcast_reg,
    5076              :                                               load->tlsdesc_val,
    5077              :                                               load->kind,
    5078           16 :                                               load->bbs,
    5079              :                                               updated_gnu_tls_insns,
    5080              :                                               updated_gnu2_tls_insns,
    5081           16 :                                               PATTERN (load->def_insn));
    5082           16 :                   break;
    5083          813 :                 case X86_CSE_VEC_DUP:
    5084              :                   /* Insert a broadcast after the original scalar
    5085              :                      definition.  */
    5086          813 :                   set = gen_rtx_SET (load->broadcast_reg,
    5087              :                                      load->broadcast_source);
    5088          813 :                   insn = emit_insn_after (set, load->def_insn);
    5089              : 
    5090          813 :                   if (cfun->can_throw_non_call_exceptions)
    5091              :                     {
    5092              :                       /* Handle REG_EH_REGION note in DEF_INSN.  */
    5093            4 :                       rtx note = find_reg_note (load->def_insn,
    5094              :                                                 REG_EH_REGION, nullptr);
    5095            4 :                       if (note)
    5096              :                         {
    5097            1 :                           control_flow_insns.safe_push (load->def_insn);
    5098            1 :                           add_reg_note (insn, REG_EH_REGION,
    5099              :                                         XEXP (note, 0));
    5100              :                         }
    5101              :                     }
    5102              : 
    5103          813 :                   if (dump_file)
    5104              :                     {
    5105            0 :                       fprintf (dump_file, "\nAdd:\n\n");
    5106            0 :                       print_rtl_single (dump_file, insn);
    5107            0 :                       fprintf (dump_file, "\nafter:\n\n");
    5108            0 :                       print_rtl_single (dump_file, load->def_insn);
    5109            0 :                       fprintf (dump_file, "\n");
    5110              :                     }
    5111              :                   break;
    5112            0 :                 default:
    5113            0 :                   gcc_unreachable ();
    5114              :                 }
    5115              :             else
    5116        62548 :               switch (load->kind)
    5117              :                 {
    5118          297 :                 case X86_CSE_TLS_GD:
    5119          297 :                 case X86_CSE_TLS_LD_BASE:
    5120          297 :                 case X86_CSE_TLSDESC:
    5121          297 :                   ix86_place_single_tls_call (load->broadcast_reg,
    5122              :                                               (load->kind == X86_CSE_TLSDESC
    5123              :                                                ? load->tlsdesc_val
    5124              :                                                : load->val),
    5125              :                                               load->kind,
    5126          297 :                                               load->bbs,
    5127              :                                               updated_gnu_tls_insns,
    5128              :                                               updated_gnu2_tls_insns);
    5129          297 :                   break;
    5130        41247 :                 case X86_CSE_CONST_VECTOR:
    5131        41247 :                 case X86_CSE_VEC_DUP:
    5132              :                   /* Keep redundant constant integer load.  */
    5133        41247 :                   if (!load->broadcast_reg)
    5134              :                     break;
    5135              :                   /* FALLTHRU */
    5136        41762 :                 case X86_CSE_CONST0_VECTOR:
    5137        41762 :                 case X86_CSE_CONSTM1_VECTOR:
    5138        41762 :                   ix86_place_single_vector_set (load->broadcast_reg,
    5139              :                                                 load->broadcast_source,
    5140              :                                                 load->bbs,
    5141              :                                                 load);
    5142        41762 :                   break;
    5143              :                 }
    5144              :           }
    5145              : 
    5146        41352 :       loop_optimizer_finalize ();
    5147              : 
    5148        41352 :       if (!control_flow_insns.is_empty ())
    5149              :         {
    5150            1 :           free_dominance_info (CDI_DOMINATORS);
    5151              : 
    5152            3 :           FOR_EACH_VEC_ELT (control_flow_insns, i, insn)
    5153            1 :             if (control_flow_insn_p (insn))
    5154              :               {
    5155              :                 /* Split the block after insn.  There will be a fallthru
    5156              :                    edge, which is OK so we keep it.  We have to create
    5157              :                    the exception edges ourselves.  */
    5158            1 :                 bb = BLOCK_FOR_INSN (insn);
    5159            1 :                 split_block (bb, insn);
    5160            1 :                 rtl_make_eh_edge (NULL, bb, BB_END (bb));
    5161              :               }
    5162              :         }
    5163              : 
    5164        41352 :       df_process_deferred_rescans ();
    5165        41352 :     }
    5166              : 
    5167      1130611 :   FOR_EACH_VEC_ELT (loads, i, load)
    5168       307780 :     delete load;
    5169              : 
    5170       976721 :   df_clear_flags (DF_DEFER_INSN_RESCAN);
    5171              : 
    5172       976721 :   timevar_pop (TV_MACH_DEP);
    5173       976721 :   return 0;
    5174       976721 : }
    5175              : 
    5176              : } // anon namespace
    5177              : 
    5178              : rtl_opt_pass *
    5179       288047 : make_pass_x86_cse (gcc::context *ctxt)
    5180              : {
    5181       288047 :   return new pass_x86_cse (ctxt);
    5182              : }
    5183              : 
    5184              : /* Convert legacy instructions that clobbers EFLAGS to APX_NF
    5185              :    instructions when there are no flag set between a flag
    5186              :    producer and user.  */
    5187              : 
    5188              : static unsigned int
    5189          369 : ix86_apx_nf_convert (void)
    5190              : {
    5191          369 :   timevar_push (TV_MACH_DEP);
    5192              : 
    5193          369 :   basic_block bb;
    5194          369 :   rtx_insn *insn;
    5195          369 :   hash_map <rtx_insn *, rtx> converting_map;
    5196          369 :   auto_vec <rtx_insn *> current_convert_list;
    5197              : 
    5198          369 :   bool converting_seq = false;
    5199          369 :   rtx cc = gen_rtx_REG (CCmode, FLAGS_REG);
    5200              : 
    5201          790 :   FOR_EACH_BB_FN (bb, cfun)
    5202              :     {
    5203              :       /* Reset conversion for each bb.  */
    5204          421 :       converting_seq = false;
    5205         5049 :       FOR_BB_INSNS (bb, insn)
    5206              :         {
    5207         4628 :           if (!NONDEBUG_INSN_P (insn))
    5208         4965 :             continue;
    5209              : 
    5210         3688 :           if (recog_memoized (insn) < 0)
    5211          336 :             continue;
    5212              : 
    5213              :           /* Convert candidate insns after cstore, which should
    5214              :              satisify the two conditions:
    5215              :              1. Is not flag user or producer, only clobbers
    5216              :              FLAGS_REG.
    5217              :              2. Have corresponding nf pattern.  */
    5218              : 
    5219         3352 :           rtx pat = PATTERN (insn);
    5220              : 
    5221              :           /* Starting convertion at first cstorecc.  */
    5222         3352 :           rtx set = NULL_RTX;
    5223         3352 :           if (!converting_seq
    5224         2771 :               && (set = single_set (insn))
    5225         2695 :               && ix86_comparison_operator (SET_SRC (set), VOIDmode)
    5226          126 :               && reg_overlap_mentioned_p (cc, SET_SRC (set))
    5227         3475 :               && !reg_overlap_mentioned_p (cc, SET_DEST (set)))
    5228              :             {
    5229          123 :               converting_seq = true;
    5230          123 :               current_convert_list.truncate (0);
    5231              :             }
    5232              :           /* Terminate at the next explicit flag set.  */
    5233         3229 :           else if (reg_set_p (cc, pat)
    5234         3229 :                    && GET_CODE (set_of (cc, pat)) != CLOBBER)
    5235              :             converting_seq = false;
    5236              : 
    5237         3132 :           if (!converting_seq)
    5238         2749 :             continue;
    5239              : 
    5240          603 :           if (get_attr_has_nf (insn)
    5241          603 :               && GET_CODE (pat) == PARALLEL)
    5242              :             {
    5243              :               /* Record the insn to candidate map.  */
    5244           72 :               current_convert_list.safe_push (insn);
    5245           72 :               converting_map.put (insn, pat);
    5246              :             }
    5247              :           /* If the insn clobbers flags but has no nf_attr,
    5248              :              revoke all previous candidates.  */
    5249          531 :           else if (!get_attr_has_nf (insn)
    5250          530 :                    && reg_set_p (cc, pat)
    5251          534 :                    && GET_CODE (set_of (cc, pat)) == CLOBBER)
    5252              :             {
    5253            3 :               for (auto item : current_convert_list)
    5254            0 :                 converting_map.remove (item);
    5255            3 :               converting_seq = false;
    5256              :             }
    5257              :         }
    5258              :     }
    5259              : 
    5260          369 :   if (!converting_map.is_empty ())
    5261              :     {
    5262           85 :       for (auto iter = converting_map.begin ();
    5263          170 :            iter != converting_map.end (); ++iter)
    5264              :         {
    5265           72 :           rtx_insn *replace = (*iter).first;
    5266           72 :           rtx pat = (*iter).second;
    5267           72 :           int i, n = 0, len = XVECLEN (pat, 0);
    5268           72 :           rtx *new_elems = XALLOCAVEC (rtx, len);
    5269           72 :           rtx new_pat;
    5270          216 :           for (i = 0; i < len; i++)
    5271              :             {
    5272          144 :               rtx temp = XVECEXP (pat, 0, i);
    5273          216 :               if (! (GET_CODE (temp) == CLOBBER
    5274           72 :                      && reg_overlap_mentioned_p (cc,
    5275           72 :                                                  XEXP (temp, 0))))
    5276              :                 {
    5277           72 :                   new_elems[n] = temp;
    5278           72 :                   n++;
    5279              :                 }
    5280              :             }
    5281              : 
    5282           72 :           if (n == 1)
    5283           72 :             new_pat = new_elems[0];
    5284              :           else
    5285            0 :             new_pat =
    5286            0 :               gen_rtx_PARALLEL (VOIDmode,
    5287              :                                 gen_rtvec_v (n,
    5288              :                                              new_elems));
    5289              : 
    5290           72 :           PATTERN (replace) = new_pat;
    5291           72 :           INSN_CODE (replace) = -1;
    5292           72 :           recog_memoized (replace);
    5293           72 :           df_insn_rescan (replace);
    5294              :         }
    5295              :     }
    5296              : 
    5297          369 :   timevar_pop (TV_MACH_DEP);
    5298          369 :   return 0;
    5299          369 : }
    5300              : 
    5301              : 
    5302              : namespace {
    5303              : 
    5304              : const pass_data pass_data_apx_nf_convert =
    5305              : {
    5306              :   RTL_PASS, /* type */
    5307              :   "apx_nfcvt", /* name */
    5308              :   OPTGROUP_NONE, /* optinfo_flags */
    5309              :   TV_MACH_DEP, /* tv_id */
    5310              :   0, /* properties_required */
    5311              :   0, /* properties_provided */
    5312              :   0, /* properties_destroyed */
    5313              :   0, /* todo_flags_start */
    5314              :   0, /* todo_flags_finish */
    5315              : };
    5316              : 
    5317              : class pass_apx_nf_convert : public rtl_opt_pass
    5318              : {
    5319              : public:
    5320       288047 :   pass_apx_nf_convert (gcc::context *ctxt)
    5321       576094 :     : rtl_opt_pass (pass_data_apx_nf_convert, ctxt)
    5322              :   {}
    5323              : 
    5324              :   /* opt_pass methods: */
    5325      1474422 :   bool gate (function *) final override
    5326              :     {
    5327      1474422 :       return (TARGET_APX_NF
    5328          461 :               && optimize
    5329      1474875 :               && optimize_function_for_speed_p (cfun));
    5330              :     }
    5331              : 
    5332          369 :   unsigned int execute (function *) final override
    5333              :     {
    5334          369 :       return ix86_apx_nf_convert ();
    5335              :     }
    5336              : }; // class pass_apx_nf_convert
    5337              : 
    5338              : } // anon namespace
    5339              : 
    5340              : rtl_opt_pass *
    5341       288047 : make_pass_apx_nf_convert (gcc::context *ctxt)
    5342              : {
    5343       288047 :   return new pass_apx_nf_convert (ctxt);
    5344              : }
    5345              : 
    5346              : /* When a hot loop can be fit into one cacheline,
    5347              :    force align the loop without considering the max skip.  */
    5348              : static void
    5349       976242 : ix86_align_loops ()
    5350              : {
    5351       976242 :   basic_block bb;
    5352              : 
    5353              :   /* Don't do this when we don't know cache line size.  */
    5354       976242 :   if (ix86_cost->prefetch_block == 0)
    5355            9 :     return;
    5356              : 
    5357       976233 :   loop_optimizer_init (AVOID_CFG_MODIFICATIONS);
    5358       976233 :   profile_count count_threshold = cfun->cfg->count_max / param_align_threshold;
    5359     11361065 :   FOR_EACH_BB_FN (bb, cfun)
    5360              :     {
    5361     10384832 :       rtx_insn *label = BB_HEAD (bb);
    5362     10384832 :       bool has_fallthru = 0;
    5363     10384832 :       edge e;
    5364     10384832 :       edge_iterator ei;
    5365              : 
    5366     10384832 :       if (!LABEL_P (label))
    5367      5286369 :         continue;
    5368              : 
    5369      5103274 :       profile_count fallthru_count = profile_count::zero ();
    5370      5103274 :       profile_count branch_count = profile_count::zero ();
    5371              : 
    5372     14837079 :       FOR_EACH_EDGE (e, ei, bb->preds)
    5373              :         {
    5374      9733805 :           if (e->flags & EDGE_FALLTHRU)
    5375      2482408 :             has_fallthru = 1, fallthru_count += e->count ();
    5376              :           else
    5377      7251397 :             branch_count += e->count ();
    5378              :         }
    5379              : 
    5380      5103274 :       if (!fallthru_count.initialized_p () || !branch_count.initialized_p ())
    5381         4811 :         continue;
    5382              : 
    5383      5098463 :       if (bb->loop_father
    5384      5098463 :           && bb->loop_father->latch != EXIT_BLOCK_PTR_FOR_FN (cfun)
    5385      6438084 :           && (has_fallthru
    5386      1339621 :               ? (!(single_succ_p (bb)
    5387       146732 :                    && single_succ (bb) == EXIT_BLOCK_PTR_FOR_FN (cfun))
    5388       932723 :                  && optimize_bb_for_speed_p (bb)
    5389       852243 :                  && branch_count + fallthru_count > count_threshold
    5390       728300 :                  && (branch_count > fallthru_count * param_align_loop_iterations))
    5391              :               /* In case there'no fallthru for the loop.
    5392              :                  Nops inserted won't be executed.  */
    5393       406898 :               : (branch_count > count_threshold
    5394       137369 :                  || (bb->count > bb->prev_bb->count * 10
    5395        12434 :                      && (bb->prev_bb->count
    5396      4567705 :                          <= ENTRY_BLOCK_PTR_FOR_FN (cfun)->count / 2)))))
    5397              :         {
    5398       543192 :           rtx_insn* insn, *end_insn;
    5399       543192 :           HOST_WIDE_INT size = 0;
    5400       543192 :           bool padding_p = true;
    5401       543192 :           basic_block tbb = bb;
    5402       543192 :           unsigned cond_branch_num = 0;
    5403       543192 :           bool detect_tight_loop_p = false;
    5404              : 
    5405       857766 :           for (unsigned int i = 0; i != bb->loop_father->num_nodes;
    5406       314574 :                i++, tbb = tbb->next_bb)
    5407              :             {
    5408              :               /* Only handle continuous cfg layout. */
    5409       857766 :               if (bb->loop_father != tbb->loop_father)
    5410              :                 {
    5411              :                   padding_p = false;
    5412              :                   break;
    5413              :                 }
    5414              : 
    5415     10082020 :               FOR_BB_INSNS (tbb, insn)
    5416              :                 {
    5417      9421869 :                   if (!NONDEBUG_INSN_P (insn))
    5418      5398086 :                     continue;
    5419      4023783 :                   size += ix86_min_insn_size (insn);
    5420              : 
    5421              :                   /* We don't know size of inline asm.
    5422              :                      Don't align loop for call.  */
    5423      4023783 :                   if (asm_noperands (PATTERN (insn)) >= 0
    5424      4023783 :                       || CALL_P (insn))
    5425              :                     {
    5426              :                       size = -1;
    5427              :                       break;
    5428              :                     }
    5429              :                 }
    5430              : 
    5431       817235 :               if (size == -1 || size > ix86_cost->prefetch_block)
    5432              :                 {
    5433              :                   padding_p = false;
    5434              :                   break;
    5435              :                 }
    5436              : 
    5437      1454095 :               FOR_EACH_EDGE (e, ei, tbb->succs)
    5438              :                 {
    5439              :                   /* It could be part of the loop.  */
    5440      1002877 :                   if (e->dest == bb)
    5441              :                     {
    5442              :                       detect_tight_loop_p = true;
    5443              :                       break;
    5444              :                     }
    5445              :                 }
    5446              : 
    5447       634828 :               if (detect_tight_loop_p)
    5448              :                 break;
    5449              : 
    5450       451218 :               end_insn = BB_END (tbb);
    5451       451218 :               if (JUMP_P (end_insn))
    5452              :                 {
    5453              :                   /* For decoded icache:
    5454              :                      1. Up to two branches are allowed per Way.
    5455              :                      2. A non-conditional branch is the last micro-op in a Way.
    5456              :                   */
    5457       363540 :                   if (onlyjump_p (end_insn)
    5458       363540 :                       && (any_uncondjump_p (end_insn)
    5459       307820 :                           || single_succ_p (tbb)))
    5460              :                     {
    5461              :                       padding_p = false;
    5462              :                       break;
    5463              :                     }
    5464       307820 :                   else if (++cond_branch_num >= 2)
    5465              :                     {
    5466              :                       padding_p = false;
    5467              :                       break;
    5468              :                     }
    5469              :                 }
    5470              : 
    5471              :             }
    5472              : 
    5473       543192 :           if (padding_p && detect_tight_loop_p)
    5474              :             {
    5475       367220 :               emit_insn_before (gen_max_skip_align (GEN_INT (ceil_log2 (size)),
    5476              :                                                     GEN_INT (0)), label);
    5477              :               /* End of function.  */
    5478       183610 :               if (!tbb || tbb == EXIT_BLOCK_PTR_FOR_FN (cfun))
    5479              :                 break;
    5480              :               /* Skip bb which already fits into one cacheline.  */
    5481              :               bb = tbb;
    5482              :             }
    5483              :         }
    5484              :     }
    5485              : 
    5486       976233 :   loop_optimizer_finalize ();
    5487       976233 :   free_dominance_info (CDI_DOMINATORS);
    5488              : }
    5489              : 
    5490              : namespace {
    5491              : 
    5492              : const pass_data pass_data_align_tight_loops =
    5493              : {
    5494              :   RTL_PASS, /* type */
    5495              :   "align_tight_loops", /* name */
    5496              :   OPTGROUP_NONE, /* optinfo_flags */
    5497              :   TV_MACH_DEP, /* tv_id */
    5498              :   0, /* properties_required */
    5499              :   0, /* properties_provided */
    5500              :   0, /* properties_destroyed */
    5501              :   0, /* todo_flags_start */
    5502              :   0, /* todo_flags_finish */
    5503              : };
    5504              : 
    5505              : class pass_align_tight_loops : public rtl_opt_pass
    5506              : {
    5507              : public:
    5508       288047 :   pass_align_tight_loops (gcc::context *ctxt)
    5509       576094 :     : rtl_opt_pass (pass_data_align_tight_loops, ctxt)
    5510              :   {}
    5511              : 
    5512              :   /* opt_pass methods: */
    5513      1474422 :   bool gate (function *) final override
    5514              :     {
    5515      1474422 :       return TARGET_ALIGN_TIGHT_LOOPS
    5516      1473936 :              && optimize
    5517      2515432 :              && optimize_function_for_speed_p (cfun);
    5518              :     }
    5519              : 
    5520       976242 :   unsigned int execute (function *) final override
    5521              :     {
    5522       976242 :       timevar_push (TV_MACH_DEP);
    5523              : #ifdef ASM_OUTPUT_MAX_SKIP_ALIGN
    5524       976242 :       ix86_align_loops ();
    5525              : #endif
    5526       976242 :       timevar_pop (TV_MACH_DEP);
    5527       976242 :       return 0;
    5528              :     }
    5529              : }; // class pass_align_tight_loops
    5530              : 
    5531              : } // anon namespace
    5532              : 
    5533              : rtl_opt_pass *
    5534       288047 : make_pass_align_tight_loops (gcc::context *ctxt)
    5535              : {
    5536       288047 :   return new pass_align_tight_loops (ctxt);
    5537              : }
    5538              : 
    5539              : /* This compares the priority of target features in function DECL1
    5540              :    and DECL2.  It returns positive value if DECL1 is higher priority,
    5541              :    negative value if DECL2 is higher priority and 0 if they are the
    5542              :    same.  */
    5543              : 
    5544              : int
    5545         5772 : ix86_compare_version_priority (tree decl1, tree decl2)
    5546              : {
    5547         5772 :   unsigned int priority1 = get_builtin_code_for_version (decl1, NULL);
    5548         5772 :   unsigned int priority2 = get_builtin_code_for_version (decl2, NULL);
    5549              : 
    5550         5772 :   return (int)priority1 - (int)priority2;
    5551              : }
    5552              : 
    5553              : /* V1 and V2 point to function versions with different priorities
    5554              :    based on the target ISA.  This function compares their priorities.  */
    5555              : 
    5556              : static int
    5557         6860 : feature_compare (const void *v1, const void *v2)
    5558              : {
    5559         6860 :   typedef struct _function_version_info
    5560              :     {
    5561              :       tree version_decl;
    5562              :       tree predicate_chain;
    5563              :       unsigned int dispatch_priority;
    5564              :     } function_version_info;
    5565              : 
    5566         6860 :   const function_version_info c1 = *(const function_version_info *)v1;
    5567         6860 :   const function_version_info c2 = *(const function_version_info *)v2;
    5568         6860 :   return (c2.dispatch_priority - c1.dispatch_priority);
    5569              : }
    5570              : 
    5571              : /* This adds a condition to the basic_block NEW_BB in function FUNCTION_DECL
    5572              :    to return a pointer to VERSION_DECL if the outcome of the expression
    5573              :    formed by PREDICATE_CHAIN is true.  This function will be called during
    5574              :    version dispatch to decide which function version to execute.  It returns
    5575              :    the basic block at the end, to which more conditions can be added.  */
    5576              : 
    5577              : static basic_block
    5578          834 : add_condition_to_bb (tree function_decl, tree version_decl,
    5579              :                      tree predicate_chain, basic_block new_bb)
    5580              : {
    5581          834 :   gimple *return_stmt;
    5582          834 :   tree convert_expr, result_var;
    5583          834 :   gimple *convert_stmt;
    5584          834 :   gimple *call_cond_stmt;
    5585          834 :   gimple *if_else_stmt;
    5586              : 
    5587          834 :   basic_block bb1, bb2, bb3;
    5588          834 :   edge e12, e23;
    5589              : 
    5590          834 :   tree cond_var, and_expr_var = NULL_TREE;
    5591          834 :   gimple_seq gseq;
    5592              : 
    5593          834 :   tree predicate_decl, predicate_arg;
    5594              : 
    5595          834 :   push_cfun (DECL_STRUCT_FUNCTION (function_decl));
    5596              : 
    5597          834 :   gcc_assert (new_bb != NULL);
    5598          834 :   gseq = bb_seq (new_bb);
    5599              : 
    5600              : 
    5601          834 :   convert_expr = build1 (CONVERT_EXPR, ptr_type_node,
    5602              :                          build_fold_addr_expr (version_decl));
    5603          834 :   result_var = create_tmp_var (ptr_type_node);
    5604          834 :   convert_stmt = gimple_build_assign (result_var, convert_expr);
    5605          834 :   return_stmt = gimple_build_return (result_var);
    5606              : 
    5607          834 :   if (predicate_chain == NULL_TREE)
    5608              :     {
    5609          200 :       gimple_seq_add_stmt (&gseq, convert_stmt);
    5610          200 :       gimple_seq_add_stmt (&gseq, return_stmt);
    5611          200 :       set_bb_seq (new_bb, gseq);
    5612          200 :       gimple_set_bb (convert_stmt, new_bb);
    5613          200 :       gimple_set_bb (return_stmt, new_bb);
    5614          200 :       pop_cfun ();
    5615          200 :       return new_bb;
    5616              :     }
    5617              : 
    5618         1307 :   while (predicate_chain != NULL)
    5619              :     {
    5620          673 :       cond_var = create_tmp_var (integer_type_node);
    5621          673 :       predicate_decl = TREE_PURPOSE (predicate_chain);
    5622          673 :       predicate_arg = TREE_VALUE (predicate_chain);
    5623          673 :       call_cond_stmt = gimple_build_call (predicate_decl, 1, predicate_arg);
    5624          673 :       gimple_call_set_lhs (call_cond_stmt, cond_var);
    5625              : 
    5626          673 :       gimple_set_block (call_cond_stmt, DECL_INITIAL (function_decl));
    5627          673 :       gimple_set_bb (call_cond_stmt, new_bb);
    5628          673 :       gimple_seq_add_stmt (&gseq, call_cond_stmt);
    5629              : 
    5630          673 :       predicate_chain = TREE_CHAIN (predicate_chain);
    5631              : 
    5632          673 :       if (and_expr_var == NULL)
    5633              :         and_expr_var = cond_var;
    5634              :       else
    5635              :         {
    5636           39 :           gimple *assign_stmt;
    5637              :           /* Use MIN_EXPR to check if any integer is zero?.
    5638              :              and_expr_var = min_expr <cond_var, and_expr_var>  */
    5639           39 :           assign_stmt = gimple_build_assign (and_expr_var,
    5640              :                           build2 (MIN_EXPR, integer_type_node,
    5641              :                                   cond_var, and_expr_var));
    5642              : 
    5643           39 :           gimple_set_block (assign_stmt, DECL_INITIAL (function_decl));
    5644           39 :           gimple_set_bb (assign_stmt, new_bb);
    5645           39 :           gimple_seq_add_stmt (&gseq, assign_stmt);
    5646              :         }
    5647              :     }
    5648              : 
    5649          634 :   if_else_stmt = gimple_build_cond (GT_EXPR, and_expr_var,
    5650              :                                     integer_zero_node,
    5651              :                                     NULL_TREE, NULL_TREE);
    5652          634 :   gimple_set_block (if_else_stmt, DECL_INITIAL (function_decl));
    5653          634 :   gimple_set_bb (if_else_stmt, new_bb);
    5654          634 :   gimple_seq_add_stmt (&gseq, if_else_stmt);
    5655              : 
    5656          634 :   gimple_seq_add_stmt (&gseq, convert_stmt);
    5657          634 :   gimple_seq_add_stmt (&gseq, return_stmt);
    5658          634 :   set_bb_seq (new_bb, gseq);
    5659              : 
    5660          634 :   bb1 = new_bb;
    5661          634 :   e12 = split_block (bb1, if_else_stmt);
    5662          634 :   bb2 = e12->dest;
    5663          634 :   e12->flags &= ~EDGE_FALLTHRU;
    5664          634 :   e12->flags |= EDGE_TRUE_VALUE;
    5665              : 
    5666          634 :   e23 = split_block (bb2, return_stmt);
    5667              : 
    5668          634 :   gimple_set_bb (convert_stmt, bb2);
    5669          634 :   gimple_set_bb (return_stmt, bb2);
    5670              : 
    5671          634 :   bb3 = e23->dest;
    5672          634 :   make_edge (bb1, bb3, EDGE_FALSE_VALUE);
    5673              : 
    5674          634 :   remove_edge (e23);
    5675          634 :   make_edge (bb2, EXIT_BLOCK_PTR_FOR_FN (cfun), 0);
    5676              : 
    5677          634 :   pop_cfun ();
    5678              : 
    5679          634 :   return bb3;
    5680              : }
    5681              : 
    5682              : /* This function generates the dispatch function for
    5683              :    multi-versioned functions.  DISPATCH_DECL is the function which will
    5684              :    contain the dispatch logic.  FNDECLS are the function choices for
    5685              :    dispatch, and is a tree chain.  EMPTY_BB is the basic block pointer
    5686              :    in DISPATCH_DECL in which the dispatch code is generated.  */
    5687              : 
    5688              : static int
    5689          200 : dispatch_function_versions (tree dispatch_decl,
    5690              :                             void *fndecls_p,
    5691              :                             basic_block *empty_bb)
    5692              : {
    5693          200 :   tree default_decl;
    5694          200 :   gimple *ifunc_cpu_init_stmt;
    5695          200 :   gimple_seq gseq;
    5696          200 :   int ix;
    5697          200 :   tree ele;
    5698          200 :   vec<tree> *fndecls;
    5699          200 :   unsigned int num_versions = 0;
    5700          200 :   unsigned int actual_versions = 0;
    5701          200 :   unsigned int i;
    5702              : 
    5703          200 :   struct _function_version_info
    5704              :     {
    5705              :       tree version_decl;
    5706              :       tree predicate_chain;
    5707              :       unsigned int dispatch_priority;
    5708              :     }*function_version_info;
    5709              : 
    5710          200 :   gcc_assert (dispatch_decl != NULL
    5711              :               && fndecls_p != NULL
    5712              :               && empty_bb != NULL);
    5713              : 
    5714              :   /*fndecls_p is actually a vector.  */
    5715          200 :   fndecls = static_cast<vec<tree> *> (fndecls_p);
    5716              : 
    5717              :   /* At least one more version other than the default.  */
    5718          200 :   num_versions = fndecls->length ();
    5719          200 :   gcc_assert (num_versions >= 2);
    5720              : 
    5721          200 :   function_version_info = (struct _function_version_info *)
    5722          200 :     XNEWVEC (struct _function_version_info, (num_versions - 1));
    5723              : 
    5724              :   /* The first version in the vector is the default decl.  */
    5725          200 :   default_decl = (*fndecls)[0];
    5726              : 
    5727          200 :   push_cfun (DECL_STRUCT_FUNCTION (dispatch_decl));
    5728              : 
    5729          200 :   gseq = bb_seq (*empty_bb);
    5730              :   /* Function version dispatch is via IFUNC.  IFUNC resolvers fire before
    5731              :      constructors, so explicity call __builtin_cpu_init here.  */
    5732          200 :   ifunc_cpu_init_stmt
    5733          200 :     = gimple_build_call_vec (get_ix86_builtin (IX86_BUILTIN_CPU_INIT), vNULL);
    5734          200 :   gimple_seq_add_stmt (&gseq, ifunc_cpu_init_stmt);
    5735          200 :   gimple_set_bb (ifunc_cpu_init_stmt, *empty_bb);
    5736          200 :   set_bb_seq (*empty_bb, gseq);
    5737              : 
    5738          200 :   pop_cfun ();
    5739              : 
    5740              : 
    5741          991 :   for (ix = 1; fndecls->iterate (ix, &ele); ++ix)
    5742              :     {
    5743          791 :       tree version_decl = ele;
    5744          791 :       tree predicate_chain = NULL_TREE;
    5745          791 :       unsigned int priority;
    5746              :       /* Get attribute string, parse it and find the right predicate decl.
    5747              :          The predicate function could be a lengthy combination of many
    5748              :          features, like arch-type and various isa-variants.  */
    5749          791 :       priority = get_builtin_code_for_version (version_decl,
    5750              :                                                &predicate_chain);
    5751              : 
    5752          791 :       if (predicate_chain == NULL_TREE)
    5753          157 :         continue;
    5754              : 
    5755          634 :       function_version_info [actual_versions].version_decl = version_decl;
    5756          634 :       function_version_info [actual_versions].predicate_chain
    5757          634 :          = predicate_chain;
    5758          634 :       function_version_info [actual_versions].dispatch_priority = priority;
    5759          634 :       actual_versions++;
    5760              :     }
    5761              : 
    5762              :   /* Sort the versions according to descending order of dispatch priority.  The
    5763              :      priority is based on the ISA.  This is not a perfect solution.  There
    5764              :      could still be ambiguity.  If more than one function version is suitable
    5765              :      to execute,  which one should be dispatched?  In future, allow the user
    5766              :      to specify a dispatch  priority next to the version.  */
    5767          200 :   qsort (function_version_info, actual_versions,
    5768              :          sizeof (struct _function_version_info), feature_compare);
    5769              : 
    5770         1034 :   for  (i = 0; i < actual_versions; ++i)
    5771          634 :     *empty_bb = add_condition_to_bb (dispatch_decl,
    5772              :                                      function_version_info[i].version_decl,
    5773          634 :                                      function_version_info[i].predicate_chain,
    5774              :                                      *empty_bb);
    5775              : 
    5776              :   /* dispatch default version at the end.  */
    5777          200 :   *empty_bb = add_condition_to_bb (dispatch_decl, default_decl,
    5778              :                                    NULL, *empty_bb);
    5779              : 
    5780          200 :   free (function_version_info);
    5781          200 :   return 0;
    5782              : }
    5783              : 
    5784              : /* This function changes the assembler name for functions that are
    5785              :    versions.  If DECL is a function version and has a "target"
    5786              :    attribute, it appends the attribute string to its assembler name.  */
    5787              : 
    5788              : static tree
    5789         1113 : ix86_mangle_function_version_assembler_name (tree decl, tree id)
    5790              : {
    5791         1113 :   tree version_attr;
    5792         1113 :   char *attr_str;
    5793              : 
    5794         1113 :   if (DECL_DECLARED_INLINE_P (decl)
    5795         1162 :       && lookup_attribute ("gnu_inline",
    5796           49 :                            DECL_ATTRIBUTES (decl)))
    5797            0 :     error_at (DECL_SOURCE_LOCATION (decl),
    5798              :               "function versions cannot be marked as %<gnu_inline%>,"
    5799              :               " bodies have to be generated");
    5800              : 
    5801         1113 :   if (DECL_VIRTUAL_P (decl)
    5802         2226 :       || DECL_VINDEX (decl))
    5803            0 :     sorry ("virtual function multiversioning not supported");
    5804              : 
    5805         1113 :   version_attr = lookup_attribute ("target", DECL_ATTRIBUTES (decl));
    5806              : 
    5807              :   /* target attribute string cannot be NULL.  */
    5808         1113 :   gcc_assert (version_attr != NULL_TREE);
    5809              : 
    5810         1113 :   attr_str = sorted_attr_string (TREE_VALUE (version_attr));
    5811              : 
    5812              :   /* Allow assembler name to be modified if already set.  */
    5813         1113 :   if (DECL_ASSEMBLER_NAME_SET_P (decl))
    5814         1098 :     SET_DECL_RTL (decl, NULL);
    5815              : 
    5816         1113 :   tree ret = clone_identifier (id, attr_str, true);
    5817              : 
    5818         1113 :   XDELETEVEC (attr_str);
    5819              : 
    5820         1113 :   return ret;
    5821              : }
    5822              : 
    5823              : tree
    5824    482940034 : ix86_mangle_decl_assembler_name (tree decl, tree id)
    5825              : {
    5826              :   /* For function version, add the target suffix to the assembler name.  */
    5827    482940034 :   if (TREE_CODE (decl) == FUNCTION_DECL)
    5828              :     {
    5829    448611536 :       cgraph_node *node = cgraph_node::get (decl);
    5830              :       /* Mangle all versions when annotated with target_clones, but only
    5831              :          non-default versions when annotated with target attributes.  */
    5832    448611536 :       if (DECL_FUNCTION_VERSIONED (decl)
    5833    448611536 :           && (node->is_target_clone
    5834         1089 :               || !is_function_default_version (node->decl)))
    5835         1113 :         id = ix86_mangle_function_version_assembler_name (decl, id);
    5836              :       /* Mangle the dispatched symbol but only in the case of target clones.  */
    5837    448610423 :       else if (node && node->dispatcher_function && !node->is_target_clone)
    5838          117 :         id = clone_identifier (id, "ifunc");
    5839     63723989 :       else if (node && node->dispatcher_resolver_function)
    5840          200 :         id = clone_identifier (id, "resolver");
    5841              :     }
    5842              : #ifdef SUBTARGET_MANGLE_DECL_ASSEMBLER_NAME
    5843              :   id = SUBTARGET_MANGLE_DECL_ASSEMBLER_NAME (decl, id);
    5844              : #endif
    5845              : 
    5846    482940034 :   return id;
    5847              : }
    5848              : 
    5849              : /* Make a dispatcher declaration for the multi-versioned function DECL.
    5850              :    Calls to DECL function will be replaced with calls to the dispatcher
    5851              :    by the front-end.  Returns the decl of the dispatcher function.  */
    5852              : 
    5853              : tree
    5854          326 : ix86_get_function_versions_dispatcher (void *decl)
    5855              : {
    5856          326 :   tree fn = (tree) decl;
    5857          326 :   struct cgraph_node *node = NULL;
    5858          326 :   struct cgraph_node *default_node = NULL;
    5859          326 :   struct cgraph_function_version_info *node_v = NULL;
    5860              : 
    5861          326 :   tree dispatch_decl = NULL;
    5862              : 
    5863          326 :   struct cgraph_function_version_info *default_version_info = NULL;
    5864              : 
    5865          652 :   gcc_assert (fn != NULL && DECL_FUNCTION_VERSIONED (fn));
    5866              : 
    5867          326 :   node = cgraph_node::get (fn);
    5868          326 :   gcc_assert (node != NULL);
    5869              : 
    5870          326 :   node_v = node->function_version ();
    5871          326 :   gcc_assert (node_v != NULL);
    5872              : 
    5873          326 :   if (node_v->dispatcher_resolver != NULL)
    5874              :     return node_v->dispatcher_resolver;
    5875              : 
    5876              :   /* The default node is always the beginning of the chain.  */
    5877              :   default_version_info = node_v;
    5878          674 :   while (default_version_info->prev != NULL)
    5879              :     default_version_info = default_version_info->prev;
    5880          212 :   default_node = default_version_info->this_node;
    5881              : 
    5882              :   /* If there is no default node, just return NULL.  */
    5883          212 :   if (!is_function_default_version (default_node->decl))
    5884              :     return NULL;
    5885              : 
    5886              : #if defined (ASM_OUTPUT_TYPE_DIRECTIVE)
    5887          203 :   if (targetm.has_ifunc_p ())
    5888              :     {
    5889          203 :       struct cgraph_function_version_info *it_v = NULL;
    5890              : 
    5891              :       /* Right now, the dispatching is done via ifunc.  */
    5892          203 :       dispatch_decl = make_dispatcher_decl (default_node->decl);
    5893              : 
    5894              :       /* Set the dispatcher for all the versions.  */
    5895          203 :       it_v = default_version_info;
    5896         1403 :       while (it_v != NULL)
    5897              :         {
    5898          997 :           it_v->dispatcher_resolver = dispatch_decl;
    5899          997 :           it_v = it_v->next;
    5900              :         }
    5901              :     }
    5902              :   else
    5903              : #endif
    5904              :     {
    5905            0 :       error_at (DECL_SOURCE_LOCATION (default_node->decl),
    5906              :                 "multiversioning needs %<ifunc%> which is not supported "
    5907              :                 "on this target");
    5908              :     }
    5909              : 
    5910              :   return dispatch_decl;
    5911              : }
    5912              : 
    5913              : /* Make the resolver function decl to dispatch the versions of
    5914              :    a multi-versioned function,  DEFAULT_DECL.  IFUNC_ALIAS_DECL is
    5915              :    ifunc alias that will point to the created resolver.  Create an
    5916              :    empty basic block in the resolver and store the pointer in
    5917              :    EMPTY_BB.  Return the decl of the resolver function.  */
    5918              : 
    5919              : static tree
    5920          200 : make_resolver_func (const tree default_decl,
    5921              :                     const tree ifunc_alias_decl,
    5922              :                     basic_block *empty_bb)
    5923              : {
    5924          200 :   tree decl, type, t;
    5925              : 
    5926              :   /* The resolver function should return a (void *). */
    5927          200 :   type = build_function_type_list (ptr_type_node, NULL_TREE);
    5928              : 
    5929          200 :   cgraph_node *node = cgraph_node::get (default_decl);
    5930          200 :   gcc_assert (node && node->function_version ());
    5931              : 
    5932          200 :   decl = build_fn_decl (IDENTIFIER_POINTER (DECL_NAME (default_decl)), type);
    5933              : 
    5934              :   /* Set the assembler name to prevent cgraph_node attempting to mangle.  */
    5935          200 :   SET_DECL_ASSEMBLER_NAME (decl, DECL_ASSEMBLER_NAME (default_decl));
    5936              : 
    5937          200 :   cgraph_node *resolver_node = cgraph_node::get_create (decl);
    5938          200 :   resolver_node->dispatcher_resolver_function = true;
    5939              : 
    5940          200 :   if (node->is_target_clone)
    5941           86 :     resolver_node->is_target_clone = true;
    5942              : 
    5943          200 :   tree id = ix86_mangle_decl_assembler_name
    5944          200 :     (decl, node->function_version ()->assembler_name);
    5945          200 :   symtab->change_decl_assembler_name (decl, id);
    5946              : 
    5947          200 :   DECL_NAME (decl) = DECL_NAME (default_decl);
    5948          200 :   TREE_USED (decl) = 1;
    5949          200 :   DECL_ARTIFICIAL (decl) = 1;
    5950          200 :   DECL_IGNORED_P (decl) = 1;
    5951          200 :   TREE_PUBLIC (decl) = 0;
    5952          200 :   DECL_UNINLINABLE (decl) = 1;
    5953              : 
    5954              :   /* Resolver is not external, body is generated.  */
    5955          200 :   DECL_EXTERNAL (decl) = 0;
    5956          200 :   DECL_EXTERNAL (ifunc_alias_decl) = 0;
    5957              : 
    5958          200 :   DECL_CONTEXT (decl) = NULL_TREE;
    5959          200 :   DECL_INITIAL (decl) = make_node (BLOCK);
    5960          200 :   DECL_STATIC_CONSTRUCTOR (decl) = 0;
    5961              : 
    5962          200 :   if (DECL_COMDAT_GROUP (default_decl)
    5963          200 :       || TREE_PUBLIC (default_decl))
    5964              :     {
    5965              :       /* In this case, each translation unit with a call to this
    5966              :          versioned function will put out a resolver.  Ensure it
    5967              :          is comdat to keep just one copy.  */
    5968          176 :       DECL_COMDAT (decl) = 1;
    5969          176 :       make_decl_one_only (decl, DECL_ASSEMBLER_NAME (decl));
    5970              :     }
    5971              :   else
    5972           24 :     TREE_PUBLIC (ifunc_alias_decl) = 0;
    5973              : 
    5974              :   /* Build result decl and add to function_decl. */
    5975          200 :   t = build_decl (UNKNOWN_LOCATION, RESULT_DECL, NULL_TREE, ptr_type_node);
    5976          200 :   DECL_CONTEXT (t) = decl;
    5977          200 :   DECL_ARTIFICIAL (t) = 1;
    5978          200 :   DECL_IGNORED_P (t) = 1;
    5979          200 :   DECL_RESULT (decl) = t;
    5980              : 
    5981          200 :   gimplify_function_tree (decl);
    5982          200 :   push_cfun (DECL_STRUCT_FUNCTION (decl));
    5983          200 :   *empty_bb = init_lowered_empty_function (decl, false,
    5984              :                                            profile_count::uninitialized ());
    5985              : 
    5986          200 :   cgraph_node::add_new_function (decl, true);
    5987          200 :   symtab->call_cgraph_insertion_hooks (cgraph_node::get_create (decl));
    5988              : 
    5989          200 :   pop_cfun ();
    5990              : 
    5991          200 :   gcc_assert (ifunc_alias_decl != NULL);
    5992              :   /* Mark ifunc_alias_decl as "ifunc" with resolver as resolver_name.  */
    5993          200 :   DECL_ATTRIBUTES (ifunc_alias_decl)
    5994          200 :     = make_attribute ("ifunc", IDENTIFIER_POINTER (DECL_ASSEMBLER_NAME (decl)),
    5995          200 :                       DECL_ATTRIBUTES (ifunc_alias_decl));
    5996              : 
    5997              :   /* Create the alias for dispatch to resolver here.  */
    5998          200 :   cgraph_node::create_same_body_alias (ifunc_alias_decl, decl);
    5999          200 :   return decl;
    6000              : }
    6001              : 
    6002              : /* Generate the dispatching code body to dispatch multi-versioned function
    6003              :    DECL.  The target hook is called to process the "target" attributes and
    6004              :    provide the code to dispatch the right function at run-time.  NODE points
    6005              :    to the dispatcher decl whose body will be created.  */
    6006              : 
    6007              : tree
    6008          200 : ix86_generate_version_dispatcher_body (void *node_p)
    6009              : {
    6010          200 :   tree resolver_decl;
    6011          200 :   basic_block empty_bb;
    6012          200 :   tree default_ver_decl;
    6013          200 :   struct cgraph_node *versn;
    6014          200 :   struct cgraph_node *node;
    6015              : 
    6016          200 :   struct cgraph_function_version_info *node_version_info = NULL;
    6017          200 :   struct cgraph_function_version_info *versn_info = NULL;
    6018              : 
    6019          200 :   node = (cgraph_node *)node_p;
    6020              : 
    6021          200 :   node_version_info = node->function_version ();
    6022          200 :   gcc_assert (node->dispatcher_function
    6023              :               && node_version_info != NULL);
    6024              : 
    6025          200 :   if (node_version_info->dispatcher_resolver)
    6026              :     return node_version_info->dispatcher_resolver;
    6027              : 
    6028              :   /* The first version in the chain corresponds to the default version.  */
    6029          200 :   default_ver_decl = node_version_info->next->this_node->decl;
    6030              : 
    6031              :   /* node is going to be an alias, so remove the finalized bit.  */
    6032          200 :   node->definition = false;
    6033              : 
    6034          200 :   resolver_decl = make_resolver_func (default_ver_decl,
    6035              :                                       node->decl, &empty_bb);
    6036              : 
    6037          200 :   node_version_info->dispatcher_resolver = resolver_decl;
    6038              : 
    6039          200 :   push_cfun (DECL_STRUCT_FUNCTION (resolver_decl));
    6040              : 
    6041          200 :   auto_vec<tree, 2> fn_ver_vec;
    6042              : 
    6043         1191 :   for (versn_info = node_version_info->next; versn_info;
    6044          991 :        versn_info = versn_info->next)
    6045              :     {
    6046          991 :       versn = versn_info->this_node;
    6047              :       /* Check for virtual functions here again, as by this time it should
    6048              :          have been determined if this function needs a vtable index or
    6049              :          not.  This happens for methods in derived classes that override
    6050              :          virtual methods in base classes but are not explicitly marked as
    6051              :          virtual.  */
    6052          991 :       if (DECL_VIRTUAL_P (versn->decl))
    6053            0 :         sorry ("virtual function multiversioning not supported");
    6054              : 
    6055          991 :       fn_ver_vec.safe_push (versn->decl);
    6056              :     }
    6057              : 
    6058          200 :   dispatch_function_versions (resolver_decl, &fn_ver_vec, &empty_bb);
    6059          200 :   cgraph_edge::rebuild_edges ();
    6060          200 :   pop_cfun ();
    6061          200 :   return resolver_decl;
    6062          200 : }
    6063              : 
    6064              : 
        

Generated by: LCOV version 2.4-beta

LCOV profile is generated on x86_64 machine using following configure options: configure --disable-bootstrap --enable-coverage=opt --enable-languages=c,c++,fortran,go,jit,lto,rust,m2 --enable-host-shared. GCC test suite is run with the built compiler.