LCOV - code coverage report
Current view: top level - gcc/config/i386 - i386-features.cc (source / functions) Coverage Total Hit
Test: gcc.info Lines: 88.3 % 2634 2325
Test Date: 2026-02-28 14:20:25 Functions: 98.9 % 95 94
Legend: Lines:     hit not hit

            Line data    Source code
       1              : /* Copyright (C) 1988-2026 Free Software Foundation, Inc.
       2              : 
       3              : This file is part of GCC.
       4              : 
       5              : GCC is free software; you can redistribute it and/or modify
       6              : it under the terms of the GNU General Public License as published by
       7              : the Free Software Foundation; either version 3, or (at your option)
       8              : any later version.
       9              : 
      10              : GCC is distributed in the hope that it will be useful,
      11              : but WITHOUT ANY WARRANTY; without even the implied warranty of
      12              : MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
      13              : GNU General Public License for more details.
      14              : 
      15              : You should have received a copy of the GNU General Public License
      16              : along with GCC; see the file COPYING3.  If not see
      17              : <http://www.gnu.org/licenses/>.  */
      18              : 
      19              : #define IN_TARGET_CODE 1
      20              : 
      21              : #include "config.h"
      22              : #include "system.h"
      23              : #include "coretypes.h"
      24              : #include "backend.h"
      25              : #include "rtl.h"
      26              : #include "tree.h"
      27              : #include "memmodel.h"
      28              : #include "gimple.h"
      29              : #include "cfghooks.h"
      30              : #include "cfgloop.h"
      31              : #include "df.h"
      32              : #include "tm_p.h"
      33              : #include "stringpool.h"
      34              : #include "expmed.h"
      35              : #include "optabs.h"
      36              : #include "regs.h"
      37              : #include "emit-rtl.h"
      38              : #include "recog.h"
      39              : #include "cgraph.h"
      40              : #include "diagnostic.h"
      41              : #include "cfgbuild.h"
      42              : #include "alias.h"
      43              : #include "fold-const.h"
      44              : #include "attribs.h"
      45              : #include "calls.h"
      46              : #include "stor-layout.h"
      47              : #include "varasm.h"
      48              : #include "output.h"
      49              : #include "insn-attr.h"
      50              : #include "flags.h"
      51              : #include "except.h"
      52              : #include "explow.h"
      53              : #include "expr.h"
      54              : #include "cfgrtl.h"
      55              : #include "common/common-target.h"
      56              : #include "langhooks.h"
      57              : #include "reload.h"
      58              : #include "gimplify.h"
      59              : #include "dwarf2.h"
      60              : #include "tm-constrs.h"
      61              : #include "cselib.h"
      62              : #include "sched-int.h"
      63              : #include "opts.h"
      64              : #include "tree-pass.h"
      65              : #include "context.h"
      66              : #include "pass_manager.h"
      67              : #include "target-globals.h"
      68              : #include "gimple-iterator.h"
      69              : #include "shrink-wrap.h"
      70              : #include "builtins.h"
      71              : #include "rtl-iter.h"
      72              : #include "tree-iterator.h"
      73              : #include "dbgcnt.h"
      74              : #include "case-cfn-macros.h"
      75              : #include "dojump.h"
      76              : #include "fold-const-call.h"
      77              : #include "tree-vrp.h"
      78              : #include "tree-ssanames.h"
      79              : #include "selftest.h"
      80              : #include "selftest-rtl.h"
      81              : #include "print-rtl.h"
      82              : #include "intl.h"
      83              : #include "ifcvt.h"
      84              : #include "symbol-summary.h"
      85              : #include "sreal.h"
      86              : #include "ipa-cp.h"
      87              : #include "ipa-prop.h"
      88              : #include "ipa-fnsummary.h"
      89              : #include "wide-int-bitmask.h"
      90              : #include "tree-vector-builder.h"
      91              : #include "debug.h"
      92              : #include "dwarf2out.h"
      93              : #include "i386-builtins.h"
      94              : #include "i386-features.h"
      95              : #include "i386-expand.h"
      96              : 
      97              : const char * const xlogue_layout::STUB_BASE_NAMES[XLOGUE_STUB_COUNT] = {
      98              :   "savms64",
      99              :   "resms64",
     100              :   "resms64x",
     101              :   "savms64f",
     102              :   "resms64f",
     103              :   "resms64fx"
     104              : };
     105              : 
     106              : const unsigned xlogue_layout::REG_ORDER[xlogue_layout::MAX_REGS] = {
     107              : /* The below offset values are where each register is stored for the layout
     108              :    relative to incoming stack pointer.  The value of each m_regs[].offset will
     109              :    be relative to the incoming base pointer (rax or rsi) used by the stub.
     110              : 
     111              :     s_instances:   0            1               2               3
     112              :     Offset:                                     realigned or    aligned + 8
     113              :     Register       aligned      aligned + 8     aligned w/HFP   w/HFP   */
     114              :     XMM15_REG,  /* 0x10         0x18            0x10            0x18    */
     115              :     XMM14_REG,  /* 0x20         0x28            0x20            0x28    */
     116              :     XMM13_REG,  /* 0x30         0x38            0x30            0x38    */
     117              :     XMM12_REG,  /* 0x40         0x48            0x40            0x48    */
     118              :     XMM11_REG,  /* 0x50         0x58            0x50            0x58    */
     119              :     XMM10_REG,  /* 0x60         0x68            0x60            0x68    */
     120              :     XMM9_REG,   /* 0x70         0x78            0x70            0x78    */
     121              :     XMM8_REG,   /* 0x80         0x88            0x80            0x88    */
     122              :     XMM7_REG,   /* 0x90         0x98            0x90            0x98    */
     123              :     XMM6_REG,   /* 0xa0         0xa8            0xa0            0xa8    */
     124              :     SI_REG,     /* 0xa8         0xb0            0xa8            0xb0    */
     125              :     DI_REG,     /* 0xb0         0xb8            0xb0            0xb8    */
     126              :     BX_REG,     /* 0xb8         0xc0            0xb8            0xc0    */
     127              :     BP_REG,     /* 0xc0         0xc8            N/A             N/A     */
     128              :     R12_REG,    /* 0xc8         0xd0            0xc0            0xc8    */
     129              :     R13_REG,    /* 0xd0         0xd8            0xc8            0xd0    */
     130              :     R14_REG,    /* 0xd8         0xe0            0xd0            0xd8    */
     131              :     R15_REG,    /* 0xe0         0xe8            0xd8            0xe0    */
     132              : };
     133              : 
     134              : /* Instantiate static const values.  */
     135              : const HOST_WIDE_INT xlogue_layout::STUB_INDEX_OFFSET;
     136              : const unsigned xlogue_layout::MIN_REGS;
     137              : const unsigned xlogue_layout::MAX_REGS;
     138              : const unsigned xlogue_layout::MAX_EXTRA_REGS;
     139              : const unsigned xlogue_layout::VARIANT_COUNT;
     140              : const unsigned xlogue_layout::STUB_NAME_MAX_LEN;
     141              : 
     142              : /* Initialize xlogue_layout::s_stub_names to zero.  */
     143              : char xlogue_layout::s_stub_names[2][XLOGUE_STUB_COUNT][VARIANT_COUNT]
     144              :                                 [STUB_NAME_MAX_LEN];
     145              : 
     146              : /* Instantiates all xlogue_layout instances.  */
     147              : const xlogue_layout xlogue_layout::s_instances[XLOGUE_SET_COUNT] = {
     148              :   xlogue_layout (0, false),
     149              :   xlogue_layout (8, false),
     150              :   xlogue_layout (0, true),
     151              :   xlogue_layout (8, true)
     152              : };
     153              : 
     154              : /* Return an appropriate const instance of xlogue_layout based upon values
     155              :    in cfun->machine and crtl.  */
     156              : const class xlogue_layout &
     157        49891 : xlogue_layout::get_instance ()
     158              : {
     159        49891 :   enum xlogue_stub_sets stub_set;
     160        49891 :   bool aligned_plus_8 = cfun->machine->call_ms2sysv_pad_in;
     161              : 
     162        49891 :   if (stack_realign_fp)
     163              :     stub_set = XLOGUE_SET_HFP_ALIGNED_OR_REALIGN;
     164        40910 :   else if (frame_pointer_needed)
     165        25246 :     stub_set = aligned_plus_8
     166        31552 :               ? XLOGUE_SET_HFP_ALIGNED_PLUS_8
     167              :               : XLOGUE_SET_HFP_ALIGNED_OR_REALIGN;
     168              :   else
     169         9358 :     stub_set = aligned_plus_8 ? XLOGUE_SET_ALIGNED_PLUS_8 : XLOGUE_SET_ALIGNED;
     170              : 
     171        49891 :   return s_instances[stub_set];
     172              : }
     173              : 
     174              : /* Determine how many clobbered registers can be saved by the stub.
     175              :    Returns the count of registers the stub will save and restore.  */
     176              : unsigned
     177        35225 : xlogue_layout::count_stub_managed_regs ()
     178              : {
     179        35225 :   bool hfp = frame_pointer_needed || stack_realign_fp;
     180        35225 :   unsigned i, count;
     181        35225 :   unsigned regno;
     182              : 
     183        94890 :   for (count = i = MIN_REGS; i < MAX_REGS; ++i)
     184              :     {
     185        93670 :       regno = REG_ORDER[i];
     186        93670 :       if (regno == BP_REG && hfp)
     187        18200 :         continue;
     188        75470 :       if (!ix86_save_reg (regno, false, false))
     189              :         break;
     190        41465 :       ++count;
     191              :     }
     192        35225 :   return count;
     193              : }
     194              : 
     195              : /* Determine if register REGNO is a stub managed register given the
     196              :    total COUNT of stub managed registers.  */
     197              : bool
     198      2650688 : xlogue_layout::is_stub_managed_reg (unsigned regno, unsigned count)
     199              : {
     200      2650688 :   bool hfp = frame_pointer_needed || stack_realign_fp;
     201      2650688 :   unsigned i;
     202              : 
     203     34587805 :   for (i = 0; i < count; ++i)
     204              :     {
     205     32436986 :       gcc_assert (i < MAX_REGS);
     206     32436986 :       if (REG_ORDER[i] == BP_REG && hfp)
     207       522627 :         ++count;
     208     31914359 :       else if (REG_ORDER[i] == regno)
     209              :         return true;
     210              :     }
     211              :   return false;
     212              : }
     213              : 
     214              : /* Constructor for xlogue_layout.  */
     215      1138364 : xlogue_layout::xlogue_layout (HOST_WIDE_INT stack_align_off_in, bool hfp)
     216      1138364 :   : m_hfp (hfp) , m_nregs (hfp ? 17 : 18),
     217      1138364 :     m_stack_align_off_in (stack_align_off_in)
     218              : {
     219      1138364 :   HOST_WIDE_INT offset = stack_align_off_in;
     220      1138364 :   unsigned i, j;
     221              : 
     222     21628916 :   for (i = j = 0; i < MAX_REGS; ++i)
     223              :     {
     224     20490552 :       unsigned regno = REG_ORDER[i];
     225              : 
     226     20490552 :       if (regno == BP_REG && hfp)
     227       569182 :         continue;
     228     19921370 :       if (SSE_REGNO_P (regno))
     229              :         {
     230     11383640 :           offset += 16;
     231              :           /* Verify that SSE regs are always aligned.  */
     232     11383640 :           gcc_assert (!((stack_align_off_in + offset) & 15));
     233              :         }
     234              :       else
     235      8537730 :         offset += 8;
     236              : 
     237     19921370 :       m_regs[j].regno    = regno;
     238     19921370 :       m_regs[j++].offset = offset - STUB_INDEX_OFFSET;
     239              :     }
     240      1138364 :   gcc_assert (j == m_nregs);
     241      1138364 : }
     242              : 
     243              : const char *
     244        14666 : xlogue_layout::get_stub_name (enum xlogue_stub stub,
     245              :                               unsigned n_extra_regs)
     246              : {
     247        14666 :   const int have_avx = TARGET_AVX;
     248        14666 :   char *name = s_stub_names[!!have_avx][stub][n_extra_regs];
     249              : 
     250              :   /* Lazy init */
     251        14666 :   if (!*name)
     252              :     {
     253          362 :       int res = snprintf (name, STUB_NAME_MAX_LEN, "__%s_%s_%u",
     254              :                           (have_avx ? "avx" : "sse"),
     255          181 :                           STUB_BASE_NAMES[stub],
     256              :                           MIN_REGS + n_extra_regs);
     257          181 :       gcc_checking_assert (res < (int)STUB_NAME_MAX_LEN);
     258              :     }
     259              : 
     260        14666 :   return name;
     261              : }
     262              : 
     263              : /* Return rtx of a symbol ref for the entry point (based upon
     264              :    cfun->machine->call_ms2sysv_extra_regs) of the specified stub.  */
     265              : rtx
     266        14666 : xlogue_layout::get_stub_rtx (enum xlogue_stub stub)
     267              : {
     268        14666 :   const unsigned n_extra_regs = cfun->machine->call_ms2sysv_extra_regs;
     269        14666 :   gcc_checking_assert (n_extra_regs <= MAX_EXTRA_REGS);
     270        14666 :   gcc_assert (stub < XLOGUE_STUB_COUNT);
     271        14666 :   gcc_assert (crtl->stack_realign_finalized);
     272              : 
     273        14666 :   return gen_rtx_SYMBOL_REF (Pmode, get_stub_name (stub, n_extra_regs));
     274              : }
     275              : 
     276              : unsigned scalar_chain::max_id = 0;
     277              : 
     278              : namespace {
     279              : 
     280              : /* Initialize new chain.  */
     281              : 
     282      6377907 : scalar_chain::scalar_chain (enum machine_mode smode_, enum machine_mode vmode_)
     283              : {
     284      6377907 :   smode = smode_;
     285      6377907 :   vmode = vmode_;
     286              : 
     287      6377907 :   chain_id = ++max_id;
     288              : 
     289      6377907 :    if (dump_file)
     290          136 :     fprintf (dump_file, "Created a new instruction chain #%d\n", chain_id);
     291              : 
     292      6377907 :   bitmap_obstack_initialize (NULL);
     293      6377907 :   insns = BITMAP_ALLOC (NULL);
     294      6377907 :   defs = BITMAP_ALLOC (NULL);
     295      6377907 :   defs_conv = BITMAP_ALLOC (NULL);
     296      6377907 :   insns_conv = BITMAP_ALLOC (NULL);
     297      6377907 :   queue = NULL;
     298              : 
     299      6377907 :   cost_sse_integer = 0;
     300      6377907 :   weighted_cost_sse_integer = 0 ;
     301      6377907 :   max_visits = x86_stv_max_visits;
     302      6377907 : }
     303              : 
     304              : /* Free chain's data.  */
     305              : 
     306      6377907 : scalar_chain::~scalar_chain ()
     307              : {
     308      6377907 :   BITMAP_FREE (insns);
     309      6377907 :   BITMAP_FREE (defs);
     310      6377907 :   BITMAP_FREE (defs_conv);
     311      6377907 :   BITMAP_FREE (insns_conv);
     312      6377907 :   bitmap_obstack_release (NULL);
     313      6377907 : }
     314              : 
     315              : /* Add instruction into chains' queue.  */
     316              : 
     317              : void
     318      8280191 : scalar_chain::add_to_queue (unsigned insn_uid)
     319              : {
     320      8280191 :   if (!bitmap_set_bit (queue, insn_uid))
     321              :     return;
     322              : 
     323      6246698 :   if (dump_file)
     324          141 :     fprintf (dump_file, "  Adding insn %d into chain's #%d queue\n",
     325              :              insn_uid, chain_id);
     326              : }
     327              : 
     328              : /* For DImode conversion, mark register defined by DEF as requiring
     329              :    conversion.  */
     330              : 
     331              : void
     332      9401198 : scalar_chain::mark_dual_mode_def (df_ref def)
     333              : {
     334      9401198 :   gcc_assert (DF_REF_REG_DEF_P (def));
     335              : 
     336              :   /* Record the def/insn pair so we can later efficiently iterate over
     337              :      the defs to convert on insns not in the chain.  */
     338      9401198 :   bool reg_new = bitmap_set_bit (defs_conv, DF_REF_REGNO (def));
     339      9401198 :   basic_block bb = BLOCK_FOR_INSN (DF_REF_INSN (def));
     340      9401198 :   profile_count entry_count = ENTRY_BLOCK_PTR_FOR_FN (cfun)->count;
     341      9401198 :   bool speed_p = optimize_bb_for_speed_p (bb);
     342      9401198 :   int cost = 0;
     343              : 
     344      9401198 :   if (!bitmap_bit_p (insns, DF_REF_INSN_UID (def)))
     345              :     {
     346      2718677 :       if (!bitmap_set_bit (insns_conv, DF_REF_INSN_UID (def))
     347      2718677 :           && !reg_new)
     348      1414282 :         return;
     349              : 
     350              :       /* Cost integer to sse moves.  */
     351      2470566 :       if (speed_p)
     352      2191452 :         cost = COSTS_N_INSNS (ix86_cost->integer_to_sse) / 2;
     353       279114 :       else if (TARGET_64BIT || smode == SImode)
     354              :         cost = COSTS_N_BYTES (4);
     355              :       /* vmovd (4 bytes) + vpinsrd (6 bytes).  */
     356        18683 :       else if (TARGET_SSE4_1)
     357              :         cost = COSTS_N_BYTES (10);
     358              :       /* movd (4 bytes) + movd (4 bytes) + unpckldq (4 bytes).  */
     359              :       else
     360      7986916 :         cost = COSTS_N_BYTES (12);
     361              :     }
     362              :   else
     363              :     {
     364      6682521 :       if (!reg_new)
     365              :         return;
     366              : 
     367              :       /* Cost sse to integer moves.  */
     368      5516350 :       if (speed_p)
     369      4958840 :         cost = COSTS_N_INSNS (ix86_cost->sse_to_integer) / 2;
     370       557510 :       else if (TARGET_64BIT || smode == SImode)
     371              :         cost = COSTS_N_BYTES (4);
     372              :       /* vmovd (4 bytes) + vpextrd (6 bytes).  */
     373         3015 :       else if (TARGET_SSE4_1)
     374              :         cost = COSTS_N_BYTES (10);
     375              :       /* movd (4 bytes) + psrlq (5 bytes) + movd (4 bytes).  */
     376              :       else
     377      7986916 :         cost = COSTS_N_BYTES (13);
     378              :     }
     379              : 
     380      7986916 :   if (speed_p)
     381      7150292 :     weighted_cost_sse_integer += bb->count.to_sreal_scale (entry_count) * cost;
     382              : 
     383      7986916 :   cost_sse_integer += cost;
     384              : 
     385      7986916 :   if (dump_file)
     386          240 :     fprintf (dump_file,
     387              :              "  Mark r%d def in insn %d as requiring both modes in chain #%d\n",
     388          240 :              DF_REF_REGNO (def), DF_REF_INSN_UID (def), chain_id);
     389              : }
     390              : 
     391              : /* Check REF's chain to add new insns into a queue
     392              :    and find registers requiring conversion.  Return true if OK, false
     393              :    if the analysis was aborted.  */
     394              : 
     395              : bool
     396     17811632 : scalar_chain::analyze_register_chain (bitmap candidates, df_ref ref,
     397              :                                       bitmap disallowed)
     398              : {
     399     17811632 :   df_link *chain;
     400     17811632 :   bool mark_def = false;
     401              : 
     402     17811632 :   gcc_checking_assert (bitmap_bit_p (insns, DF_REF_INSN_UID (ref)));
     403              : 
     404     62349069 :   for (chain = DF_REF_CHAIN (ref); chain; chain = chain->next)
     405              :     {
     406     44544326 :       unsigned uid = DF_REF_INSN_UID (chain->ref);
     407              : 
     408     44544326 :       if (!NONDEBUG_INSN_P (DF_REF_INSN (chain->ref)))
     409      8222393 :         continue;
     410              : 
     411     36321933 :       if (--max_visits == 0)
     412              :         return false;
     413              : 
     414     36321347 :       if (!DF_REF_REG_MEM_P (chain->ref))
     415              :         {
     416     30233453 :           if (bitmap_bit_p (insns, uid))
     417      9533741 :             continue;
     418              : 
     419     20699712 :           if (bitmap_bit_p (candidates, uid))
     420              :             {
     421      8280191 :               add_to_queue (uid);
     422      8280191 :               continue;
     423              :             }
     424              : 
     425              :           /* If we run into parts of an aborted chain discovery abort.  */
     426     12419521 :           if (bitmap_bit_p (disallowed, uid))
     427              :             return false;
     428              :         }
     429              : 
     430     18501112 :       if (DF_REF_REG_DEF_P (chain->ref))
     431              :         {
     432      2718677 :           if (dump_file)
     433          125 :             fprintf (dump_file, "  r%d def in insn %d isn't convertible\n",
     434              :                      DF_REF_REGNO (chain->ref), uid);
     435      2718677 :           mark_dual_mode_def (chain->ref);
     436              :         }
     437              :       else
     438              :         {
     439     15782435 :           if (dump_file)
     440          524 :             fprintf (dump_file, "  r%d use in insn %d isn't convertible\n",
     441              :                      DF_REF_REGNO (chain->ref), uid);
     442              :           mark_def = true;
     443              :         }
     444              :     }
     445              : 
     446     17804743 :   if (mark_def)
     447      6682521 :     mark_dual_mode_def (ref);
     448              : 
     449              :   return true;
     450              : }
     451              : 
     452              : /* Check whether X is a convertible *concatditi_? variant.  X is known
     453              :    to be any_or_plus:TI, i.e. PLUS:TI, IOR:TI or XOR:TI.  */
     454              : 
     455              : static bool
     456        26876 : timode_concatdi_p (rtx x)
     457              : {
     458        26876 :   rtx op0 = XEXP (x, 0);
     459        26876 :   rtx op1 = XEXP (x, 1);
     460              : 
     461        26876 :   if (GET_CODE (op1) == ASHIFT)
     462          957 :     std::swap (op0, op1);
     463              : 
     464        26876 :   return GET_CODE (op0) == ASHIFT
     465        18050 :          && GET_CODE (XEXP (op0, 0)) == ZERO_EXTEND
     466        18050 :          && GET_MODE (XEXP (XEXP (op0, 0), 0)) == DImode
     467        18050 :          && REG_P (XEXP (XEXP (op0, 0), 0))
     468        17915 :          && CONST_INT_P (XEXP (op0, 1))
     469        17915 :          && INTVAL (XEXP (op0, 1)) == 64
     470        17915 :          && GET_CODE (op1) == ZERO_EXTEND
     471        16958 :          && GET_MODE (XEXP (op1, 0)) == DImode
     472        43834 :          && REG_P (XEXP (op1, 0));
     473              : }
     474              : 
     475              : 
     476              : /* Add instruction into a chain.  Return true if OK, false if the search
     477              :    was aborted.  */
     478              : 
     479              : bool
     480     12610686 : scalar_chain::add_insn (bitmap candidates, unsigned int insn_uid,
     481              :                         bitmap disallowed)
     482              : {
     483     12610686 :   if (!bitmap_set_bit (insns, insn_uid))
     484              :     return true;
     485              : 
     486     12610686 :   if (dump_file)
     487          277 :     fprintf (dump_file, "  Adding insn %d to chain #%d\n", insn_uid, chain_id);
     488              : 
     489     12610686 :   rtx_insn *insn = DF_INSN_UID_GET (insn_uid)->insn;
     490     12610686 :   rtx def_set = single_set (insn);
     491     12610686 :   if (def_set && REG_P (SET_DEST (def_set))
     492     22349260 :       && !HARD_REGISTER_P (SET_DEST (def_set)))
     493      9738562 :     bitmap_set_bit (defs, REGNO (SET_DEST (def_set)));
     494              : 
     495              :   /* ???  The following is quadratic since analyze_register_chain
     496              :      iterates over all refs to look for dual-mode regs.  Instead this
     497              :      should be done separately for all regs mentioned in the chain once.  */
     498     12610686 :   df_ref ref;
     499     25773262 :   for (ref = DF_INSN_UID_DEFS (insn_uid); ref; ref = DF_REF_NEXT_LOC (ref))
     500     13165186 :     if (!HARD_REGISTER_P (DF_REF_REG (ref)))
     501      9738562 :       if (!analyze_register_chain (candidates, ref, disallowed))
     502              :         return false;
     503              : 
     504              :   /* The operand(s) of VEC_SELECT, ZERO_EXTEND and similar ops don't need
     505              :      to be converted/convertible.  */
     506     12608076 :   if (def_set)
     507     12608076 :     switch (GET_CODE (SET_SRC (def_set)))
     508              :       {
     509              :       case VEC_SELECT:
     510              :         return true;
     511          122 :       case ZERO_EXTEND:
     512          122 :         if (GET_MODE (XEXP (SET_SRC (def_set), 0)) == DImode)
     513              :           return true;
     514              :         break;
     515      2379743 :       case PLUS:
     516      2379743 :       case IOR:
     517      2379743 :       case XOR:
     518      2379743 :         if (smode == TImode && timode_concatdi_p (SET_SRC (def_set)))
     519              :           return true;
     520              :         break;
     521              :       default:
     522              :         break;
     523              :       }
     524              : 
     525     27605123 :   for (ref = DF_INSN_UID_USES (insn_uid); ref; ref = DF_REF_NEXT_LOC (ref))
     526     15038033 :     if (!DF_REF_REG_MEM_P (ref))
     527      8073070 :       if (!analyze_register_chain (candidates, ref, disallowed))
     528              :         return false;
     529              : 
     530              :   return true;
     531              : }
     532              : 
     533              : /* Build new chain starting from insn INSN_UID recursively
     534              :    adding all dependent uses and definitions.  Return true if OK, false
     535              :    if the chain discovery was aborted.  */
     536              : 
     537              : bool
     538      6377907 : scalar_chain::build (bitmap candidates, unsigned insn_uid, bitmap disallowed)
     539              : {
     540      6377907 :   queue = BITMAP_ALLOC (NULL);
     541      6377907 :   bitmap_set_bit (queue, insn_uid);
     542              : 
     543      6377907 :   if (dump_file)
     544          136 :     fprintf (dump_file, "Building chain #%d...\n", chain_id);
     545              : 
     546     18981704 :   while (!bitmap_empty_p (queue))
     547              :     {
     548     12610686 :       insn_uid = bitmap_first_set_bit (queue);
     549     12610686 :       bitmap_clear_bit (queue, insn_uid);
     550     12610686 :       bitmap_clear_bit (candidates, insn_uid);
     551     12610686 :       if (!add_insn (candidates, insn_uid, disallowed))
     552              :         {
     553              :           /* If we aborted the search put sofar found insn on the set of
     554              :              disallowed insns so that further searches reaching them also
     555              :              abort and thus we abort the whole but yet undiscovered chain.  */
     556         6889 :           bitmap_ior_into (disallowed, insns);
     557         6889 :           if (dump_file)
     558            0 :             fprintf (dump_file, "Aborted chain #%d discovery\n", chain_id);
     559         6889 :           BITMAP_FREE (queue);
     560         6889 :           return false;
     561              :         }
     562              :     }
     563              : 
     564      6371018 :   if (dump_file)
     565              :     {
     566          136 :       fprintf (dump_file, "Collected chain #%d...\n", chain_id);
     567          136 :       fprintf (dump_file, "  insns: ");
     568          136 :       dump_bitmap (dump_file, insns);
     569          136 :       if (!bitmap_empty_p (defs_conv))
     570              :         {
     571          136 :           bitmap_iterator bi;
     572          136 :           unsigned id;
     573          136 :           const char *comma = "";
     574          136 :           fprintf (dump_file, "  defs to convert: ");
     575          366 :           EXECUTE_IF_SET_IN_BITMAP (defs_conv, 0, id, bi)
     576              :             {
     577          230 :               fprintf (dump_file, "%sr%d", comma, id);
     578          230 :               comma = ", ";
     579              :             }
     580          136 :           fprintf (dump_file, "\n");
     581              :         }
     582              :     }
     583              : 
     584      6371018 :   BITMAP_FREE (queue);
     585              : 
     586      6371018 :   return true;
     587              : }
     588              : 
     589              : /* Return a cost of building a vector constant
     590              :    instead of using a scalar one.  */
     591              : 
     592              : int
     593      2686558 : general_scalar_chain::vector_const_cost (rtx exp, basic_block bb)
     594              : {
     595      2686558 :   gcc_assert (CONST_INT_P (exp));
     596              : 
     597      2686558 :   if (standard_sse_constant_p (exp, vmode))
     598       620050 :     return ix86_cost->sse_op;
     599      2066508 :   if (optimize_bb_for_size_p (bb))
     600              :     return COSTS_N_BYTES (8);
     601              :   /* We have separate costs for SImode and DImode, use SImode costs
     602              :      for smaller modes.  */
     603      2458852 :   return COSTS_N_INSNS (ix86_cost->sse_load[smode == DImode ? 1 : 0]) / 2;
     604              : }
     605              : 
     606              : /* Return true if it's cost profitable for chain conversion.  */
     607              : 
     608              : bool
     609      5897243 : general_scalar_chain::compute_convert_gain ()
     610              : {
     611      5897243 :   bitmap_iterator bi;
     612      5897243 :   unsigned insn_uid;
     613      5897243 :   int gain = 0;
     614      5897243 :   sreal weighted_gain = 0;
     615              : 
     616      5897243 :   if (dump_file)
     617          136 :     fprintf (dump_file, "Computing gain for chain #%d...\n", chain_id);
     618              : 
     619              :   /* SSE costs distinguish between SImode and DImode loads/stores, for
     620              :      int costs factor in the number of GPRs involved.  When supporting
     621              :      smaller modes than SImode the int load/store costs need to be
     622              :      adjusted as well.  */
     623      5897243 :   unsigned sse_cost_idx = smode == DImode ? 1 : 0;
     624      5897243 :   int m = smode == DImode ? (TARGET_64BIT ? 1 : 2) : 1;
     625              : 
     626     17552104 :   EXECUTE_IF_SET_IN_BITMAP (insns, 0, insn_uid, bi)
     627              :     {
     628     11654861 :       rtx_insn *insn = DF_INSN_UID_GET (insn_uid)->insn;
     629     11654861 :       rtx def_set = single_set (insn);
     630     11654861 :       rtx src = SET_SRC (def_set);
     631     11654861 :       rtx dst = SET_DEST (def_set);
     632     11654861 :       basic_block bb = BLOCK_FOR_INSN (insn);
     633     11654861 :       int igain = 0;
     634     11654861 :       profile_count entry_count = ENTRY_BLOCK_PTR_FOR_FN (cfun)->count;
     635     11654861 :       bool speed_p = optimize_bb_for_speed_p (bb);
     636     11654861 :       sreal bb_freq = bb->count.to_sreal_scale (entry_count);
     637              : 
     638     11654861 :       if (REG_P (src) && REG_P (dst))
     639              :         {
     640       937970 :           if (!speed_p)
     641              :             /* reg-reg move is 2 bytes, while SSE 3.  */
     642       188178 :             igain += COSTS_N_BYTES (2 * m - 3);
     643              :           else
     644              :             /* Move costs are normalized to reg-reg move having cost 2.  */
     645       749792 :             igain += COSTS_N_INSNS (2 * m - ix86_cost->xmm_move) / 2;
     646              :         }
     647     10716891 :       else if (REG_P (src) && MEM_P (dst))
     648              :         {
     649      2301068 :           if (!speed_p)
     650              :             /* Integer load/store is 3+ bytes and SSE 4+.  */
     651       191672 :             igain += COSTS_N_BYTES (3 * m - 4);
     652              :           else
     653      2109396 :             igain
     654      2109396 :               += COSTS_N_INSNS (m * ix86_cost->int_store[2]
     655              :                                 - ix86_cost->sse_store[sse_cost_idx]) / 2;
     656              :         }
     657      8415823 :       else if (MEM_P (src) && REG_P (dst))
     658              :         {
     659      3780986 :           if (!speed_p)
     660       358324 :             igain += COSTS_N_BYTES (3 * m - 4);
     661              :           else
     662      3422662 :             igain += COSTS_N_INSNS (m * ix86_cost->int_load[2]
     663              :                                     - ix86_cost->sse_load[sse_cost_idx]) / 2;
     664              :         }
     665              :       else
     666              :         {
     667              :           /* For operations on memory operands, include the overhead
     668              :              of explicit load and store instructions.  */
     669      4634837 :           if (MEM_P (dst))
     670              :             {
     671        67157 :               if (!speed_p)
     672              :                 /* ??? This probably should account size difference
     673              :                    of SSE and integer load rather than full SSE load.  */
     674              :                 igain -= COSTS_N_BYTES (8);
     675              :               else
     676              :                 {
     677        58045 :                   int cost = (m * (ix86_cost->int_load[2]
     678        58045 :                                    + ix86_cost->int_store[2])
     679        58045 :                              - (ix86_cost->sse_load[sse_cost_idx] +
     680        58045 :                                 ix86_cost->sse_store[sse_cost_idx]));
     681        58045 :                   igain += COSTS_N_INSNS (cost) / 2;
     682              :                 }
     683              :             }
     684              : 
     685      4634837 :           switch (GET_CODE (src))
     686              :             {
     687       474904 :             case ASHIFT:
     688       474904 :             case ASHIFTRT:
     689       474904 :             case LSHIFTRT:
     690       474904 :               if (m == 2)
     691              :                 {
     692        16941 :                   if (INTVAL (XEXP (src, 1)) >= 32)
     693        11523 :                     igain += ix86_cost->add;
     694              :                   /* Gain for extend highpart case.  */
     695         5418 :                   else if (GET_CODE (XEXP (src, 0)) == ASHIFT)
     696            0 :                     igain += ix86_cost->shift_const - ix86_cost->sse_op;
     697              :                   else
     698         5418 :                     igain += ix86_cost->shift_const;
     699              :                 }
     700              : 
     701       474904 :               igain += ix86_cost->shift_const - ix86_cost->sse_op;
     702              : 
     703       474904 :               if (CONST_INT_P (XEXP (src, 0)))
     704            0 :                 igain -= vector_const_cost (XEXP (src, 0), bb);
     705              :               break;
     706              : 
     707         3819 :             case ROTATE:
     708         3819 :             case ROTATERT:
     709         3819 :               igain += m * ix86_cost->shift_const;
     710         3819 :               if (TARGET_AVX512VL)
     711          204 :                 igain -= ix86_cost->sse_op;
     712         3615 :               else if (smode == DImode)
     713              :                 {
     714          612 :                   int bits = INTVAL (XEXP (src, 1));
     715          612 :                   if ((bits & 0x0f) == 0)
     716          128 :                     igain -= ix86_cost->sse_op;
     717          484 :                   else if ((bits & 0x07) == 0)
     718           27 :                     igain -= 2 * ix86_cost->sse_op;
     719              :                   else
     720          457 :                     igain -= 3 * ix86_cost->sse_op;
     721              :                 }
     722         3003 :               else if (INTVAL (XEXP (src, 1)) == 16)
     723          242 :                 igain -= ix86_cost->sse_op;
     724              :               else
     725         2761 :                 igain -= 2 * ix86_cost->sse_op;
     726              :               break;
     727              : 
     728      2858422 :             case AND:
     729      2858422 :             case IOR:
     730      2858422 :             case XOR:
     731      2858422 :             case PLUS:
     732      2858422 :             case MINUS:
     733      2858422 :               igain += m * ix86_cost->add - ix86_cost->sse_op;
     734              :               /* Additional gain for andnot for targets without BMI.  */
     735      2858422 :               if (GET_CODE (XEXP (src, 0)) == NOT
     736         3599 :                   && !TARGET_BMI)
     737         3590 :                 igain += m * ix86_cost->add;
     738              : 
     739      2858422 :               if (CONST_INT_P (XEXP (src, 0)))
     740            0 :                 igain -= vector_const_cost (XEXP (src, 0), bb);
     741      2858422 :               if (CONST_INT_P (XEXP (src, 1)))
     742      1702852 :                 igain -= vector_const_cost (XEXP (src, 1), bb);
     743      2858422 :               if (MEM_P (XEXP (src, 1)))
     744              :                 {
     745        89267 :                   if (!speed_p)
     746        21291 :                     igain -= COSTS_N_BYTES (m == 2 ? 3 : 5);
     747              :                   else
     748        78617 :                     igain += COSTS_N_INSNS
     749              :                                (m * ix86_cost->int_load[2]
     750              :                                  - ix86_cost->sse_load[sse_cost_idx]) / 2;
     751              :                 }
     752              :               break;
     753              : 
     754        49831 :             case NEG:
     755        49831 :             case NOT:
     756        49831 :               igain -= ix86_cost->sse_op + COSTS_N_INSNS (1);
     757              : 
     758        49831 :               if (GET_CODE (XEXP (src, 0)) != ABS)
     759              :                 {
     760        49831 :                   igain += m * ix86_cost->add;
     761        49831 :                   break;
     762              :                 }
     763              :               /* FALLTHRU */
     764              : 
     765         1004 :             case ABS:
     766         1004 :             case SMAX:
     767         1004 :             case SMIN:
     768         1004 :             case UMAX:
     769         1004 :             case UMIN:
     770              :               /* We do not have any conditional move cost, estimate it as a
     771              :                  reg-reg move.  Comparisons are costed as adds.  */
     772         1004 :               igain += m * (COSTS_N_INSNS (2) + ix86_cost->add);
     773              :               /* Integer SSE ops are all costed the same.  */
     774         1004 :               igain -= ix86_cost->sse_op;
     775         1004 :               break;
     776              : 
     777            0 :             case COMPARE:
     778            0 :               if (XEXP (src, 1) != const0_rtx)
     779              :                 {
     780              :                   /* cmp vs. pxor;pshufd;ptest.  */
     781            0 :                   igain += COSTS_N_INSNS (m - 3);
     782              :                 }
     783            0 :               else if (GET_CODE (XEXP (src, 0)) != AND)
     784              :                 {
     785              :                   /* test vs. pshufd;ptest.  */
     786            0 :                   igain += COSTS_N_INSNS (m - 2);
     787              :                 }
     788            0 :               else if (GET_CODE (XEXP (XEXP (src, 0), 0)) != NOT)
     789              :                 {
     790              :                   /* and;test vs. pshufd;ptest.  */
     791            0 :                   igain += COSTS_N_INSNS (2 * m - 2);
     792              :                 }
     793            0 :               else if (TARGET_BMI)
     794              :                 {
     795              :                   /* andn;test vs. pandn;pshufd;ptest.  */
     796            0 :                   igain += COSTS_N_INSNS (2 * m - 3);
     797              :                 }
     798              :               else
     799              :                 {
     800              :                   /* not;and;test vs. pandn;pshufd;ptest.  */
     801            0 :                   igain += COSTS_N_INSNS (3 * m - 3);
     802              :                 }
     803              :               break;
     804              : 
     805      1212913 :             case CONST_INT:
     806      1212913 :               if (REG_P (dst))
     807              :                 {
     808      1212913 :                   if (!speed_p)
     809              :                     {
     810              :                       /* xor (2 bytes) vs. xorps (3 bytes).  */
     811       229207 :                       if (src == const0_rtx)
     812       121775 :                         igain -= COSTS_N_BYTES (1);
     813              :                       /* movdi_internal vs. movv2di_internal.  */
     814              :                       /* => mov (5 bytes) vs. movaps (7 bytes).  */
     815       107432 :                       else if (x86_64_immediate_operand (src, SImode))
     816        94755 :                         igain -= COSTS_N_BYTES (2);
     817              :                       else
     818              :                         /* ??? Larger immediate constants are placed in the
     819              :                            constant pool, where the size benefit/impact of
     820              :                            STV conversion is affected by whether and how
     821              :                            often each constant pool entry is shared/reused.
     822              :                            The value below is empirically derived from the
     823              :                            CSiBE benchmark (and the optimal value may drift
     824              :                            over time).  */
     825              :                         igain += COSTS_N_BYTES (0);
     826              :                     }
     827              :                   else
     828              :                     {
     829              :                       /* DImode can be immediate for TARGET_64BIT
     830              :                          and SImode always.  */
     831       983706 :                       igain += m * COSTS_N_INSNS (1);
     832       983706 :                       igain -= vector_const_cost (src, bb);
     833              :                     }
     834              :                 }
     835            0 :               else if (MEM_P (dst))
     836              :                 {
     837            0 :                   igain += (m * ix86_cost->int_store[2]
     838            0 :                             - ix86_cost->sse_store[sse_cost_idx]);
     839            0 :                   igain -= vector_const_cost (src, bb);
     840              :                 }
     841              :               break;
     842              : 
     843        33944 :             case VEC_SELECT:
     844        33944 :               if (XVECEXP (XEXP (src, 1), 0, 0) == const0_rtx)
     845              :                 {
     846              :                   // movd (4 bytes) replaced with movdqa (4 bytes).
     847        25589 :                   if (!!speed_p)
     848        23829 :                     igain += COSTS_N_INSNS (ix86_cost->sse_to_integer
     849              :                                             - ix86_cost->xmm_move) / 2;
     850              :                 }
     851              :               else
     852              :                 {
     853              :                   // pshufd; movd replaced with pshufd.
     854         8355 :                   if (!speed_p)
     855          624 :                     igain += COSTS_N_BYTES (4);
     856              :                   else
     857         7731 :                     igain += ix86_cost->sse_to_integer;
     858              :                 }
     859              :               break;
     860              : 
     861            0 :             default:
     862            0 :               gcc_unreachable ();
     863              :             }
     864              :         }
     865              : 
     866     11653101 :       if (speed_p)
     867     10384812 :         weighted_gain += bb_freq * igain;
     868     11654861 :       gain += igain;
     869              : 
     870     11654861 :       if (igain != 0 && dump_file)
     871              :         {
     872           93 :           fprintf (dump_file, "  Instruction gain %d with bb_freq %.2f for",
     873              :                    igain, bb_freq.to_double ());
     874           93 :           dump_insn_slim (dump_file, insn);
     875              :         }
     876              :     }
     877              : 
     878      5897243 :   if (dump_file)
     879              :     {
     880          136 :       fprintf (dump_file, "  Instruction conversion gain: %d, \n",
     881              :                gain);
     882          136 :       fprintf (dump_file, "  Registers conversion cost: %d\n",
     883              :                cost_sse_integer);
     884          136 :       fprintf (dump_file, "  Weighted instruction conversion gain: %.2f, \n",
     885              :                weighted_gain.to_double ());
     886          136 :       fprintf (dump_file, "  Weighted registers conversion cost: %.2f\n",
     887              :                weighted_cost_sse_integer.to_double ());
     888              :     }
     889              : 
     890      5897243 :   if (weighted_gain != weighted_cost_sse_integer)
     891      4768084 :     return weighted_gain > weighted_cost_sse_integer;
     892              :   else
     893      1129159 :     return gain > cost_sse_integer;;
     894              : }
     895              : 
     896              : /* Insert generated conversion instruction sequence INSNS
     897              :    after instruction AFTER.  New BB may be required in case
     898              :    instruction has EH region attached.  */
     899              : 
     900              : void
     901        30192 : scalar_chain::emit_conversion_insns (rtx insns, rtx_insn *after)
     902              : {
     903        30192 :   if (!control_flow_insn_p (after))
     904              :     {
     905        29979 :       emit_insn_after (insns, after);
     906        29979 :       return;
     907              :     }
     908              : 
     909          213 :   basic_block bb = BLOCK_FOR_INSN (after);
     910          213 :   edge e = find_fallthru_edge (bb->succs);
     911          213 :   gcc_assert (e);
     912              : 
     913          213 :   basic_block new_bb = split_edge (e);
     914          213 :   emit_insn_after (insns, BB_HEAD (new_bb));
     915              : }
     916              : 
     917              : } // anon namespace
     918              : 
     919              : /* Generate the canonical SET_SRC to move GPR to a VMODE vector register,
     920              :    zeroing the upper parts.  */
     921              : 
     922              : static rtx
     923       173048 : gen_gpr_to_xmm_move_src (enum machine_mode vmode, rtx gpr)
     924              : {
     925       346096 :   switch (GET_MODE_NUNITS (vmode))
     926              :     {
     927           25 :     case 1:
     928           25 :       return gen_rtx_SUBREG (vmode, gpr, 0);
     929       172466 :     case 2:
     930       344932 :       return gen_rtx_VEC_CONCAT (vmode, gpr,
     931              :                                  CONST0_RTX (GET_MODE_INNER (vmode)));
     932          557 :     default:
     933          557 :       return gen_rtx_VEC_MERGE (vmode, gen_rtx_VEC_DUPLICATE (vmode, gpr),
     934              :                                 CONST0_RTX (vmode), GEN_INT (HOST_WIDE_INT_1U));
     935              :     }
     936              : }
     937              : 
     938              : /* Make vector copies for all register REGNO definitions
     939              :    and replace its uses in a chain.  */
     940              : 
     941              : void
     942         8060 : scalar_chain::make_vector_copies (rtx_insn *insn, rtx reg)
     943              : {
     944         8060 :   rtx vreg = *defs_map.get (reg);
     945              : 
     946         8060 :   start_sequence ();
     947         8060 :   if (!TARGET_INTER_UNIT_MOVES_TO_VEC)
     948              :     {
     949            0 :       rtx tmp = assign_386_stack_local (smode, SLOT_STV_TEMP);
     950            0 :       if (smode == DImode && !TARGET_64BIT)
     951              :         {
     952            0 :           emit_move_insn (adjust_address (tmp, SImode, 0),
     953              :                           gen_rtx_SUBREG (SImode, reg, 0));
     954            0 :           emit_move_insn (adjust_address (tmp, SImode, 4),
     955              :                           gen_rtx_SUBREG (SImode, reg, 4));
     956              :         }
     957              :       else
     958            0 :         emit_move_insn (copy_rtx (tmp), reg);
     959            0 :       emit_insn (gen_rtx_SET (gen_rtx_SUBREG (vmode, vreg, 0),
     960              :                               gen_gpr_to_xmm_move_src (vmode, tmp)));
     961              :     }
     962         8060 :   else if (!TARGET_64BIT && smode == DImode)
     963              :     {
     964         7949 :       if (TARGET_SSE4_1)
     965              :         {
     966          356 :           emit_insn (gen_sse2_loadld (gen_rtx_SUBREG (V4SImode, vreg, 0),
     967              :                                       CONST0_RTX (V4SImode),
     968              :                                       gen_rtx_SUBREG (SImode, reg, 0)));
     969          356 :           emit_insn (gen_sse4_1_pinsrd (gen_rtx_SUBREG (V4SImode, vreg, 0),
     970              :                                         gen_rtx_SUBREG (V4SImode, vreg, 0),
     971              :                                         gen_rtx_SUBREG (SImode, reg, 4),
     972              :                                         GEN_INT (2)));
     973              :         }
     974              :       else
     975              :         {
     976         7593 :           rtx tmp = gen_reg_rtx (DImode);
     977         7593 :           emit_insn (gen_sse2_loadld (gen_rtx_SUBREG (V4SImode, vreg, 0),
     978              :                                       CONST0_RTX (V4SImode),
     979              :                                       gen_rtx_SUBREG (SImode, reg, 0)));
     980         7593 :           emit_insn (gen_sse2_loadld (gen_rtx_SUBREG (V4SImode, tmp, 0),
     981              :                                       CONST0_RTX (V4SImode),
     982              :                                       gen_rtx_SUBREG (SImode, reg, 4)));
     983         7593 :           emit_insn (gen_vec_interleave_lowv4si
     984              :                      (gen_rtx_SUBREG (V4SImode, vreg, 0),
     985              :                       gen_rtx_SUBREG (V4SImode, vreg, 0),
     986              :                       gen_rtx_SUBREG (V4SImode, tmp, 0)));
     987              :         }
     988              :     }
     989              :   else
     990          111 :     emit_insn (gen_rtx_SET (gen_rtx_SUBREG (vmode, vreg, 0),
     991              :                             gen_gpr_to_xmm_move_src (vmode, reg)));
     992         8060 :   rtx_insn *seq = end_sequence ();
     993         8060 :   emit_conversion_insns (seq, insn);
     994              : 
     995         8060 :   if (dump_file)
     996            0 :     fprintf (dump_file,
     997              :              "  Copied r%d to a vector register r%d for insn %d\n",
     998            0 :              REGNO (reg), REGNO (vreg), INSN_UID (insn));
     999         8060 : }
    1000              : 
    1001              : /* Copy the definition SRC of INSN inside the chain to DST for
    1002              :    scalar uses outside of the chain.  */
    1003              : 
    1004              : void
    1005        21370 : scalar_chain::convert_reg (rtx_insn *insn, rtx dst, rtx src)
    1006              : {
    1007        21370 :   start_sequence ();
    1008        21370 :   if (!TARGET_INTER_UNIT_MOVES_FROM_VEC)
    1009              :     {
    1010            0 :       rtx tmp = assign_386_stack_local (smode, SLOT_STV_TEMP);
    1011            0 :       emit_move_insn (tmp, src);
    1012            0 :       if (!TARGET_64BIT && smode == DImode)
    1013              :         {
    1014            0 :           emit_move_insn (gen_rtx_SUBREG (SImode, dst, 0),
    1015              :                           adjust_address (tmp, SImode, 0));
    1016            0 :           emit_move_insn (gen_rtx_SUBREG (SImode, dst, 4),
    1017              :                           adjust_address (tmp, SImode, 4));
    1018              :         }
    1019              :       else
    1020            0 :         emit_move_insn (dst, copy_rtx (tmp));
    1021              :     }
    1022        21370 :   else if (!TARGET_64BIT && smode == DImode)
    1023              :     {
    1024        21002 :       if (TARGET_SSE4_1)
    1025              :         {
    1026            0 :           rtx tmp = gen_rtx_PARALLEL (VOIDmode,
    1027              :                                       gen_rtvec (1, const0_rtx));
    1028            0 :           emit_insn
    1029            0 :               (gen_rtx_SET
    1030              :                (gen_rtx_SUBREG (SImode, dst, 0),
    1031              :                 gen_rtx_VEC_SELECT (SImode,
    1032              :                                     gen_rtx_SUBREG (V4SImode, src, 0),
    1033              :                                     tmp)));
    1034              : 
    1035            0 :           tmp = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (1, const1_rtx));
    1036            0 :           emit_insn
    1037            0 :               (gen_rtx_SET
    1038              :                (gen_rtx_SUBREG (SImode, dst, 4),
    1039              :                 gen_rtx_VEC_SELECT (SImode,
    1040              :                                     gen_rtx_SUBREG (V4SImode, src, 0),
    1041              :                                     tmp)));
    1042              :         }
    1043              :       else
    1044              :         {
    1045        21002 :           rtx vcopy = gen_reg_rtx (V2DImode);
    1046        21002 :           emit_move_insn (vcopy, gen_rtx_SUBREG (V2DImode, src, 0));
    1047        21002 :           emit_move_insn (gen_rtx_SUBREG (SImode, dst, 0),
    1048              :                           gen_rtx_SUBREG (SImode, vcopy, 0));
    1049        21002 :           emit_move_insn (vcopy,
    1050              :                           gen_rtx_LSHIFTRT (V2DImode,
    1051              :                                             vcopy, GEN_INT (32)));
    1052        21002 :           emit_move_insn (gen_rtx_SUBREG (SImode, dst, 4),
    1053              :                           gen_rtx_SUBREG (SImode, vcopy, 0));
    1054              :         }
    1055              :     }
    1056              :   else
    1057          368 :     emit_move_insn (dst, src);
    1058              : 
    1059        21370 :   rtx_insn *seq = end_sequence ();
    1060        21370 :   emit_conversion_insns (seq, insn);
    1061              : 
    1062        21370 :   if (dump_file)
    1063            0 :     fprintf (dump_file,
    1064              :              "  Copied r%d to a scalar register r%d for insn %d\n",
    1065            0 :              REGNO (src), REGNO (dst), INSN_UID (insn));
    1066        21370 : }
    1067              : 
    1068              : /* Helper function to convert immediate constant X to vmode.  */
    1069              : static rtx
    1070        39460 : smode_convert_cst (rtx x, enum machine_mode vmode)
    1071              : {
    1072              :   /* Prefer all ones vector in case of -1.  */
    1073        39460 :   if (constm1_operand (x, GET_MODE (x)))
    1074          894 :     return CONSTM1_RTX (vmode);
    1075              : 
    1076        38566 :   unsigned n = GET_MODE_NUNITS (vmode);
    1077        38566 :   rtx *v = XALLOCAVEC (rtx, n);
    1078        38566 :   v[0] = x;
    1079        44350 :   for (unsigned i = 1; i < n; ++i)
    1080         5784 :     v[i] = const0_rtx;
    1081        38566 :   return gen_rtx_CONST_VECTOR (vmode, gen_rtvec_v (n, v));
    1082              : }
    1083              : 
    1084              : /* Convert operand OP in INSN.  We should handle
    1085              :    memory operands and uninitialized registers.
    1086              :    All other register uses are converted during
    1087              :    registers conversion.  */
    1088              : 
    1089              : void
    1090       246449 : scalar_chain::convert_op (rtx *op, rtx_insn *insn)
    1091              : {
    1092       246449 :   rtx tmp;
    1093              : 
    1094       246449 :   if (GET_MODE (*op) == V1TImode)
    1095              :     return;
    1096              : 
    1097       246298 :   *op = copy_rtx_if_shared (*op);
    1098              : 
    1099       246298 :   if (GET_CODE (*op) == NOT
    1100       246298 :       || GET_CODE (*op) == ASHIFT)
    1101              :     {
    1102         3490 :       convert_op (&XEXP (*op, 0), insn);
    1103         3490 :       PUT_MODE (*op, vmode);
    1104              :     }
    1105              :   else if (MEM_P (*op))
    1106              :     {
    1107       172937 :       rtx_insn *movabs = NULL;
    1108              : 
    1109              :       /* Emit MOVABS to load from a 64-bit absolute address to a GPR.  */
    1110       172937 :       if (!memory_operand (*op, GET_MODE (*op)))
    1111              :         {
    1112            0 :           tmp = gen_reg_rtx (GET_MODE (*op));
    1113            0 :           movabs = emit_insn_before (gen_rtx_SET (tmp, *op), insn);
    1114              : 
    1115            0 :           *op = tmp;
    1116              :         }
    1117              : 
    1118       172937 :       tmp = gen_rtx_SUBREG (vmode, gen_reg_rtx (GET_MODE (*op)), 0);
    1119              : 
    1120       172937 :       rtx_insn *eh_insn
    1121       172937 :         = emit_insn_before (gen_rtx_SET (copy_rtx (tmp),
    1122              :                                          gen_gpr_to_xmm_move_src (vmode, *op)),
    1123       172937 :                             insn);
    1124              : 
    1125       172937 :       if (cfun->can_throw_non_call_exceptions)
    1126              :         {
    1127              :           /* Handle REG_EH_REGION note.  */
    1128       168856 :           rtx note = find_reg_note (insn, REG_EH_REGION, NULL_RTX);
    1129       168856 :           if (note)
    1130              :             {
    1131         3597 :               if (movabs)
    1132            0 :                 eh_insn = movabs;
    1133         3597 :               control_flow_insns.safe_push (eh_insn);
    1134         3597 :               add_reg_note (eh_insn, REG_EH_REGION, XEXP (note, 0));
    1135              :             }
    1136              :         }
    1137              : 
    1138       172937 :       *op = tmp;
    1139              : 
    1140       172937 :       if (dump_file)
    1141            0 :         fprintf (dump_file, "  Preloading operand for insn %d into r%d\n",
    1142            0 :                  INSN_UID (insn), reg_or_subregno (tmp));
    1143              :     }
    1144              :   else if (REG_P (*op))
    1145        63459 :     *op = gen_rtx_SUBREG (vmode, *op, 0);
    1146              :   else if (CONST_SCALAR_INT_P (*op))
    1147              :     {
    1148         6412 :       rtx vec_cst = smode_convert_cst (*op, vmode);
    1149              : 
    1150         6412 :       if (!standard_sse_constant_p (vec_cst, vmode))
    1151              :         {
    1152         2698 :           start_sequence ();
    1153         2698 :           vec_cst = validize_mem (force_const_mem (vmode, vec_cst));
    1154         2698 :           rtx_insn *seq = end_sequence ();
    1155         2698 :           emit_insn_before (seq, insn);
    1156              :         }
    1157              : 
    1158         6412 :       tmp = gen_rtx_SUBREG (vmode, gen_reg_rtx (smode), 0);
    1159              : 
    1160         6412 :       emit_insn_before (gen_move_insn (copy_rtx (tmp), vec_cst), insn);
    1161         6412 :       *op = tmp;
    1162              :     }
    1163              :   else
    1164              :     {
    1165            0 :       gcc_assert (SUBREG_P (*op));
    1166            0 :       gcc_assert (GET_MODE (*op) == vmode);
    1167              :     }
    1168              : }
    1169              : 
    1170              : /* Convert CCZmode COMPARE to vector mode.  */
    1171              : 
    1172              : rtx
    1173           10 : scalar_chain::convert_compare (rtx op1, rtx op2, rtx_insn *insn)
    1174              : {
    1175           10 :   rtx src, tmp;
    1176              : 
    1177              :   /* Handle any REG_EQUAL notes.  */
    1178           10 :   tmp = find_reg_equal_equiv_note (insn);
    1179           10 :   if (tmp)
    1180              :     {
    1181            1 :       if (GET_CODE (XEXP (tmp, 0)) == COMPARE
    1182            1 :           && GET_MODE (XEXP (tmp, 0)) == CCZmode
    1183            1 :           && REG_P (XEXP (XEXP (tmp, 0), 0)))
    1184              :         {
    1185            1 :           rtx *op = &XEXP (XEXP (tmp, 0), 1);
    1186            1 :           if (CONST_SCALAR_INT_P (*op))
    1187              :             {
    1188            1 :               if (constm1_operand (*op, GET_MODE (*op)))
    1189            0 :                 *op = CONSTM1_RTX (vmode);
    1190              :               else
    1191              :                 {
    1192            1 :                   unsigned n = GET_MODE_NUNITS (vmode);
    1193            1 :                   rtx *v = XALLOCAVEC (rtx, n);
    1194            1 :                   v[0] = *op;
    1195            1 :                   for (unsigned i = 1; i < n; ++i)
    1196            0 :                     v[i] = const0_rtx;
    1197            1 :                   *op = gen_rtx_CONST_VECTOR (vmode, gen_rtvec_v (n, v));
    1198              :                 }
    1199              :               tmp = NULL_RTX;
    1200              :             }
    1201            0 :           else if (REG_P (*op))
    1202              :             tmp = NULL_RTX;
    1203              :         }
    1204              : 
    1205              :       if (tmp)
    1206            0 :         remove_note (insn, tmp);
    1207              :     }
    1208              : 
    1209              :   /* Comparison against anything other than zero, requires an XOR.  */
    1210           10 :   if (op2 != const0_rtx)
    1211              :     {
    1212            4 :       convert_op (&op1, insn);
    1213            4 :       convert_op (&op2, insn);
    1214              :       /* If both operands are MEMs, explicitly load the OP1 into TMP.  */
    1215            4 :       if (MEM_P (op1) && MEM_P (op2))
    1216              :         {
    1217            0 :           tmp = gen_reg_rtx (vmode);
    1218            0 :           emit_insn_before (gen_rtx_SET (tmp, op1), insn);
    1219            0 :           src = tmp;
    1220              :         }
    1221              :       else
    1222              :         src = op1;
    1223            4 :       src = gen_rtx_XOR (vmode, src, op2);
    1224              :     }
    1225            6 :   else if (GET_CODE (op1) == AND
    1226            0 :            && GET_CODE (XEXP (op1, 0)) == NOT)
    1227              :     {
    1228            0 :       rtx op11 = XEXP (XEXP (op1, 0), 0);
    1229            0 :       rtx op12 = XEXP (op1, 1);
    1230            0 :       convert_op (&op11, insn);
    1231            0 :       convert_op (&op12, insn);
    1232            0 :       if (!REG_P (op11))
    1233              :         {
    1234            0 :           tmp = gen_reg_rtx (vmode);
    1235            0 :           emit_insn_before (gen_rtx_SET (tmp, op11), insn);
    1236            0 :           op11 = tmp;
    1237              :         }
    1238            0 :       src = gen_rtx_AND (vmode, gen_rtx_NOT (vmode, op11), op12);
    1239            0 :     }
    1240            6 :   else if (GET_CODE (op1) == AND)
    1241              :     {
    1242            0 :       rtx op11 = XEXP (op1, 0);
    1243            0 :       rtx op12 = XEXP (op1, 1);
    1244            0 :       convert_op (&op11, insn);
    1245            0 :       convert_op (&op12, insn);
    1246            0 :       if (!REG_P (op11))
    1247              :         {
    1248            0 :           tmp = gen_reg_rtx (vmode);
    1249            0 :           emit_insn_before (gen_rtx_SET (tmp, op11), insn);
    1250            0 :           op11 = tmp;
    1251              :         }
    1252            0 :       return gen_rtx_UNSPEC (CCZmode, gen_rtvec (2, op11, op12),
    1253              :                              UNSPEC_PTEST);
    1254              :     }
    1255              :   else
    1256              :     {
    1257            6 :       convert_op (&op1, insn);
    1258            6 :       src = op1;
    1259              :     }
    1260              : 
    1261           10 :   if (!REG_P (src))
    1262              :     {
    1263            6 :       tmp = gen_reg_rtx (vmode);
    1264            6 :       emit_insn_before (gen_rtx_SET (tmp, src), insn);
    1265            6 :       src = tmp;
    1266              :     }
    1267              : 
    1268           10 :   if (vmode == V2DImode)
    1269              :     {
    1270            0 :       tmp = gen_reg_rtx (vmode);
    1271            0 :       emit_insn_before (gen_vec_interleave_lowv2di (tmp, src, src), insn);
    1272            0 :       src = tmp;
    1273              :     }
    1274           10 :   else if (vmode == V4SImode)
    1275              :     {
    1276            0 :       tmp = gen_reg_rtx (vmode);
    1277            0 :       emit_insn_before (gen_sse2_pshufd (tmp, src, const0_rtx), insn);
    1278            0 :       src = tmp;
    1279              :     }
    1280              : 
    1281           10 :   return gen_rtx_UNSPEC (CCZmode, gen_rtvec (2, src, src), UNSPEC_PTEST);
    1282              : }
    1283              : 
    1284              : /* Helper function for converting INSN to vector mode.  */
    1285              : 
    1286              : void
    1287      1336784 : scalar_chain::convert_insn_common (rtx_insn *insn)
    1288              : {
    1289              :   /* Generate copies for out-of-chain uses of defs and adjust debug uses.  */
    1290      2043450 :   for (df_ref ref = DF_INSN_DEFS (insn); ref; ref = DF_REF_NEXT_LOC (ref))
    1291       706666 :     if (bitmap_bit_p (defs_conv, DF_REF_REGNO (ref)))
    1292              :       {
    1293        22795 :         df_link *use;
    1294        43884 :         for (use = DF_REF_CHAIN (ref); use; use = use->next)
    1295        42459 :           if (NONDEBUG_INSN_P (DF_REF_INSN (use->ref))
    1296        42459 :               && (DF_REF_REG_MEM_P (use->ref)
    1297        37941 :                   || !bitmap_bit_p (insns, DF_REF_INSN_UID (use->ref))))
    1298              :             break;
    1299        22795 :         if (use)
    1300        21370 :           convert_reg (insn, DF_REF_REG (ref),
    1301        21370 :                        *defs_map.get (regno_reg_rtx [DF_REF_REGNO (ref)]));
    1302         1425 :         else if (MAY_HAVE_DEBUG_BIND_INSNS)
    1303              :           {
    1304              :             /* If we generated a scalar copy we can leave debug-insns
    1305              :                as-is, if not, we have to adjust them.  */
    1306         1305 :             auto_vec<rtx_insn *, 5> to_reset_debug_insns;
    1307         3903 :             for (use = DF_REF_CHAIN (ref); use; use = use->next)
    1308         2598 :               if (DEBUG_INSN_P (DF_REF_INSN (use->ref)))
    1309              :                 {
    1310          849 :                   rtx_insn *debug_insn = DF_REF_INSN (use->ref);
    1311              :                   /* If there's a reaching definition outside of the
    1312              :                      chain we have to reset.  */
    1313          849 :                   df_link *def;
    1314         2972 :                   for (def = DF_REF_CHAIN (use->ref); def; def = def->next)
    1315         2307 :                     if (!bitmap_bit_p (insns, DF_REF_INSN_UID (def->ref)))
    1316              :                       break;
    1317          849 :                   if (def)
    1318          184 :                     to_reset_debug_insns.safe_push (debug_insn);
    1319              :                   else
    1320              :                     {
    1321          665 :                       *DF_REF_REAL_LOC (use->ref)
    1322          665 :                         = *defs_map.get (regno_reg_rtx [DF_REF_REGNO (ref)]);
    1323          665 :                       df_insn_rescan (debug_insn);
    1324              :                     }
    1325              :                 }
    1326              :             /* Have to do the reset outside of the DF_CHAIN walk to not
    1327              :                disrupt it.  */
    1328         2794 :             while (!to_reset_debug_insns.is_empty ())
    1329              :               {
    1330          184 :                 rtx_insn *debug_insn = to_reset_debug_insns.pop ();
    1331          184 :                 INSN_VAR_LOCATION_LOC (debug_insn) = gen_rtx_UNKNOWN_VAR_LOC ();
    1332          184 :                 df_insn_rescan_debug_internal (debug_insn);
    1333              :               }
    1334         1305 :           }
    1335              :       }
    1336              : 
    1337              :   /* Replace uses in this insn with the defs we use in the chain.  */
    1338      3344151 :   for (df_ref ref = DF_INSN_USES (insn); ref; ref = DF_REF_NEXT_LOC (ref))
    1339      2007367 :     if (!DF_REF_REG_MEM_P (ref))
    1340       716366 :       if (rtx *vreg = defs_map.get (regno_reg_rtx[DF_REF_REGNO (ref)]))
    1341              :         {
    1342              :           /* Also update a corresponding REG_DEAD note.  */
    1343        35087 :           rtx note = find_reg_note (insn, REG_DEAD, DF_REF_REG (ref));
    1344        35087 :           if (note)
    1345        23178 :             XEXP (note, 0) = *vreg;
    1346        35087 :           *DF_REF_REAL_LOC (ref) = *vreg;
    1347              :         }
    1348      1336784 : }
    1349              : 
    1350              : /* Convert INSN which is an SImode or DImode rotation by a constant
    1351              :    to vector mode.  CODE is either ROTATE or ROTATERT with operands
    1352              :    OP0 and OP1.  Returns the SET_SRC of the last instruction in the
    1353              :    resulting sequence, which is emitted before INSN.  */
    1354              : 
    1355              : rtx
    1356           92 : general_scalar_chain::convert_rotate (enum rtx_code code, rtx op0, rtx op1,
    1357              :                                       rtx_insn *insn)
    1358              : {
    1359           92 :   int bits = INTVAL (op1);
    1360           92 :   rtx pat, result;
    1361              : 
    1362           92 :   convert_op (&op0, insn);
    1363           92 :   if (bits == 0)
    1364            0 :     return op0;
    1365              : 
    1366           92 :   if (smode == DImode)
    1367              :     {
    1368           92 :       if (code == ROTATE)
    1369           45 :         bits = 64 - bits;
    1370           92 :       if (bits == 32)
    1371              :         {
    1372            0 :           rtx tmp1 = gen_reg_rtx (V4SImode);
    1373            0 :           pat = gen_sse2_pshufd (tmp1, gen_lowpart (V4SImode, op0),
    1374              :                                  GEN_INT (225));
    1375            0 :           emit_insn_before (pat, insn);
    1376            0 :           result = gen_lowpart (V2DImode, tmp1);
    1377              :         }
    1378           92 :       else if (TARGET_AVX512VL)
    1379            0 :         result = simplify_gen_binary (code, V2DImode, op0, op1);
    1380           92 :       else if (bits == 16 || bits == 48)
    1381              :         {
    1382            0 :           rtx tmp1 = gen_reg_rtx (V8HImode);
    1383            0 :           pat = gen_sse2_pshuflw (tmp1, gen_lowpart (V8HImode, op0),
    1384              :                                   GEN_INT (bits == 16 ? 57 : 147));
    1385            0 :           emit_insn_before (pat, insn);
    1386            0 :           result = gen_lowpart (V2DImode, tmp1);
    1387              :         }
    1388           92 :       else if ((bits & 0x07) == 0)
    1389              :         {
    1390            0 :           rtx tmp1 = gen_reg_rtx (V4SImode);
    1391            0 :           pat = gen_sse2_pshufd (tmp1, gen_lowpart (V4SImode, op0),
    1392              :                                  GEN_INT (68));
    1393            0 :           emit_insn_before (pat, insn);
    1394            0 :           rtx tmp2 = gen_reg_rtx (V1TImode);
    1395            0 :           pat = gen_sse2_lshrv1ti3 (tmp2, gen_lowpart (V1TImode, tmp1),
    1396              :                                     GEN_INT (bits));
    1397            0 :           emit_insn_before (pat, insn);
    1398            0 :           result = gen_lowpart (V2DImode, tmp2);
    1399              :         }
    1400              :       else
    1401              :         {
    1402           92 :           rtx tmp1 = gen_reg_rtx (V4SImode);
    1403           92 :           pat = gen_sse2_pshufd (tmp1, gen_lowpart (V4SImode, op0),
    1404              :                                  GEN_INT (20));
    1405           92 :           emit_insn_before (pat, insn);
    1406           92 :           rtx tmp2 = gen_reg_rtx (V2DImode);
    1407           92 :           pat = gen_lshrv2di3 (tmp2, gen_lowpart (V2DImode, tmp1),
    1408              :                                GEN_INT (bits & 31));
    1409           92 :           emit_insn_before (pat, insn);
    1410           92 :           rtx tmp3 = gen_reg_rtx (V4SImode);
    1411          139 :           pat = gen_sse2_pshufd (tmp3, gen_lowpart (V4SImode, tmp2),
    1412              :                                  GEN_INT (bits > 32 ? 34 : 136));
    1413           92 :           emit_insn_before (pat, insn);
    1414           92 :           result = gen_lowpart (V2DImode, tmp3);
    1415              :         }
    1416              :     }
    1417            0 :   else if (bits == 16)
    1418              :     {
    1419            0 :       rtx tmp1 = gen_reg_rtx (V8HImode);
    1420            0 :       pat = gen_sse2_pshuflw (tmp1, gen_lowpart (V8HImode, op0), GEN_INT (225));
    1421            0 :       emit_insn_before (pat, insn);
    1422            0 :       result = gen_lowpart (V4SImode, tmp1);
    1423              :     }
    1424            0 :   else if (TARGET_AVX512VL)
    1425            0 :     result = simplify_gen_binary (code, V4SImode, op0, op1);
    1426              :   else
    1427              :     {
    1428            0 :       if (code == ROTATE)
    1429            0 :         bits = 32 - bits;
    1430              : 
    1431            0 :       rtx tmp1 = gen_reg_rtx (V4SImode);
    1432            0 :       emit_insn_before (gen_sse2_pshufd (tmp1, op0, GEN_INT (224)), insn);
    1433            0 :       rtx tmp2 = gen_reg_rtx (V2DImode);
    1434            0 :       pat = gen_lshrv2di3 (tmp2, gen_lowpart (V2DImode, tmp1),
    1435              :                            GEN_INT (bits));
    1436            0 :       emit_insn_before (pat, insn);
    1437            0 :       result = gen_lowpart (V4SImode, tmp2);
    1438              :     }
    1439              : 
    1440              :   return result;
    1441              : }
    1442              : 
    1443              : /* Convert INSN to vector mode.  */
    1444              : 
    1445              : void
    1446       410511 : general_scalar_chain::convert_insn (rtx_insn *insn)
    1447              : {
    1448       410511 :   rtx def_set = single_set (insn);
    1449       410511 :   rtx src = SET_SRC (def_set);
    1450       410511 :   rtx dst = SET_DEST (def_set);
    1451       410511 :   rtx subreg;
    1452              : 
    1453       410511 :   if (MEM_P (dst) && !REG_P (src))
    1454              :     {
    1455              :       /* There are no scalar integer instructions and therefore
    1456              :          temporary register usage is required.  */
    1457          762 :       rtx tmp = gen_reg_rtx (smode);
    1458          762 :       emit_conversion_insns (gen_move_insn (dst, tmp), insn);
    1459          762 :       dst = gen_rtx_SUBREG (vmode, tmp, 0);
    1460          762 :     }
    1461       409749 :   else if (REG_P (dst) && GET_MODE (dst) == smode)
    1462              :     {
    1463              :       /* Replace the definition with a SUBREG to the definition we
    1464              :          use inside the chain.  */
    1465       214157 :       rtx *vdef = defs_map.get (dst);
    1466       214157 :       if (vdef)
    1467        22795 :         dst = *vdef;
    1468       214157 :       dst = gen_rtx_SUBREG (vmode, dst, 0);
    1469              :       /* IRA doesn't like to have REG_EQUAL/EQUIV notes when the SET_DEST
    1470              :          is a non-REG_P.  So kill those off.  */
    1471       214157 :       rtx note = find_reg_equal_equiv_note (insn);
    1472       214157 :       if (note)
    1473         9538 :         remove_note (insn, note);
    1474              :     }
    1475              : 
    1476       410511 :   switch (GET_CODE (src))
    1477              :     {
    1478        29592 :     case PLUS:
    1479        29592 :     case MINUS:
    1480        29592 :     case IOR:
    1481        29592 :     case XOR:
    1482        29592 :     case AND:
    1483        29592 :     case SMAX:
    1484        29592 :     case SMIN:
    1485        29592 :     case UMAX:
    1486        29592 :     case UMIN:
    1487        29592 :       convert_op (&XEXP (src, 1), insn);
    1488              :       /* FALLTHRU */
    1489              : 
    1490        36870 :     case ABS:
    1491        36870 :     case ASHIFT:
    1492        36870 :     case ASHIFTRT:
    1493        36870 :     case LSHIFTRT:
    1494        36870 :       convert_op (&XEXP (src, 0), insn);
    1495        36870 :       PUT_MODE (src, vmode);
    1496        36870 :       break;
    1497              : 
    1498           92 :     case ROTATE:
    1499           92 :     case ROTATERT:
    1500           92 :       src = convert_rotate (GET_CODE (src), XEXP (src, 0), XEXP (src, 1),
    1501              :                             insn);
    1502           92 :       break;
    1503              : 
    1504          400 :     case NEG:
    1505          400 :       src = XEXP (src, 0);
    1506              : 
    1507          400 :       if (GET_CODE (src) == ABS)
    1508              :         {
    1509            0 :           src = XEXP (src, 0);
    1510            0 :           convert_op (&src, insn);
    1511            0 :           subreg = gen_reg_rtx (vmode);
    1512            0 :           emit_insn_before (gen_rtx_SET (subreg,
    1513              :                                          gen_rtx_ABS (vmode, src)), insn);
    1514            0 :           src = subreg;
    1515              :         }
    1516              :       else
    1517          400 :         convert_op (&src, insn);
    1518              : 
    1519          400 :       subreg = gen_reg_rtx (vmode);
    1520          400 :       emit_insn_before (gen_move_insn (subreg, CONST0_RTX (vmode)), insn);
    1521          400 :       src = gen_rtx_MINUS (vmode, subreg, src);
    1522          400 :       break;
    1523              : 
    1524          250 :     case NOT:
    1525          250 :       src = XEXP (src, 0);
    1526          250 :       convert_op (&src, insn);
    1527          250 :       subreg = gen_reg_rtx (vmode);
    1528          250 :       emit_insn_before (gen_move_insn (subreg, CONSTM1_RTX (vmode)), insn);
    1529          250 :       src = gen_rtx_XOR (vmode, src, subreg);
    1530          250 :       break;
    1531              : 
    1532       170787 :     case MEM:
    1533       170787 :       if (!REG_P (dst))
    1534       170787 :         convert_op (&src, insn);
    1535              :       break;
    1536              : 
    1537       196972 :     case REG:
    1538       196972 :       if (!MEM_P (dst))
    1539         1380 :         convert_op (&src, insn);
    1540              :       break;
    1541              : 
    1542            0 :     case SUBREG:
    1543            0 :       gcc_assert (GET_MODE (src) == vmode);
    1544              :       break;
    1545              : 
    1546            0 :     case COMPARE:
    1547            0 :       dst = gen_rtx_REG (CCZmode, FLAGS_REG);
    1548            0 :       src = convert_compare (XEXP (src, 0), XEXP (src, 1), insn);
    1549            0 :       break;
    1550              : 
    1551         3400 :     case CONST_INT:
    1552         3400 :       convert_op (&src, insn);
    1553         3400 :       break;
    1554              : 
    1555         1740 :     case VEC_SELECT:
    1556         1740 :       if (XVECEXP (XEXP (src, 1), 0, 0) == const0_rtx)
    1557         1325 :         src = XEXP (src, 0);
    1558          415 :       else if (smode == DImode)
    1559              :         {
    1560          172 :           rtx tmp = gen_lowpart (V1TImode, XEXP (src, 0));
    1561          172 :           dst = gen_lowpart (V1TImode, dst);
    1562          172 :           src = gen_rtx_LSHIFTRT (V1TImode, tmp, GEN_INT (64));
    1563              :         }
    1564              :       else
    1565              :         {
    1566          243 :           rtx tmp = XVECEXP (XEXP (src, 1), 0, 0);
    1567          243 :           rtvec vec = gen_rtvec (4, tmp, tmp, tmp, tmp);
    1568          243 :           rtx par = gen_rtx_PARALLEL (VOIDmode, vec);
    1569          243 :           src = gen_rtx_VEC_SELECT (vmode, XEXP (src, 0), par);
    1570              :         }
    1571              :       break;
    1572              : 
    1573            0 :     default:
    1574            0 :       gcc_unreachable ();
    1575              :     }
    1576              : 
    1577       410511 :   SET_SRC (def_set) = src;
    1578       410511 :   SET_DEST (def_set) = dst;
    1579              : 
    1580              :   /* Drop possible dead definitions.  */
    1581       410511 :   PATTERN (insn) = def_set;
    1582              : 
    1583       410511 :   INSN_CODE (insn) = -1;
    1584       410511 :   int patt = recog_memoized (insn);
    1585       410511 :   if  (patt == -1)
    1586            0 :     fatal_insn_not_found (insn);
    1587       410511 :   df_insn_rescan (insn);
    1588       410511 : }
    1589              : 
    1590              : /* Helper function to compute gain for loading an immediate constant.
    1591              :    Typically, two movabsq for TImode vs. vmovdqa for V1TImode, but
    1592              :    with numerous special cases.  */
    1593              : 
    1594              : static int
    1595            8 : timode_immed_const_gain (rtx cst, basic_block bb)
    1596              : {
    1597              :   /* movabsq vs. movabsq+vmovq+vunpacklqdq.  */
    1598            8 :   if (CONST_WIDE_INT_P (cst)
    1599            5 :       && CONST_WIDE_INT_NUNITS (cst) == 2
    1600           13 :       && CONST_WIDE_INT_ELT (cst, 0) == CONST_WIDE_INT_ELT (cst, 1))
    1601            0 :     return optimize_bb_for_size_p (bb) ? -COSTS_N_BYTES (9)
    1602              :                                        : -COSTS_N_INSNS (2);
    1603              :   /* 2x movabsq ~ vmovdqa.  */
    1604              :   return 0;
    1605              : }
    1606              : 
    1607              : /* Return true it's cost profitable for for chain conversion.  */
    1608              : 
    1609              : bool
    1610       473775 : timode_scalar_chain::compute_convert_gain ()
    1611              : {
    1612              :   /* Assume that if we have to move TImode values between units,
    1613              :      then transforming this chain isn't worth it.  */
    1614       473775 :   if (cost_sse_integer)
    1615              :     return false;
    1616              : 
    1617       473775 :   bitmap_iterator bi;
    1618       473775 :   unsigned insn_uid;
    1619              : 
    1620              :   /* Split ties to prefer V1TImode when not optimizing for size.  */
    1621       473775 :   int gain = optimize_size ? 0 : 1;
    1622       473775 :   sreal weighted_gain  = 0;
    1623              : 
    1624       473775 :   if (dump_file)
    1625            0 :     fprintf (dump_file, "Computing gain for chain #%d...\n", chain_id);
    1626              : 
    1627      1406144 :   EXECUTE_IF_SET_IN_BITMAP (insns, 0, insn_uid, bi)
    1628              :     {
    1629       932369 :       rtx_insn *insn = DF_INSN_UID_GET (insn_uid)->insn;
    1630       932369 :       rtx def_set = single_set (insn);
    1631       932369 :       rtx src = SET_SRC (def_set);
    1632       932369 :       rtx dst = SET_DEST (def_set);
    1633       932369 :       HOST_WIDE_INT op1val;
    1634       932369 :       basic_block bb = BLOCK_FOR_INSN (insn);
    1635       932369 :       int scost, vcost;
    1636       932369 :       int igain = 0;
    1637       932369 :       profile_count entry_count = ENTRY_BLOCK_PTR_FOR_FN (cfun)->count;
    1638       932369 :       bool speed_p = optimize_bb_for_speed_p (bb);
    1639       932369 :       sreal bb_freq = bb->count.to_sreal_scale (entry_count);
    1640              : 
    1641       932369 :       switch (GET_CODE (src))
    1642              :         {
    1643       458213 :         case REG:
    1644       458213 :           if (!speed_p)
    1645        20482 :             igain = MEM_P (dst) ? COSTS_N_BYTES (6) : COSTS_N_BYTES (3);
    1646              :           else
    1647              :             igain = COSTS_N_INSNS (1);
    1648              :           break;
    1649              : 
    1650       426596 :         case MEM:
    1651       426596 :           igain = !speed_p ? COSTS_N_BYTES (7) : COSTS_N_INSNS (1);
    1652              :           break;
    1653              : 
    1654        11192 :         case CONST_INT:
    1655        11192 :           if (MEM_P (dst)
    1656        11192 :               && standard_sse_constant_p (src, V1TImode))
    1657        10688 :             igain = !speed_p ? COSTS_N_BYTES (11) : 1;
    1658              :           break;
    1659              : 
    1660        33243 :         case CONST_WIDE_INT:
    1661              :           /* 2 x mov vs. vmovdqa.  */
    1662        33243 :           if (MEM_P (dst))
    1663        33059 :             igain = !speed_p ? COSTS_N_BYTES (3) : COSTS_N_INSNS (1);
    1664              :           break;
    1665              : 
    1666           19 :         case NOT:
    1667           19 :           if (MEM_P (dst))
    1668        24266 :             igain = -COSTS_N_INSNS (1);
    1669              :           break;
    1670              : 
    1671           14 :         case AND:
    1672           14 :           if (!MEM_P (dst))
    1673            3 :             igain = COSTS_N_INSNS (1);
    1674           14 :           if (CONST_SCALAR_INT_P (XEXP (src, 1)))
    1675            5 :             igain += timode_immed_const_gain (XEXP (src, 1), bb);
    1676              :           break;
    1677              : 
    1678         2692 :         case XOR:
    1679         2692 :         case IOR:
    1680         2692 :           if (timode_concatdi_p (src))
    1681              :             {
    1682              :               /* vmovq;vpinsrq (11 bytes).  */
    1683         2641 :               igain = speed_p ? -2 * ix86_cost->sse_to_integer
    1684              :                               : -COSTS_N_BYTES (11);
    1685              :               break;
    1686              :             }
    1687           51 :           if (!MEM_P (dst))
    1688           43 :             igain = COSTS_N_INSNS (1);
    1689           51 :           if (CONST_SCALAR_INT_P (XEXP (src, 1)))
    1690            3 :             igain += timode_immed_const_gain (XEXP (src, 1), bb);
    1691              :           break;
    1692              : 
    1693            0 :         case PLUS:
    1694            0 :           if (timode_concatdi_p (src))
    1695              :             /* vmovq;vpinsrq (11 bytes).  */
    1696            0 :             igain = speed_p ? -2 * ix86_cost->sse_to_integer
    1697              :                             : -COSTS_N_BYTES (11);
    1698              :           break;
    1699              : 
    1700          158 :         case ASHIFT:
    1701          158 :         case LSHIFTRT:
    1702              :           /* See ix86_expand_v1ti_shift.  */
    1703          158 :           op1val = INTVAL (XEXP (src, 1));
    1704          158 :           if (!speed_p)
    1705              :             {
    1706           15 :               if (op1val == 64 || op1val == 65)
    1707              :                 scost = COSTS_N_BYTES (5);
    1708           10 :               else if (op1val >= 66)
    1709              :                 scost = COSTS_N_BYTES (6);
    1710           10 :               else if (op1val == 1)
    1711              :                 scost = COSTS_N_BYTES (8);
    1712              :               else
    1713              :                 scost = COSTS_N_BYTES (9);
    1714              : 
    1715           14 :               if ((op1val & 7) == 0)
    1716              :                 vcost = COSTS_N_BYTES (5);
    1717           10 :               else if (op1val > 64)
    1718              :                 vcost = COSTS_N_BYTES (10);
    1719              :               else
    1720           10 :                 vcost = TARGET_AVX ? COSTS_N_BYTES (19) : COSTS_N_BYTES (23);
    1721              :             }
    1722              :           else
    1723              :             {
    1724          143 :               scost = COSTS_N_INSNS (2);
    1725          143 :               if ((op1val & 7) == 0)
    1726              :                 vcost = COSTS_N_INSNS (1);
    1727          110 :               else if (op1val > 64)
    1728              :                 vcost = COSTS_N_INSNS (2);
    1729              :               else
    1730          110 :                 vcost = TARGET_AVX ? COSTS_N_INSNS (4) : COSTS_N_INSNS (5);
    1731              :             }
    1732          158 :           igain = scost - vcost;
    1733          158 :           break;
    1734              : 
    1735          103 :         case ASHIFTRT:
    1736              :           /* See ix86_expand_v1ti_ashiftrt.  */
    1737          103 :           op1val = INTVAL (XEXP (src, 1));
    1738          103 :           if (!speed_p)
    1739              :             {
    1740            7 :               if (op1val == 64 || op1val == 127)
    1741              :                 scost = COSTS_N_BYTES (7);
    1742            7 :               else if (op1val == 1)
    1743              :                 scost = COSTS_N_BYTES (8);
    1744            7 :               else if (op1val == 65)
    1745              :                 scost = COSTS_N_BYTES (10);
    1746            7 :               else if (op1val >= 66)
    1747              :                 scost = COSTS_N_BYTES (11);
    1748              :               else
    1749              :                 scost = COSTS_N_BYTES (9);
    1750              : 
    1751            0 :               if (op1val == 127)
    1752              :                 vcost = COSTS_N_BYTES (10);
    1753            7 :               else if (op1val == 64)
    1754              :                 vcost = COSTS_N_BYTES (14);
    1755            7 :               else if (op1val == 96)
    1756              :                 vcost = COSTS_N_BYTES (18);
    1757            7 :               else if (op1val >= 111)
    1758              :                 vcost = COSTS_N_BYTES (15);
    1759            7 :               else if (TARGET_AVX2 && op1val == 32)
    1760              :                 vcost = COSTS_N_BYTES (16);
    1761            7 :               else if (TARGET_SSE4_1 && op1val == 32)
    1762              :                 vcost = COSTS_N_BYTES (20);
    1763            7 :               else if (op1val >= 96)
    1764              :                 vcost = COSTS_N_BYTES (23);
    1765            7 :               else if ((op1val & 7) == 0)
    1766              :                 vcost = COSTS_N_BYTES (28);
    1767            7 :               else if (TARGET_AVX2 && op1val < 32)
    1768              :                 vcost = COSTS_N_BYTES (30);
    1769            7 :               else if (op1val == 1 || op1val >= 64)
    1770              :                 vcost = COSTS_N_BYTES (42);
    1771              :               else
    1772            7 :                 vcost = COSTS_N_BYTES (47);
    1773              :             }
    1774              :           else
    1775              :             {
    1776           96 :               if (op1val >= 65 && op1val <= 126)
    1777              :                 scost = COSTS_N_INSNS (3);
    1778              :               else
    1779           96 :                 scost = COSTS_N_INSNS (2);
    1780              : 
    1781           96 :               if (op1val == 127)
    1782              :                 vcost = COSTS_N_INSNS (2);
    1783           96 :               else if (op1val == 64)
    1784              :                 vcost = COSTS_N_INSNS (3);
    1785           96 :               else if (op1val == 96)
    1786              :                 vcost = COSTS_N_INSNS (3);
    1787           96 :               else if (op1val >= 111)
    1788              :                 vcost = COSTS_N_INSNS (3);
    1789           96 :               else if (TARGET_SSE4_1 && op1val == 32)
    1790              :                 vcost = COSTS_N_INSNS (3);
    1791           96 :               else if (TARGET_SSE4_1
    1792            0 :                        && (op1val == 8 || op1val == 16 || op1val == 24))
    1793              :                 vcost = COSTS_N_INSNS (3);
    1794           96 :               else if (op1val >= 96)
    1795              :                 vcost = COSTS_N_INSNS (4);
    1796           96 :               else if (TARGET_SSE4_1 && (op1val == 28 || op1val == 80))
    1797              :                 vcost = COSTS_N_INSNS (4);
    1798           96 :               else if ((op1val & 7) == 0)
    1799              :                 vcost = COSTS_N_INSNS (5);
    1800           96 :               else if (TARGET_AVX2 && op1val < 32)
    1801              :                 vcost = COSTS_N_INSNS (6);
    1802           96 :               else if (TARGET_SSE4_1 && op1val < 15)
    1803              :                 vcost = COSTS_N_INSNS (6);
    1804           96 :               else if (op1val == 1 || op1val >= 64)
    1805              :                 vcost = COSTS_N_INSNS (8);
    1806              :               else
    1807            0 :                 vcost = COSTS_N_INSNS (9);
    1808              :             }
    1809          103 :           igain = scost - vcost;
    1810          103 :           break;
    1811              : 
    1812            5 :         case ROTATE:
    1813            5 :         case ROTATERT:
    1814              :           /* See ix86_expand_v1ti_rotate.  */
    1815            5 :           op1val = INTVAL (XEXP (src, 1));
    1816            5 :           if (!speed_p)
    1817              :             {
    1818            0 :               scost = COSTS_N_BYTES (13);
    1819            0 :               if ((op1val & 31) == 0)
    1820              :                 vcost = COSTS_N_BYTES (5);
    1821            0 :               else if ((op1val & 7) == 0)
    1822            0 :                 vcost = TARGET_AVX ? COSTS_N_BYTES (13) : COSTS_N_BYTES (18);
    1823            0 :               else if (op1val > 32 && op1val < 96)
    1824              :                 vcost = COSTS_N_BYTES (24);
    1825              :               else
    1826            0 :                 vcost = COSTS_N_BYTES (19);
    1827              :             }
    1828              :           else
    1829              :             {
    1830            5 :               scost = COSTS_N_INSNS (3);
    1831            5 :               if ((op1val & 31) == 0)
    1832              :                 vcost = COSTS_N_INSNS (1);
    1833            3 :               else if ((op1val & 7) == 0)
    1834            1 :                 vcost = TARGET_AVX ? COSTS_N_INSNS (3) : COSTS_N_INSNS (4);
    1835            2 :               else if (op1val > 32 && op1val < 96)
    1836              :                 vcost = COSTS_N_INSNS (5);
    1837              :               else
    1838            2 :                 vcost = COSTS_N_INSNS (1);
    1839              :             }
    1840            5 :           igain = scost - vcost;
    1841            5 :           break;
    1842              : 
    1843           12 :         case COMPARE:
    1844           12 :           if (XEXP (src, 1) == const0_rtx)
    1845              :             {
    1846            8 :               if (GET_CODE (XEXP (src, 0)) == AND)
    1847              :                 /* and;and;or (9 bytes) vs. ptest (5 bytes).  */
    1848              :                 igain = !speed_p ? COSTS_N_BYTES (4) : COSTS_N_INSNS (2);
    1849              :               /* or (3 bytes) vs. ptest (5 bytes).  */
    1850            8 :               else if (!speed_p)
    1851            0 :                 igain = -COSTS_N_BYTES (2);
    1852              :             }
    1853            4 :           else if (XEXP (src, 1) == const1_rtx)
    1854              :             /* and;cmp -1 (7 bytes) vs. pcmpeqd;pxor;ptest (13 bytes).  */
    1855            0 :             igain = !speed_p ? -COSTS_N_BYTES (6) : -COSTS_N_INSNS (1);
    1856              :           break;
    1857              : 
    1858          122 :         case ZERO_EXTEND:
    1859          122 :           if (GET_MODE (XEXP (src, 0)) == DImode)
    1860              :             /* xor (2 bytes) vs. vmovq (5 bytes).  */
    1861          122 :             igain = speed_p ? COSTS_N_INSNS (1) - ix86_cost->sse_to_integer
    1862              :                             : -COSTS_N_BYTES (3);
    1863              :           break;
    1864              : 
    1865              :         default:
    1866              :           break;
    1867              :         }
    1868              : 
    1869      1823066 :       gain += igain;
    1870       932361 :       if (speed_p)
    1871       890705 :         weighted_gain += bb_freq * igain;
    1872              : 
    1873       932369 :       if (igain != 0 && dump_file)
    1874              :         {
    1875            0 :           fprintf (dump_file, "  Instruction gain %d with bb_freq %.2f for ",
    1876              :                    igain, bb_freq.to_double ());
    1877            0 :           dump_insn_slim (dump_file, insn);
    1878              :         }
    1879              :     }
    1880              : 
    1881       473775 :   if (dump_file)
    1882            0 :     fprintf (dump_file, "  Total gain: %d, weighted gain %.2f\n",
    1883              :              gain, weighted_gain.to_double ());
    1884              : 
    1885       473775 :   if (weighted_gain > (sreal) 0)
    1886              :     return true;
    1887              :   else
    1888        24307 :     return gain > 0;
    1889              : }
    1890              : 
    1891              : /* Fix uses of converted REG in debug insns.  */
    1892              : 
    1893              : void
    1894       427804 : timode_scalar_chain::fix_debug_reg_uses (rtx reg)
    1895              : {
    1896       427804 :   if (!flag_var_tracking)
    1897              :     return;
    1898              : 
    1899       375770 :   df_ref ref, next;
    1900       769301 :   for (ref = DF_REG_USE_CHAIN (REGNO (reg)); ref; ref = next)
    1901              :     {
    1902       393531 :       rtx_insn *insn = DF_REF_INSN (ref);
    1903              :       /* Make sure the next ref is for a different instruction,
    1904              :          so that we're not affected by the rescan.  */
    1905       393531 :       next = DF_REF_NEXT_REG (ref);
    1906       393531 :       while (next && DF_REF_INSN (next) == insn)
    1907            0 :         next = DF_REF_NEXT_REG (next);
    1908              : 
    1909       393531 :       if (DEBUG_INSN_P (insn))
    1910              :         {
    1911              :           /* It may be a debug insn with a TImode variable in
    1912              :              register.  */
    1913              :           bool changed = false;
    1914          178 :           for (; ref != next; ref = DF_REF_NEXT_REG (ref))
    1915              :             {
    1916           89 :               rtx *loc = DF_REF_LOC (ref);
    1917           89 :               if (REG_P (*loc) && GET_MODE (*loc) == V1TImode)
    1918              :                 {
    1919           85 :                   *loc = gen_rtx_SUBREG (TImode, *loc, 0);
    1920           85 :                   changed = true;
    1921              :                 }
    1922              :             }
    1923           89 :           if (changed)
    1924           85 :             df_insn_rescan (insn);
    1925              :         }
    1926              :     }
    1927              : }
    1928              : 
    1929              : /* Convert SRC, a *concatditi3 pattern, into a vec_concatv2di instruction.
    1930              :    Insert this before INSN, and return the result as a V1TImode subreg.  */
    1931              : 
    1932              : static rtx
    1933          253 : timode_convert_concatdi (rtx src, rtx_insn *insn)
    1934              : {
    1935          253 :   rtx hi, lo;
    1936          253 :   rtx tmp = gen_reg_rtx (V2DImode);
    1937          253 :   if (GET_CODE (XEXP (src, 0)) == ASHIFT)
    1938              :     {
    1939          253 :       hi = XEXP (XEXP (XEXP (src, 0), 0), 0);
    1940          253 :       lo = XEXP (XEXP (src, 1), 0);
    1941              :     }
    1942              :   else
    1943              :     {
    1944            0 :       hi = XEXP (XEXP (XEXP (src, 1), 0), 0);
    1945            0 :       lo = XEXP (XEXP (src, 0), 0);
    1946              :     }
    1947          253 :   emit_insn_before (gen_vec_concatv2di (tmp, lo, hi), insn);
    1948          253 :   return gen_rtx_SUBREG (V1TImode, tmp, 0);
    1949              : }
    1950              : 
    1951              : /* Convert INSN from TImode to V1T1mode.  */
    1952              : 
    1953              : void
    1954       926273 : timode_scalar_chain::convert_insn (rtx_insn *insn)
    1955              : {
    1956       926273 :   rtx def_set = single_set (insn);
    1957       926273 :   rtx src = SET_SRC (def_set);
    1958       926273 :   rtx dst = SET_DEST (def_set);
    1959       926273 :   rtx tmp;
    1960              : 
    1961       926273 :   switch (GET_CODE (dst))
    1962              :     {
    1963       427814 :     case REG:
    1964       427814 :       if (GET_MODE (dst) == TImode)
    1965              :         {
    1966       426053 :           PUT_MODE (dst, V1TImode);
    1967       426053 :           fix_debug_reg_uses (dst);
    1968              :         }
    1969       427814 :       if (GET_MODE (dst) == V1TImode)
    1970              :         {
    1971              :           /* It might potentially be helpful to convert REG_EQUAL notes,
    1972              :              but for now we just remove them.  */
    1973       427804 :           rtx note = find_reg_equal_equiv_note (insn);
    1974       427804 :           if (note)
    1975          444 :             remove_note (insn, note);
    1976              :         }
    1977              :       break;
    1978       498459 :     case MEM:
    1979       498459 :       PUT_MODE (dst, V1TImode);
    1980       498459 :       break;
    1981              : 
    1982            0 :     default:
    1983            0 :       gcc_unreachable ();
    1984              :     }
    1985              : 
    1986       926273 :   switch (GET_CODE (src))
    1987              :     {
    1988       454819 :     case REG:
    1989       454819 :       if (GET_MODE (src) == TImode)
    1990              :         {
    1991         1751 :           PUT_MODE (src, V1TImode);
    1992         1751 :           fix_debug_reg_uses (src);
    1993              :         }
    1994              :       break;
    1995              : 
    1996       426548 :     case MEM:
    1997       426548 :       PUT_MODE (src, V1TImode);
    1998       426548 :       break;
    1999              : 
    2000        33242 :     case CONST_WIDE_INT:
    2001        33242 :       if (NONDEBUG_INSN_P (insn))
    2002              :         {
    2003              :           /* Since there are no instructions to store 128-bit constant,
    2004              :              temporary register usage is required.  */
    2005        33242 :           bool use_move;
    2006        33242 :           start_sequence ();
    2007        33242 :           tmp = ix86_convert_const_wide_int_to_broadcast (TImode, src);
    2008        33242 :           if (tmp)
    2009              :             {
    2010          194 :               src = lowpart_subreg (V1TImode, tmp, TImode);
    2011          194 :               use_move = true;
    2012              :             }
    2013              :           else
    2014              :             {
    2015        33048 :               src = smode_convert_cst (src, V1TImode);
    2016        33048 :               src = validize_mem (force_const_mem (V1TImode, src));
    2017        33048 :               use_move = MEM_P (dst);
    2018              :             }
    2019        33242 :           rtx_insn *seq = end_sequence ();
    2020        33242 :           if (seq)
    2021          195 :             emit_insn_before (seq, insn);
    2022        33242 :           if (use_move)
    2023              :             {
    2024        33060 :               tmp = gen_reg_rtx (V1TImode);
    2025        33060 :               emit_insn_before (gen_rtx_SET (tmp, src), insn);
    2026        33060 :               src = tmp;
    2027              :             }
    2028              :         }
    2029              :       break;
    2030              : 
    2031        11192 :     case CONST_INT:
    2032        11192 :       switch (standard_sse_constant_p (src, TImode))
    2033              :         {
    2034        10967 :         case 1:
    2035        10967 :           src = CONST0_RTX (GET_MODE (dst));
    2036        10967 :           break;
    2037          225 :         case 2:
    2038          225 :           src = CONSTM1_RTX (GET_MODE (dst));
    2039          225 :           break;
    2040            0 :         default:
    2041            0 :           gcc_unreachable ();
    2042              :         }
    2043        11192 :       if (MEM_P (dst))
    2044              :         {
    2045        10688 :           tmp = gen_reg_rtx (V1TImode);
    2046        10688 :           emit_insn_before (gen_rtx_SET (tmp, src), insn);
    2047        10688 :           src = tmp;
    2048              :         }
    2049              :       break;
    2050              : 
    2051           13 :     case AND:
    2052           13 :       if (GET_CODE (XEXP (src, 0)) == NOT)
    2053              :         {
    2054            0 :           convert_op (&XEXP (XEXP (src, 0), 0), insn);
    2055            0 :           convert_op (&XEXP (src, 1), insn);
    2056            0 :           PUT_MODE (XEXP (src, 0), V1TImode);
    2057            0 :           PUT_MODE (src, V1TImode);
    2058            0 :           break;
    2059              :         }
    2060           13 :       convert_op (&XEXP (src, 0), insn);
    2061           13 :       convert_op (&XEXP (src, 1), insn);
    2062           13 :       PUT_MODE (src, V1TImode);
    2063           13 :       if (MEM_P (dst))
    2064              :         {
    2065           10 :           tmp = gen_reg_rtx (V1TImode);
    2066           10 :           emit_insn_before (gen_rtx_SET (tmp, src), insn);
    2067           10 :           src = tmp;
    2068              :         }
    2069              :       break;
    2070              : 
    2071          304 :     case XOR:
    2072          304 :     case IOR:
    2073          304 :       if (timode_concatdi_p (src))
    2074              :         {
    2075          253 :           src = timode_convert_concatdi (src, insn);
    2076          253 :           break;
    2077              :         }
    2078           51 :       convert_op (&XEXP (src, 0), insn);
    2079           51 :       convert_op (&XEXP (src, 1), insn);
    2080           51 :       PUT_MODE (src, V1TImode);
    2081           51 :       if (MEM_P (dst))
    2082              :         {
    2083            8 :           tmp = gen_reg_rtx (V1TImode);
    2084            8 :           emit_insn_before (gen_rtx_SET (tmp, src), insn);
    2085            8 :           src = tmp;
    2086              :         }
    2087              :       break;
    2088              : 
    2089            3 :     case NOT:
    2090            3 :       src = XEXP (src, 0);
    2091            3 :       convert_op (&src, insn);
    2092            3 :       tmp = gen_reg_rtx (V1TImode);
    2093            3 :       emit_insn_before (gen_move_insn (tmp, CONSTM1_RTX (V1TImode)), insn);
    2094            3 :       src = gen_rtx_XOR (V1TImode, src, tmp);
    2095            3 :       if (MEM_P (dst))
    2096              :         {
    2097            0 :           tmp = gen_reg_rtx (V1TImode);
    2098            0 :           emit_insn_before (gen_rtx_SET (tmp, src), insn);
    2099            0 :           src = tmp;
    2100              :         }
    2101              :       break;
    2102              : 
    2103           10 :     case COMPARE:
    2104           10 :       dst = gen_rtx_REG (CCZmode, FLAGS_REG);
    2105           10 :       src = convert_compare (XEXP (src, 0), XEXP (src, 1), insn);
    2106           10 :       break;
    2107              : 
    2108           43 :     case ASHIFT:
    2109           43 :     case LSHIFTRT:
    2110           43 :     case ASHIFTRT:
    2111           43 :     case ROTATERT:
    2112           43 :     case ROTATE:
    2113           43 :       convert_op (&XEXP (src, 0), insn);
    2114           43 :       PUT_MODE (src, V1TImode);
    2115           43 :       break;
    2116              : 
    2117           99 :     case ZERO_EXTEND:
    2118           99 :       if (GET_MODE (XEXP (src, 0)) == DImode)
    2119              :         {
    2120              :           /* Convert to *vec_concatv2di_0.  */
    2121           99 :           rtx tmp = gen_reg_rtx (V2DImode);
    2122           99 :           rtx pat = gen_rtx_VEC_CONCAT (V2DImode, XEXP (src, 0), const0_rtx);
    2123           99 :           emit_insn_before (gen_move_insn (tmp, pat), insn);
    2124           99 :           src = gen_rtx_SUBREG (vmode, tmp, 0);
    2125              :         }
    2126              :       else
    2127            0 :         gcc_unreachable ();
    2128           99 :       break;
    2129              : 
    2130            0 :     case PLUS:
    2131            0 :       if (timode_concatdi_p (src))
    2132            0 :         src = timode_convert_concatdi (src, insn);
    2133              :       else
    2134            0 :         gcc_unreachable ();
    2135            0 :       break;
    2136              : 
    2137            0 :     default:
    2138            0 :       gcc_unreachable ();
    2139              :     }
    2140              : 
    2141       926273 :   SET_SRC (def_set) = src;
    2142       926273 :   SET_DEST (def_set) = dst;
    2143              : 
    2144              :   /* Drop possible dead definitions.  */
    2145       926273 :   PATTERN (insn) = def_set;
    2146              : 
    2147       926273 :   INSN_CODE (insn) = -1;
    2148       926273 :   recog_memoized (insn);
    2149       926273 :   df_insn_rescan (insn);
    2150       926273 : }
    2151              : 
    2152              : /* Generate copies from defs used by the chain but not defined therein.
    2153              :    Also populates defs_map which is used later by convert_insn.  */
    2154              : 
    2155              : void
    2156       643914 : scalar_chain::convert_registers ()
    2157              : {
    2158       643914 :   bitmap_iterator bi;
    2159       643914 :   unsigned id;
    2160       669336 :   EXECUTE_IF_SET_IN_BITMAP (defs_conv, 0, id, bi)
    2161              :     {
    2162        25422 :       rtx chain_reg = gen_reg_rtx (smode);
    2163        25422 :       defs_map.put (regno_reg_rtx[id], chain_reg);
    2164              :     }
    2165       651974 :   EXECUTE_IF_SET_IN_BITMAP (insns_conv, 0, id, bi)
    2166        20379 :     for (df_ref ref = DF_INSN_UID_DEFS (id); ref; ref = DF_REF_NEXT_LOC (ref))
    2167        12319 :       if (bitmap_bit_p (defs_conv, DF_REF_REGNO (ref)))
    2168         8060 :         make_vector_copies (DF_REF_INSN (ref), DF_REF_REAL_REG (ref));
    2169       643914 : }
    2170              : 
    2171              : /* Convert whole chain creating required register
    2172              :    conversions and copies.  */
    2173              : 
    2174              : int
    2175       643914 : scalar_chain::convert ()
    2176              : {
    2177       643914 :   bitmap_iterator bi;
    2178       643914 :   unsigned id;
    2179       643914 :   int converted_insns = 0;
    2180              : 
    2181       643914 :   if (!dbg_cnt (stv_conversion))
    2182              :     return 0;
    2183              : 
    2184       643914 :   if (dump_file)
    2185            0 :     fprintf (dump_file, "Converting chain #%d...\n", chain_id);
    2186              : 
    2187       643914 :   convert_registers ();
    2188              : 
    2189      1980698 :   EXECUTE_IF_SET_IN_BITMAP (insns, 0, id, bi)
    2190              :     {
    2191      1336784 :       rtx_insn *insn = DF_INSN_UID_GET (id)->insn;
    2192      1336784 :       convert_insn_common (insn);
    2193      1336784 :       convert_insn (insn);
    2194      1336784 :       converted_insns++;
    2195              :     }
    2196              : 
    2197              :   return converted_insns;
    2198              : }
    2199              : 
    2200              : /* Return the SET expression if INSN doesn't reference hard register.
    2201              :    Return NULL if INSN uses or defines a hard register, excluding
    2202              :    pseudo register pushes, hard register uses in a memory address,
    2203              :    clobbers and flags definitions.  */
    2204              : 
    2205              : static rtx
    2206    339055620 : pseudo_reg_set (rtx_insn *insn)
    2207              : {
    2208    339055620 :   rtx set = single_set (insn);
    2209    339055620 :   if (!set)
    2210              :     return NULL;
    2211              : 
    2212              :   /* Check pseudo register push first. */
    2213    135502964 :   machine_mode mode = TARGET_64BIT ? TImode : DImode;
    2214    135502964 :   if (REG_P (SET_SRC (set))
    2215     38238551 :       && !HARD_REGISTER_P (SET_SRC (set))
    2216    165331507 :       && push_operand (SET_DEST (set), mode))
    2217              :     return set;
    2218              : 
    2219    135250625 :   df_ref ref;
    2220    219146869 :   FOR_EACH_INSN_DEF (ref, insn)
    2221    120638935 :     if (HARD_REGISTER_P (DF_REF_REAL_REG (ref))
    2222     64768779 :         && !DF_REF_FLAGS_IS_SET (ref, DF_REF_MUST_CLOBBER)
    2223    170908075 :         && DF_REF_REGNO (ref) != FLAGS_REG)
    2224              :       return NULL;
    2225              : 
    2226    188714818 :   FOR_EACH_INSN_USE (ref, insn)
    2227    115658630 :     if (!DF_REF_REG_MEM_P (ref) && HARD_REGISTER_P (DF_REF_REAL_REG (ref)))
    2228              :       return NULL;
    2229              : 
    2230              :   return set;
    2231              : }
    2232              : 
    2233              : /* Return true if the register REG is defined in a single DEF chain.
    2234              :    If it is defined in more than one DEF chains, we may not be able
    2235              :    to convert it in all chains.  */
    2236              : 
    2237              : static bool
    2238      1159377 : single_def_chain_p (rtx reg)
    2239              : {
    2240      1159377 :   df_ref ref = DF_REG_DEF_CHAIN (REGNO (reg));
    2241      1159377 :   if (!ref)
    2242              :     return false;
    2243      1159361 :   return DF_REF_NEXT_REG (ref) == nullptr;
    2244              : }
    2245              : 
    2246              : /* Check if comparison INSN may be transformed into vector comparison.
    2247              :    Currently we transform equality/inequality checks which look like:
    2248              :    (set (reg:CCZ 17 flags) (compare:CCZ (reg:TI x) (reg:TI y)))  */
    2249              : 
    2250              : static bool
    2251     12906698 : convertible_comparison_p (rtx_insn *insn, enum machine_mode mode)
    2252              : {
    2253     14309584 :   if (mode != (TARGET_64BIT ? TImode : DImode))
    2254              :     return false;
    2255              : 
    2256      4709145 :   if (!TARGET_SSE4_1)
    2257              :     return false;
    2258              : 
    2259       164923 :   rtx def_set = single_set (insn);
    2260              : 
    2261       164923 :   gcc_assert (def_set);
    2262              : 
    2263       164923 :   rtx src = SET_SRC (def_set);
    2264       164923 :   rtx dst = SET_DEST (def_set);
    2265              : 
    2266       164923 :   gcc_assert (GET_CODE (src) == COMPARE);
    2267              : 
    2268       164923 :   if (!REG_P (dst)
    2269       164923 :       || REGNO (dst) != FLAGS_REG
    2270       329846 :       || GET_MODE (dst) != CCZmode)
    2271              :     return false;
    2272              : 
    2273       120106 :   rtx op1 = XEXP (src, 0);
    2274       120106 :   rtx op2 = XEXP (src, 1);
    2275              : 
    2276              :   /* *cmp<dwi>_doubleword.  */
    2277       120106 :   if ((CONST_SCALAR_INT_P (op1)
    2278       120106 :        || ((REG_P (op1) || MEM_P (op1))
    2279       118321 :            && GET_MODE (op1) == mode))
    2280           60 :       && (CONST_SCALAR_INT_P (op2)
    2281           12 :           || ((REG_P (op2) || MEM_P (op2))
    2282           10 :               && GET_MODE (op2) == mode)))
    2283              :     return true;
    2284              : 
    2285              :   /* *testti_doubleword.  */
    2286       120048 :   if (op2 == const0_rtx
    2287        38296 :       && GET_CODE (op1) == AND
    2288          150 :       && REG_P (XEXP (op1, 0)))
    2289              :     {
    2290          150 :       rtx op12 = XEXP (op1, 1);
    2291          150 :       return GET_MODE (XEXP (op1, 0)) == TImode
    2292          150 :              && (CONST_SCALAR_INT_P (op12)
    2293            0 :                  || ((REG_P (op12) || MEM_P (op12))
    2294            0 :                      && GET_MODE (op12) == TImode));
    2295              :     }
    2296              : 
    2297              :   /* *test<dwi>_not_doubleword.  */
    2298       119898 :   if (op2 == const0_rtx
    2299        38146 :       && GET_CODE (op1) == AND
    2300            0 :       && GET_CODE (XEXP (op1, 0)) == NOT)
    2301              :     {
    2302            0 :       rtx op11 = XEXP (XEXP (op1, 0), 0);
    2303            0 :       rtx op12 = XEXP (op1, 1);
    2304            0 :       return (REG_P (op11) || MEM_P (op11))
    2305            0 :              && (REG_P (op12) || MEM_P (op12))
    2306            0 :              && GET_MODE (op11) == mode
    2307            0 :              && GET_MODE (op12) == mode;
    2308              :     }
    2309              : 
    2310              :   return false;
    2311              : }
    2312              : 
    2313              : /* The general version of scalar_to_vector_candidate_p.  */
    2314              : 
    2315              : static bool
    2316    237054288 : general_scalar_to_vector_candidate_p (rtx_insn *insn, enum machine_mode mode)
    2317              : {
    2318    237054288 :   rtx def_set = pseudo_reg_set (insn);
    2319              : 
    2320    237054288 :   if (!def_set)
    2321              :     return false;
    2322              : 
    2323     49552264 :   rtx src = SET_SRC (def_set);
    2324     49552264 :   rtx dst = SET_DEST (def_set);
    2325              : 
    2326     49552264 :   if (GET_CODE (src) == COMPARE)
    2327      8898996 :     return convertible_comparison_p (insn, mode);
    2328              : 
    2329              :   /* We are interested in "mode" only.  */
    2330     40653268 :   if ((GET_MODE (src) != mode
    2331     27792050 :        && !CONST_INT_P (src))
    2332     18021873 :       || GET_MODE (dst) != mode)
    2333              :     return false;
    2334              : 
    2335     15122887 :   if (!REG_P (dst) && !MEM_P (dst))
    2336              :     return false;
    2337              : 
    2338     14893693 :   switch (GET_CODE (src))
    2339              :     {
    2340       525992 :     case ASHIFT:
    2341       525992 :     case LSHIFTRT:
    2342       525992 :     case ASHIFTRT:
    2343       525992 :     case ROTATE:
    2344       525992 :     case ROTATERT:
    2345       525992 :       if (!CONST_INT_P (XEXP (src, 1))
    2346      1016749 :           || !IN_RANGE (INTVAL (XEXP (src, 1)), 0, GET_MODE_BITSIZE (mode)-1))
    2347              :         return false;
    2348              : 
    2349              :       /* Check for extend highpart case.  */
    2350       490753 :       if (mode != DImode
    2351       351859 :           || GET_CODE (src) != ASHIFTRT
    2352        77039 :           || GET_CODE (XEXP (src, 0)) != ASHIFT)
    2353              :         break;
    2354              : 
    2355      3705022 :       src = XEXP (src, 0);
    2356              :       break;
    2357              : 
    2358        78154 :     case SMAX:
    2359        78154 :     case SMIN:
    2360        78154 :     case UMAX:
    2361        78154 :     case UMIN:
    2362        78154 :       if ((mode == DImode && !TARGET_AVX512VL)
    2363        17450 :           || (mode == SImode && !TARGET_SSE4_1))
    2364              :         return false;
    2365              :       /* Fallthru.  */
    2366              : 
    2367      3254998 :     case AND:
    2368      3254998 :     case IOR:
    2369      3254998 :     case XOR:
    2370      3254998 :     case PLUS:
    2371      3254998 :     case MINUS:
    2372      3254998 :       if (!REG_P (XEXP (src, 1))
    2373              :           && !MEM_P (XEXP (src, 1))
    2374              :           && !CONST_INT_P (XEXP (src, 1)))
    2375              :         return false;
    2376              : 
    2377      3162661 :       if (GET_MODE (XEXP (src, 1)) != mode
    2378      1848634 :           && !CONST_INT_P (XEXP (src, 1)))
    2379              :         return false;
    2380              : 
    2381              :       /* Check for andnot case.  */
    2382      3162661 :       if (GET_CODE (src) != AND
    2383       181859 :           || GET_CODE (XEXP (src, 0)) != NOT)
    2384              :         break;
    2385              : 
    2386      3705022 :       src = XEXP (src, 0);
    2387              :       /* FALLTHRU */
    2388              : 
    2389              :     case NOT:
    2390              :       break;
    2391              : 
    2392        24657 :     case NEG:
    2393              :       /* Check for nabs case.  */
    2394        24657 :       if (GET_CODE (XEXP (src, 0)) != ABS)
    2395              :         break;
    2396              : 
    2397              :       src = XEXP (src, 0);
    2398              :       /* FALLTHRU */
    2399              : 
    2400         2883 :     case ABS:
    2401         2883 :       if ((mode == DImode && !TARGET_AVX512VL)
    2402         1427 :           || (mode == SImode && !TARGET_SSSE3))
    2403              :         return false;
    2404              :       break;
    2405              : 
    2406              :     case REG:
    2407              :       return true;
    2408              : 
    2409      6001584 :     case MEM:
    2410      6001584 :     case CONST_INT:
    2411      6001584 :       return REG_P (dst);
    2412              : 
    2413        53627 :     case VEC_SELECT:
    2414              :       /* Excluding MEM_P (dst) avoids intefering with vpextr[dq].  */
    2415        53627 :       return REG_P (dst)
    2416        43117 :              && REG_P (XEXP (src, 0))
    2417        49292 :              && GET_MODE (XEXP (src, 0)) == (mode == DImode ? V2DImode
    2418              :                                                             : V4SImode)
    2419        33944 :              && GET_CODE (XEXP (src, 1)) == PARALLEL
    2420        33944 :              && XVECLEN (XEXP (src, 1), 0) == 1
    2421        87571 :              && CONST_INT_P (XVECEXP (XEXP (src, 1), 0, 0));
    2422              : 
    2423              :     default:
    2424              :       return false;
    2425              :     }
    2426              : 
    2427      3705022 :   if (!REG_P (XEXP (src, 0))
    2428              :       && !MEM_P (XEXP (src, 0))
    2429              :       && !CONST_INT_P (XEXP (src, 0)))
    2430              :     return false;
    2431              : 
    2432      3398188 :   if (GET_MODE (XEXP (src, 0)) != mode
    2433            0 :       && !CONST_INT_P (XEXP (src, 0)))
    2434              :     return false;
    2435              : 
    2436              :   return true;
    2437              : }
    2438              : 
    2439              : /* Check for a suitable TImode memory operand.  */
    2440              : 
    2441              : static bool
    2442         1565 : timode_mem_p (rtx x)
    2443              : {
    2444         1565 :   return MEM_P (x)
    2445         1565 :          && (TARGET_SSE_UNALIGNED_LOAD_OPTIMAL
    2446            0 :              || !misaligned_operand (x, TImode));
    2447              : }
    2448              : 
    2449              : /* The TImode version of scalar_to_vector_candidate_p.  */
    2450              : 
    2451              : static bool
    2452    102001332 : timode_scalar_to_vector_candidate_p (rtx_insn *insn)
    2453              : {
    2454    102001332 :   rtx def_set = pseudo_reg_set (insn);
    2455              : 
    2456    102001332 :   if (!def_set)
    2457              :     return false;
    2458              : 
    2459     23756263 :   rtx src = SET_SRC (def_set);
    2460     23756263 :   rtx dst = SET_DEST (def_set);
    2461              : 
    2462     23756263 :   if (GET_CODE (src) == COMPARE)
    2463      4007702 :     return convertible_comparison_p (insn, TImode);
    2464              : 
    2465     19748561 :   if (GET_MODE (dst) != TImode
    2466      1206128 :       || (GET_MODE (src) != TImode
    2467        62662 :           && !CONST_SCALAR_INT_P (src)))
    2468              :     return false;
    2469              : 
    2470      1206128 :   if (!REG_P (dst) && !MEM_P (dst))
    2471              :     return false;
    2472              : 
    2473      1204675 :   if (MEM_P (dst)
    2474       535807 :       && misaligned_operand (dst, TImode)
    2475      1521437 :       && !TARGET_SSE_UNALIGNED_STORE_OPTIMAL)
    2476              :     return false;
    2477              : 
    2478      1204670 :   if (REG_P (dst) && !single_def_chain_p (dst))
    2479              :     return false;
    2480              : 
    2481      1052230 :   switch (GET_CODE (src))
    2482              :     {
    2483       490509 :     case REG:
    2484       490509 :       return single_def_chain_p (src);
    2485              : 
    2486              :     case CONST_WIDE_INT:
    2487              :       return true;
    2488              : 
    2489        13044 :     case CONST_INT:
    2490              :       /* ??? Verify performance impact before enabling CONST_INT for
    2491              :          __int128 store.  */
    2492        13044 :       return standard_sse_constant_p (src, TImode);
    2493              : 
    2494       449007 :     case MEM:
    2495              :       /* Memory must be aligned or unaligned load is optimal.  */
    2496       449007 :       return (REG_P (dst)
    2497       449007 :               && (!misaligned_operand (src, TImode)
    2498       152540 :                   || TARGET_SSE_UNALIGNED_LOAD_OPTIMAL));
    2499              : 
    2500         3920 :     case AND:
    2501         3920 :       if (!MEM_P (dst)
    2502         3879 :           && GET_CODE (XEXP (src, 0)) == NOT
    2503            0 :           && REG_P (XEXP (XEXP (src, 0), 0))
    2504         3920 :           && (REG_P (XEXP (src, 1))
    2505            0 :               || CONST_SCALAR_INT_P (XEXP (src, 1))
    2506            0 :               || timode_mem_p (XEXP (src, 1))))
    2507            0 :         return true;
    2508         3920 :       return (REG_P (XEXP (src, 0))
    2509           46 :               || timode_mem_p (XEXP (src, 0)))
    2510         3966 :              && (REG_P (XEXP (src, 1))
    2511         2098 :                  || CONST_SCALAR_INT_P (XEXP (src, 1))
    2512           35 :                  || timode_mem_p (XEXP (src, 1)));
    2513              : 
    2514        13982 :     case IOR:
    2515        13982 :     case XOR:
    2516        13982 :       if (timode_concatdi_p (src))
    2517              :         return true;
    2518         2666 :       return (REG_P (XEXP (src, 0))
    2519         1437 :               || timode_mem_p (XEXP (src, 0)))
    2520         2683 :              && (REG_P (XEXP (src, 1))
    2521          267 :                  || CONST_SCALAR_INT_P (XEXP (src, 1))
    2522           31 :                  || timode_mem_p (XEXP (src, 1)));
    2523              : 
    2524          505 :     case NOT:
    2525          505 :       return REG_P (XEXP (src, 0)) || timode_mem_p (XEXP (src, 0));
    2526              : 
    2527        12281 :     case ASHIFT:
    2528        12281 :     case LSHIFTRT:
    2529        12281 :     case ASHIFTRT:
    2530        12281 :     case ROTATERT:
    2531        12281 :     case ROTATE:
    2532              :       /* Handle shifts/rotates by integer constants between 0 and 127.  */
    2533        12281 :       return REG_P (XEXP (src, 0))
    2534        12249 :              && CONST_INT_P (XEXP (src, 1))
    2535        24189 :              && (INTVAL (XEXP (src, 1)) & ~0x7f) == 0;
    2536              : 
    2537         7206 :     case PLUS:
    2538         7206 :       return timode_concatdi_p (src);
    2539              : 
    2540         3798 :     case ZERO_EXTEND:
    2541         3798 :       return REG_P (XEXP (src, 0))
    2542         3798 :              && GET_MODE (XEXP (src, 0)) == DImode;
    2543              : 
    2544              :     default:
    2545              :       return false;
    2546              :     }
    2547              : }
    2548              : 
    2549              : /* For a register REGNO, scan instructions for its defs and uses.
    2550              :    Put REGNO in REGS if a def or use isn't in CANDIDATES.  */
    2551              : 
    2552              : static void
    2553      1284857 : timode_check_non_convertible_regs (bitmap candidates, bitmap regs,
    2554              :                                    unsigned int regno)
    2555              : {
    2556              :   /* Do nothing if REGNO is already in REGS or is a hard reg.  */
    2557      1284857 :   if (bitmap_bit_p (regs, regno)
    2558      1284857 :       || HARD_REGISTER_NUM_P (regno))
    2559              :     return;
    2560              : 
    2561      1272245 :   for (df_ref def = DF_REG_DEF_CHAIN (regno);
    2562      2520025 :        def;
    2563      1247780 :        def = DF_REF_NEXT_REG (def))
    2564              :     {
    2565      1272225 :       if (!bitmap_bit_p (candidates, DF_REF_INSN_UID (def)))
    2566              :         {
    2567        24445 :           if (dump_file)
    2568            0 :             fprintf (dump_file,
    2569              :                      "r%d has non convertible def in insn %d\n",
    2570            0 :                      regno, DF_REF_INSN_UID (def));
    2571              : 
    2572        24445 :           bitmap_set_bit (regs, regno);
    2573        24445 :           break;
    2574              :         }
    2575              :     }
    2576              : 
    2577      1272245 :   for (df_ref ref = DF_REG_USE_CHAIN (regno);
    2578      2795321 :        ref;
    2579      1523076 :        ref = DF_REF_NEXT_REG (ref))
    2580              :     {
    2581              :       /* Debug instructions are skipped.  */
    2582      1587371 :       if (NONDEBUG_INSN_P (DF_REF_INSN (ref))
    2583      1587371 :           && !bitmap_bit_p (candidates, DF_REF_INSN_UID (ref)))
    2584              :         {
    2585        64295 :           if (dump_file)
    2586            0 :             fprintf (dump_file,
    2587              :                      "r%d has non convertible use in insn %d\n",
    2588            0 :                      regno, DF_REF_INSN_UID (ref));
    2589              : 
    2590        64295 :           bitmap_set_bit (regs, regno);
    2591        64295 :           break;
    2592              :         }
    2593              :     }
    2594              : }
    2595              : 
    2596              : /* For a given bitmap of insn UIDs scans all instructions and
    2597              :    remove insn from CANDIDATES in case it has both convertible
    2598              :    and not convertible definitions.
    2599              : 
    2600              :    All insns in a bitmap are conversion candidates according to
    2601              :    scalar_to_vector_candidate_p.  Currently it implies all insns
    2602              :    are single_set.  */
    2603              : 
    2604              : static void
    2605       832304 : timode_remove_non_convertible_regs (bitmap candidates)
    2606              : {
    2607       832304 :   bitmap_iterator bi;
    2608       832304 :   unsigned id;
    2609       832304 :   bitmap regs = BITMAP_ALLOC (NULL);
    2610       859507 :   bool changed;
    2611              : 
    2612       859507 :   do {
    2613       859507 :     changed = false;
    2614      2172899 :     EXECUTE_IF_SET_IN_BITMAP (candidates, 0, id, bi)
    2615              :       {
    2616      1313392 :         rtx_insn *insn = DF_INSN_UID_GET (id)->insn;
    2617      1313392 :         df_ref ref;
    2618              : 
    2619      1971184 :         FOR_EACH_INSN_DEF (ref, insn)
    2620       657792 :           if (!DF_REF_REG_MEM_P (ref)
    2621       657792 :               && GET_MODE (DF_REF_REG (ref)) == TImode)
    2622       635067 :             timode_check_non_convertible_regs (candidates, regs,
    2623              :                                                DF_REF_REGNO (ref));
    2624              : 
    2625      3240546 :         FOR_EACH_INSN_USE (ref, insn)
    2626      1927154 :           if (!DF_REF_REG_MEM_P (ref)
    2627       680130 :               && GET_MODE (DF_REF_REG (ref)) == TImode)
    2628       649790 :             timode_check_non_convertible_regs (candidates, regs,
    2629              :                                                DF_REF_REGNO (ref));
    2630              :       }
    2631              : 
    2632      1050236 :     EXECUTE_IF_SET_IN_BITMAP (regs, 0, id, bi)
    2633              :       {
    2634       190729 :         for (df_ref def = DF_REG_DEF_CHAIN (id);
    2635       388300 :              def;
    2636       197571 :              def = DF_REF_NEXT_REG (def))
    2637       197571 :           if (bitmap_bit_p (candidates, DF_REF_INSN_UID (def)))
    2638              :             {
    2639        49262 :               if (dump_file)
    2640            0 :                 fprintf (dump_file, "Removing insn %d from candidates list\n",
    2641            0 :                          DF_REF_INSN_UID (def));
    2642              : 
    2643        49262 :               bitmap_clear_bit (candidates, DF_REF_INSN_UID (def));
    2644        49262 :               changed = true;
    2645              :             }
    2646              : 
    2647       190729 :         for (df_ref ref = DF_REG_USE_CHAIN (id);
    2648       513955 :              ref;
    2649       323226 :              ref = DF_REF_NEXT_REG (ref))
    2650       323226 :           if (bitmap_bit_p (candidates, DF_REF_INSN_UID (ref)))
    2651              :             {
    2652        35287 :               if (dump_file)
    2653            0 :                 fprintf (dump_file, "Removing insn %d from candidates list\n",
    2654            0 :                          DF_REF_INSN_UID (ref));
    2655              : 
    2656        35287 :               bitmap_clear_bit (candidates, DF_REF_INSN_UID (ref));
    2657        35287 :               changed = true;
    2658              :             }
    2659              :       }
    2660              :   } while (changed);
    2661              : 
    2662       832304 :   BITMAP_FREE (regs);
    2663       832304 : }
    2664              : 
    2665              : /* Main STV pass function.  Find and convert scalar
    2666              :    instructions into vector mode when profitable.  */
    2667              : 
    2668              : static unsigned int
    2669      1790568 : convert_scalars_to_vector (bool timode_p)
    2670              : {
    2671      1790568 :   basic_block bb;
    2672      1790568 :   int converted_insns = 0;
    2673      1790568 :   auto_vec<rtx_insn *> control_flow_insns;
    2674              : 
    2675      1790568 :   bitmap_obstack_initialize (NULL);
    2676      1790568 :   const machine_mode cand_mode[3] = { SImode, DImode, TImode };
    2677      1790568 :   const machine_mode cand_vmode[3] = { V4SImode, V2DImode, V1TImode };
    2678      5371704 :   bitmap_head candidates[3];  /* { SImode, DImode, TImode } */
    2679      7162272 :   for (unsigned i = 0; i < 3; ++i)
    2680      5371704 :     bitmap_initialize (&candidates[i], &bitmap_default_obstack);
    2681              : 
    2682      1790568 :   calculate_dominance_info (CDI_DOMINATORS);
    2683      1790568 :   df_set_flags (DF_DEFER_INSN_RESCAN | DF_RD_PRUNE_DEAD_DEFS);
    2684      1790568 :   df_chain_add_problem (DF_DU_CHAIN | DF_UD_CHAIN);
    2685      1790568 :   df_analyze ();
    2686              : 
    2687              :   /* Find all instructions we want to convert into vector mode.  */
    2688      1790568 :   if (dump_file)
    2689           44 :     fprintf (dump_file, "Searching for mode conversion candidates...\n");
    2690              : 
    2691     19843735 :   FOR_EACH_BB_FN (bb, cfun)
    2692              :     {
    2693     18053167 :       rtx_insn *insn;
    2694    240356182 :       FOR_BB_INSNS (bb, insn)
    2695    222303015 :         if (timode_p
    2696    222303015 :             && timode_scalar_to_vector_candidate_p (insn))
    2697              :           {
    2698      1016918 :             if (dump_file)
    2699            0 :               fprintf (dump_file, "  insn %d is marked as a TImode candidate\n",
    2700            0 :                        INSN_UID (insn));
    2701              : 
    2702      1016918 :             bitmap_set_bit (&candidates[2], INSN_UID (insn));
    2703              :           }
    2704    221286097 :         else if (!timode_p)
    2705              :           {
    2706              :             /* Check {SI,DI}mode.  */
    2707    345677654 :             for (unsigned i = 0; i <= 1; ++i)
    2708    237054288 :               if (general_scalar_to_vector_candidate_p (insn, cand_mode[i]))
    2709              :                 {
    2710     11678317 :                   if (dump_file)
    2711          554 :                     fprintf (dump_file, "  insn %d is marked as a %s candidate\n",
    2712          277 :                              INSN_UID (insn), i == 0 ? "SImode" : "DImode");
    2713              : 
    2714     11678317 :                   bitmap_set_bit (&candidates[i], INSN_UID (insn));
    2715     11678317 :                   break;
    2716              :                 }
    2717              :           }
    2718              :     }
    2719              : 
    2720      1790568 :   if (timode_p)
    2721       832304 :     timode_remove_non_convertible_regs (&candidates[2]);
    2722              : 
    2723      5678330 :   for (unsigned i = 0; i <= 2; ++i)
    2724      4519639 :     if (!bitmap_empty_p (&candidates[i]))
    2725              :       break;
    2726      3887762 :     else if (i == 2 && dump_file)
    2727           23 :       fprintf (dump_file, "There are no candidates for optimization.\n");
    2728              : 
    2729      7162272 :   for (unsigned i = 0; i <= 2; ++i)
    2730              :     {
    2731      5371704 :       auto_bitmap disallowed;
    2732      5371704 :       bitmap_tree_view (&candidates[i]);
    2733     17121315 :       while (!bitmap_empty_p (&candidates[i]))
    2734              :         {
    2735      6377907 :           unsigned uid = bitmap_first_set_bit (&candidates[i]);
    2736      6377907 :           scalar_chain *chain;
    2737              : 
    2738      6377907 :           if (cand_mode[i] == TImode)
    2739       473775 :             chain = new timode_scalar_chain;
    2740              :           else
    2741      5904132 :             chain = new general_scalar_chain (cand_mode[i], cand_vmode[i]);
    2742              : 
    2743              :           /* Find instructions chain we want to convert to vector mode.
    2744              :              Check all uses and definitions to estimate all required
    2745              :              conversions.  */
    2746      6377907 :           if (chain->build (&candidates[i], uid, disallowed))
    2747              :             {
    2748      6371018 :               if (chain->compute_convert_gain ())
    2749       643914 :                 converted_insns += chain->convert ();
    2750      5727104 :               else if (dump_file)
    2751          136 :                 fprintf (dump_file, "Chain #%d conversion is not profitable\n",
    2752              :                          chain->chain_id);
    2753              :             }
    2754              : 
    2755      6377907 :           rtx_insn* iter_insn;
    2756      6377907 :           unsigned int ii;
    2757      6381504 :           FOR_EACH_VEC_ELT (chain->control_flow_insns, ii, iter_insn)
    2758         3597 :             control_flow_insns.safe_push (iter_insn);
    2759              : 
    2760      6377907 :           delete chain;
    2761              :         }
    2762      5371704 :     }
    2763              : 
    2764      1790568 :   if (dump_file)
    2765           44 :     fprintf (dump_file, "Total insns converted: %d\n", converted_insns);
    2766              : 
    2767      7162272 :   for (unsigned i = 0; i <= 2; ++i)
    2768      5371704 :     bitmap_release (&candidates[i]);
    2769      1790568 :   bitmap_obstack_release (NULL);
    2770      1790568 :   df_process_deferred_rescans ();
    2771              : 
    2772              :   /* Conversion means we may have 128bit register spills/fills
    2773              :      which require aligned stack.  */
    2774      1790568 :   if (converted_insns)
    2775              :     {
    2776       104811 :       if (crtl->stack_alignment_needed < 128)
    2777         2372 :         crtl->stack_alignment_needed = 128;
    2778       104811 :       if (crtl->stack_alignment_estimated < 128)
    2779          219 :         crtl->stack_alignment_estimated = 128;
    2780              : 
    2781       104811 :       crtl->stack_realign_needed
    2782       104811 :         = INCOMING_STACK_BOUNDARY < crtl->stack_alignment_estimated;
    2783       104811 :       crtl->stack_realign_tried = crtl->stack_realign_needed;
    2784              : 
    2785       104811 :       crtl->stack_realign_processed = true;
    2786              : 
    2787       104811 :       if (!crtl->drap_reg)
    2788              :         {
    2789       104645 :           rtx drap_rtx = targetm.calls.get_drap_rtx ();
    2790              : 
    2791              :           /* stack_realign_drap and drap_rtx must match.  */
    2792       104645 :           gcc_assert ((stack_realign_drap != 0) == (drap_rtx != NULL));
    2793              : 
    2794              :           /* Do nothing if NULL is returned,
    2795              :              which means DRAP is not needed.  */
    2796       104645 :           if (drap_rtx != NULL)
    2797              :             {
    2798            0 :               crtl->args.internal_arg_pointer = drap_rtx;
    2799              : 
    2800              :               /* Call fixup_tail_calls to clean up
    2801              :                  REG_EQUIV note if DRAP is needed. */
    2802            0 :               fixup_tail_calls ();
    2803              :             }
    2804              :         }
    2805              : 
    2806              :       /* Fix up DECL_RTL/DECL_INCOMING_RTL of arguments.  */
    2807       104811 :       if (TARGET_64BIT)
    2808        66377 :         for (tree parm = DECL_ARGUMENTS (current_function_decl);
    2809       183037 :              parm; parm = DECL_CHAIN (parm))
    2810              :           {
    2811       116660 :             if (TYPE_MODE (TREE_TYPE (parm)) != TImode)
    2812       100506 :               continue;
    2813        16154 :             if (DECL_RTL_SET_P (parm)
    2814        32308 :                 && GET_MODE (DECL_RTL (parm)) == V1TImode)
    2815              :               {
    2816          522 :                 rtx r = DECL_RTL (parm);
    2817          522 :                 if (REG_P (r))
    2818          522 :                   SET_DECL_RTL (parm, gen_rtx_SUBREG (TImode, r, 0));
    2819              :               }
    2820        16154 :             if (DECL_INCOMING_RTL (parm)
    2821        16154 :                 && GET_MODE (DECL_INCOMING_RTL (parm)) == V1TImode)
    2822              :               {
    2823            0 :                 rtx r = DECL_INCOMING_RTL (parm);
    2824            0 :                 if (REG_P (r))
    2825            0 :                   DECL_INCOMING_RTL (parm) = gen_rtx_SUBREG (TImode, r, 0);
    2826              :               }
    2827              :           }
    2828              : 
    2829       104811 :       if (!control_flow_insns.is_empty ())
    2830              :         {
    2831         1130 :           free_dominance_info (CDI_DOMINATORS);
    2832              : 
    2833         1130 :           unsigned int i;
    2834         1130 :           rtx_insn* insn;
    2835         5857 :           FOR_EACH_VEC_ELT (control_flow_insns, i, insn)
    2836         3597 :             if (control_flow_insn_p (insn))
    2837              :               {
    2838              :                 /* Split the block after insn.  There will be a fallthru
    2839              :                    edge, which is OK so we keep it.  We have to create
    2840              :                    the exception edges ourselves.  */
    2841         3597 :                 bb = BLOCK_FOR_INSN (insn);
    2842         3597 :                 split_block (bb, insn);
    2843         3597 :                 rtl_make_eh_edge (NULL, bb, BB_END (bb));
    2844              :               }
    2845              :         }
    2846              :     }
    2847              : 
    2848      1790568 :   return 0;
    2849      1790568 : }
    2850              : 
    2851              : static unsigned int
    2852        74443 : rest_of_handle_insert_vzeroupper (void)
    2853              : {
    2854              :   /* vzeroupper instructions are inserted immediately after reload and
    2855              :      postreload_cse to clean up after it a little bit to account for possible
    2856              :      spills from 256bit or 512bit registers.  The pass reuses mode switching
    2857              :      infrastructure by re-running mode insertion pass, so disable entities
    2858              :      that have already been processed.  */
    2859       521101 :   for (int i = 0; i < MAX_386_ENTITIES; i++)
    2860       446658 :     ix86_optimize_mode_switching[i] = 0;
    2861              : 
    2862        74443 :   ix86_optimize_mode_switching[AVX_U128] = 1;
    2863              : 
    2864              :   /* Call optimize_mode_switching.  */
    2865        74443 :   g->get_passes ()->execute_pass_mode_switching ();
    2866              : 
    2867              :   /* LRA removes all REG_DEAD/REG_UNUSED notes and normally they
    2868              :      reappear in the IL only at the start of pass_rtl_dse2, which does
    2869              :      df_note_add_problem (); df_analyze ();
    2870              :      The vzeroupper is scheduled after postreload_cse pass and mode
    2871              :      switching computes the notes as well, the problem is that e.g.
    2872              :      pass_gcse2 doesn't maintain the notes, see PR113059 and
    2873              :      PR112760.  Remove the notes now to restore status quo ante
    2874              :      until we figure out how to maintain the notes or what else
    2875              :      to do.  */
    2876        74443 :   basic_block bb;
    2877        74443 :   rtx_insn *insn;
    2878       409381 :   FOR_EACH_BB_FN (bb, cfun)
    2879      4319679 :     FOR_BB_INSNS (bb, insn)
    2880      3984741 :       if (NONDEBUG_INSN_P (insn))
    2881              :         {
    2882      2121773 :           rtx *pnote = &REG_NOTES (insn);
    2883      3934874 :           while (*pnote != 0)
    2884              :             {
    2885      1813101 :               if (REG_NOTE_KIND (*pnote) == REG_DEAD
    2886       830088 :                   || REG_NOTE_KIND (*pnote) == REG_UNUSED)
    2887      1300583 :                 *pnote = XEXP (*pnote, 1);
    2888              :               else
    2889       512518 :                 pnote = &XEXP (*pnote, 1);
    2890              :             }
    2891              :         }
    2892              : 
    2893        74443 :   df_remove_problem (df_note);
    2894        74443 :   df_analyze ();
    2895        74443 :   return 0;
    2896              : }
    2897              : 
    2898              : namespace {
    2899              : 
    2900              : const pass_data pass_data_insert_vzeroupper =
    2901              : {
    2902              :   RTL_PASS, /* type */
    2903              :   "vzeroupper", /* name */
    2904              :   OPTGROUP_NONE, /* optinfo_flags */
    2905              :   TV_MACH_DEP, /* tv_id */
    2906              :   0, /* properties_required */
    2907              :   0, /* properties_provided */
    2908              :   0, /* properties_destroyed */
    2909              :   0, /* todo_flags_start */
    2910              :   TODO_df_finish, /* todo_flags_finish */
    2911              : };
    2912              : 
    2913              : class pass_insert_vzeroupper : public rtl_opt_pass
    2914              : {
    2915              : public:
    2916       285722 :   pass_insert_vzeroupper(gcc::context *ctxt)
    2917       571444 :     : rtl_opt_pass(pass_data_insert_vzeroupper, ctxt)
    2918              :   {}
    2919              : 
    2920              :   /* opt_pass methods: */
    2921      1471370 :   bool gate (function *) final override
    2922              :     {
    2923      1471370 :       return TARGET_AVX && TARGET_VZEROUPPER;
    2924              :     }
    2925              : 
    2926        74443 :   unsigned int execute (function *) final override
    2927              :     {
    2928        74443 :       return rest_of_handle_insert_vzeroupper ();
    2929              :     }
    2930              : 
    2931              : }; // class pass_insert_vzeroupper
    2932              : 
    2933              : const pass_data pass_data_stv =
    2934              : {
    2935              :   RTL_PASS, /* type */
    2936              :   "stv", /* name */
    2937              :   OPTGROUP_NONE, /* optinfo_flags */
    2938              :   TV_MACH_DEP, /* tv_id */
    2939              :   0, /* properties_required */
    2940              :   0, /* properties_provided */
    2941              :   0, /* properties_destroyed */
    2942              :   0, /* todo_flags_start */
    2943              :   TODO_df_finish, /* todo_flags_finish */
    2944              : };
    2945              : 
    2946              : class pass_stv : public rtl_opt_pass
    2947              : {
    2948              : public:
    2949       571444 :   pass_stv (gcc::context *ctxt)
    2950       571444 :     : rtl_opt_pass (pass_data_stv, ctxt),
    2951      1142888 :       timode_p (false)
    2952              :   {}
    2953              : 
    2954              :   /* opt_pass methods: */
    2955      2942740 :   bool gate (function *) final override
    2956              :     {
    2957      1471370 :       return ((!timode_p || TARGET_64BIT)
    2958      4287829 :               && TARGET_STV && TARGET_SSE2 && optimize > 1);
    2959              :     }
    2960              : 
    2961      1790568 :   unsigned int execute (function *) final override
    2962              :     {
    2963      1790568 :       return convert_scalars_to_vector (timode_p);
    2964              :     }
    2965              : 
    2966       285722 :   opt_pass *clone () final override
    2967              :     {
    2968       285722 :       return new pass_stv (m_ctxt);
    2969              :     }
    2970              : 
    2971       571444 :   void set_pass_param (unsigned int n, bool param) final override
    2972              :     {
    2973       571444 :       gcc_assert (n == 0);
    2974       571444 :       timode_p = param;
    2975       571444 :     }
    2976              : 
    2977              : private:
    2978              :   bool timode_p;
    2979              : }; // class pass_stv
    2980              : 
    2981              : } // anon namespace
    2982              : 
    2983              : rtl_opt_pass *
    2984       285722 : make_pass_insert_vzeroupper (gcc::context *ctxt)
    2985              : {
    2986       285722 :   return new pass_insert_vzeroupper (ctxt);
    2987              : }
    2988              : 
    2989              : rtl_opt_pass *
    2990       285722 : make_pass_stv (gcc::context *ctxt)
    2991              : {
    2992       285722 :   return new pass_stv (ctxt);
    2993              : }
    2994              : 
    2995              : /* Inserting ENDBR and pseudo patchable-area instructions.  */
    2996              : 
    2997              : static void
    2998       198192 : rest_of_insert_endbr_and_patchable_area (bool need_endbr,
    2999              :                                          unsigned int patchable_area_size)
    3000              : {
    3001       198192 :   rtx endbr;
    3002       198192 :   rtx_insn *insn;
    3003       198192 :   rtx_insn *endbr_insn = NULL;
    3004       198192 :   basic_block bb;
    3005              : 
    3006       198192 :   if (need_endbr)
    3007              :     {
    3008              :       /* Currently emit EB if it's a tracking function, i.e. 'nocf_check'
    3009              :          is absent among function attributes.  Later an optimization will
    3010              :          be introduced to make analysis if an address of a static function
    3011              :          is taken.  A static function whose address is not taken will get
    3012              :          a nocf_check attribute.  This will allow to reduce the number of
    3013              :          EB.  */
    3014       198147 :       if (!lookup_attribute ("nocf_check",
    3015       198147 :                              TYPE_ATTRIBUTES (TREE_TYPE (cfun->decl)))
    3016       198129 :           && (!flag_manual_endbr
    3017            8 :               || lookup_attribute ("cf_check",
    3018            8 :                                    DECL_ATTRIBUTES (cfun->decl)))
    3019       396275 :           && (!cgraph_node::get (cfun->decl)->only_called_directly_p ()
    3020        27192 :               || ix86_cmodel == CM_LARGE
    3021        27191 :               || ix86_cmodel == CM_LARGE_PIC
    3022        27190 :               || flag_force_indirect_call
    3023        27190 :               || (TARGET_DLLIMPORT_DECL_ATTRIBUTES
    3024              :                   && DECL_DLLIMPORT_P (cfun->decl))))
    3025              :         {
    3026       170939 :           if (crtl->profile && flag_fentry)
    3027              :             {
    3028              :               /* Queue ENDBR insertion to x86_function_profiler.
    3029              :                  NB: Any patchable-area insn will be inserted after
    3030              :                  ENDBR.  */
    3031            6 :               cfun->machine->insn_queued_at_entrance = TYPE_ENDBR;
    3032              :             }
    3033              :           else
    3034              :             {
    3035       170933 :               endbr = gen_nop_endbr ();
    3036       170933 :               bb = ENTRY_BLOCK_PTR_FOR_FN (cfun)->next_bb;
    3037       170933 :               rtx_insn *insn = BB_HEAD (bb);
    3038       170933 :               endbr_insn = emit_insn_before (endbr, insn);
    3039              :             }
    3040              :         }
    3041              :     }
    3042              : 
    3043       198192 :   if (patchable_area_size)
    3044              :     {
    3045           51 :       if (crtl->profile && flag_fentry)
    3046              :         {
    3047              :           /* Queue patchable-area insertion to x86_function_profiler.
    3048              :              NB: If there is a queued ENDBR, x86_function_profiler
    3049              :              will also handle patchable-area.  */
    3050            2 :           if (!cfun->machine->insn_queued_at_entrance)
    3051            1 :             cfun->machine->insn_queued_at_entrance = TYPE_PATCHABLE_AREA;
    3052              :         }
    3053              :       else
    3054              :         {
    3055           49 :           rtx patchable_area
    3056           49 :             = gen_patchable_area (GEN_INT (patchable_area_size),
    3057           49 :                                   GEN_INT (crtl->patch_area_entry == 0));
    3058           49 :           if (endbr_insn)
    3059            3 :             emit_insn_after (patchable_area, endbr_insn);
    3060              :           else
    3061              :             {
    3062           46 :               bb = ENTRY_BLOCK_PTR_FOR_FN (cfun)->next_bb;
    3063           46 :               insn = BB_HEAD (bb);
    3064           46 :               emit_insn_before (patchable_area, insn);
    3065              :             }
    3066              :         }
    3067              :     }
    3068              : 
    3069       198192 :   if (!need_endbr)
    3070              :     return;
    3071              : 
    3072       198147 :   bb = 0;
    3073      4100595 :   FOR_EACH_BB_FN (bb, cfun)
    3074              :     {
    3075     73990475 :       for (insn = BB_HEAD (bb); insn != NEXT_INSN (BB_END (bb));
    3076     70088027 :            insn = NEXT_INSN (insn))
    3077              :         {
    3078     70088027 :           if (CALL_P (insn))
    3079              :             {
    3080      1377302 :               need_endbr = find_reg_note (insn, REG_SETJMP, NULL) != NULL;
    3081      1377302 :               if (!need_endbr && !SIBLING_CALL_P (insn))
    3082              :                 {
    3083      1325732 :                   rtx call = get_call_rtx_from (insn);
    3084      1325732 :                   rtx fnaddr = XEXP (call, 0);
    3085      1325732 :                   tree fndecl = NULL_TREE;
    3086              : 
    3087              :                   /* Also generate ENDBRANCH for non-tail call which
    3088              :                      may return via indirect branch.  */
    3089      1325732 :                   if (SYMBOL_REF_P (XEXP (fnaddr, 0)))
    3090      1263955 :                     fndecl = SYMBOL_REF_DECL (XEXP (fnaddr, 0));
    3091      1263955 :                   if (fndecl == NULL_TREE)
    3092        62145 :                     fndecl = MEM_EXPR (fnaddr);
    3093        62145 :                   if (fndecl
    3094      1323240 :                       && TREE_CODE (TREE_TYPE (fndecl)) != FUNCTION_TYPE
    3095       564691 :                       && TREE_CODE (TREE_TYPE (fndecl)) != METHOD_TYPE)
    3096              :                     fndecl = NULL_TREE;
    3097      1325732 :                   if (fndecl && TYPE_ARG_TYPES (TREE_TYPE (fndecl)))
    3098              :                     {
    3099      1284314 :                       tree fntype = TREE_TYPE (fndecl);
    3100      1284314 :                       if (lookup_attribute ("indirect_return",
    3101      1284314 :                                             TYPE_ATTRIBUTES (fntype)))
    3102              :                         need_endbr = true;
    3103              :                     }
    3104              :                 }
    3105      1377290 :               if (!need_endbr)
    3106      1377282 :                 continue;
    3107              :               /* Generate ENDBRANCH after CALL, which can return more than
    3108              :                  twice, setjmp-like functions.  */
    3109              : 
    3110           20 :               endbr = gen_nop_endbr ();
    3111           20 :               emit_insn_after_setloc (endbr, insn, INSN_LOCATION (insn));
    3112           20 :               continue;
    3113           20 :             }
    3114              : 
    3115     68710725 :           if (JUMP_P (insn) && flag_cet_switch)
    3116              :             {
    3117            9 :               rtx target = JUMP_LABEL (insn);
    3118            9 :               if (target == NULL_RTX || ANY_RETURN_P (target))
    3119            5 :                 continue;
    3120              : 
    3121              :               /* Check the jump is a switch table.  */
    3122            4 :               rtx_insn *label = as_a<rtx_insn *> (target);
    3123            4 :               rtx_insn *table = next_insn (label);
    3124            4 :               if (table == NULL_RTX || !JUMP_TABLE_DATA_P (table))
    3125            2 :                 continue;
    3126              : 
    3127              :               /* For the indirect jump find out all places it jumps and insert
    3128              :                  ENDBRANCH there.  It should be done under a special flag to
    3129              :                  control ENDBRANCH generation for switch stmts.  */
    3130            2 :               edge_iterator ei;
    3131            2 :               edge e;
    3132            2 :               basic_block dest_blk;
    3133              : 
    3134           24 :               FOR_EACH_EDGE (e, ei, bb->succs)
    3135              :                 {
    3136           22 :                   rtx_insn *insn;
    3137              : 
    3138           22 :                   dest_blk = e->dest;
    3139           22 :                   insn = BB_HEAD (dest_blk);
    3140           22 :                   gcc_assert (LABEL_P (insn));
    3141           22 :                   endbr = gen_nop_endbr ();
    3142           22 :                   emit_insn_after (endbr, insn);
    3143              :                 }
    3144            2 :               continue;
    3145            2 :             }
    3146              : 
    3147     68710716 :           if (LABEL_P (insn) && LABEL_PRESERVE_P (insn))
    3148              :             {
    3149       139168 :               endbr = gen_nop_endbr ();
    3150       139168 :               emit_insn_after (endbr, insn);
    3151       139168 :               continue;
    3152              :             }
    3153              :         }
    3154              :     }
    3155              : 
    3156              :   return;
    3157              : }
    3158              : 
    3159              : namespace {
    3160              : 
    3161              : const pass_data pass_data_insert_endbr_and_patchable_area =
    3162              : {
    3163              :   RTL_PASS, /* type.  */
    3164              :   "endbr_and_patchable_area", /* name.  */
    3165              :   OPTGROUP_NONE, /* optinfo_flags.  */
    3166              :   TV_MACH_DEP, /* tv_id.  */
    3167              :   0, /* properties_required.  */
    3168              :   0, /* properties_provided.  */
    3169              :   0, /* properties_destroyed.  */
    3170              :   0, /* todo_flags_start.  */
    3171              :   0, /* todo_flags_finish.  */
    3172              : };
    3173              : 
    3174              : class pass_insert_endbr_and_patchable_area : public rtl_opt_pass
    3175              : {
    3176              : public:
    3177       285722 :   pass_insert_endbr_and_patchable_area (gcc::context *ctxt)
    3178       571444 :     : rtl_opt_pass (pass_data_insert_endbr_and_patchable_area, ctxt)
    3179              :   {}
    3180              : 
    3181              :   /* opt_pass methods: */
    3182      1471370 :   bool gate (function *) final override
    3183              :     {
    3184      1471370 :       need_endbr = (flag_cf_protection & CF_BRANCH) != 0;
    3185      1471370 :       patchable_area_size = crtl->patch_area_size - crtl->patch_area_entry;
    3186      1471370 :       return need_endbr || patchable_area_size;
    3187              :     }
    3188              : 
    3189       198192 :   unsigned int execute (function *) final override
    3190              :     {
    3191       198192 :       timevar_push (TV_MACH_DEP);
    3192       198192 :       rest_of_insert_endbr_and_patchable_area (need_endbr,
    3193              :                                                patchable_area_size);
    3194       198192 :       timevar_pop (TV_MACH_DEP);
    3195       198192 :       return 0;
    3196              :     }
    3197              : 
    3198              : private:
    3199              :   bool need_endbr;
    3200              :   unsigned int patchable_area_size;
    3201              : }; // class pass_insert_endbr_and_patchable_area
    3202              : 
    3203              : } // anon namespace
    3204              : 
    3205              : rtl_opt_pass *
    3206       285722 : make_pass_insert_endbr_and_patchable_area (gcc::context *ctxt)
    3207              : {
    3208       285722 :   return new pass_insert_endbr_and_patchable_area (ctxt);
    3209              : }
    3210              : 
    3211              : bool
    3212      6116892 : ix86_rpad_gate ()
    3213              : {
    3214      6116892 :   return (TARGET_AVX
    3215       403983 :           && TARGET_SSE_PARTIAL_REG_DEPENDENCY
    3216       309088 :           && TARGET_SSE_MATH
    3217       308858 :           && optimize
    3218      6420547 :           && optimize_function_for_speed_p (cfun));
    3219              : }
    3220              : 
    3221              : enum x86_cse_kind
    3222              : {
    3223              :   X86_CSE_CONST0_VECTOR,
    3224              :   X86_CSE_CONSTM1_VECTOR,
    3225              :   X86_CSE_VEC_DUP,
    3226              :   X86_CSE_TLS_GD,
    3227              :   X86_CSE_TLS_LD_BASE,
    3228              :   X86_CSE_TLSDESC
    3229              : };
    3230              : 
    3231       122144 : struct redundant_pattern
    3232              : {
    3233              :   /* Bitmap of basic blocks with broadcast instructions.  */
    3234              :   auto_bitmap bbs;
    3235              :   /* Bitmap of broadcast instructions.  */
    3236              :   auto_bitmap insns;
    3237              :   /* The broadcast inner scalar.  */
    3238              :   rtx val;
    3239              :   /* The actual redundant source value for UNSPEC_TLSDESC.  */
    3240              :   rtx tlsdesc_val;
    3241              :   /* The inner scalar mode.  */
    3242              :   machine_mode mode;
    3243              :   /* The instruction which sets the inner scalar.  Nullptr if the inner
    3244              :      scalar is applied to the whole function, instead of within the same
    3245              :      block.  */
    3246              :   rtx_insn *def_insn;
    3247              :   /* The widest broadcast source.  */
    3248              :   rtx broadcast_source;
    3249              :   /* The widest broadcast register.  */
    3250              :   rtx broadcast_reg;
    3251              :   /* The basic block of the broadcast instruction.  */
    3252              :   basic_block bb;
    3253              :   /* The number of broadcast instructions with the same inner scalar.  */
    3254              :   unsigned HOST_WIDE_INT count;
    3255              :   /* The threshold of broadcast instructions with the same inner
    3256              :      scalar.  */
    3257              :   unsigned int threshold;
    3258              :   /* The widest broadcast size in bytes.  */
    3259              :   unsigned int size;
    3260              :   /* Load kind.  */
    3261              :   x86_cse_kind kind;
    3262              : };
    3263              : 
    3264              : /* Generate a vector set, DEST = SRC, at entry of the nearest dominator
    3265              :    for basic block map BBS, which is in the fake loop that contains the
    3266              :    whole function, so that there is only a single vector set in the
    3267              :    whole function.  If not nullptr, LOAD is a pointer to the load.  */
    3268              : 
    3269              : static void
    3270        31988 : ix86_place_single_vector_set (rtx dest, rtx src, bitmap bbs,
    3271              :                               redundant_pattern *load = nullptr)
    3272              : {
    3273        31988 :   basic_block bb = nearest_common_dominator_for_set (CDI_DOMINATORS, bbs);
    3274              :   /* For X86_CSE_VEC_DUP, don't place the vector set outside of the loop
    3275              :      to avoid extra spills.  */
    3276        31988 :   if (!load || load->kind != X86_CSE_VEC_DUP)
    3277              :     {
    3278        23085 :       while (bb->loop_father->latch
    3279        23085 :              != EXIT_BLOCK_PTR_FOR_FN (cfun))
    3280         1350 :         bb = get_immediate_dominator (CDI_DOMINATORS,
    3281              :                                       bb->loop_father->header);
    3282              :     }
    3283              : 
    3284        31988 :   rtx set = gen_rtx_SET (dest, src);
    3285              : 
    3286        31988 :   rtx_insn *insn = BB_HEAD (bb);
    3287       123646 :   while (insn && !NONDEBUG_INSN_P (insn))
    3288              :     {
    3289        91662 :       if (insn == BB_END (bb))
    3290              :         {
    3291              :           insn = NULL;
    3292              :           break;
    3293              :         }
    3294        91658 :       insn = NEXT_INSN (insn);
    3295              :     }
    3296              : 
    3297        31988 :   rtx_insn *set_insn;
    3298        31988 :   if (insn == BB_HEAD (bb))
    3299              :     {
    3300            0 :       set_insn = emit_insn_before (set, insn);
    3301            0 :       if (dump_file)
    3302              :         {
    3303            0 :           fprintf (dump_file, "\nPlace:\n\n");
    3304            0 :           print_rtl_single (dump_file, set_insn);
    3305            0 :           fprintf (dump_file, "\nbefore:\n\n");
    3306            0 :           print_rtl_single (dump_file, insn);
    3307            0 :           fprintf (dump_file, "\n");
    3308              :         }
    3309              :     }
    3310              :   else
    3311              :     {
    3312        31988 :       rtx_insn *after = insn ? PREV_INSN (insn) : BB_END (bb);
    3313        31988 :       set_insn = emit_insn_after (set, after);
    3314        31988 :       if (dump_file)
    3315              :         {
    3316            0 :           fprintf (dump_file, "\nPlace:\n\n");
    3317            0 :           print_rtl_single (dump_file, set_insn);
    3318            0 :           fprintf (dump_file, "\nafter:\n\n");
    3319            0 :           print_rtl_single (dump_file, after);
    3320            0 :           fprintf (dump_file, "\n");
    3321              :         }
    3322              :     }
    3323              : 
    3324        31988 :   if (load && load->kind == X86_CSE_VEC_DUP)
    3325              :     {
    3326              :       /* Get the source from LOAD as (reg:SI 99) in
    3327              : 
    3328              :          (vec_duplicate:V4SI (reg:SI 99))
    3329              : 
    3330              :        */
    3331        10253 :       rtx inner_scalar = load->val;
    3332              :       /* Set the source in (vec_duplicate:V4SI (reg:SI 99)).  */
    3333        10253 :       rtx reg = XEXP (src, 0);
    3334        10253 :       if ((REG_P (inner_scalar) || MEM_P (inner_scalar))
    3335          278 :           && GET_MODE (reg) != GET_MODE (inner_scalar))
    3336            0 :         inner_scalar = gen_rtx_SUBREG (GET_MODE (reg), inner_scalar, 0);
    3337        10253 :       rtx set = gen_rtx_SET (reg, inner_scalar);
    3338        10253 :       insn = emit_insn_before (set, set_insn);
    3339        10253 :       if (dump_file)
    3340              :         {
    3341            0 :           fprintf (dump_file, "\nAdd:\n\n");
    3342            0 :           print_rtl_single (dump_file, insn);
    3343            0 :           fprintf (dump_file, "\nbefore:\n\n");
    3344            0 :           print_rtl_single (dump_file, set_insn);
    3345            0 :           fprintf (dump_file, "\n");
    3346              :         }
    3347              :     }
    3348        31988 : }
    3349              : 
    3350              : /* At entry of the nearest common dominator for basic blocks with
    3351              :    conversions/rcp/sqrt/rsqrt/round, generate a single
    3352              :         vxorps %xmmN, %xmmN, %xmmN
    3353              :    for all
    3354              :         vcvtss2sd  op, %xmmN, %xmmX
    3355              :         vcvtsd2ss  op, %xmmN, %xmmX
    3356              :         vcvtsi2ss  op, %xmmN, %xmmX
    3357              :         vcvtsi2sd  op, %xmmN, %xmmX
    3358              : 
    3359              :    NB: We want to generate only a single vxorps to cover the whole
    3360              :    function.  The LCM algorithm isn't appropriate here since it may
    3361              :    place a vxorps inside the loop.  */
    3362              : 
    3363              : static unsigned int
    3364        33277 : remove_partial_avx_dependency (void)
    3365              : {
    3366        33277 :   timevar_push (TV_MACH_DEP);
    3367              : 
    3368        33277 :   bitmap_obstack_initialize (NULL);
    3369        33277 :   bitmap convert_bbs = BITMAP_ALLOC (NULL);
    3370              : 
    3371        33277 :   basic_block bb;
    3372        33277 :   rtx_insn *insn, *set_insn;
    3373        33277 :   rtx set;
    3374        33277 :   rtx v4sf_const0 = NULL_RTX;
    3375              : 
    3376        33277 :   auto_vec<rtx_insn *> control_flow_insns;
    3377              : 
    3378              :   /* We create invalid RTL initially so defer rescans.  */
    3379        33277 :   df_set_flags (DF_DEFER_INSN_RESCAN);
    3380              : 
    3381       312857 :   FOR_EACH_BB_FN (bb, cfun)
    3382              :     {
    3383      3553321 :       FOR_BB_INSNS (bb, insn)
    3384              :         {
    3385      3273741 :           if (!NONDEBUG_INSN_P (insn))
    3386      1465599 :             continue;
    3387              : 
    3388      1808142 :           set = single_set (insn);
    3389      1808142 :           if (!set)
    3390        70300 :             continue;
    3391              : 
    3392      1737842 :           if (get_attr_avx_partial_xmm_update (insn)
    3393              :               != AVX_PARTIAL_XMM_UPDATE_TRUE)
    3394      1734613 :             continue;
    3395              : 
    3396              :           /* Convert PARTIAL_XMM_UPDATE_TRUE insns, DF -> SF, SF -> DF,
    3397              :              SI -> SF, SI -> DF, DI -> SF, DI -> DF, sqrt, rsqrt, rcp,
    3398              :              round, to vec_dup and vec_merge with subreg.  */
    3399         3229 :           rtx src = SET_SRC (set);
    3400         3229 :           rtx dest = SET_DEST (set);
    3401         3229 :           machine_mode dest_mode = GET_MODE (dest);
    3402         3229 :           bool convert_p = false;
    3403         3229 :           switch (GET_CODE (src))
    3404              :             {
    3405         3124 :             case FLOAT:
    3406         3124 :             case FLOAT_EXTEND:
    3407         3124 :             case FLOAT_TRUNCATE:
    3408         3124 :             case UNSIGNED_FLOAT:
    3409         3124 :               convert_p = true;
    3410         3124 :               break;
    3411              :             default:
    3412              :               break;
    3413              :             }
    3414              : 
    3415              :           /* Only handle conversion here.  */
    3416         3124 :           machine_mode src_mode
    3417         3124 :             = convert_p ? GET_MODE (XEXP (src, 0)) : VOIDmode;
    3418         3124 :           switch (src_mode)
    3419              :             {
    3420          155 :             case E_SFmode:
    3421          155 :             case E_DFmode:
    3422          155 :               if (TARGET_USE_VECTOR_FP_CONVERTS
    3423          149 :                   || !TARGET_SSE_PARTIAL_REG_FP_CONVERTS_DEPENDENCY)
    3424            8 :                 continue;
    3425              :               break;
    3426         2969 :             case E_SImode:
    3427         2969 :             case E_DImode:
    3428         2969 :               if (TARGET_USE_VECTOR_CONVERTS
    3429         2957 :                   || !TARGET_SSE_PARTIAL_REG_CONVERTS_DEPENDENCY)
    3430           14 :                 continue;
    3431              :               break;
    3432          105 :             case E_VOIDmode:
    3433          105 :               gcc_assert (!convert_p);
    3434              :               break;
    3435            0 :             default:
    3436            0 :               gcc_unreachable ();
    3437              :             }
    3438              : 
    3439         3207 :           if (!v4sf_const0)
    3440         1022 :             v4sf_const0 = gen_reg_rtx (V4SFmode);
    3441              : 
    3442         3207 :           rtx zero;
    3443         3207 :           machine_mode dest_vecmode;
    3444         3207 :           switch (dest_mode)
    3445              :             {
    3446           90 :             case E_HFmode:
    3447           90 :               dest_vecmode = V8HFmode;
    3448           90 :               zero = gen_rtx_SUBREG (V8HFmode, v4sf_const0, 0);
    3449           90 :               break;
    3450              :             case E_SFmode:
    3451              :               dest_vecmode = V4SFmode;
    3452              :               zero = v4sf_const0;
    3453              :               break;
    3454         1175 :             case E_DFmode:
    3455         1175 :               dest_vecmode = V2DFmode;
    3456         1175 :               zero = gen_rtx_SUBREG (V2DFmode, v4sf_const0, 0);
    3457         1175 :               break;
    3458            0 :             default:
    3459            0 :               gcc_unreachable ();
    3460              :             }
    3461              : 
    3462              :           /* Change source to vector mode.  */
    3463         3207 :           src = gen_rtx_VEC_DUPLICATE (dest_vecmode, src);
    3464         3207 :           src = gen_rtx_VEC_MERGE (dest_vecmode, src, zero,
    3465              :                                    GEN_INT (HOST_WIDE_INT_1U));
    3466              :           /* Change destination to vector mode.  */
    3467         3207 :           rtx vec = gen_reg_rtx (dest_vecmode);
    3468              :           /* Generate an XMM vector SET.  */
    3469         3207 :           set = gen_rtx_SET (vec, src);
    3470         3207 :           set_insn = emit_insn_before (set, insn);
    3471              : 
    3472         3207 :           if (cfun->can_throw_non_call_exceptions)
    3473              :             {
    3474              :               /* Handle REG_EH_REGION note.  */
    3475            0 :               rtx note = find_reg_note (insn, REG_EH_REGION, NULL_RTX);
    3476            0 :               if (note)
    3477              :                 {
    3478            0 :                   control_flow_insns.safe_push (set_insn);
    3479            0 :                   add_reg_note (set_insn, REG_EH_REGION, XEXP (note, 0));
    3480              :                 }
    3481              :             }
    3482              : 
    3483         3207 :           src = gen_rtx_SUBREG (dest_mode, vec, 0);
    3484         3207 :           set = gen_rtx_SET (dest, src);
    3485              : 
    3486              :           /* Drop possible dead definitions.  */
    3487         3207 :           PATTERN (insn) = set;
    3488              : 
    3489         3207 :           INSN_CODE (insn) = -1;
    3490         3207 :           recog_memoized (insn);
    3491         3207 :           df_insn_rescan (insn);
    3492         3207 :           bitmap_set_bit (convert_bbs, bb->index);
    3493              :         }
    3494              :     }
    3495              : 
    3496        33277 :   if (v4sf_const0)
    3497              :     {
    3498              :       /* (Re-)discover loops so that bb->loop_father can be used in the
    3499              :          analysis below.  */
    3500         1022 :       calculate_dominance_info (CDI_DOMINATORS);
    3501         1022 :       loop_optimizer_init (AVOID_CFG_MODIFICATIONS);
    3502              : 
    3503         1022 :       ix86_place_single_vector_set (v4sf_const0,
    3504              :                                     CONST0_RTX (V4SFmode),
    3505              :                                     convert_bbs);
    3506              : 
    3507         1022 :       loop_optimizer_finalize ();
    3508              : 
    3509         1022 :       if (!control_flow_insns.is_empty ())
    3510              :         {
    3511            0 :           free_dominance_info (CDI_DOMINATORS);
    3512              : 
    3513            0 :           unsigned int i;
    3514            0 :           FOR_EACH_VEC_ELT (control_flow_insns, i, insn)
    3515            0 :             if (control_flow_insn_p (insn))
    3516              :               {
    3517              :                 /* Split the block after insn.  There will be a fallthru
    3518              :                    edge, which is OK so we keep it.  We have to create
    3519              :                    the exception edges ourselves.  */
    3520            0 :                 bb = BLOCK_FOR_INSN (insn);
    3521            0 :                 split_block (bb, insn);
    3522            0 :                 rtl_make_eh_edge (NULL, bb, BB_END (bb));
    3523              :               }
    3524              :         }
    3525              :     }
    3526              : 
    3527        33277 :   df_process_deferred_rescans ();
    3528        33277 :   df_clear_flags (DF_DEFER_INSN_RESCAN);
    3529        33277 :   bitmap_obstack_release (NULL);
    3530        33277 :   BITMAP_FREE (convert_bbs);
    3531              : 
    3532        33277 :   timevar_pop (TV_MACH_DEP);
    3533        33277 :   return 0;
    3534        33277 : }
    3535              : 
    3536              : namespace {
    3537              : 
    3538              : const pass_data pass_data_remove_partial_avx_dependency =
    3539              : {
    3540              :   RTL_PASS, /* type */
    3541              :   "rpad", /* name */
    3542              :   OPTGROUP_NONE, /* optinfo_flags */
    3543              :   TV_MACH_DEP, /* tv_id */
    3544              :   0, /* properties_required */
    3545              :   0, /* properties_provided */
    3546              :   0, /* properties_destroyed */
    3547              :   0, /* todo_flags_start */
    3548              :   0, /* todo_flags_finish */
    3549              : };
    3550              : 
    3551              : class pass_remove_partial_avx_dependency : public rtl_opt_pass
    3552              : {
    3553              : public:
    3554       285722 :   pass_remove_partial_avx_dependency (gcc::context *ctxt)
    3555       571444 :     : rtl_opt_pass (pass_data_remove_partial_avx_dependency, ctxt)
    3556              :   {}
    3557              : 
    3558              :   /* opt_pass methods: */
    3559      1471370 :   bool gate (function *) final override
    3560              :     {
    3561      1471370 :       return ix86_rpad_gate ();
    3562              :     }
    3563              : 
    3564        33277 :   unsigned int execute (function *) final override
    3565              :     {
    3566        33277 :       return remove_partial_avx_dependency ();
    3567              :     }
    3568              : }; // class pass_rpad
    3569              : 
    3570              : } // anon namespace
    3571              : 
    3572              : rtl_opt_pass *
    3573       285722 : make_pass_remove_partial_avx_dependency (gcc::context *ctxt)
    3574              : {
    3575       285722 :   return new pass_remove_partial_avx_dependency (ctxt);
    3576              : }
    3577              : 
    3578              : /* Return a machine mode suitable for vector SIZE with SMODE inner
    3579              :    mode.  */
    3580              : 
    3581              : static machine_mode
    3582        32228 : ix86_get_vector_cse_mode (unsigned int size, machine_mode smode)
    3583              : {
    3584              :   /* Use the inner scalar mode of vector broadcast source in:
    3585              : 
    3586              :      (set (reg:V8DF 394)
    3587              :           (vec_duplicate:V8DF (reg:V2DF 190 [ alpha ])))
    3588              : 
    3589              :      to compute the vector mode for broadcast from vector source.
    3590              :    */
    3591        32228 :   if (VECTOR_MODE_P (smode))
    3592            1 :     smode = GET_MODE_INNER (smode);
    3593        32228 :   scalar_mode s_mode = as_a <scalar_mode> (smode);
    3594        64456 :   poly_uint64 nunits = size / GET_MODE_SIZE (smode);
    3595        32228 :   machine_mode mode = mode_for_vector (s_mode, nunits).require ();
    3596        32228 :   return mode;
    3597              : }
    3598              : 
    3599              : /* Replace the source operand of instructions in VECTOR_INSNS with
    3600              :    VECTOR_CONST in VECTOR_MODE.  */
    3601              : 
    3602              : static void
    3603        31797 : replace_vector_const (machine_mode vector_mode, rtx vector_const,
    3604              :                       auto_bitmap &vector_insns,
    3605              :                       machine_mode scalar_mode)
    3606              : {
    3607        31797 :   bitmap_iterator bi;
    3608        31797 :   unsigned int id;
    3609              : 
    3610       153252 :   EXECUTE_IF_SET_IN_BITMAP (vector_insns, 0, id, bi)
    3611              :     {
    3612       121455 :       rtx_insn *insn = DF_INSN_UID_GET (id)->insn;
    3613              : 
    3614              :       /* Get the single SET instruction.  */
    3615       121455 :       rtx set = single_set (insn);
    3616       121455 :       rtx src = SET_SRC (set);
    3617       121455 :       rtx dest = SET_DEST (set);
    3618       121455 :       machine_mode mode = GET_MODE (dest);
    3619              : 
    3620       121455 :       rtx replace;
    3621              :       /* Replace the source operand with VECTOR_CONST.  */
    3622       121455 :       if (SUBREG_P (src) || mode == vector_mode)
    3623              :         replace = vector_const;
    3624              :       else
    3625              :         {
    3626        58713 :           unsigned int size = GET_MODE_SIZE (mode);
    3627        58713 :           if (size < ix86_regmode_natural_size (mode))
    3628              :             {
    3629              :               /* If the mode size is smaller than its natural size,
    3630              :                  first insert an extra move with a QI vector SUBREG
    3631              :                  of the same size to avoid validate_subreg failure.  */
    3632          431 :               machine_mode vmode
    3633          431 :                 = ix86_get_vector_cse_mode (size, scalar_mode);
    3634          431 :               rtx vreg;
    3635          431 :               if (mode == vmode)
    3636              :                 vreg = vector_const;
    3637              :               else
    3638              :                 {
    3639           40 :                   vreg = gen_reg_rtx (vmode);
    3640           40 :                   rtx vsubreg = gen_rtx_SUBREG (vmode, vector_const, 0);
    3641           40 :                   rtx pat = gen_rtx_SET (vreg, vsubreg);
    3642           40 :                   rtx_insn *vinsn = emit_insn_before (pat, insn);
    3643           40 :                   if (dump_file)
    3644              :                     {
    3645            0 :                       fprintf (dump_file, "\nInsert an extra move:\n\n");
    3646            0 :                       print_rtl_single (dump_file, vinsn);
    3647            0 :                       fprintf (dump_file, "\nbefore:\n\n");
    3648            0 :                       print_rtl_single (dump_file, insn);
    3649            0 :                       fprintf (dump_file, "\n");
    3650              :                     }
    3651              :                 }
    3652          431 :               replace = gen_rtx_SUBREG (mode, vreg, 0);
    3653              :             }
    3654              :           else
    3655        58282 :             replace = gen_rtx_SUBREG (mode, vector_const, 0);
    3656              :         }
    3657              : 
    3658       121455 :       if (dump_file)
    3659              :         {
    3660            0 :           fprintf (dump_file, "\nReplace:\n\n");
    3661            0 :           print_rtl_single (dump_file, insn);
    3662              :         }
    3663       121455 :       SET_SRC (set) = replace;
    3664              :       /* Drop possible dead definitions.  */
    3665       121455 :       PATTERN (insn) = set;
    3666       121455 :       INSN_CODE (insn) = -1;
    3667       121455 :       recog_memoized (insn);
    3668       121455 :       if (dump_file)
    3669              :         {
    3670            0 :           fprintf (dump_file, "\nwith:\n\n");
    3671            0 :           print_rtl_single (dump_file, insn);
    3672            0 :           fprintf (dump_file, "\n");
    3673              :         }
    3674       121455 :       df_insn_rescan (insn);
    3675              :     }
    3676        31797 : }
    3677              : 
    3678              : /* Return the inner scalar if OP is a broadcast, else return nullptr.  */
    3679              : 
    3680              : static rtx
    3681      2185915 : ix86_broadcast_inner (rtx op, machine_mode mode,
    3682              :                       machine_mode *scalar_mode_p,
    3683              :                       x86_cse_kind *kind_p, rtx_insn **insn_p)
    3684              : {
    3685      2185915 :   switch (standard_sse_constant_p (op, mode))
    3686              :     {
    3687       112734 :     case 1:
    3688       112734 :       *scalar_mode_p = QImode;
    3689       112734 :       *kind_p = X86_CSE_CONST0_VECTOR;
    3690       112734 :       *insn_p = nullptr;
    3691       112734 :       return const0_rtx;
    3692        10879 :     case 2:
    3693        10879 :       *scalar_mode_p = QImode;
    3694        10879 :       *kind_p = X86_CSE_CONSTM1_VECTOR;
    3695        10879 :       *insn_p = nullptr;
    3696        10879 :       return constm1_rtx;
    3697      2062302 :     default:
    3698      2062302 :       break;
    3699              :     }
    3700              : 
    3701      2062302 :   mode = GET_MODE (op);
    3702      2062302 :   int nunits = GET_MODE_NUNITS (mode);
    3703      2062302 :   if (nunits < 2)
    3704              :     return nullptr;
    3705              : 
    3706      1582989 :   *kind_p = X86_CSE_VEC_DUP;
    3707              : 
    3708      1582989 :   rtx reg;
    3709      1582989 :   if (GET_CODE (op) == VEC_DUPLICATE)
    3710              :     {
    3711              :       /* Only
    3712              :           (vec_duplicate:V4SI (reg:SI 99))
    3713              :           (vec_duplicate:V2DF (mem/u/c:DF (symbol_ref/u:DI ("*.LC1") [flags 0x2]) [0  S8 A64]))
    3714              :          are supported.  Set OP to the broadcast source by default.  */
    3715        95280 :       op = XEXP (op, 0);
    3716        95280 :       reg = op;
    3717        95280 :       if (SUBREG_P (op)
    3718          401 :           && SUBREG_BYTE (op) == 0
    3719        95681 :           && !paradoxical_subreg_p (op))
    3720          401 :         reg = SUBREG_REG (op);
    3721        95280 :       if (!REG_P (reg))
    3722              :         {
    3723         7785 :           if (MEM_P (op)
    3724         7519 :               && SYMBOL_REF_P (XEXP (op, 0))
    3725        13497 :               && CONSTANT_POOL_ADDRESS_P (XEXP (op, 0)))
    3726              :             {
    3727              :               /* Handle constant broadcast from memory.  */
    3728         5491 :               *scalar_mode_p = GET_MODE_INNER (mode);
    3729         5491 :               *insn_p = nullptr;
    3730         5491 :               return op;
    3731              :             }
    3732              :           return nullptr;
    3733              :         }
    3734              :     }
    3735      1487709 :   else if (CONST_VECTOR_P (op))
    3736              :     {
    3737           20 :       rtx first = XVECEXP (op, 0, 0);
    3738           48 :       for (int i = 1; i < nunits; ++i)
    3739              :         {
    3740           48 :           rtx tmp = XVECEXP (op, 0, i);
    3741              :           /* Vector duplicate value.  */
    3742           48 :           if (!rtx_equal_p (tmp, first))
    3743              :             return nullptr;
    3744              :         }
    3745            0 :       *scalar_mode_p = GET_MODE (first);
    3746            0 :       *insn_p = nullptr;
    3747            0 :       return first;
    3748              :     }
    3749              :   else
    3750              :     return nullptr;
    3751              : 
    3752        87495 :   mode = GET_MODE (op);
    3753              : 
    3754              :   /* Only single def chain is supported.  */
    3755        87495 :   df_ref ref = DF_REG_DEF_CHAIN (REGNO (reg));
    3756        87495 :   if (!ref
    3757        87494 :       || DF_REF_IS_ARTIFICIAL (ref)
    3758        87494 :       || DF_REF_NEXT_REG (ref) != nullptr)
    3759              :     return nullptr;
    3760              : 
    3761        81786 :   rtx_insn *insn = DF_REF_INSN (ref);
    3762        81786 :   rtx set = single_set (insn);
    3763        81786 :   if (!set)
    3764              :     return nullptr;
    3765              : 
    3766        81738 :   rtx src = SET_SRC (set);
    3767              : 
    3768        81738 :   if (CONST_INT_P (src))
    3769              :     {
    3770              :       /* Handle sequences like
    3771              : 
    3772              :          (set (reg:SI 99)
    3773              :                (const_int 34 [0x22]))
    3774              :          (set (reg:V4SI 98)
    3775              :                (vec_duplicate:V4SI (reg:SI 99)))
    3776              : 
    3777              :          Set *INSN_P to nullptr and return SET_SRC if SET_SRC is an
    3778              :          integer constant.  */
    3779        67133 :       op = src;
    3780        67133 :       if (mode != GET_MODE (reg))
    3781            0 :         op = gen_int_mode (INTVAL (src), mode);
    3782        67133 :       *insn_p = nullptr;
    3783              :     }
    3784              :   else
    3785              :     {
    3786              :       /* Handle sequences like
    3787              : 
    3788              :          (set (reg:QI 105 [ c ])
    3789              :               (reg:QI 5 di [ c ]))
    3790              :          (set (reg:V64QI 102 [ _1 ])
    3791              :               (vec_duplicate:V64QI (reg:QI 105 [ c ])))
    3792              : 
    3793              :          (set (reg/v:SI 116 [ argc ])
    3794              :               (mem/c:SI (reg:SI 135) [2 argc+0 S4 A32]))
    3795              :          (set (reg:V4SI 119 [ _45 ])
    3796              :               (vec_duplicate:V4SI (reg/v:SI 116 [ argc ])))
    3797              : 
    3798              :          (set (reg:SI 98 [ _1 ])
    3799              :               (sign_extend:SI (reg:QI 106 [ c ])))
    3800              :          (set (reg:V16SI 103 [ _2 ])
    3801              :                (vec_duplicate:V16SI (reg:SI 98 [ _1 ])))
    3802              : 
    3803              :          (set (reg:SI 102 [ cost ])
    3804              :               (mem/c:SI (symbol_ref:DI ("cost") [flags 0x40])))
    3805              :          (set (reg:V4HI 103 [ _16 ])
    3806              :               (vec_duplicate:V4HI (subreg:HI (reg:SI 102 [ cost ]) 0)))
    3807              : 
    3808              :          (set (subreg:SI (reg/v:HI 107 [ cr_val ]) 0)
    3809              :               (ashift:SI (reg:SI 158)
    3810              :                          (subreg:QI (reg:SI 156 [ _2 ]) 0)))
    3811              :          (set (reg:V16HI 183 [ _61 ])
    3812              :               (vec_duplicate:V16HI (reg/v:HI 107 [ cr_val ])))
    3813              : 
    3814              :          Set *INSN_P to INSN and return the broadcast source otherwise.  */
    3815        14605 :       *insn_p = insn;
    3816              :     }
    3817              : 
    3818        81738 :   *scalar_mode_p = mode;
    3819        81738 :   return op;
    3820              : }
    3821              : 
    3822              : /* Replace CALL instruction in TLS_CALL_INSNS with SET from SRC and
    3823              :    put the updated instruction in UPDATED_TLS_INSNS.  */
    3824              : 
    3825              : static void
    3826          310 : replace_tls_call (rtx src, auto_bitmap &tls_call_insns,
    3827              :                   auto_bitmap &updated_tls_insns)
    3828              : {
    3829          310 :   bitmap_iterator bi;
    3830          310 :   unsigned int id;
    3831              : 
    3832         1731 :   EXECUTE_IF_SET_IN_BITMAP (tls_call_insns, 0, id, bi)
    3833              :     {
    3834         1421 :       rtx_insn *insn = DF_INSN_UID_GET (id)->insn;
    3835              : 
    3836              :       /* If this isn't a CALL, only GNU2 TLS implicit CALL patterns are
    3837              :          allowed.  */
    3838         1421 :       if (!CALL_P (insn))
    3839              :         {
    3840           41 :           attr_tls64 tls64 = get_attr_tls64 (insn);
    3841           41 :           if (tls64 != TLS64_CALL && tls64 != TLS64_COMBINE)
    3842            0 :             gcc_unreachable ();
    3843              :         }
    3844              : 
    3845         1421 :       rtx pat = PATTERN (insn);
    3846         1421 :       gcc_assert (GET_CODE (pat) == PARALLEL);
    3847         1421 :       rtx set = XVECEXP (pat, 0, 0);
    3848         1421 :       gcc_assert (GET_CODE (set) == SET);
    3849         1421 :       rtx dest = SET_DEST (set);
    3850              : 
    3851         1421 :       set = gen_rtx_SET (dest, src);
    3852         1421 :       rtx_insn *set_insn = emit_insn_after (set, insn);
    3853         1421 :       if (recog_memoized (set_insn) < 0)
    3854            0 :         gcc_unreachable ();
    3855              : 
    3856              :       /* Put SET_INSN in UPDATED_TLS_INSNS.  */
    3857         1421 :       bitmap_set_bit (updated_tls_insns, INSN_UID (set_insn));
    3858              : 
    3859         1421 :       if (dump_file)
    3860              :         {
    3861            0 :           fprintf (dump_file, "\nReplace:\n\n");
    3862            0 :           print_rtl_single (dump_file, insn);
    3863            0 :           fprintf (dump_file, "\nwith:\n\n");
    3864            0 :           print_rtl_single (dump_file, set_insn);
    3865            0 :           fprintf (dump_file, "\n");
    3866              :         }
    3867              : 
    3868              :       /* Delete the CALL insn.  */
    3869         1421 :       delete_insn (insn);
    3870              : 
    3871         1421 :       df_insn_rescan (set_insn);
    3872              :     }
    3873          310 : }
    3874              : 
    3875              : /* Return the basic block which dominates all basic blocks which set
    3876              :    hard register REGNO used in basic block BB.  */
    3877              : 
    3878              : static basic_block
    3879            2 : ix86_get_dominator_for_reg (unsigned int regno, basic_block bb)
    3880              : {
    3881            2 :   basic_block set_bb;
    3882            2 :   auto_bitmap set_bbs;
    3883              : 
    3884              :   /* Get all BBs which set REGNO and dominate the current BB from all
    3885              :      DEFs of REGNO.  */
    3886            2 :   for (df_ref def = DF_REG_DEF_CHAIN (regno);
    3887           18 :        def;
    3888           16 :        def = DF_REF_NEXT_REG (def))
    3889           16 :     if (!DF_REF_IS_ARTIFICIAL (def)
    3890           16 :         && !DF_REF_FLAGS_IS_SET (def, DF_REF_MAY_CLOBBER)
    3891            6 :         && !DF_REF_FLAGS_IS_SET (def, DF_REF_MUST_CLOBBER))
    3892              :       {
    3893            4 :         set_bb = DF_REF_BB (def);
    3894            4 :         if (dominated_by_p (CDI_DOMINATORS, bb, set_bb))
    3895            2 :           bitmap_set_bit (set_bbs, set_bb->index);
    3896              :       }
    3897              : 
    3898            2 :   bb = nearest_common_dominator_for_set (CDI_DOMINATORS, set_bbs);
    3899            2 :   return bb;
    3900            2 : }
    3901              : 
    3902              : /* Mark FLAGS register as live in DATA, a bitmap of live caller-saved
    3903              :    registers, if DEST is FLAGS register.  */
    3904              : 
    3905              : static void
    3906          381 : ix86_check_flags_reg (rtx dest, const_rtx x, void *data)
    3907              : {
    3908          381 :   if (GET_CODE (x) == CLOBBER)
    3909              :     return;
    3910              : 
    3911          374 :   auto_bitmap *live_caller_saved_regs = (auto_bitmap *) data;
    3912          374 :   if (REG_P (dest) && REGNO (dest) == FLAGS_REG)
    3913            0 :     bitmap_set_bit (*live_caller_saved_regs, FLAGS_REG);
    3914              : }
    3915              : 
    3916              : /* Emit a TLS_SET instruction of KIND in basic block BB.   Store the
    3917              :    insertion point in *BEFORE_P for emit_insn_before or in *AFTER_P
    3918              :    for emit_insn_after.  UPDATED_GNU_TLS_INSNS contains instructions
    3919              :    which replace the GNU TLS instructions.  UPDATED_GNU2_TLS_INSNS
    3920              :    contains instructions which replace the GNU2 TLS instructions.  */
    3921              : 
    3922              : static rtx_insn *
    3923          310 : ix86_emit_tls_call (rtx tls_set, x86_cse_kind kind, basic_block bb,
    3924              :                     rtx_insn **before_p, rtx_insn **after_p,
    3925              :                     auto_bitmap &updated_gnu_tls_insns,
    3926              :                     auto_bitmap &updated_gnu2_tls_insns)
    3927              : {
    3928          312 :   rtx_insn *tls_insn;
    3929              : 
    3930          312 :   do
    3931              :     {
    3932          312 :       rtx_insn *insn = BB_HEAD (bb);
    3933         1288 :       while (insn && !NONDEBUG_INSN_P (insn))
    3934              :         {
    3935          980 :           if (insn == BB_END (bb))
    3936              :             {
    3937              :               /* This must be the beginning basic block:
    3938              : 
    3939              :                  (note 4 0 2 2 [bb 2] NOTE_INSN_BASIC_BLOCK)
    3940              :                  (note 2 4 26 2 NOTE_INSN_FUNCTION_BEG)
    3941              : 
    3942              :                  or a basic block with only a label:
    3943              : 
    3944              :                  (code_label 78 11 77 3 14 (nil) [1 uses])
    3945              :                  (note 77 78 54 3 [bb 3] NOTE_INSN_BASIC_BLOCK)
    3946              : 
    3947              :                  or a basic block with only a debug marker:
    3948              : 
    3949              :                  (note 3 0 2 2 [bb 2] NOTE_INSN_BASIC_BLOCK)
    3950              :                  (note 2 3 5 2 NOTE_INSN_FUNCTION_BEG)
    3951              :                  (debug_insn 5 2 16 2 (debug_marker) "x.c":6:3 -1 (nil))
    3952              : 
    3953              :                  or a basic block with only deleted instructions:
    3954              : 
    3955              :                  (code_label 348 23 349 45 3 (nil) [0 uses])
    3956              :                  (note 349 348 436 45 [bb 45] NOTE_INSN_BASIC_BLOCK)
    3957              :                  (note 436 349 362 45 NOTE_INSN_DELETED)
    3958              : 
    3959              :                */
    3960            4 :               gcc_assert (DEBUG_INSN_P (insn)
    3961              :                           || (NOTE_P (insn)
    3962              :                               && ((NOTE_KIND (insn)
    3963              :                                    == NOTE_INSN_FUNCTION_BEG)
    3964              :                                   || (NOTE_KIND (insn)
    3965              :                                       == NOTE_INSN_DELETED)
    3966              :                                   || (NOTE_KIND (insn)
    3967              :                                       == NOTE_INSN_BASIC_BLOCK))));
    3968              :               insn = NULL;
    3969              :               break;
    3970              :             }
    3971          976 :           insn = NEXT_INSN (insn);
    3972              :         }
    3973              : 
    3974              :       /* TLS_GD and TLS_LD_BASE instructions are normal functions which
    3975              :          clobber caller-saved registers.  TLSDESC instructions only
    3976              :          clobber FLAGS.  If any registers clobbered by TLS instructions
    3977              :          are live in this basic block, we must insert TLS instructions
    3978              :          after all live registers clobbered are dead.  */
    3979              : 
    3980          312 :       auto_bitmap live_caller_saved_regs;
    3981          624 :       bitmap in = df_live ? DF_LIVE_IN (bb) : DF_LR_IN (bb);
    3982              : 
    3983          312 :       if (bitmap_bit_p (in, FLAGS_REG))
    3984            4 :         bitmap_set_bit (live_caller_saved_regs, FLAGS_REG);
    3985              : 
    3986          312 :       unsigned int i;
    3987              : 
    3988              :       /* Get all live caller-saved registers for TLS_GD and TLS_LD_BASE
    3989              :          instructions.  */
    3990          312 :       if (kind != X86_CSE_TLSDESC)
    3991        27249 :         for (i = 0; i < FIRST_PSEUDO_REGISTER; i++)
    3992        26956 :           if (call_used_regs[i]
    3993        25198 :               && !fixed_regs[i]
    3994        38993 :               && bitmap_bit_p (in, i))
    3995          344 :             bitmap_set_bit (live_caller_saved_regs, i);
    3996              : 
    3997          312 :       if (bitmap_empty_p (live_caller_saved_regs))
    3998              :         {
    3999           79 :           if (insn == BB_HEAD (bb))
    4000              :             {
    4001            0 :               *before_p = insn;
    4002            0 :               tls_insn = emit_insn_before (tls_set, insn);
    4003              :             }
    4004              :           else
    4005              :             {
    4006              :               /* Emit the TLS call after NOTE_INSN_FUNCTION_BEG in the
    4007              :                  beginning basic block:
    4008              : 
    4009              :                  (note 4 0 2 2 [bb 2] NOTE_INSN_BASIC_BLOCK)
    4010              :                  (note 2 4 26 2 NOTE_INSN_FUNCTION_BEG)
    4011              : 
    4012              :                  or after NOTE_INSN_BASIC_BLOCK in a basic block with
    4013              :                  only a label:
    4014              : 
    4015              :                  (code_label 78 11 77 3 14 (nil) [1 uses])
    4016              :                  (note 77 78 54 3 [bb 3] NOTE_INSN_BASIC_BLOCK)
    4017              : 
    4018              :                  or after debug marker in a basic block with only a
    4019              :                  debug marker:
    4020              : 
    4021              :                  (note 3 0 2 2 [bb 2] NOTE_INSN_BASIC_BLOCK)
    4022              :                  (note 2 3 5 2 NOTE_INSN_FUNCTION_BEG)
    4023              :                  (debug_insn 5 2 16 2 (debug_marker) "x.c":6:3 -1 (nil))
    4024              : 
    4025              :                */
    4026           79 :               insn = insn ? PREV_INSN (insn) : BB_END (bb);
    4027           79 :               *after_p = insn;
    4028           79 :               tls_insn = emit_insn_after (tls_set, insn);
    4029              :             }
    4030           79 :           return tls_insn;
    4031              :         }
    4032              : 
    4033          233 :       bool repeat = false;
    4034              : 
    4035              :       /* Search for REG_DEAD notes in this basic block.  */
    4036          661 :       FOR_BB_INSNS (bb, insn)
    4037              :         {
    4038          661 :           if (!NONDEBUG_INSN_P (insn))
    4039          283 :             continue;
    4040              : 
    4041              :           /* NB: Conditional jump is the only instruction which reads
    4042              :              flags register and changes control flow.  We can never
    4043              :              place the TLS call after unconditional jump.  */
    4044          378 :           if (JUMP_P (insn))
    4045              :             {
    4046              :               /* This must be a conditional jump.  */
    4047            2 :               rtx label = JUMP_LABEL (insn);
    4048            2 :               if (label == nullptr
    4049            2 :                   || ANY_RETURN_P (label)
    4050            2 :                   || !(LABEL_P (label) || SYMBOL_REF_P (label)))
    4051            0 :                 gcc_unreachable ();
    4052              : 
    4053              :               /* Place the call before all FLAGS_REG setting BBs since
    4054              :                  we can't place a call before nor after a conditional
    4055              :                  jump.  */
    4056            2 :               bb = ix86_get_dominator_for_reg (FLAGS_REG, bb);
    4057              : 
    4058              :               /* Start over again.  */
    4059            2 :               repeat = true;
    4060            2 :               break;
    4061              :             }
    4062              : 
    4063          376 :           if (bitmap_bit_p (updated_gnu_tls_insns, INSN_UID (insn)))
    4064              :             {
    4065              :               /* Insert the __tls_get_addr call before INSN which
    4066              :                  replaces a __tls_get_addr call.  */
    4067            1 :               *before_p = insn;
    4068            1 :               tls_insn = emit_insn_before (tls_set, insn);
    4069            1 :               return tls_insn;
    4070              :             }
    4071              : 
    4072          375 :           if (bitmap_bit_p (updated_gnu2_tls_insns, INSN_UID (insn)))
    4073              :             {
    4074              :               /* Mark FLAGS register as dead since FLAGS register
    4075              :                  would be clobbered by the GNU2 TLS instruction.  */
    4076            1 :               bitmap_clear_bit (live_caller_saved_regs, FLAGS_REG);
    4077            1 :               continue;
    4078              :             }
    4079              : 
    4080              :           /* Check if FLAGS register is live.  */
    4081          374 :           note_stores (insn, ix86_check_flags_reg,
    4082              :                        &live_caller_saved_regs);
    4083              : 
    4084          374 :           rtx link;
    4085          515 :           for (link = REG_NOTES (insn); link; link = XEXP (link, 1))
    4086          371 :             if ((REG_NOTE_KIND (link) == REG_DEAD
    4087            9 :                  || (REG_NOTE_KIND (link) == REG_UNUSED
    4088            7 :                      && REGNO (XEXP (link, 0)) == FLAGS_REG))
    4089          378 :                 && REG_P (XEXP (link, 0)))
    4090              :               {
    4091              :                 /* Mark the live caller-saved register as dead.  */
    4092          743 :                 for (i = REGNO (XEXP (link, 0));
    4093          743 :                      i < END_REGNO (XEXP (link, 0));
    4094              :                      i++)
    4095          374 :                   if (i < FIRST_PSEUDO_REGISTER)
    4096          351 :                     bitmap_clear_bit (live_caller_saved_regs, i);
    4097              : 
    4098          369 :                 if (bitmap_empty_p (live_caller_saved_regs))
    4099              :                   {
    4100          230 :                     *after_p = insn;
    4101          230 :                     tls_insn = emit_insn_after (tls_set, insn);
    4102          230 :                     return tls_insn;
    4103              :                   }
    4104              :               }
    4105              :         }
    4106              : 
    4107              :       /* NB: Start over again for conditional jump.  */
    4108            2 :       if (repeat)
    4109            2 :         continue;
    4110              : 
    4111            0 :       gcc_assert (!bitmap_empty_p (live_caller_saved_regs));
    4112              : 
    4113              :       /* If any live caller-saved registers aren't dead at the end of
    4114              :          this basic block, get the basic block which dominates all
    4115              :          basic blocks which set the remaining live registers.  */
    4116            0 :       auto_bitmap set_bbs;
    4117            0 :       bitmap_iterator bi;
    4118            0 :       unsigned int id;
    4119            0 :       EXECUTE_IF_SET_IN_BITMAP (live_caller_saved_regs, 0, id, bi)
    4120              :         {
    4121            0 :           basic_block set_bb = ix86_get_dominator_for_reg (id, bb);
    4122            0 :           bitmap_set_bit (set_bbs, set_bb->index);
    4123              :         }
    4124            0 :       bb = nearest_common_dominator_for_set (CDI_DOMINATORS, set_bbs);
    4125            2 :     }
    4126              :   while (true);
    4127              : }
    4128              : 
    4129              : /* Generate a TLS call of KIND with VAL and copy the call result to DEST,
    4130              :    at entry of the nearest dominator for basic block map BBS, which is in
    4131              :    the fake loop that contains the whole function, so that there is only
    4132              :    a single TLS CALL of KIND with VAL in the whole function.
    4133              :    UPDATED_GNU_TLS_INSNS contains instructions which replace the GNU TLS
    4134              :    instructions.  UPDATED_GNU2_TLS_INSNS contains instructions which
    4135              :    replace the GNU2 TLS instructions.  If TLSDESC_SET isn't nullptr,
    4136              :    insert it before the TLS call.  */
    4137              : 
    4138              : static void
    4139          310 : ix86_place_single_tls_call (rtx dest, rtx val, x86_cse_kind kind,
    4140              :                             auto_bitmap &bbs,
    4141              :                             auto_bitmap &updated_gnu_tls_insns,
    4142              :                             auto_bitmap &updated_gnu2_tls_insns,
    4143              :                             rtx tlsdesc_set = nullptr)
    4144              : {
    4145          310 :   basic_block bb = nearest_common_dominator_for_set (CDI_DOMINATORS, bbs);
    4146          310 :   while (bb->loop_father->latch
    4147          319 :          != EXIT_BLOCK_PTR_FOR_FN (cfun))
    4148            9 :     bb = get_immediate_dominator (CDI_DOMINATORS,
    4149              :                                   bb->loop_father->header);
    4150              : 
    4151          310 :   rtx rax = nullptr, rdi;
    4152          310 :   rtx eqv = nullptr;
    4153          310 :   rtx caddr;
    4154          310 :   rtx set;
    4155          310 :   rtx clob;
    4156          310 :   rtx symbol;
    4157          310 :   rtx tls;
    4158              : 
    4159          310 :   switch (kind)
    4160              :     {
    4161          262 :     case X86_CSE_TLS_GD:
    4162          262 :       rax = gen_rtx_REG (Pmode, AX_REG);
    4163          262 :       rdi = gen_rtx_REG (Pmode, DI_REG);
    4164          262 :       caddr = ix86_tls_get_addr ();
    4165              : 
    4166          262 :       symbol = XVECEXP (val, 0, 0);
    4167          262 :       tls = gen_tls_global_dynamic_64 (Pmode, rax, symbol, caddr, rdi);
    4168              : 
    4169          262 :       if (GET_MODE (symbol) != Pmode)
    4170            0 :         symbol = gen_rtx_ZERO_EXTEND (Pmode, symbol);
    4171              :       eqv = symbol;
    4172              :       break;
    4173              : 
    4174           30 :     case X86_CSE_TLS_LD_BASE:
    4175           30 :       rax = gen_rtx_REG (Pmode, AX_REG);
    4176           30 :       rdi = gen_rtx_REG (Pmode, DI_REG);
    4177           30 :       caddr = ix86_tls_get_addr ();
    4178              : 
    4179           30 :       tls = gen_tls_local_dynamic_base_64 (Pmode, rax, caddr, rdi);
    4180              : 
    4181              :       /* Attach a unique REG_EQUAL to DEST, to allow the RTL optimizers
    4182              :          to share the LD_BASE result with other LD model accesses.  */
    4183           30 :       eqv = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, const0_rtx),
    4184              :                             UNSPEC_TLS_LD_BASE);
    4185              : 
    4186           30 :       break;
    4187              : 
    4188           18 :     case X86_CSE_TLSDESC:
    4189           18 :       set = gen_rtx_SET (dest, val);
    4190           18 :       clob = gen_rtx_CLOBBER (VOIDmode,
    4191              :                               gen_rtx_REG (CCmode, FLAGS_REG));
    4192           18 :       tls = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, set, clob));
    4193           18 :       break;
    4194              : 
    4195            0 :     default:
    4196            0 :       gcc_unreachable ();
    4197              :     }
    4198              : 
    4199              :   /* Emit the TLS CALL insn.  */
    4200          310 :   rtx_insn *before = nullptr;
    4201          310 :   rtx_insn *after = nullptr;
    4202          310 :   rtx_insn *tls_insn = ix86_emit_tls_call (tls, kind, bb, &before,
    4203              :                                            &after,
    4204              :                                            updated_gnu_tls_insns,
    4205              :                                            updated_gnu2_tls_insns);
    4206              : 
    4207          310 :   rtx_insn *tlsdesc_insn = nullptr;
    4208          310 :   if (tlsdesc_set)
    4209              :     {
    4210           14 :       rtx dest = copy_rtx (SET_DEST (tlsdesc_set));
    4211           14 :       rtx src = copy_rtx (SET_SRC (tlsdesc_set));
    4212           14 :       tlsdesc_set = gen_rtx_SET (dest, src);
    4213           14 :       tlsdesc_insn = emit_insn_before (tlsdesc_set, tls_insn);
    4214              :     }
    4215              : 
    4216          310 :   if (kind != X86_CSE_TLSDESC)
    4217              :     {
    4218          292 :       RTL_CONST_CALL_P (tls_insn) = 1;
    4219              : 
    4220              :       /* Indicate that this function can't jump to non-local gotos.  */
    4221          292 :       make_reg_eh_region_note_nothrow_nononlocal (tls_insn);
    4222              :     }
    4223              : 
    4224          310 :   if (recog_memoized (tls_insn) < 0)
    4225            0 :     gcc_unreachable ();
    4226              : 
    4227          310 :   if (dump_file)
    4228              :     {
    4229            0 :       if (after)
    4230              :         {
    4231            0 :           fprintf (dump_file, "\nPlace:\n\n");
    4232            0 :           if (tlsdesc_insn)
    4233            0 :             print_rtl_single (dump_file, tlsdesc_insn);
    4234            0 :           print_rtl_single (dump_file, tls_insn);
    4235            0 :           fprintf (dump_file, "\nafter:\n\n");
    4236            0 :           print_rtl_single (dump_file, after);
    4237            0 :           fprintf (dump_file, "\n");
    4238              :         }
    4239              :       else
    4240              :         {
    4241            0 :           fprintf (dump_file, "\nPlace:\n\n");
    4242            0 :           if (tlsdesc_insn)
    4243            0 :             print_rtl_single (dump_file, tlsdesc_insn);
    4244            0 :           print_rtl_single (dump_file, tls_insn);
    4245            0 :           fprintf (dump_file, "\nbefore:\n\n");
    4246            0 :           print_rtl_single (dump_file, before);
    4247            0 :           fprintf (dump_file, "\n");
    4248              :         }
    4249              :     }
    4250              : 
    4251          310 :   if (kind != X86_CSE_TLSDESC)
    4252              :     {
    4253              :       /* Copy RAX to DEST.  */
    4254          292 :       set = gen_rtx_SET (dest, rax);
    4255          292 :       rtx_insn *set_insn = emit_insn_after (set, tls_insn);
    4256          292 :       set_dst_reg_note (set_insn, REG_EQUAL, copy_rtx (eqv), dest);
    4257          292 :       if (dump_file)
    4258              :         {
    4259            0 :           fprintf (dump_file, "\nPlace:\n\n");
    4260            0 :           print_rtl_single (dump_file, set_insn);
    4261            0 :           fprintf (dump_file, "\nafter:\n\n");
    4262            0 :           print_rtl_single (dump_file, tls_insn);
    4263            0 :           fprintf (dump_file, "\n");
    4264              :         }
    4265              :     }
    4266          310 : }
    4267              : 
    4268              : namespace {
    4269              : 
    4270              : const pass_data pass_data_x86_cse =
    4271              : {
    4272              :   RTL_PASS, /* type */
    4273              :   "x86_cse", /* name */
    4274              :   OPTGROUP_NONE, /* optinfo_flags */
    4275              :   TV_MACH_DEP, /* tv_id */
    4276              :   0, /* properties_required */
    4277              :   0, /* properties_provided */
    4278              :   0, /* properties_destroyed */
    4279              :   0, /* todo_flags_start */
    4280              :   0, /* todo_flags_finish */
    4281              : };
    4282              : 
    4283              : class pass_x86_cse : public rtl_opt_pass
    4284              : {
    4285              : public:
    4286       285722 :   pass_x86_cse (gcc::context *ctxt)
    4287       571444 :     : rtl_opt_pass (pass_data_x86_cse, ctxt)
    4288              :   {}
    4289              : 
    4290              :   /* opt_pass methods: */
    4291      1471370 :   bool gate (function *fun) final override
    4292              :     {
    4293      1471370 :       return (TARGET_SSE2
    4294      1467149 :               && optimize
    4295      2512707 :               && optimize_function_for_speed_p (fun));
    4296              :     }
    4297              : 
    4298       976823 :   unsigned int execute (function *) final override
    4299              :     {
    4300       976823 :       return x86_cse ();
    4301              :     }
    4302              : 
    4303              : private:
    4304              :   /* The redundant source value.  */
    4305              :   rtx val;
    4306              :   /* The actual redundant source value for UNSPEC_TLSDESC.  */
    4307              :   rtx tlsdesc_val;
    4308              :   /* The instruction which defines the redundant value.  */
    4309              :   rtx_insn *def_insn;
    4310              :   /* Mode of the destination of the candidate redundant instruction.  */
    4311              :   machine_mode mode;
    4312              :   /* Mode of the source of the candidate redundant instruction.  */
    4313              :   machine_mode scalar_mode;
    4314              :   /* The classification of the candidate redundant instruction.  */
    4315              :   x86_cse_kind kind;
    4316              : 
    4317              :   unsigned int x86_cse (void);
    4318              :   bool candidate_gnu_tls_p (rtx_insn *, attr_tls64);
    4319              :   bool candidate_gnu2_tls_p (rtx, attr_tls64);
    4320              :   bool candidate_vector_p (rtx);
    4321              :   rtx_insn *tls_set_insn_from_symbol (const_rtx, const_rtx);
    4322              : }; // class pass_x86_cse
    4323              : 
    4324              : /* Return the instruction which sets REG from TLS_SYMBOL.  */
    4325              : 
    4326              : rtx_insn *
    4327           38 : pass_x86_cse::tls_set_insn_from_symbol (const_rtx reg,
    4328              :                                         const_rtx tls_symbol)
    4329              : {
    4330           38 :   rtx_insn *set_insn = nullptr;
    4331           38 :   for (df_ref ref = DF_REG_DEF_CHAIN (REGNO (reg));
    4332          103 :        ref;
    4333           65 :        ref = DF_REF_NEXT_REG (ref))
    4334              :     {
    4335           65 :       if (DF_REF_IS_ARTIFICIAL (ref))
    4336              :         return nullptr;
    4337              : 
    4338           65 :       set_insn = DF_REF_INSN (ref);
    4339           65 :       if (get_attr_tls64 (set_insn) != TLS64_LEA)
    4340              :         return nullptr;
    4341              : 
    4342           65 :       rtx tls_set = PATTERN (set_insn);
    4343           65 :       rtx tls_src = XVECEXP (SET_SRC (tls_set), 0, 0);
    4344           65 :       if (!rtx_equal_p (tls_symbol, tls_src))
    4345              :         return nullptr;
    4346              :     }
    4347              : 
    4348              :   return set_insn;
    4349              : }
    4350              : 
    4351              : /* Return true and output def_insn, val, mode, scalar_mode and kind if
    4352              :    INSN is UNSPEC_TLS_GD or UNSPEC_TLS_LD_BASE.  */
    4353              : 
    4354              : bool
    4355         2186 : pass_x86_cse::candidate_gnu_tls_p (rtx_insn *insn, attr_tls64 tls64)
    4356              : {
    4357         2186 :   if (!TARGET_64BIT || !cfun->machine->tls_descriptor_call_multiple_p)
    4358              :     return false;
    4359              : 
    4360              :   /* Record the redundant TLS CALLs for 64-bit:
    4361              : 
    4362              :      (parallel [
    4363              :         (set (reg:DI 0 ax)
    4364              :              (call:DI (mem:QI (symbol_ref:DI ("__tls_get_addr")))
    4365              :                       (const_int 0 [0])))
    4366              :         (unspec:DI [(symbol_ref:DI ("foo") [flags 0x50])
    4367              :                     (reg/f:DI 7 sp)] UNSPEC_TLS_GD)
    4368              :         (clobber (reg:DI 5 di))])
    4369              : 
    4370              : 
    4371              :      and
    4372              : 
    4373              :      (parallel [
    4374              :         (set (reg:DI 0 ax)
    4375              :              (call:DI (mem:QI (symbol_ref:DI ("__tls_get_addr")))
    4376              :                       (const_int 0 [0])))
    4377              :         (unspec:DI [(reg/f:DI 7 sp)] UNSPEC_TLS_LD_BASE)])
    4378              : 
    4379              :    */
    4380              : 
    4381         2023 :   rtx pat = PATTERN (insn);
    4382         2023 :   rtx set = XVECEXP (pat, 0, 0);
    4383         2023 :   gcc_assert (GET_CODE (set) == SET);
    4384         2023 :   rtx dest = SET_DEST (set);
    4385         2023 :   scalar_mode = mode = GET_MODE (dest);
    4386         2023 :   val = XVECEXP (pat, 0, 1);
    4387         2023 :   gcc_assert (GET_CODE (val) == UNSPEC);
    4388              : 
    4389         2023 :   if (tls64 == TLS64_GD)
    4390         1922 :     kind = X86_CSE_TLS_GD;
    4391              :   else
    4392          101 :     kind = X86_CSE_TLS_LD_BASE;
    4393              : 
    4394         2023 :   def_insn = nullptr;
    4395         2023 :   return true;
    4396              : }
    4397              : 
    4398              : /* Return true and output def_insn, val, mode, scalar_mode and kind if
    4399              :    SET is UNSPEC_TLSDESC.  */
    4400              : 
    4401              : bool
    4402           50 : pass_x86_cse::candidate_gnu2_tls_p (rtx set, attr_tls64 tls64)
    4403              : {
    4404           50 :   if (!TARGET_64BIT || !cfun->machine->tls_descriptor_call_multiple_p)
    4405              :     return false;
    4406              : 
    4407           48 :   rtx tls_symbol;
    4408           48 :   rtx_insn *set_insn;
    4409           48 :   rtx src = SET_SRC (set);
    4410           48 :   val = src;
    4411           48 :   tlsdesc_val = src;
    4412           48 :   kind = X86_CSE_TLSDESC;
    4413              : 
    4414           48 :   if (tls64 == TLS64_COMBINE)
    4415              :     {
    4416              :       /* Record 64-bit TLS64_COMBINE:
    4417              : 
    4418              :          (set (reg/f:DI 104)
    4419              :               (plus:DI (unspec:DI [
    4420              :                           (symbol_ref:DI ("_TLS_MODULE_BASE_") [flags 0x10])
    4421              :                           (reg:DI 114)
    4422              :                           (reg/f:DI 7 sp)] UNSPEC_TLSDESC)
    4423              :                        (const:DI (unspec:DI [
    4424              :                                     (symbol_ref:DI ("e") [flags 0x1a])
    4425              :                                   ] UNSPEC_DTPOFF))))
    4426              : 
    4427              :          (set (reg/f:DI 104)
    4428              :               (plus:DI (unspec:DI [
    4429              :                           (symbol_ref:DI ("_TLS_MODULE_BASE_") [flags 0x10])
    4430              :                           (unspec:DI [
    4431              :                              (symbol_ref:DI ("_TLS_MODULE_BASE_") [flags 0x10])
    4432              :                           ] UNSPEC_TLSDESC)
    4433              :                           (reg/f:DI 7 sp)] UNSPEC_TLSDESC)
    4434              :                        (const:DI (unspec:DI [
    4435              :                                     (symbol_ref:DI ("e") [flags 0x1a])
    4436              :                                  ] UNSPEC_DTPOFF))))
    4437              :      */
    4438              : 
    4439           10 :       scalar_mode = mode = GET_MODE (src);
    4440              : 
    4441              :       /* Since the first operand of PLUS in the source TLS_COMBINE
    4442              :          pattern is unused, use the second operand of PLUS:
    4443              : 
    4444              :          (const:DI (unspec:DI [
    4445              :                       (symbol_ref:DI ("e") [flags 0x1a])
    4446              :                    ] UNSPEC_DTPOFF))
    4447              : 
    4448              :          as VAL to check if 2 TLS_COMBINE patterns have the same
    4449              :          source.  */
    4450           10 :       val = XEXP (src, 1);
    4451           10 :       gcc_assert (GET_CODE (val) == CONST
    4452              :                   && GET_CODE (XEXP (val, 0)) == UNSPEC
    4453              :                       && XINT (XEXP (val, 0), 1) == UNSPEC_DTPOFF
    4454              :                       && SYMBOL_REF_P (XVECEXP (XEXP (val, 0), 0, 0)));
    4455           10 :       def_insn = nullptr;
    4456           10 :       return true;
    4457              :     }
    4458              : 
    4459              :   /* Record 64-bit TLS_CALL:
    4460              : 
    4461              :      (set (reg:DI 101)
    4462              :           (unspec:DI [(symbol_ref:DI ("foo") [flags 0x50])
    4463              :                       (reg:DI 112)
    4464              :                       (reg/f:DI 7 sp)] UNSPEC_TLSDESC))
    4465              : 
    4466              :    */
    4467              : 
    4468           38 :   gcc_assert (GET_CODE (src) == UNSPEC);
    4469           38 :   tls_symbol = XVECEXP (src, 0, 0);
    4470           38 :   src = XVECEXP (src, 0, 1);
    4471           38 :   scalar_mode = mode = GET_MODE (src);
    4472           38 :   gcc_assert (REG_P (src));
    4473              : 
    4474              :   /* All definitions of reg:DI 129 in
    4475              : 
    4476              :      (set (reg:DI 110)
    4477              :           (unspec:DI [(symbol_ref:DI ("foo"))
    4478              :                       (reg:DI 129)
    4479              :                       (reg/f:DI 7 sp)] UNSPEC_TLSDESC))
    4480              : 
    4481              :      should have the same source as in
    4482              : 
    4483              :      (set (reg:DI 129)
    4484              :           (unspec:DI [(symbol_ref:DI ("foo"))] UNSPEC_TLSDESC))
    4485              : 
    4486              :    */
    4487              : 
    4488           38 :   set_insn = tls_set_insn_from_symbol (src, tls_symbol);
    4489           38 :   if (!set_insn)
    4490              :     return false;
    4491              : 
    4492              :   /* Use TLS_SYMBOL as VAL to check if 2 patterns have the same source.  */
    4493           38 :   val = tls_symbol;
    4494           38 :   def_insn = set_insn;
    4495           38 :   return true;
    4496              : }
    4497              : 
    4498              : /* Return true and output def_insn, val, mode, scalar_mode and kind if
    4499              :   INSN is a vector broadcast instruction.  */
    4500              : 
    4501              : bool
    4502     50170450 : pass_x86_cse::candidate_vector_p (rtx set)
    4503              : {
    4504     50170450 :   rtx src = SET_SRC (set);
    4505     50170450 :   rtx dest = SET_DEST (set);
    4506     50170450 :   mode = GET_MODE (dest);
    4507              :   /* Skip non-vector instruction.  */
    4508     50170450 :   if (!VECTOR_MODE_P (mode))
    4509              :     return false;
    4510              : 
    4511              :   /* Skip non-vector load instruction.  */
    4512      3686356 :   if (!REG_P (dest) && !SUBREG_P (dest))
    4513              :     return false;
    4514              : 
    4515      2185915 :   val = ix86_broadcast_inner (src, mode, &scalar_mode, &kind,
    4516              :                               &def_insn);
    4517      2185915 :   return val ? true : false;
    4518              : }
    4519              : 
    4520              : /* At entry of the nearest common dominator for basic blocks with
    4521              : 
    4522              :    1. Vector CONST0_RTX patterns.
    4523              :    2. Vector CONSTM1_RTX patterns.
    4524              :    3. Vector broadcast patterns.
    4525              :    4. UNSPEC_TLS_GD patterns.
    4526              :    5. UNSPEC_TLS_LD_BASE patterns.
    4527              :    6. UNSPEC_TLSDESC patterns.
    4528              : 
    4529              :    generate a single pattern whose destination is used to replace the
    4530              :    source in all identical patterns.
    4531              : 
    4532              :    NB: We want to generate a pattern, which is executed only once, to
    4533              :    cover the whole function.  The LCM algorithm isn't appropriate here
    4534              :    since it may place a pattern inside the loop.  */
    4535              : 
    4536              : unsigned int
    4537       976823 : pass_x86_cse::x86_cse (void)
    4538              : {
    4539       976823 :   timevar_push (TV_MACH_DEP);
    4540              : 
    4541       976823 :   auto_vec<redundant_pattern *> loads;
    4542       976823 :   redundant_pattern *load;
    4543       976823 :   basic_block bb;
    4544       976823 :   rtx_insn *insn;
    4545       976823 :   unsigned int i;
    4546       976823 :   auto_bitmap updated_gnu_tls_insns;
    4547       976823 :   auto_bitmap updated_gnu2_tls_insns;
    4548              : 
    4549       976823 :   df_set_flags (DF_DEFER_INSN_RESCAN);
    4550              : 
    4551       976823 :   bool recursive_call_p = cfun->machine->recursive_function;
    4552              : 
    4553     11001357 :   FOR_EACH_BB_FN (bb, cfun)
    4554              :     {
    4555    132360951 :       FOR_BB_INSNS (bb, insn)
    4556              :         {
    4557    122336417 :           if (!NONDEBUG_INSN_P (insn))
    4558     68512091 :             continue;
    4559              : 
    4560     53824326 :           bool matched = false;
    4561              :           /* Remove redundant pattens if there are more than 2 of
    4562              :              them.  */
    4563     53824326 :           unsigned int threshold = 2;
    4564              : 
    4565     53824326 :           rtx set = single_set (insn);
    4566     53824326 :           if (!set && !CALL_P (insn))
    4567      1099392 :             continue;
    4568              : 
    4569     52724934 :           tlsdesc_val = nullptr;
    4570              : 
    4571     52724934 :           attr_tls64 tls64 = get_attr_tls64 (insn);
    4572     52724934 :           switch (tls64)
    4573              :             {
    4574         2186 :             case TLS64_GD:
    4575         2186 :             case TLS64_LD_BASE:
    4576              :               /* Verify UNSPEC_TLS_GD and UNSPEC_TLS_LD_BASE.  */
    4577         2186 :               if (candidate_gnu_tls_p (insn, tls64))
    4578              :                 break;
    4579          163 :               continue;
    4580              : 
    4581           50 :             case TLS64_CALL:
    4582           50 :             case TLS64_COMBINE:
    4583              :               /* Verify UNSPEC_TLSDESC.  */
    4584           50 :               if (candidate_gnu2_tls_p (set, tls64))
    4585              :                 break;
    4586            2 :               continue;
    4587              : 
    4588           35 :             case TLS64_LEA:
    4589              :               /* Skip TLS64_LEA.  */
    4590           35 :               continue;
    4591              : 
    4592     52722663 :             case TLS64_NONE:
    4593     52722663 :               if (!set)
    4594      2552213 :                 continue;
    4595              : 
    4596              :               /* Check for vector broadcast.  */
    4597     50170450 :               if (candidate_vector_p (set))
    4598              :                 break;
    4599     49959608 :               continue;
    4600              :             }
    4601              : 
    4602              :           /* Check if there is a matching redundant load.   */
    4603       380462 :           FOR_EACH_VEC_ELT (loads, i, load)
    4604       258318 :             if (load->val
    4605       258318 :                 && load->kind == kind
    4606       200709 :                 && load->mode == scalar_mode
    4607       191526 :                 && (load->bb == bb
    4608       155499 :                     || kind != X86_CSE_VEC_DUP
    4609              :                     /* Non all 0s/1s vector load must be in the same
    4610              :                        basic block if it is in a recursive call.  */
    4611        97217 :                     || !recursive_call_p)
    4612       447924 :                 && rtx_equal_p (load->val, val))
    4613              :               {
    4614              :                 /* Record instruction.  */
    4615        90769 :                 bitmap_set_bit (load->insns, INSN_UID (insn));
    4616              : 
    4617              :                 /* Record the maximum vector size.  */
    4618        90769 :                 if (kind <= X86_CSE_VEC_DUP
    4619       180427 :                     && load->size < GET_MODE_SIZE (mode))
    4620          962 :                   load->size = GET_MODE_SIZE (mode);
    4621              : 
    4622              :                 /* Record the basic block.  */
    4623        90769 :                 bitmap_set_bit (load->bbs, bb->index);
    4624              : 
    4625              :                 /* Increment the count.  */
    4626        90769 :                 load->count++;
    4627              : 
    4628        90769 :                 matched = true;
    4629        90769 :                 break;
    4630              :               }
    4631              : 
    4632       212913 :           if (matched)
    4633        90769 :             continue;
    4634              : 
    4635              :           /* We see this instruction the first time.  Record the
    4636              :              redundant source value, its mode, the destination size,
    4637              :              instruction which defines the redundant source value,
    4638              :              instruction basic block and the instruction kind.  */
    4639       122144 :           load = new redundant_pattern;
    4640              : 
    4641       122144 :           load->val = copy_rtx (val);
    4642       122144 :           if (tlsdesc_val)
    4643           25 :             load->tlsdesc_val = copy_rtx (tlsdesc_val);
    4644              :           else
    4645       122119 :             load->tlsdesc_val = nullptr;
    4646       122144 :           load->mode = scalar_mode;
    4647       122144 :           load->size = GET_MODE_SIZE (mode);
    4648       122144 :           load->def_insn = def_insn;
    4649       122144 :           load->count = 1;
    4650       122144 :           load->threshold = threshold;
    4651       122144 :           load->bb = BLOCK_FOR_INSN (insn);
    4652       122144 :           load->kind = kind;
    4653              : 
    4654       122144 :           bitmap_set_bit (load->insns, INSN_UID (insn));
    4655       122144 :           bitmap_set_bit (load->bbs, bb->index);
    4656              : 
    4657       122144 :           loads.safe_push (load);
    4658              :         }
    4659              :     }
    4660              : 
    4661              :   bool replaced = false;
    4662      1098967 :   FOR_EACH_VEC_ELT (loads, i, load)
    4663       122144 :     if (load->count >= load->threshold)
    4664              :       {
    4665        32107 :         machine_mode mode;
    4666        32107 :         rtx reg, broadcast_source, broadcast_reg;
    4667        32107 :         replaced = true;
    4668        32107 :         switch (load->kind)
    4669              :           {
    4670          310 :           case X86_CSE_TLS_GD:
    4671          310 :           case X86_CSE_TLS_LD_BASE:
    4672          310 :           case X86_CSE_TLSDESC:
    4673          310 :             broadcast_reg = gen_reg_rtx (load->mode);
    4674          310 :             replace_tls_call (broadcast_reg, load->insns,
    4675          310 :                               (load->kind == X86_CSE_TLSDESC
    4676              :                                ? updated_gnu2_tls_insns
    4677              :                                : updated_gnu_tls_insns));
    4678          310 :             load->broadcast_reg = broadcast_reg;
    4679          310 :             break;
    4680              : 
    4681        31797 :           case X86_CSE_CONST0_VECTOR:
    4682        31797 :           case X86_CSE_CONSTM1_VECTOR:
    4683        31797 :           case X86_CSE_VEC_DUP:
    4684        31797 :             mode = ix86_get_vector_cse_mode (load->size, load->mode);
    4685        31797 :             broadcast_reg = gen_reg_rtx (mode);
    4686        31797 :             if (load->def_insn)
    4687              :               {
    4688              :                 /* Replace redundant vector loads with a single vector
    4689              :                    load in the same basic block.  */
    4690          831 :                 reg = load->val;
    4691          831 :                 if (load->mode != GET_MODE (reg))
    4692            0 :                   reg = gen_rtx_SUBREG (load->mode, reg, 0);
    4693          831 :                 broadcast_source = gen_rtx_VEC_DUPLICATE (mode, reg);
    4694              :               }
    4695              :             else
    4696              :               /* This is a constant integer/double vector.  If the
    4697              :                  inner scalar is 0 or -1, set vector to CONST0_RTX
    4698              :                  or CONSTM1_RTX directly.  */
    4699        30966 :               switch (load->kind)
    4700              :                 {
    4701        19566 :                 case X86_CSE_CONST0_VECTOR:
    4702        19566 :                   broadcast_source = CONST0_RTX (mode);
    4703        19566 :                   break;
    4704         1147 :                 case X86_CSE_CONSTM1_VECTOR:
    4705         1147 :                   broadcast_source = CONSTM1_RTX (mode);
    4706         1147 :                   break;
    4707        10253 :                 case X86_CSE_VEC_DUP:
    4708        10253 :                   reg = gen_reg_rtx (load->mode);
    4709        10253 :                   broadcast_source = gen_rtx_VEC_DUPLICATE (mode, reg);
    4710        10253 :                   break;
    4711            0 :                 default:
    4712            0 :                   gcc_unreachable ();
    4713              :                 }
    4714        31797 :             replace_vector_const (mode, broadcast_reg, load->insns,
    4715              :                                   load->mode);
    4716        31797 :             load->broadcast_source = broadcast_source;
    4717        31797 :             load->broadcast_reg = broadcast_reg;
    4718        31797 :             break;
    4719              :           }
    4720              :       }
    4721              : 
    4722       976823 :   if (replaced)
    4723              :     {
    4724        26041 :       auto_vec<rtx_insn *> control_flow_insns;
    4725              : 
    4726              :       /* (Re-)discover loops so that bb->loop_father can be used in the
    4727              :          analysis below.  */
    4728        26041 :       calculate_dominance_info (CDI_DOMINATORS);
    4729        26041 :       loop_optimizer_init (AVOID_CFG_MODIFICATIONS);
    4730              : 
    4731        71666 :       FOR_EACH_VEC_ELT (loads, i, load)
    4732        45625 :         if (load->count >= load->threshold)
    4733              :           {
    4734        32107 :             rtx set;
    4735        32107 :             if (load->def_insn)
    4736          845 :               switch (load->kind)
    4737              :                 {
    4738           14 :                 case X86_CSE_TLSDESC:
    4739           14 :                   ix86_place_single_tls_call (load->broadcast_reg,
    4740              :                                               load->tlsdesc_val,
    4741              :                                               load->kind,
    4742           14 :                                               load->bbs,
    4743              :                                               updated_gnu_tls_insns,
    4744              :                                               updated_gnu2_tls_insns,
    4745           14 :                                               PATTERN (load->def_insn));
    4746           14 :                   break;
    4747          831 :                 case X86_CSE_VEC_DUP:
    4748              :                   /* Insert a broadcast after the original scalar
    4749              :                      definition.  */
    4750          831 :                   set = gen_rtx_SET (load->broadcast_reg,
    4751              :                                      load->broadcast_source);
    4752          831 :                   insn = emit_insn_after (set, load->def_insn);
    4753              : 
    4754          831 :                   if (cfun->can_throw_non_call_exceptions)
    4755              :                     {
    4756              :                       /* Handle REG_EH_REGION note in DEF_INSN.  */
    4757            5 :                       rtx note = find_reg_note (load->def_insn,
    4758              :                                                 REG_EH_REGION, nullptr);
    4759            5 :                       if (note)
    4760              :                         {
    4761            1 :                           control_flow_insns.safe_push (load->def_insn);
    4762            1 :                           add_reg_note (insn, REG_EH_REGION,
    4763              :                                         XEXP (note, 0));
    4764              :                         }
    4765              :                     }
    4766              : 
    4767          831 :                   if (dump_file)
    4768              :                     {
    4769            0 :                       fprintf (dump_file, "\nAdd:\n\n");
    4770            0 :                       print_rtl_single (dump_file, insn);
    4771            0 :                       fprintf (dump_file, "\nafter:\n\n");
    4772            0 :                       print_rtl_single (dump_file, load->def_insn);
    4773            0 :                       fprintf (dump_file, "\n");
    4774              :                     }
    4775              :                   break;
    4776            0 :                 default:
    4777            0 :                   gcc_unreachable ();
    4778              :                 }
    4779              :             else
    4780        31262 :               switch (load->kind)
    4781              :                 {
    4782          296 :                 case X86_CSE_TLS_GD:
    4783          296 :                 case X86_CSE_TLS_LD_BASE:
    4784          296 :                 case X86_CSE_TLSDESC:
    4785          296 :                   ix86_place_single_tls_call (load->broadcast_reg,
    4786              :                                               (load->kind == X86_CSE_TLSDESC
    4787              :                                                ? load->tlsdesc_val
    4788              :                                                : load->val),
    4789              :                                               load->kind,
    4790          296 :                                               load->bbs,
    4791              :                                               updated_gnu_tls_insns,
    4792              :                                               updated_gnu2_tls_insns);
    4793          296 :                   break;
    4794        30966 :                 case X86_CSE_CONST0_VECTOR:
    4795        30966 :                 case X86_CSE_CONSTM1_VECTOR:
    4796        30966 :                 case X86_CSE_VEC_DUP:
    4797        30966 :                   ix86_place_single_vector_set (load->broadcast_reg,
    4798              :                                                 load->broadcast_source,
    4799              :                                                 load->bbs,
    4800              :                                                 load);
    4801        30966 :                   break;
    4802              :                 }
    4803              :           }
    4804              : 
    4805        26041 :       loop_optimizer_finalize ();
    4806              : 
    4807        26041 :       if (!control_flow_insns.is_empty ())
    4808              :         {
    4809            1 :           free_dominance_info (CDI_DOMINATORS);
    4810              : 
    4811            3 :           FOR_EACH_VEC_ELT (control_flow_insns, i, insn)
    4812            1 :             if (control_flow_insn_p (insn))
    4813              :               {
    4814              :                 /* Split the block after insn.  There will be a fallthru
    4815              :                    edge, which is OK so we keep it.  We have to create
    4816              :                    the exception edges ourselves.  */
    4817            1 :                 bb = BLOCK_FOR_INSN (insn);
    4818            1 :                 split_block (bb, insn);
    4819            1 :                 rtl_make_eh_edge (NULL, bb, BB_END (bb));
    4820              :               }
    4821              :         }
    4822              : 
    4823        26041 :       df_process_deferred_rescans ();
    4824        26041 :     }
    4825              : 
    4826      1098967 :   FOR_EACH_VEC_ELT (loads, i, load)
    4827       244288 :     delete load;
    4828              : 
    4829       976823 :   df_clear_flags (DF_DEFER_INSN_RESCAN);
    4830              : 
    4831       976823 :   timevar_pop (TV_MACH_DEP);
    4832       976823 :   return 0;
    4833       976823 : }
    4834              : 
    4835              : } // anon namespace
    4836              : 
    4837              : rtl_opt_pass *
    4838       285722 : make_pass_x86_cse (gcc::context *ctxt)
    4839              : {
    4840       285722 :   return new pass_x86_cse (ctxt);
    4841              : }
    4842              : 
    4843              : /* Convert legacy instructions that clobbers EFLAGS to APX_NF
    4844              :    instructions when there are no flag set between a flag
    4845              :    producer and user.  */
    4846              : 
    4847              : static unsigned int
    4848          367 : ix86_apx_nf_convert (void)
    4849              : {
    4850          367 :   timevar_push (TV_MACH_DEP);
    4851              : 
    4852          367 :   basic_block bb;
    4853          367 :   rtx_insn *insn;
    4854          367 :   hash_map <rtx_insn *, rtx> converting_map;
    4855          367 :   auto_vec <rtx_insn *> current_convert_list;
    4856              : 
    4857          367 :   bool converting_seq = false;
    4858          367 :   rtx cc = gen_rtx_REG (CCmode, FLAGS_REG);
    4859              : 
    4860          786 :   FOR_EACH_BB_FN (bb, cfun)
    4861              :     {
    4862              :       /* Reset conversion for each bb.  */
    4863          419 :       converting_seq = false;
    4864         5031 :       FOR_BB_INSNS (bb, insn)
    4865              :         {
    4866         4612 :           if (!NONDEBUG_INSN_P (insn))
    4867         4945 :             continue;
    4868              : 
    4869         3676 :           if (recog_memoized (insn) < 0)
    4870          335 :             continue;
    4871              : 
    4872              :           /* Convert candidate insns after cstore, which should
    4873              :              satisify the two conditions:
    4874              :              1. Is not flag user or producer, only clobbers
    4875              :              FLAGS_REG.
    4876              :              2. Have corresponding nf pattern.  */
    4877              : 
    4878         3341 :           rtx pat = PATTERN (insn);
    4879              : 
    4880              :           /* Starting convertion at first cstorecc.  */
    4881         3341 :           rtx set = NULL_RTX;
    4882         3341 :           if (!converting_seq
    4883         2760 :               && (set = single_set (insn))
    4884         2684 :               && ix86_comparison_operator (SET_SRC (set), VOIDmode)
    4885          126 :               && reg_overlap_mentioned_p (cc, SET_SRC (set))
    4886         3464 :               && !reg_overlap_mentioned_p (cc, SET_DEST (set)))
    4887              :             {
    4888          123 :               converting_seq = true;
    4889          123 :               current_convert_list.truncate (0);
    4890              :             }
    4891              :           /* Terminate at the next explicit flag set.  */
    4892         3218 :           else if (reg_set_p (cc, pat)
    4893         3218 :                    && GET_CODE (set_of (cc, pat)) != CLOBBER)
    4894              :             converting_seq = false;
    4895              : 
    4896         3122 :           if (!converting_seq)
    4897         2738 :             continue;
    4898              : 
    4899          603 :           if (get_attr_has_nf (insn)
    4900          603 :               && GET_CODE (pat) == PARALLEL)
    4901              :             {
    4902              :               /* Record the insn to candidate map.  */
    4903           72 :               current_convert_list.safe_push (insn);
    4904           72 :               converting_map.put (insn, pat);
    4905              :             }
    4906              :           /* If the insn clobbers flags but has no nf_attr,
    4907              :              revoke all previous candidates.  */
    4908          531 :           else if (!get_attr_has_nf (insn)
    4909          530 :                    && reg_set_p (cc, pat)
    4910          534 :                    && GET_CODE (set_of (cc, pat)) == CLOBBER)
    4911              :             {
    4912            3 :               for (auto item : current_convert_list)
    4913            0 :                 converting_map.remove (item);
    4914            3 :               converting_seq = false;
    4915              :             }
    4916              :         }
    4917              :     }
    4918              : 
    4919          367 :   if (!converting_map.is_empty ())
    4920              :     {
    4921           85 :       for (auto iter = converting_map.begin ();
    4922          170 :            iter != converting_map.end (); ++iter)
    4923              :         {
    4924           72 :           rtx_insn *replace = (*iter).first;
    4925           72 :           rtx pat = (*iter).second;
    4926           72 :           int i, n = 0, len = XVECLEN (pat, 0);
    4927           72 :           rtx *new_elems = XALLOCAVEC (rtx, len);
    4928           72 :           rtx new_pat;
    4929          216 :           for (i = 0; i < len; i++)
    4930              :             {
    4931          144 :               rtx temp = XVECEXP (pat, 0, i);
    4932          216 :               if (! (GET_CODE (temp) == CLOBBER
    4933           72 :                      && reg_overlap_mentioned_p (cc,
    4934           72 :                                                  XEXP (temp, 0))))
    4935              :                 {
    4936           72 :                   new_elems[n] = temp;
    4937           72 :                   n++;
    4938              :                 }
    4939              :             }
    4940              : 
    4941           72 :           if (n == 1)
    4942           72 :             new_pat = new_elems[0];
    4943              :           else
    4944            0 :             new_pat =
    4945            0 :               gen_rtx_PARALLEL (VOIDmode,
    4946              :                                 gen_rtvec_v (n,
    4947              :                                              new_elems));
    4948              : 
    4949           72 :           PATTERN (replace) = new_pat;
    4950           72 :           INSN_CODE (replace) = -1;
    4951           72 :           recog_memoized (replace);
    4952           72 :           df_insn_rescan (replace);
    4953              :         }
    4954              :     }
    4955              : 
    4956          367 :   timevar_pop (TV_MACH_DEP);
    4957          367 :   return 0;
    4958          367 : }
    4959              : 
    4960              : 
    4961              : namespace {
    4962              : 
    4963              : const pass_data pass_data_apx_nf_convert =
    4964              : {
    4965              :   RTL_PASS, /* type */
    4966              :   "apx_nfcvt", /* name */
    4967              :   OPTGROUP_NONE, /* optinfo_flags */
    4968              :   TV_MACH_DEP, /* tv_id */
    4969              :   0, /* properties_required */
    4970              :   0, /* properties_provided */
    4971              :   0, /* properties_destroyed */
    4972              :   0, /* todo_flags_start */
    4973              :   0, /* todo_flags_finish */
    4974              : };
    4975              : 
    4976              : class pass_apx_nf_convert : public rtl_opt_pass
    4977              : {
    4978              : public:
    4979       285722 :   pass_apx_nf_convert (gcc::context *ctxt)
    4980       571444 :     : rtl_opt_pass (pass_data_apx_nf_convert, ctxt)
    4981              :   {}
    4982              : 
    4983              :   /* opt_pass methods: */
    4984      1471370 :   bool gate (function *) final override
    4985              :     {
    4986      1471370 :       return (TARGET_APX_NF
    4987          459 :               && optimize
    4988      1471821 :               && optimize_function_for_speed_p (cfun));
    4989              :     }
    4990              : 
    4991          367 :   unsigned int execute (function *) final override
    4992              :     {
    4993          367 :       return ix86_apx_nf_convert ();
    4994              :     }
    4995              : }; // class pass_apx_nf_convert
    4996              : 
    4997              : } // anon namespace
    4998              : 
    4999              : rtl_opt_pass *
    5000       285722 : make_pass_apx_nf_convert (gcc::context *ctxt)
    5001              : {
    5002       285722 :   return new pass_apx_nf_convert (ctxt);
    5003              : }
    5004              : 
    5005              : /* When a hot loop can be fit into one cacheline,
    5006              :    force align the loop without considering the max skip.  */
    5007              : static void
    5008       978644 : ix86_align_loops ()
    5009              : {
    5010       978644 :   basic_block bb;
    5011              : 
    5012              :   /* Don't do this when we don't know cache line size.  */
    5013       978644 :   if (ix86_cost->prefetch_block == 0)
    5014            9 :     return;
    5015              : 
    5016       978635 :   loop_optimizer_init (AVOID_CFG_MODIFICATIONS);
    5017       978635 :   profile_count count_threshold = cfun->cfg->count_max / param_align_threshold;
    5018     11466240 :   FOR_EACH_BB_FN (bb, cfun)
    5019              :     {
    5020     10487605 :       rtx_insn *label = BB_HEAD (bb);
    5021     10487605 :       bool has_fallthru = 0;
    5022     10487605 :       edge e;
    5023     10487605 :       edge_iterator ei;
    5024              : 
    5025     10487605 :       if (!LABEL_P (label))
    5026      5324228 :         continue;
    5027              : 
    5028      5168190 :       profile_count fallthru_count = profile_count::zero ();
    5029      5168190 :       profile_count branch_count = profile_count::zero ();
    5030              : 
    5031     15027324 :       FOR_EACH_EDGE (e, ei, bb->preds)
    5032              :         {
    5033      9859134 :           if (e->flags & EDGE_FALLTHRU)
    5034      2517073 :             has_fallthru = 1, fallthru_count += e->count ();
    5035              :           else
    5036      7342061 :             branch_count += e->count ();
    5037              :         }
    5038              : 
    5039      5168190 :       if (!fallthru_count.initialized_p () || !branch_count.initialized_p ())
    5040         4813 :         continue;
    5041              : 
    5042      5163377 :       if (bb->loop_father
    5043      5163377 :           && bb->loop_father->latch != EXIT_BLOCK_PTR_FOR_FN (cfun)
    5044      6511778 :           && (has_fallthru
    5045      1348401 :               ? (!(single_succ_p (bb)
    5046       146104 :                    && single_succ (bb) == EXIT_BLOCK_PTR_FOR_FN (cfun))
    5047       938933 :                  && optimize_bb_for_speed_p (bb)
    5048       857203 :                  && branch_count + fallthru_count > count_threshold
    5049       733852 :                  && (branch_count > fallthru_count * param_align_loop_iterations))
    5050              :               /* In case there'no fallthru for the loop.
    5051              :                  Nops inserted won't be executed.  */
    5052       409468 :               : (branch_count > count_threshold
    5053       141382 :                  || (bb->count > bb->prev_bb->count * 10
    5054        12798 :                      && (bb->prev_bb->count
    5055      4629996 :                          <= ENTRY_BLOCK_PTR_FOR_FN (cfun)->count / 2)))))
    5056              :         {
    5057       546179 :           rtx_insn* insn, *end_insn;
    5058       546179 :           HOST_WIDE_INT size = 0;
    5059       546179 :           bool padding_p = true;
    5060       546179 :           basic_block tbb = bb;
    5061       546179 :           unsigned cond_branch_num = 0;
    5062       546179 :           bool detect_tight_loop_p = false;
    5063              : 
    5064       862829 :           for (unsigned int i = 0; i != bb->loop_father->num_nodes;
    5065       316650 :                i++, tbb = tbb->next_bb)
    5066              :             {
    5067              :               /* Only handle continuous cfg layout. */
    5068       862829 :               if (bb->loop_father != tbb->loop_father)
    5069              :                 {
    5070              :                   padding_p = false;
    5071              :                   break;
    5072              :                 }
    5073              : 
    5074     10161450 :               FOR_BB_INSNS (tbb, insn)
    5075              :                 {
    5076      9497895 :                   if (!NONDEBUG_INSN_P (insn))
    5077      5447853 :                     continue;
    5078      4050042 :                   size += ix86_min_insn_size (insn);
    5079              : 
    5080              :                   /* We don't know size of inline asm.
    5081              :                      Don't align loop for call.  */
    5082      4050042 :                   if (asm_noperands (PATTERN (insn)) >= 0
    5083      4050042 :                       || CALL_P (insn))
    5084              :                     {
    5085              :                       size = -1;
    5086              :                       break;
    5087              :                     }
    5088              :                 }
    5089              : 
    5090       821782 :               if (size == -1 || size > ix86_cost->prefetch_block)
    5091              :                 {
    5092              :                   padding_p = false;
    5093              :                   break;
    5094              :                 }
    5095              : 
    5096      1464466 :               FOR_EACH_EDGE (e, ei, tbb->succs)
    5097              :                 {
    5098              :                   /* It could be part of the loop.  */
    5099      1010786 :                   if (e->dest == bb)
    5100              :                     {
    5101              :                       detect_tight_loop_p = true;
    5102              :                       break;
    5103              :                     }
    5104              :                 }
    5105              : 
    5106       638158 :               if (detect_tight_loop_p)
    5107              :                 break;
    5108              : 
    5109       453680 :               end_insn = BB_END (tbb);
    5110       453680 :               if (JUMP_P (end_insn))
    5111              :                 {
    5112              :                   /* For decoded icache:
    5113              :                      1. Up to two branches are allowed per Way.
    5114              :                      2. A non-conditional branch is the last micro-op in a Way.
    5115              :                   */
    5116       367537 :                   if (onlyjump_p (end_insn)
    5117       367537 :                       && (any_uncondjump_p (end_insn)
    5118       312313 :                           || single_succ_p (tbb)))
    5119              :                     {
    5120              :                       padding_p = false;
    5121              :                       break;
    5122              :                     }
    5123       312313 :                   else if (++cond_branch_num >= 2)
    5124              :                     {
    5125              :                       padding_p = false;
    5126              :                       break;
    5127              :                     }
    5128              :                 }
    5129              : 
    5130              :             }
    5131              : 
    5132       546179 :           if (padding_p && detect_tight_loop_p)
    5133              :             {
    5134       368956 :               emit_insn_before (gen_max_skip_align (GEN_INT (ceil_log2 (size)),
    5135              :                                                     GEN_INT (0)), label);
    5136              :               /* End of function.  */
    5137       184478 :               if (!tbb || tbb == EXIT_BLOCK_PTR_FOR_FN (cfun))
    5138              :                 break;
    5139              :               /* Skip bb which already fits into one cacheline.  */
    5140              :               bb = tbb;
    5141              :             }
    5142              :         }
    5143              :     }
    5144              : 
    5145       978635 :   loop_optimizer_finalize ();
    5146       978635 :   free_dominance_info (CDI_DOMINATORS);
    5147              : }
    5148              : 
    5149              : namespace {
    5150              : 
    5151              : const pass_data pass_data_align_tight_loops =
    5152              : {
    5153              :   RTL_PASS, /* type */
    5154              :   "align_tight_loops", /* name */
    5155              :   OPTGROUP_NONE, /* optinfo_flags */
    5156              :   TV_MACH_DEP, /* tv_id */
    5157              :   0, /* properties_required */
    5158              :   0, /* properties_provided */
    5159              :   0, /* properties_destroyed */
    5160              :   0, /* todo_flags_start */
    5161              :   0, /* todo_flags_finish */
    5162              : };
    5163              : 
    5164              : class pass_align_tight_loops : public rtl_opt_pass
    5165              : {
    5166              : public:
    5167       285722 :   pass_align_tight_loops (gcc::context *ctxt)
    5168       571444 :     : rtl_opt_pass (pass_data_align_tight_loops, ctxt)
    5169              :   {}
    5170              : 
    5171              :   /* opt_pass methods: */
    5172      1471370 :   bool gate (function *) final override
    5173              :     {
    5174      1471370 :       return TARGET_ALIGN_TIGHT_LOOPS
    5175      1470884 :              && optimize
    5176      2514573 :              && optimize_function_for_speed_p (cfun);
    5177              :     }
    5178              : 
    5179       978644 :   unsigned int execute (function *) final override
    5180              :     {
    5181       978644 :       timevar_push (TV_MACH_DEP);
    5182              : #ifdef ASM_OUTPUT_MAX_SKIP_ALIGN
    5183       978644 :       ix86_align_loops ();
    5184              : #endif
    5185       978644 :       timevar_pop (TV_MACH_DEP);
    5186       978644 :       return 0;
    5187              :     }
    5188              : }; // class pass_align_tight_loops
    5189              : 
    5190              : } // anon namespace
    5191              : 
    5192              : rtl_opt_pass *
    5193       285722 : make_pass_align_tight_loops (gcc::context *ctxt)
    5194              : {
    5195       285722 :   return new pass_align_tight_loops (ctxt);
    5196              : }
    5197              : 
    5198              : /* This compares the priority of target features in function DECL1
    5199              :    and DECL2.  It returns positive value if DECL1 is higher priority,
    5200              :    negative value if DECL2 is higher priority and 0 if they are the
    5201              :    same.  */
    5202              : 
    5203              : int
    5204         5737 : ix86_compare_version_priority (tree decl1, tree decl2)
    5205              : {
    5206         5737 :   unsigned int priority1 = get_builtin_code_for_version (decl1, NULL);
    5207         5737 :   unsigned int priority2 = get_builtin_code_for_version (decl2, NULL);
    5208              : 
    5209         5737 :   return (int)priority1 - (int)priority2;
    5210              : }
    5211              : 
    5212              : /* V1 and V2 point to function versions with different priorities
    5213              :    based on the target ISA.  This function compares their priorities.  */
    5214              : 
    5215              : static int
    5216         6830 : feature_compare (const void *v1, const void *v2)
    5217              : {
    5218         6830 :   typedef struct _function_version_info
    5219              :     {
    5220              :       tree version_decl;
    5221              :       tree predicate_chain;
    5222              :       unsigned int dispatch_priority;
    5223              :     } function_version_info;
    5224              : 
    5225         6830 :   const function_version_info c1 = *(const function_version_info *)v1;
    5226         6830 :   const function_version_info c2 = *(const function_version_info *)v2;
    5227         6830 :   return (c2.dispatch_priority - c1.dispatch_priority);
    5228              : }
    5229              : 
    5230              : /* This adds a condition to the basic_block NEW_BB in function FUNCTION_DECL
    5231              :    to return a pointer to VERSION_DECL if the outcome of the expression
    5232              :    formed by PREDICATE_CHAIN is true.  This function will be called during
    5233              :    version dispatch to decide which function version to execute.  It returns
    5234              :    the basic block at the end, to which more conditions can be added.  */
    5235              : 
    5236              : static basic_block
    5237          818 : add_condition_to_bb (tree function_decl, tree version_decl,
    5238              :                      tree predicate_chain, basic_block new_bb)
    5239              : {
    5240          818 :   gimple *return_stmt;
    5241          818 :   tree convert_expr, result_var;
    5242          818 :   gimple *convert_stmt;
    5243          818 :   gimple *call_cond_stmt;
    5244          818 :   gimple *if_else_stmt;
    5245              : 
    5246          818 :   basic_block bb1, bb2, bb3;
    5247          818 :   edge e12, e23;
    5248              : 
    5249          818 :   tree cond_var, and_expr_var = NULL_TREE;
    5250          818 :   gimple_seq gseq;
    5251              : 
    5252          818 :   tree predicate_decl, predicate_arg;
    5253              : 
    5254          818 :   push_cfun (DECL_STRUCT_FUNCTION (function_decl));
    5255              : 
    5256          818 :   gcc_assert (new_bb != NULL);
    5257          818 :   gseq = bb_seq (new_bb);
    5258              : 
    5259              : 
    5260          818 :   convert_expr = build1 (CONVERT_EXPR, ptr_type_node,
    5261              :                          build_fold_addr_expr (version_decl));
    5262          818 :   result_var = create_tmp_var (ptr_type_node);
    5263          818 :   convert_stmt = gimple_build_assign (result_var, convert_expr);
    5264          818 :   return_stmt = gimple_build_return (result_var);
    5265              : 
    5266          818 :   if (predicate_chain == NULL_TREE)
    5267              :     {
    5268          195 :       gimple_seq_add_stmt (&gseq, convert_stmt);
    5269          195 :       gimple_seq_add_stmt (&gseq, return_stmt);
    5270          195 :       set_bb_seq (new_bb, gseq);
    5271          195 :       gimple_set_bb (convert_stmt, new_bb);
    5272          195 :       gimple_set_bb (return_stmt, new_bb);
    5273          195 :       pop_cfun ();
    5274          195 :       return new_bb;
    5275              :     }
    5276              : 
    5277         1285 :   while (predicate_chain != NULL)
    5278              :     {
    5279          662 :       cond_var = create_tmp_var (integer_type_node);
    5280          662 :       predicate_decl = TREE_PURPOSE (predicate_chain);
    5281          662 :       predicate_arg = TREE_VALUE (predicate_chain);
    5282          662 :       call_cond_stmt = gimple_build_call (predicate_decl, 1, predicate_arg);
    5283          662 :       gimple_call_set_lhs (call_cond_stmt, cond_var);
    5284              : 
    5285          662 :       gimple_set_block (call_cond_stmt, DECL_INITIAL (function_decl));
    5286          662 :       gimple_set_bb (call_cond_stmt, new_bb);
    5287          662 :       gimple_seq_add_stmt (&gseq, call_cond_stmt);
    5288              : 
    5289          662 :       predicate_chain = TREE_CHAIN (predicate_chain);
    5290              : 
    5291          662 :       if (and_expr_var == NULL)
    5292              :         and_expr_var = cond_var;
    5293              :       else
    5294              :         {
    5295           39 :           gimple *assign_stmt;
    5296              :           /* Use MIN_EXPR to check if any integer is zero?.
    5297              :              and_expr_var = min_expr <cond_var, and_expr_var>  */
    5298           39 :           assign_stmt = gimple_build_assign (and_expr_var,
    5299              :                           build2 (MIN_EXPR, integer_type_node,
    5300              :                                   cond_var, and_expr_var));
    5301              : 
    5302           39 :           gimple_set_block (assign_stmt, DECL_INITIAL (function_decl));
    5303           39 :           gimple_set_bb (assign_stmt, new_bb);
    5304           39 :           gimple_seq_add_stmt (&gseq, assign_stmt);
    5305              :         }
    5306              :     }
    5307              : 
    5308          623 :   if_else_stmt = gimple_build_cond (GT_EXPR, and_expr_var,
    5309              :                                     integer_zero_node,
    5310              :                                     NULL_TREE, NULL_TREE);
    5311          623 :   gimple_set_block (if_else_stmt, DECL_INITIAL (function_decl));
    5312          623 :   gimple_set_bb (if_else_stmt, new_bb);
    5313          623 :   gimple_seq_add_stmt (&gseq, if_else_stmt);
    5314              : 
    5315          623 :   gimple_seq_add_stmt (&gseq, convert_stmt);
    5316          623 :   gimple_seq_add_stmt (&gseq, return_stmt);
    5317          623 :   set_bb_seq (new_bb, gseq);
    5318              : 
    5319          623 :   bb1 = new_bb;
    5320          623 :   e12 = split_block (bb1, if_else_stmt);
    5321          623 :   bb2 = e12->dest;
    5322          623 :   e12->flags &= ~EDGE_FALLTHRU;
    5323          623 :   e12->flags |= EDGE_TRUE_VALUE;
    5324              : 
    5325          623 :   e23 = split_block (bb2, return_stmt);
    5326              : 
    5327          623 :   gimple_set_bb (convert_stmt, bb2);
    5328          623 :   gimple_set_bb (return_stmt, bb2);
    5329              : 
    5330          623 :   bb3 = e23->dest;
    5331          623 :   make_edge (bb1, bb3, EDGE_FALSE_VALUE);
    5332              : 
    5333          623 :   remove_edge (e23);
    5334          623 :   make_edge (bb2, EXIT_BLOCK_PTR_FOR_FN (cfun), 0);
    5335              : 
    5336          623 :   pop_cfun ();
    5337              : 
    5338          623 :   return bb3;
    5339              : }
    5340              : 
    5341              : /* This function generates the dispatch function for
    5342              :    multi-versioned functions.  DISPATCH_DECL is the function which will
    5343              :    contain the dispatch logic.  FNDECLS are the function choices for
    5344              :    dispatch, and is a tree chain.  EMPTY_BB is the basic block pointer
    5345              :    in DISPATCH_DECL in which the dispatch code is generated.  */
    5346              : 
    5347              : static int
    5348          195 : dispatch_function_versions (tree dispatch_decl,
    5349              :                             void *fndecls_p,
    5350              :                             basic_block *empty_bb)
    5351              : {
    5352          195 :   tree default_decl;
    5353          195 :   gimple *ifunc_cpu_init_stmt;
    5354          195 :   gimple_seq gseq;
    5355          195 :   int ix;
    5356          195 :   tree ele;
    5357          195 :   vec<tree> *fndecls;
    5358          195 :   unsigned int num_versions = 0;
    5359          195 :   unsigned int actual_versions = 0;
    5360          195 :   unsigned int i;
    5361              : 
    5362          195 :   struct _function_version_info
    5363              :     {
    5364              :       tree version_decl;
    5365              :       tree predicate_chain;
    5366              :       unsigned int dispatch_priority;
    5367              :     }*function_version_info;
    5368              : 
    5369          195 :   gcc_assert (dispatch_decl != NULL
    5370              :               && fndecls_p != NULL
    5371              :               && empty_bb != NULL);
    5372              : 
    5373              :   /*fndecls_p is actually a vector.  */
    5374          195 :   fndecls = static_cast<vec<tree> *> (fndecls_p);
    5375              : 
    5376              :   /* At least one more version other than the default.  */
    5377          195 :   num_versions = fndecls->length ();
    5378          195 :   gcc_assert (num_versions >= 2);
    5379              : 
    5380          195 :   function_version_info = (struct _function_version_info *)
    5381          195 :     XNEWVEC (struct _function_version_info, (num_versions - 1));
    5382              : 
    5383              :   /* The first version in the vector is the default decl.  */
    5384          195 :   default_decl = (*fndecls)[0];
    5385              : 
    5386          195 :   push_cfun (DECL_STRUCT_FUNCTION (dispatch_decl));
    5387              : 
    5388          195 :   gseq = bb_seq (*empty_bb);
    5389              :   /* Function version dispatch is via IFUNC.  IFUNC resolvers fire before
    5390              :      constructors, so explicity call __builtin_cpu_init here.  */
    5391          195 :   ifunc_cpu_init_stmt
    5392          195 :     = gimple_build_call_vec (get_ix86_builtin (IX86_BUILTIN_CPU_INIT), vNULL);
    5393          195 :   gimple_seq_add_stmt (&gseq, ifunc_cpu_init_stmt);
    5394          195 :   gimple_set_bb (ifunc_cpu_init_stmt, *empty_bb);
    5395          195 :   set_bb_seq (*empty_bb, gseq);
    5396              : 
    5397          195 :   pop_cfun ();
    5398              : 
    5399              : 
    5400          975 :   for (ix = 1; fndecls->iterate (ix, &ele); ++ix)
    5401              :     {
    5402          780 :       tree version_decl = ele;
    5403          780 :       tree predicate_chain = NULL_TREE;
    5404          780 :       unsigned int priority;
    5405              :       /* Get attribute string, parse it and find the right predicate decl.
    5406              :          The predicate function could be a lengthy combination of many
    5407              :          features, like arch-type and various isa-variants.  */
    5408          780 :       priority = get_builtin_code_for_version (version_decl,
    5409              :                                                &predicate_chain);
    5410              : 
    5411          780 :       if (predicate_chain == NULL_TREE)
    5412          157 :         continue;
    5413              : 
    5414          623 :       function_version_info [actual_versions].version_decl = version_decl;
    5415          623 :       function_version_info [actual_versions].predicate_chain
    5416          623 :          = predicate_chain;
    5417          623 :       function_version_info [actual_versions].dispatch_priority = priority;
    5418          623 :       actual_versions++;
    5419              :     }
    5420              : 
    5421              :   /* Sort the versions according to descending order of dispatch priority.  The
    5422              :      priority is based on the ISA.  This is not a perfect solution.  There
    5423              :      could still be ambiguity.  If more than one function version is suitable
    5424              :      to execute,  which one should be dispatched?  In future, allow the user
    5425              :      to specify a dispatch  priority next to the version.  */
    5426          195 :   qsort (function_version_info, actual_versions,
    5427              :          sizeof (struct _function_version_info), feature_compare);
    5428              : 
    5429         1013 :   for  (i = 0; i < actual_versions; ++i)
    5430          623 :     *empty_bb = add_condition_to_bb (dispatch_decl,
    5431              :                                      function_version_info[i].version_decl,
    5432          623 :                                      function_version_info[i].predicate_chain,
    5433              :                                      *empty_bb);
    5434              : 
    5435              :   /* dispatch default version at the end.  */
    5436          195 :   *empty_bb = add_condition_to_bb (dispatch_decl, default_decl,
    5437              :                                    NULL, *empty_bb);
    5438              : 
    5439          195 :   free (function_version_info);
    5440          195 :   return 0;
    5441              : }
    5442              : 
    5443              : /* This function changes the assembler name for functions that are
    5444              :    versions.  If DECL is a function version and has a "target"
    5445              :    attribute, it appends the attribute string to its assembler name.  */
    5446              : 
    5447              : static tree
    5448         1100 : ix86_mangle_function_version_assembler_name (tree decl, tree id)
    5449              : {
    5450         1100 :   tree version_attr;
    5451         1100 :   char *attr_str;
    5452              : 
    5453         1100 :   if (DECL_DECLARED_INLINE_P (decl)
    5454         1147 :       && lookup_attribute ("gnu_inline",
    5455           47 :                            DECL_ATTRIBUTES (decl)))
    5456            0 :     error_at (DECL_SOURCE_LOCATION (decl),
    5457              :               "function versions cannot be marked as %<gnu_inline%>,"
    5458              :               " bodies have to be generated");
    5459              : 
    5460         1100 :   if (DECL_VIRTUAL_P (decl)
    5461         2200 :       || DECL_VINDEX (decl))
    5462            0 :     sorry ("virtual function multiversioning not supported");
    5463              : 
    5464         1100 :   version_attr = lookup_attribute ("target", DECL_ATTRIBUTES (decl));
    5465              : 
    5466              :   /* target attribute string cannot be NULL.  */
    5467         1100 :   gcc_assert (version_attr != NULL_TREE);
    5468              : 
    5469         1100 :   attr_str = sorted_attr_string (TREE_VALUE (version_attr));
    5470              : 
    5471              :   /* Allow assembler name to be modified if already set.  */
    5472         1100 :   if (DECL_ASSEMBLER_NAME_SET_P (decl))
    5473         1085 :     SET_DECL_RTL (decl, NULL);
    5474              : 
    5475         1100 :   tree ret = clone_identifier (id, attr_str, true);
    5476              : 
    5477         1100 :   XDELETEVEC (attr_str);
    5478              : 
    5479         1100 :   return ret;
    5480              : }
    5481              : 
    5482              : tree
    5483    496779878 : ix86_mangle_decl_assembler_name (tree decl, tree id)
    5484              : {
    5485              :   /* For function version, add the target suffix to the assembler name.  */
    5486    496779878 :   if (TREE_CODE (decl) == FUNCTION_DECL)
    5487              :     {
    5488    458096156 :       cgraph_node *node = cgraph_node::get (decl);
    5489              :       /* Mangle all versions when annotated with target_clones, but only
    5490              :          non-default versions when annotated with target attributes.  */
    5491    458096156 :       if (DECL_FUNCTION_VERSIONED (decl)
    5492    458096156 :           && (node->is_target_clone
    5493         1077 :               || !is_function_default_version (node->decl)))
    5494         1100 :         id = ix86_mangle_function_version_assembler_name (decl, id);
    5495              :       /* Mangle the dispatched symbol but only in the case of target clones.  */
    5496    458095056 :       else if (node && node->dispatcher_function && !node->is_target_clone)
    5497          114 :         id = clone_identifier (id, "ifunc");
    5498     73263234 :       else if (node && node->dispatcher_resolver_function)
    5499          195 :         id = clone_identifier (id, "resolver");
    5500              :     }
    5501              : #ifdef SUBTARGET_MANGLE_DECL_ASSEMBLER_NAME
    5502              :   id = SUBTARGET_MANGLE_DECL_ASSEMBLER_NAME (decl, id);
    5503              : #endif
    5504              : 
    5505    496779878 :   return id;
    5506              : }
    5507              : 
    5508              : /* Make a dispatcher declaration for the multi-versioned function DECL.
    5509              :    Calls to DECL function will be replaced with calls to the dispatcher
    5510              :    by the front-end.  Returns the decl of the dispatcher function.  */
    5511              : 
    5512              : tree
    5513          321 : ix86_get_function_versions_dispatcher (void *decl)
    5514              : {
    5515          321 :   tree fn = (tree) decl;
    5516          321 :   struct cgraph_node *node = NULL;
    5517          321 :   struct cgraph_node *default_node = NULL;
    5518          321 :   struct cgraph_function_version_info *node_v = NULL;
    5519              : 
    5520          321 :   tree dispatch_decl = NULL;
    5521              : 
    5522          321 :   struct cgraph_function_version_info *default_version_info = NULL;
    5523              : 
    5524          642 :   gcc_assert (fn != NULL && DECL_FUNCTION_VERSIONED (fn));
    5525              : 
    5526          321 :   node = cgraph_node::get (fn);
    5527          321 :   gcc_assert (node != NULL);
    5528              : 
    5529          321 :   node_v = node->function_version ();
    5530          321 :   gcc_assert (node_v != NULL);
    5531              : 
    5532          321 :   if (node_v->dispatcher_resolver != NULL)
    5533              :     return node_v->dispatcher_resolver;
    5534              : 
    5535              :   /* The default node is always the beginning of the chain.  */
    5536              :   default_version_info = node_v;
    5537          660 :   while (default_version_info->prev != NULL)
    5538              :     default_version_info = default_version_info->prev;
    5539          207 :   default_node = default_version_info->this_node;
    5540              : 
    5541              :   /* If there is no default node, just return NULL.  */
    5542          207 :   if (!is_function_default_version (default_node->decl))
    5543              :     return NULL;
    5544              : 
    5545              : #if defined (ASM_OUTPUT_TYPE_DIRECTIVE)
    5546          198 :   if (targetm.has_ifunc_p ())
    5547              :     {
    5548          198 :       struct cgraph_function_version_info *it_v = NULL;
    5549              : 
    5550              :       /* Right now, the dispatching is done via ifunc.  */
    5551          198 :       dispatch_decl = make_dispatcher_decl (default_node->decl);
    5552              : 
    5553              :       /* Set the dispatcher for all the versions.  */
    5554          198 :       it_v = default_version_info;
    5555         1377 :       while (it_v != NULL)
    5556              :         {
    5557          981 :           it_v->dispatcher_resolver = dispatch_decl;
    5558          981 :           it_v = it_v->next;
    5559              :         }
    5560              :     }
    5561              :   else
    5562              : #endif
    5563              :     {
    5564            0 :       error_at (DECL_SOURCE_LOCATION (default_node->decl),
    5565              :                 "multiversioning needs %<ifunc%> which is not supported "
    5566              :                 "on this target");
    5567              :     }
    5568              : 
    5569              :   return dispatch_decl;
    5570              : }
    5571              : 
    5572              : /* Make the resolver function decl to dispatch the versions of
    5573              :    a multi-versioned function,  DEFAULT_DECL.  IFUNC_ALIAS_DECL is
    5574              :    ifunc alias that will point to the created resolver.  Create an
    5575              :    empty basic block in the resolver and store the pointer in
    5576              :    EMPTY_BB.  Return the decl of the resolver function.  */
    5577              : 
    5578              : static tree
    5579          195 : make_resolver_func (const tree default_decl,
    5580              :                     const tree ifunc_alias_decl,
    5581              :                     basic_block *empty_bb)
    5582              : {
    5583          195 :   tree decl, type, t;
    5584              : 
    5585              :   /* The resolver function should return a (void *). */
    5586          195 :   type = build_function_type_list (ptr_type_node, NULL_TREE);
    5587              : 
    5588          195 :   cgraph_node *node = cgraph_node::get (default_decl);
    5589          195 :   gcc_assert (node && node->function_version ());
    5590              : 
    5591          195 :   decl = build_fn_decl (IDENTIFIER_POINTER (DECL_NAME (default_decl)), type);
    5592              : 
    5593              :   /* Set the assembler name to prevent cgraph_node attempting to mangle.  */
    5594          195 :   SET_DECL_ASSEMBLER_NAME (decl, DECL_ASSEMBLER_NAME (default_decl));
    5595              : 
    5596          195 :   cgraph_node *resolver_node = cgraph_node::get_create (decl);
    5597          195 :   resolver_node->dispatcher_resolver_function = true;
    5598              : 
    5599          195 :   if (node->is_target_clone)
    5600           84 :     resolver_node->is_target_clone = true;
    5601              : 
    5602          195 :   tree id = ix86_mangle_decl_assembler_name
    5603          195 :     (decl, node->function_version ()->assembler_name);
    5604          195 :   SET_DECL_ASSEMBLER_NAME (decl, id);
    5605              : 
    5606          195 :   DECL_NAME (decl) = DECL_NAME (default_decl);
    5607          195 :   TREE_USED (decl) = 1;
    5608          195 :   DECL_ARTIFICIAL (decl) = 1;
    5609          195 :   DECL_IGNORED_P (decl) = 1;
    5610          195 :   TREE_PUBLIC (decl) = 0;
    5611          195 :   DECL_UNINLINABLE (decl) = 1;
    5612              : 
    5613              :   /* Resolver is not external, body is generated.  */
    5614          195 :   DECL_EXTERNAL (decl) = 0;
    5615          195 :   DECL_EXTERNAL (ifunc_alias_decl) = 0;
    5616              : 
    5617          195 :   DECL_CONTEXT (decl) = NULL_TREE;
    5618          195 :   DECL_INITIAL (decl) = make_node (BLOCK);
    5619          195 :   DECL_STATIC_CONSTRUCTOR (decl) = 0;
    5620              : 
    5621          195 :   if (DECL_COMDAT_GROUP (default_decl)
    5622          195 :       || TREE_PUBLIC (default_decl))
    5623              :     {
    5624              :       /* In this case, each translation unit with a call to this
    5625              :          versioned function will put out a resolver.  Ensure it
    5626              :          is comdat to keep just one copy.  */
    5627          171 :       DECL_COMDAT (decl) = 1;
    5628          171 :       make_decl_one_only (decl, DECL_ASSEMBLER_NAME (decl));
    5629              :     }
    5630              :   else
    5631           24 :     TREE_PUBLIC (ifunc_alias_decl) = 0;
    5632              : 
    5633              :   /* Build result decl and add to function_decl. */
    5634          195 :   t = build_decl (UNKNOWN_LOCATION, RESULT_DECL, NULL_TREE, ptr_type_node);
    5635          195 :   DECL_CONTEXT (t) = decl;
    5636          195 :   DECL_ARTIFICIAL (t) = 1;
    5637          195 :   DECL_IGNORED_P (t) = 1;
    5638          195 :   DECL_RESULT (decl) = t;
    5639              : 
    5640          195 :   gimplify_function_tree (decl);
    5641          195 :   push_cfun (DECL_STRUCT_FUNCTION (decl));
    5642          195 :   *empty_bb = init_lowered_empty_function (decl, false,
    5643              :                                            profile_count::uninitialized ());
    5644              : 
    5645          195 :   cgraph_node::add_new_function (decl, true);
    5646          195 :   symtab->call_cgraph_insertion_hooks (cgraph_node::get_create (decl));
    5647              : 
    5648          195 :   pop_cfun ();
    5649              : 
    5650          195 :   gcc_assert (ifunc_alias_decl != NULL);
    5651              :   /* Mark ifunc_alias_decl as "ifunc" with resolver as resolver_name.  */
    5652          195 :   DECL_ATTRIBUTES (ifunc_alias_decl)
    5653          195 :     = make_attribute ("ifunc", IDENTIFIER_POINTER (DECL_ASSEMBLER_NAME (decl)),
    5654          195 :                       DECL_ATTRIBUTES (ifunc_alias_decl));
    5655              : 
    5656              :   /* Create the alias for dispatch to resolver here.  */
    5657          195 :   cgraph_node::create_same_body_alias (ifunc_alias_decl, decl);
    5658          195 :   return decl;
    5659              : }
    5660              : 
    5661              : /* Generate the dispatching code body to dispatch multi-versioned function
    5662              :    DECL.  The target hook is called to process the "target" attributes and
    5663              :    provide the code to dispatch the right function at run-time.  NODE points
    5664              :    to the dispatcher decl whose body will be created.  */
    5665              : 
    5666              : tree
    5667          195 : ix86_generate_version_dispatcher_body (void *node_p)
    5668              : {
    5669          195 :   tree resolver_decl;
    5670          195 :   basic_block empty_bb;
    5671          195 :   tree default_ver_decl;
    5672          195 :   struct cgraph_node *versn;
    5673          195 :   struct cgraph_node *node;
    5674              : 
    5675          195 :   struct cgraph_function_version_info *node_version_info = NULL;
    5676          195 :   struct cgraph_function_version_info *versn_info = NULL;
    5677              : 
    5678          195 :   node = (cgraph_node *)node_p;
    5679              : 
    5680          195 :   node_version_info = node->function_version ();
    5681          195 :   gcc_assert (node->dispatcher_function
    5682              :               && node_version_info != NULL);
    5683              : 
    5684          195 :   if (node_version_info->dispatcher_resolver)
    5685              :     return node_version_info->dispatcher_resolver;
    5686              : 
    5687              :   /* The first version in the chain corresponds to the default version.  */
    5688          195 :   default_ver_decl = node_version_info->next->this_node->decl;
    5689              : 
    5690              :   /* node is going to be an alias, so remove the finalized bit.  */
    5691          195 :   node->definition = false;
    5692              : 
    5693          195 :   resolver_decl = make_resolver_func (default_ver_decl,
    5694              :                                       node->decl, &empty_bb);
    5695              : 
    5696          195 :   node_version_info->dispatcher_resolver = resolver_decl;
    5697              : 
    5698          195 :   push_cfun (DECL_STRUCT_FUNCTION (resolver_decl));
    5699              : 
    5700          195 :   auto_vec<tree, 2> fn_ver_vec;
    5701              : 
    5702         1170 :   for (versn_info = node_version_info->next; versn_info;
    5703          975 :        versn_info = versn_info->next)
    5704              :     {
    5705          975 :       versn = versn_info->this_node;
    5706              :       /* Check for virtual functions here again, as by this time it should
    5707              :          have been determined if this function needs a vtable index or
    5708              :          not.  This happens for methods in derived classes that override
    5709              :          virtual methods in base classes but are not explicitly marked as
    5710              :          virtual.  */
    5711          975 :       if (DECL_VINDEX (versn->decl))
    5712            0 :         sorry ("virtual function multiversioning not supported");
    5713              : 
    5714          975 :       fn_ver_vec.safe_push (versn->decl);
    5715              :     }
    5716              : 
    5717          195 :   dispatch_function_versions (resolver_decl, &fn_ver_vec, &empty_bb);
    5718          195 :   cgraph_edge::rebuild_edges ();
    5719          195 :   pop_cfun ();
    5720          195 :   return resolver_decl;
    5721          195 : }
    5722              : 
    5723              : 
        

Generated by: LCOV version 2.4-beta

LCOV profile is generated on x86_64 machine using following configure options: configure --disable-bootstrap --enable-coverage=opt --enable-languages=c,c++,fortran,go,jit,lto,rust,m2 --enable-host-shared. GCC test suite is run with the built compiler.