LCOV - code coverage report
Current view: top level - gcc/config/i386 - i386-features.cc (source / functions) Coverage Total Hit
Test: gcc.info Lines: 89.1 % 2798 2493
Test Date: 2026-05-30 15:37:04 Functions: 98.9 % 95 94
Legend: Lines:     hit not hit

            Line data    Source code
       1              : /* Copyright (C) 1988-2026 Free Software Foundation, Inc.
       2              : 
       3              : This file is part of GCC.
       4              : 
       5              : GCC is free software; you can redistribute it and/or modify
       6              : it under the terms of the GNU General Public License as published by
       7              : the Free Software Foundation; either version 3, or (at your option)
       8              : any later version.
       9              : 
      10              : GCC is distributed in the hope that it will be useful,
      11              : but WITHOUT ANY WARRANTY; without even the implied warranty of
      12              : MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
      13              : GNU General Public License for more details.
      14              : 
      15              : You should have received a copy of the GNU General Public License
      16              : along with GCC; see the file COPYING3.  If not see
      17              : <http://www.gnu.org/licenses/>.  */
      18              : 
      19              : #define IN_TARGET_CODE 1
      20              : 
      21              : #include "config.h"
      22              : #include "system.h"
      23              : #include "coretypes.h"
      24              : #include "backend.h"
      25              : #include "rtl.h"
      26              : #include "tree.h"
      27              : #include "memmodel.h"
      28              : #include "gimple.h"
      29              : #include "cfghooks.h"
      30              : #include "cfgloop.h"
      31              : #include "df.h"
      32              : #include "tm_p.h"
      33              : #include "stringpool.h"
      34              : #include "expmed.h"
      35              : #include "optabs.h"
      36              : #include "regs.h"
      37              : #include "emit-rtl.h"
      38              : #include "recog.h"
      39              : #include "cgraph.h"
      40              : #include "diagnostic.h"
      41              : #include "cfgbuild.h"
      42              : #include "alias.h"
      43              : #include "fold-const.h"
      44              : #include "attribs.h"
      45              : #include "calls.h"
      46              : #include "stor-layout.h"
      47              : #include "varasm.h"
      48              : #include "output.h"
      49              : #include "insn-attr.h"
      50              : #include "flags.h"
      51              : #include "except.h"
      52              : #include "explow.h"
      53              : #include "expr.h"
      54              : #include "cfgrtl.h"
      55              : #include "common/common-target.h"
      56              : #include "langhooks.h"
      57              : #include "reload.h"
      58              : #include "gimplify.h"
      59              : #include "dwarf2.h"
      60              : #include "tm-constrs.h"
      61              : #include "cselib.h"
      62              : #include "sched-int.h"
      63              : #include "opts.h"
      64              : #include "tree-pass.h"
      65              : #include "context.h"
      66              : #include "pass_manager.h"
      67              : #include "target-globals.h"
      68              : #include "gimple-iterator.h"
      69              : #include "shrink-wrap.h"
      70              : #include "builtins.h"
      71              : #include "rtl-iter.h"
      72              : #include "tree-iterator.h"
      73              : #include "dbgcnt.h"
      74              : #include "case-cfn-macros.h"
      75              : #include "dojump.h"
      76              : #include "fold-const-call.h"
      77              : #include "tree-vrp.h"
      78              : #include "tree-ssanames.h"
      79              : #include "selftest.h"
      80              : #include "selftest-rtl.h"
      81              : #include "print-rtl.h"
      82              : #include "intl.h"
      83              : #include "ifcvt.h"
      84              : #include "symbol-summary.h"
      85              : #include "sreal.h"
      86              : #include "ipa-cp.h"
      87              : #include "ipa-prop.h"
      88              : #include "ipa-fnsummary.h"
      89              : #include "wide-int-bitmask.h"
      90              : #include "tree-vector-builder.h"
      91              : #include "debug.h"
      92              : #include "dwarf2out.h"
      93              : #include "i386-builtins.h"
      94              : #include "i386-features.h"
      95              : #include "i386-expand.h"
      96              : 
      97              : const char * const xlogue_layout::STUB_BASE_NAMES[XLOGUE_STUB_COUNT] = {
      98              :   "savms64",
      99              :   "resms64",
     100              :   "resms64x",
     101              :   "savms64f",
     102              :   "resms64f",
     103              :   "resms64fx"
     104              : };
     105              : 
     106              : const unsigned xlogue_layout::REG_ORDER[xlogue_layout::MAX_REGS] = {
     107              : /* The below offset values are where each register is stored for the layout
     108              :    relative to incoming stack pointer.  The value of each m_regs[].offset will
     109              :    be relative to the incoming base pointer (rax or rsi) used by the stub.
     110              : 
     111              :     s_instances:   0            1               2               3
     112              :     Offset:                                     realigned or    aligned + 8
     113              :     Register       aligned      aligned + 8     aligned w/HFP   w/HFP   */
     114              :     XMM15_REG,  /* 0x10         0x18            0x10            0x18    */
     115              :     XMM14_REG,  /* 0x20         0x28            0x20            0x28    */
     116              :     XMM13_REG,  /* 0x30         0x38            0x30            0x38    */
     117              :     XMM12_REG,  /* 0x40         0x48            0x40            0x48    */
     118              :     XMM11_REG,  /* 0x50         0x58            0x50            0x58    */
     119              :     XMM10_REG,  /* 0x60         0x68            0x60            0x68    */
     120              :     XMM9_REG,   /* 0x70         0x78            0x70            0x78    */
     121              :     XMM8_REG,   /* 0x80         0x88            0x80            0x88    */
     122              :     XMM7_REG,   /* 0x90         0x98            0x90            0x98    */
     123              :     XMM6_REG,   /* 0xa0         0xa8            0xa0            0xa8    */
     124              :     SI_REG,     /* 0xa8         0xb0            0xa8            0xb0    */
     125              :     DI_REG,     /* 0xb0         0xb8            0xb0            0xb8    */
     126              :     BX_REG,     /* 0xb8         0xc0            0xb8            0xc0    */
     127              :     BP_REG,     /* 0xc0         0xc8            N/A             N/A     */
     128              :     R12_REG,    /* 0xc8         0xd0            0xc0            0xc8    */
     129              :     R13_REG,    /* 0xd0         0xd8            0xc8            0xd0    */
     130              :     R14_REG,    /* 0xd8         0xe0            0xd0            0xd8    */
     131              :     R15_REG,    /* 0xe0         0xe8            0xd8            0xe0    */
     132              : };
     133              : 
     134              : /* Instantiate static const values.  */
     135              : const HOST_WIDE_INT xlogue_layout::STUB_INDEX_OFFSET;
     136              : const unsigned xlogue_layout::MIN_REGS;
     137              : const unsigned xlogue_layout::MAX_REGS;
     138              : const unsigned xlogue_layout::MAX_EXTRA_REGS;
     139              : const unsigned xlogue_layout::VARIANT_COUNT;
     140              : const unsigned xlogue_layout::STUB_NAME_MAX_LEN;
     141              : 
     142              : /* Initialize xlogue_layout::s_stub_names to zero.  */
     143              : char xlogue_layout::s_stub_names[2][XLOGUE_STUB_COUNT][VARIANT_COUNT]
     144              :                                 [STUB_NAME_MAX_LEN];
     145              : 
     146              : /* Instantiates all xlogue_layout instances.  */
     147              : const xlogue_layout xlogue_layout::s_instances[XLOGUE_SET_COUNT] = {
     148              :   xlogue_layout (0, false),
     149              :   xlogue_layout (8, false),
     150              :   xlogue_layout (0, true),
     151              :   xlogue_layout (8, true)
     152              : };
     153              : 
     154              : /* Return an appropriate const instance of xlogue_layout based upon values
     155              :    in cfun->machine and crtl.  */
     156              : const class xlogue_layout &
     157        49891 : xlogue_layout::get_instance ()
     158              : {
     159        49891 :   enum xlogue_stub_sets stub_set;
     160        49891 :   bool aligned_plus_8 = cfun->machine->call_ms2sysv_pad_in;
     161              : 
     162        49891 :   if (stack_realign_fp)
     163              :     stub_set = XLOGUE_SET_HFP_ALIGNED_OR_REALIGN;
     164        40910 :   else if (frame_pointer_needed)
     165        25246 :     stub_set = aligned_plus_8
     166        31552 :               ? XLOGUE_SET_HFP_ALIGNED_PLUS_8
     167              :               : XLOGUE_SET_HFP_ALIGNED_OR_REALIGN;
     168              :   else
     169         9358 :     stub_set = aligned_plus_8 ? XLOGUE_SET_ALIGNED_PLUS_8 : XLOGUE_SET_ALIGNED;
     170              : 
     171        49891 :   return s_instances[stub_set];
     172              : }
     173              : 
     174              : /* Determine how many clobbered registers can be saved by the stub.
     175              :    Returns the count of registers the stub will save and restore.  */
     176              : unsigned
     177        35225 : xlogue_layout::count_stub_managed_regs ()
     178              : {
     179        35225 :   bool hfp = frame_pointer_needed || stack_realign_fp;
     180        35225 :   unsigned i, count;
     181        35225 :   unsigned regno;
     182              : 
     183        94890 :   for (count = i = MIN_REGS; i < MAX_REGS; ++i)
     184              :     {
     185        93670 :       regno = REG_ORDER[i];
     186        93670 :       if (regno == BP_REG && hfp)
     187        18200 :         continue;
     188        75470 :       if (!ix86_save_reg (regno, false, false))
     189              :         break;
     190        41465 :       ++count;
     191              :     }
     192        35225 :   return count;
     193              : }
     194              : 
     195              : /* Determine if register REGNO is a stub managed register given the
     196              :    total COUNT of stub managed registers.  */
     197              : bool
     198      2641728 : xlogue_layout::is_stub_managed_reg (unsigned regno, unsigned count)
     199              : {
     200      2641728 :   bool hfp = frame_pointer_needed || stack_realign_fp;
     201      2641728 :   unsigned i;
     202              : 
     203     34456982 :   for (i = 0; i < count; ++i)
     204              :     {
     205     32315123 :       gcc_assert (i < MAX_REGS);
     206     32315123 :       if (REG_ORDER[i] == BP_REG && hfp)
     207       519694 :         ++count;
     208     31795429 :       else if (REG_ORDER[i] == regno)
     209              :         return true;
     210              :     }
     211              :   return false;
     212              : }
     213              : 
     214              : /* Constructor for xlogue_layout.  */
     215      1150544 : xlogue_layout::xlogue_layout (HOST_WIDE_INT stack_align_off_in, bool hfp)
     216      1150544 :   : m_hfp (hfp) , m_nregs (hfp ? 17 : 18),
     217      1150544 :     m_stack_align_off_in (stack_align_off_in)
     218              : {
     219      1150544 :   HOST_WIDE_INT offset = stack_align_off_in;
     220      1150544 :   unsigned i, j;
     221              : 
     222     21860336 :   for (i = j = 0; i < MAX_REGS; ++i)
     223              :     {
     224     20709792 :       unsigned regno = REG_ORDER[i];
     225              : 
     226     20709792 :       if (regno == BP_REG && hfp)
     227       575272 :         continue;
     228     20134520 :       if (SSE_REGNO_P (regno))
     229              :         {
     230     11505440 :           offset += 16;
     231              :           /* Verify that SSE regs are always aligned.  */
     232     11505440 :           gcc_assert (!((stack_align_off_in + offset) & 15));
     233              :         }
     234              :       else
     235      8629080 :         offset += 8;
     236              : 
     237     20134520 :       m_regs[j].regno    = regno;
     238     20134520 :       m_regs[j++].offset = offset - STUB_INDEX_OFFSET;
     239              :     }
     240      1150544 :   gcc_assert (j == m_nregs);
     241      1150544 : }
     242              : 
     243              : const char *
     244        14666 : xlogue_layout::get_stub_name (enum xlogue_stub stub,
     245              :                               unsigned n_extra_regs)
     246              : {
     247        14666 :   const int have_avx = TARGET_AVX;
     248        14666 :   char *name = s_stub_names[!!have_avx][stub][n_extra_regs];
     249              : 
     250              :   /* Lazy init */
     251        14666 :   if (!*name)
     252              :     {
     253          362 :       int res = snprintf (name, STUB_NAME_MAX_LEN, "__%s_%s_%u",
     254              :                           (have_avx ? "avx" : "sse"),
     255          181 :                           STUB_BASE_NAMES[stub],
     256              :                           MIN_REGS + n_extra_regs);
     257          181 :       gcc_checking_assert (res < (int)STUB_NAME_MAX_LEN);
     258              :     }
     259              : 
     260        14666 :   return name;
     261              : }
     262              : 
     263              : /* Return rtx of a symbol ref for the entry point (based upon
     264              :    cfun->machine->call_ms2sysv_extra_regs) of the specified stub.  */
     265              : rtx
     266        14666 : xlogue_layout::get_stub_rtx (enum xlogue_stub stub)
     267              : {
     268        14666 :   const unsigned n_extra_regs = cfun->machine->call_ms2sysv_extra_regs;
     269        14666 :   gcc_checking_assert (n_extra_regs <= MAX_EXTRA_REGS);
     270        14666 :   gcc_assert (stub < XLOGUE_STUB_COUNT);
     271        14666 :   gcc_assert (crtl->stack_realign_finalized);
     272              : 
     273        14666 :   return gen_rtx_SYMBOL_REF (Pmode, get_stub_name (stub, n_extra_regs));
     274              : }
     275              : 
     276              : unsigned scalar_chain::max_id = 0;
     277              : 
     278              : namespace {
     279              : 
     280              : /* Initialize new chain.  */
     281              : 
     282      6389111 : scalar_chain::scalar_chain (enum machine_mode smode_, enum machine_mode vmode_)
     283              : {
     284      6389111 :   smode = smode_;
     285      6389111 :   vmode = vmode_;
     286              : 
     287      6389111 :   chain_id = ++max_id;
     288              : 
     289      6389111 :    if (dump_file)
     290          136 :     fprintf (dump_file, "Created a new instruction chain #%d\n", chain_id);
     291              : 
     292      6389111 :   bitmap_obstack_initialize (NULL);
     293      6389111 :   insns = BITMAP_ALLOC (NULL);
     294      6389111 :   defs = BITMAP_ALLOC (NULL);
     295      6389111 :   defs_conv = BITMAP_ALLOC (NULL);
     296      6389111 :   insns_conv = BITMAP_ALLOC (NULL);
     297      6389111 :   queue = NULL;
     298              : 
     299      6389111 :   cost_sse_integer = 0;
     300      6389111 :   weighted_cost_sse_integer = 0 ;
     301      6389111 :   max_visits = x86_stv_max_visits;
     302      6389111 : }
     303              : 
     304              : /* Free chain's data.  */
     305              : 
     306      6389111 : scalar_chain::~scalar_chain ()
     307              : {
     308      6389111 :   BITMAP_FREE (insns);
     309      6389111 :   BITMAP_FREE (defs);
     310      6389111 :   BITMAP_FREE (defs_conv);
     311      6389111 :   BITMAP_FREE (insns_conv);
     312      6389111 :   bitmap_obstack_release (NULL);
     313      6389111 : }
     314              : 
     315              : /* Add instruction into chains' queue.  */
     316              : 
     317              : void
     318      8232309 : scalar_chain::add_to_queue (unsigned insn_uid)
     319              : {
     320      8232309 :   if (!bitmap_set_bit (queue, insn_uid))
     321              :     return;
     322              : 
     323      6211958 :   if (dump_file)
     324          141 :     fprintf (dump_file, "  Adding insn %d into chain's #%d queue\n",
     325              :              insn_uid, chain_id);
     326              : }
     327              : 
     328              : /* For DImode conversion, mark register defined by DEF as requiring
     329              :    conversion.  */
     330              : 
     331              : void
     332      9317542 : scalar_chain::mark_dual_mode_def (df_ref def)
     333              : {
     334      9317542 :   gcc_assert (DF_REF_REG_DEF_P (def));
     335              : 
     336              :   /* Record the def/insn pair so we can later efficiently iterate over
     337              :      the defs to convert on insns not in the chain.  */
     338      9317542 :   bool reg_new = bitmap_set_bit (defs_conv, DF_REF_REGNO (def));
     339      9317542 :   basic_block bb = BLOCK_FOR_INSN (DF_REF_INSN (def));
     340      9317542 :   profile_count entry_count = ENTRY_BLOCK_PTR_FOR_FN (cfun)->count;
     341      9317542 :   bool speed_p = optimize_bb_for_speed_p (bb);
     342      9317542 :   int cost = 0;
     343              : 
     344      9317542 :   if (!bitmap_bit_p (insns, DF_REF_INSN_UID (def)))
     345              :     {
     346      2707235 :       if (!bitmap_set_bit (insns_conv, DF_REF_INSN_UID (def))
     347      2707235 :           && !reg_new)
     348      1386803 :         return;
     349              : 
     350              :       /* Cost integer to sse moves.  */
     351      2465013 :       if (speed_p)
     352      2183962 :         cost = COSTS_N_INSNS (ix86_cost->integer_to_sse) / 2;
     353       281051 :       else if (TARGET_64BIT || smode == SImode)
     354              :         cost = COSTS_N_BYTES (4);
     355              :       /* vmovd (4 bytes) + vpinsrd (6 bytes).  */
     356        18650 :       else if (TARGET_SSE4_1)
     357              :         cost = COSTS_N_BYTES (10);
     358              :       /* movd (4 bytes) + movd (4 bytes) + unpckldq (4 bytes).  */
     359              :       else
     360      7930739 :         cost = COSTS_N_BYTES (12);
     361              :     }
     362              :   else
     363              :     {
     364      6610307 :       if (!reg_new)
     365              :         return;
     366              : 
     367              :       /* Cost sse to integer moves.  */
     368      5465726 :       if (speed_p)
     369      4907580 :         cost = COSTS_N_INSNS (ix86_cost->sse_to_integer) / 2;
     370       558146 :       else if (TARGET_64BIT || smode == SImode)
     371              :         cost = COSTS_N_BYTES (4);
     372              :       /* vmovd (4 bytes) + vpextrd (6 bytes).  */
     373         2973 :       else if (TARGET_SSE4_1)
     374              :         cost = COSTS_N_BYTES (10);
     375              :       /* movd (4 bytes) + psrlq (5 bytes) + movd (4 bytes).  */
     376              :       else
     377      7930739 :         cost = COSTS_N_BYTES (13);
     378              :     }
     379              : 
     380      7930739 :   if (speed_p)
     381      7091542 :     weighted_cost_sse_integer += bb->count.to_sreal_scale (entry_count) * cost;
     382              : 
     383      7930739 :   cost_sse_integer += cost;
     384              : 
     385      7930739 :   if (dump_file)
     386          240 :     fprintf (dump_file,
     387              :              "  Mark r%d def in insn %d as requiring both modes in chain #%d\n",
     388          240 :              DF_REF_REGNO (def), DF_REF_INSN_UID (def), chain_id);
     389              : }
     390              : 
     391              : /* Check REF's chain to add new insns into a queue
     392              :    and find registers requiring conversion.  Return true if OK, false
     393              :    if the analysis was aborted.  */
     394              : 
     395              : bool
     396     17746546 : scalar_chain::analyze_register_chain (bitmap candidates, df_ref ref,
     397              :                                       bitmap disallowed)
     398              : {
     399     17746546 :   df_link *chain;
     400     17746546 :   bool mark_def = false;
     401              : 
     402     17746546 :   gcc_checking_assert (bitmap_bit_p (insns, DF_REF_INSN_UID (ref)));
     403              : 
     404     61491507 :   for (chain = DF_REF_CHAIN (ref); chain; chain = chain->next)
     405              :     {
     406     43748830 :       unsigned uid = DF_REF_INSN_UID (chain->ref);
     407              : 
     408     43748830 :       if (!NONDEBUG_INSN_P (DF_REF_INSN (chain->ref)))
     409      7851978 :         continue;
     410              : 
     411     35896852 :       if (--max_visits == 0)
     412              :         return false;
     413              : 
     414     35896292 :       if (!DF_REF_REG_MEM_P (chain->ref))
     415              :         {
     416     29965617 :           if (bitmap_bit_p (insns, uid))
     417      9540541 :             continue;
     418              : 
     419     20425076 :           if (bitmap_bit_p (candidates, uid))
     420              :             {
     421      8232309 :               add_to_queue (uid);
     422      8232309 :               continue;
     423              :             }
     424              : 
     425              :           /* If we run into parts of an aborted chain discovery abort.  */
     426     12192767 :           if (bitmap_bit_p (disallowed, uid))
     427              :             return false;
     428              :         }
     429              : 
     430     18120133 :       if (DF_REF_REG_DEF_P (chain->ref))
     431              :         {
     432      2707235 :           if (dump_file)
     433          125 :             fprintf (dump_file, "  r%d def in insn %d isn't convertible\n",
     434              :                      DF_REF_REGNO (chain->ref), uid);
     435      2707235 :           mark_dual_mode_def (chain->ref);
     436              :         }
     437              :       else
     438              :         {
     439     15412898 :           if (dump_file)
     440          524 :             fprintf (dump_file, "  r%d use in insn %d isn't convertible\n",
     441              :                      DF_REF_REGNO (chain->ref), uid);
     442              :           mark_def = true;
     443              :         }
     444              :     }
     445              : 
     446     17742677 :   if (mark_def)
     447      6610307 :     mark_dual_mode_def (ref);
     448              : 
     449              :   return true;
     450              : }
     451              : 
     452              : /* Check whether X is a convertible *concatditi_? variant.  X is known
     453              :    to be any_or_plus:TI, i.e. PLUS:TI, IOR:TI or XOR:TI.  */
     454              : 
     455              : static bool
     456        29932 : timode_concatdi_p (rtx x)
     457              : {
     458        29932 :   rtx op0 = XEXP (x, 0);
     459        29932 :   rtx op1 = XEXP (x, 1);
     460              : 
     461        29932 :   if (GET_CODE (op1) == ASHIFT)
     462          948 :     std::swap (op0, op1);
     463              : 
     464        29932 :   return GET_CODE (op0) == ASHIFT
     465        21050 :          && GET_CODE (XEXP (op0, 0)) == ZERO_EXTEND
     466        21050 :          && GET_MODE (XEXP (XEXP (op0, 0), 0)) == DImode
     467        21050 :          && REG_P (XEXP (XEXP (op0, 0), 0))
     468        20919 :          && CONST_INT_P (XEXP (op0, 1))
     469        20919 :          && INTVAL (XEXP (op0, 1)) == 64
     470        20919 :          && GET_CODE (op1) == ZERO_EXTEND
     471        19971 :          && GET_MODE (XEXP (op1, 0)) == DImode
     472        49903 :          && REG_P (XEXP (op1, 0));
     473              : }
     474              : 
     475              : 
     476              : /* Add instruction into a chain.  Return true if OK, false if the search
     477              :    was aborted.  */
     478              : 
     479              : bool
     480     12596937 : scalar_chain::add_insn (bitmap candidates, unsigned int insn_uid,
     481              :                         bitmap disallowed)
     482              : {
     483     12596937 :   if (!bitmap_set_bit (insns, insn_uid))
     484              :     return true;
     485              : 
     486     12596937 :   if (dump_file)
     487          277 :     fprintf (dump_file, "  Adding insn %d to chain #%d\n", insn_uid, chain_id);
     488              : 
     489     12596937 :   rtx_insn *insn = DF_INSN_UID_GET (insn_uid)->insn;
     490     12596937 :   rtx def_set = single_set (insn);
     491     12596937 :   if (def_set && REG_P (SET_DEST (def_set))
     492     22301691 :       && !HARD_REGISTER_P (SET_DEST (def_set)))
     493      9679858 :     bitmap_set_bit (defs, REGNO (SET_DEST (def_set)));
     494              : 
     495              :   /* ???  The following is quadratic since analyze_register_chain
     496              :      iterates over all refs to look for dual-mode regs.  Instead this
     497              :      should be done separately for all regs mentioned in the chain once.  */
     498     12596937 :   df_ref ref;
     499     25730377 :   for (ref = DF_INSN_UID_DEFS (insn_uid); ref; ref = DF_REF_NEXT_LOC (ref))
     500     13134837 :     if (!HARD_REGISTER_P (DF_REF_REG (ref)))
     501      9679858 :       if (!analyze_register_chain (candidates, ref, disallowed))
     502              :         return false;
     503              : 
     504              :   /* The operand(s) of VEC_SELECT, ZERO_EXTEND and similar ops don't need
     505              :      to be converted/convertible.  */
     506     12595540 :   if (def_set)
     507     12595540 :     switch (GET_CODE (SET_SRC (def_set)))
     508              :       {
     509      3749670 :       case REG:
     510      3749670 :         if (HARD_REGISTER_P (SET_SRC (def_set)))
     511              :           return true;
     512              :         break;
     513              :       case VEC_SELECT:
     514              :         return true;
     515          260 :       case ZERO_EXTEND:
     516          260 :         if (GET_MODE (XEXP (SET_SRC (def_set), 0)) == DImode)
     517              :           return true;
     518              :         break;
     519      2359271 :       case PLUS:
     520      2359271 :       case IOR:
     521      2359271 :       case XOR:
     522      2359271 :         if (smode == TImode && timode_concatdi_p (SET_SRC (def_set)))
     523              :           return true;
     524              :         break;
     525              :       default:
     526              :         break;
     527              :       }
     528              : 
     529     27550204 :   for (ref = DF_INSN_UID_USES (insn_uid); ref; ref = DF_REF_NEXT_LOC (ref))
     530     15023481 :     if (DF_REF_TYPE (ref) == DF_REF_REG_USE
     531      8066691 :         && !SUBREG_P (DF_REF_REG (ref)))
     532      8066688 :       if (!analyze_register_chain (candidates, ref, disallowed))
     533              :         return false;
     534              : 
     535              :   return true;
     536              : }
     537              : 
     538              : /* Build new chain starting from insn INSN_UID recursively
     539              :    adding all dependent uses and definitions.  Return true if OK, false
     540              :    if the chain discovery was aborted.  */
     541              : 
     542              : bool
     543      6389111 : scalar_chain::build (bitmap candidates, unsigned insn_uid, bitmap disallowed)
     544              : {
     545      6389111 :   queue = BITMAP_ALLOC (NULL);
     546      6389111 :   bitmap_set_bit (queue, insn_uid);
     547              : 
     548      6389111 :   if (dump_file)
     549          136 :     fprintf (dump_file, "Building chain #%d...\n", chain_id);
     550              : 
     551     18982179 :   while (!bitmap_empty_p (queue))
     552              :     {
     553     12596937 :       insn_uid = bitmap_first_set_bit (queue);
     554     12596937 :       bitmap_clear_bit (queue, insn_uid);
     555     12596937 :       bitmap_clear_bit (candidates, insn_uid);
     556     12596937 :       if (!add_insn (candidates, insn_uid, disallowed))
     557              :         {
     558              :           /* If we aborted the search put sofar found insn on the set of
     559              :              disallowed insns so that further searches reaching them also
     560              :              abort and thus we abort the whole but yet undiscovered chain.  */
     561         3869 :           bitmap_ior_into (disallowed, insns);
     562         3869 :           if (dump_file)
     563            0 :             fprintf (dump_file, "Aborted chain #%d discovery\n", chain_id);
     564         3869 :           BITMAP_FREE (queue);
     565         3869 :           return false;
     566              :         }
     567              :     }
     568              : 
     569      6385242 :   if (dump_file)
     570              :     {
     571          136 :       fprintf (dump_file, "Collected chain #%d...\n", chain_id);
     572          136 :       fprintf (dump_file, "  insns: ");
     573          136 :       dump_bitmap (dump_file, insns);
     574          136 :       if (!bitmap_empty_p (defs_conv))
     575              :         {
     576          136 :           bitmap_iterator bi;
     577          136 :           unsigned id;
     578          136 :           const char *comma = "";
     579          136 :           fprintf (dump_file, "  defs to convert: ");
     580          366 :           EXECUTE_IF_SET_IN_BITMAP (defs_conv, 0, id, bi)
     581              :             {
     582          230 :               fprintf (dump_file, "%sr%d", comma, id);
     583          230 :               comma = ", ";
     584              :             }
     585          136 :           fprintf (dump_file, "\n");
     586              :         }
     587              :     }
     588              : 
     589      6385242 :   BITMAP_FREE (queue);
     590              : 
     591      6385242 :   return true;
     592              : }
     593              : 
     594              : /* Return a cost of building a vector constant
     595              :    instead of using a scalar one.  */
     596              : 
     597              : int
     598      2637382 : general_scalar_chain::vector_const_cost (rtx exp, basic_block bb)
     599              : {
     600      2637382 :   gcc_assert (CONST_INT_P (exp));
     601              : 
     602      2637382 :   if (standard_sse_constant_p (exp, vmode))
     603       616063 :     return ix86_cost->sse_op;
     604      2021319 :   if (optimize_bb_for_size_p (bb))
     605              :     return COSTS_N_BYTES (8);
     606              :   /* We have separate costs for SImode and DImode, use SImode costs
     607              :      for smaller modes.  */
     608      2401649 :   return COSTS_N_INSNS (ix86_cost->sse_load[smode == DImode ? 1 : 0]) / 2;
     609              : }
     610              : 
     611              : /* Return true if it's cost profitable for chain conversion.  */
     612              : 
     613              : bool
     614      5885375 : general_scalar_chain::compute_convert_gain ()
     615              : {
     616      5885375 :   bitmap_iterator bi;
     617      5885375 :   unsigned insn_uid;
     618      5885375 :   int gain = 0;
     619      5885375 :   sreal weighted_gain = 0;
     620              : 
     621      5885375 :   if (dump_file)
     622          136 :     fprintf (dump_file, "Computing gain for chain #%d...\n", chain_id);
     623              : 
     624              :   /* SSE costs distinguish between SImode and DImode loads/stores, for
     625              :      int costs factor in the number of GPRs involved.  When supporting
     626              :      smaller modes than SImode the int load/store costs need to be
     627              :      adjusted as well.  */
     628      5885375 :   unsigned sse_cost_idx = smode == DImode ? 1 : 0;
     629      5885375 :   int m = smode == DImode ? (TARGET_64BIT ? 1 : 2) : 1;
     630              : 
     631     17472900 :   EXECUTE_IF_SET_IN_BITMAP (insns, 0, insn_uid, bi)
     632              :     {
     633     11587525 :       rtx_insn *insn = DF_INSN_UID_GET (insn_uid)->insn;
     634     11587525 :       rtx def_set = single_set (insn);
     635     11587525 :       rtx src = SET_SRC (def_set);
     636     11587525 :       rtx dst = SET_DEST (def_set);
     637     11587525 :       basic_block bb = BLOCK_FOR_INSN (insn);
     638     11587525 :       int igain = 0;
     639     11587525 :       profile_count entry_count = ENTRY_BLOCK_PTR_FOR_FN (cfun)->count;
     640     11587525 :       bool speed_p = optimize_bb_for_speed_p (bb);
     641     11587525 :       sreal bb_freq = bb->count.to_sreal_scale (entry_count);
     642              : 
     643     11587525 :       if (REG_P (src) && REG_P (dst))
     644              :         {
     645       911982 :           if (!speed_p)
     646              :             /* reg-reg move is 2 bytes, while SSE 3.  */
     647       187201 :             igain += COSTS_N_BYTES (2 * m - 3);
     648              :           else
     649              :             /* Move costs are normalized to reg-reg move having cost 2.  */
     650       724781 :             igain += COSTS_N_INSNS (2 * m - ix86_cost->xmm_move) / 2;
     651              :         }
     652     10675543 :       else if (REG_P (src) && MEM_P (dst))
     653              :         {
     654      2314578 :           if (!speed_p)
     655              :             /* Integer load/store is 3+ bytes and SSE 4+.  */
     656       192017 :             igain += COSTS_N_BYTES (3 * m - 4);
     657              :           else
     658      2122561 :             igain
     659      2122561 :               += COSTS_N_INSNS (m * ix86_cost->int_store[2]
     660              :                                 - ix86_cost->sse_store[sse_cost_idx]) / 2;
     661              :         }
     662      8360965 :       else if (MEM_P (src) && REG_P (dst))
     663              :         {
     664      3775094 :           if (!speed_p)
     665       360427 :             igain += COSTS_N_BYTES (3 * m - 4);
     666              :           else
     667      3414667 :             igain += COSTS_N_INSNS (m * ix86_cost->int_load[2]
     668              :                                     - ix86_cost->sse_load[sse_cost_idx]) / 2;
     669              :         }
     670              :       else
     671              :         {
     672              :           /* For operations on memory operands, include the overhead
     673              :              of explicit load and store instructions.  */
     674      4585871 :           if (MEM_P (dst))
     675              :             {
     676        66596 :               if (!speed_p)
     677              :                 /* ??? This probably should account size difference
     678              :                    of SSE and integer load rather than full SSE load.  */
     679              :                 igain -= COSTS_N_BYTES (8);
     680              :               else
     681              :                 {
     682        57321 :                   int cost = (m * (ix86_cost->int_load[2]
     683        57321 :                                    + ix86_cost->int_store[2])
     684        57321 :                              - (ix86_cost->sse_load[sse_cost_idx] +
     685        57321 :                                 ix86_cost->sse_store[sse_cost_idx]));
     686        57321 :                   igain += COSTS_N_INSNS (cost) / 2;
     687              :                 }
     688              :             }
     689              : 
     690      4585871 :           switch (GET_CODE (src))
     691              :             {
     692       480092 :             case ASHIFT:
     693       480092 :             case ASHIFTRT:
     694       480092 :             case LSHIFTRT:
     695       480092 :               if (m == 2)
     696              :                 {
     697        16992 :                   if (INTVAL (XEXP (src, 1)) >= 32)
     698        11522 :                     igain += ix86_cost->add;
     699              :                   /* Gain for extend highpart case.  */
     700         5470 :                   else if (GET_CODE (XEXP (src, 0)) == ASHIFT)
     701            0 :                     igain += ix86_cost->shift_const - ix86_cost->sse_op;
     702              :                   else
     703         5470 :                     igain += ix86_cost->shift_const;
     704              :                 }
     705              : 
     706       480092 :               igain += ix86_cost->shift_const - ix86_cost->sse_op;
     707              : 
     708       480092 :               if (CONST_INT_P (XEXP (src, 0)))
     709            0 :                 igain -= vector_const_cost (XEXP (src, 0), bb);
     710              :               break;
     711              : 
     712         3646 :             case ROTATE:
     713         3646 :             case ROTATERT:
     714         3646 :               igain += m * ix86_cost->shift_const;
     715         3646 :               if (TARGET_AVX512VL)
     716          204 :                 igain -= ix86_cost->sse_op;
     717         3442 :               else if (smode == DImode)
     718              :                 {
     719          590 :                   int bits = INTVAL (XEXP (src, 1));
     720          590 :                   if ((bits & 0x0f) == 0)
     721          106 :                     igain -= ix86_cost->sse_op;
     722          484 :                   else if ((bits & 0x07) == 0)
     723           27 :                     igain -= 2 * ix86_cost->sse_op;
     724              :                   else
     725          457 :                     igain -= 3 * ix86_cost->sse_op;
     726              :                 }
     727         2852 :               else if (INTVAL (XEXP (src, 1)) == 16)
     728          139 :                 igain -= ix86_cost->sse_op;
     729              :               else
     730         2713 :                 igain -= 2 * ix86_cost->sse_op;
     731              :               break;
     732              : 
     733      2834700 :             case AND:
     734      2834700 :             case IOR:
     735      2834700 :             case XOR:
     736      2834700 :             case PLUS:
     737      2834700 :             case MINUS:
     738      2834700 :               igain += m * ix86_cost->add - ix86_cost->sse_op;
     739              :               /* Additional gain for andnot for targets without BMI.  */
     740      2834700 :               if (GET_CODE (XEXP (src, 0)) == NOT
     741         3598 :                   && !TARGET_BMI)
     742         3589 :                 igain += m * ix86_cost->add;
     743              : 
     744      2834700 :               if (CONST_INT_P (XEXP (src, 0)))
     745            0 :                 igain -= vector_const_cost (XEXP (src, 0), bb);
     746      2834700 :               if (CONST_INT_P (XEXP (src, 1)))
     747      1687550 :                 igain -= vector_const_cost (XEXP (src, 1), bb);
     748      2834700 :               if (MEM_P (XEXP (src, 1)))
     749              :                 {
     750        87376 :                   if (!speed_p)
     751        20625 :                     igain -= COSTS_N_BYTES (m == 2 ? 3 : 5);
     752              :                   else
     753        77059 :                     igain += COSTS_N_INSNS
     754              :                                (m * ix86_cost->int_load[2]
     755              :                                  - ix86_cost->sse_load[sse_cost_idx]) / 2;
     756              :                 }
     757              :               break;
     758              : 
     759        50935 :             case NEG:
     760        50935 :             case NOT:
     761        50935 :               igain -= ix86_cost->sse_op + COSTS_N_INSNS (1);
     762              : 
     763        50935 :               if (GET_CODE (XEXP (src, 0)) != ABS)
     764              :                 {
     765        50935 :                   igain += m * ix86_cost->add;
     766        50935 :                   break;
     767              :                 }
     768              :               /* FALLTHRU */
     769              : 
     770          998 :             case ABS:
     771          998 :             case SMAX:
     772          998 :             case SMIN:
     773          998 :             case UMAX:
     774          998 :             case UMIN:
     775              :               /* We do not have any conditional move cost, estimate it as a
     776              :                  reg-reg move.  Comparisons are costed as adds.  */
     777          998 :               igain += m * (COSTS_N_INSNS (2) + ix86_cost->add);
     778              :               /* Integer SSE ops are all costed the same.  */
     779          998 :               igain -= ix86_cost->sse_op;
     780          998 :               break;
     781              : 
     782            0 :             case COMPARE:
     783            0 :               if (XEXP (src, 1) != const0_rtx)
     784              :                 {
     785              :                   /* cmp vs. pxor;pshufd;ptest.  */
     786            0 :                   igain += COSTS_N_INSNS (m - 3);
     787              :                 }
     788            0 :               else if (GET_CODE (XEXP (src, 0)) != AND)
     789              :                 {
     790              :                   /* test vs. pshufd;ptest.  */
     791            0 :                   igain += COSTS_N_INSNS (m - 2);
     792              :                 }
     793            0 :               else if (GET_CODE (XEXP (XEXP (src, 0), 0)) != NOT)
     794              :                 {
     795              :                   /* and;test vs. pshufd;ptest.  */
     796            0 :                   igain += COSTS_N_INSNS (2 * m - 2);
     797              :                 }
     798            0 :               else if (TARGET_BMI)
     799              :                 {
     800              :                   /* andn;test vs. pandn;pshufd;ptest.  */
     801            0 :                   igain += COSTS_N_INSNS (2 * m - 3);
     802              :                 }
     803              :               else
     804              :                 {
     805              :                   /* not;and;test vs. pandn;pshufd;ptest.  */
     806            0 :                   igain += COSTS_N_INSNS (3 * m - 3);
     807              :                 }
     808              :               break;
     809              : 
     810      1178536 :             case CONST_INT:
     811      1178536 :               if (REG_P (dst))
     812              :                 {
     813      1178536 :                   if (!speed_p)
     814              :                     {
     815              :                       /* xor (2 bytes) vs. xorps (3 bytes).  */
     816       228704 :                       if (src == const0_rtx)
     817       121416 :                         igain -= COSTS_N_BYTES (1);
     818              :                       /* movdi_internal vs. movv2di_internal.  */
     819              :                       /* => mov (5 bytes) vs. movaps (7 bytes).  */
     820       107288 :                       else if (x86_64_immediate_operand (src, SImode))
     821        95294 :                         igain -= COSTS_N_BYTES (2);
     822              :                       else
     823              :                         /* ??? Larger immediate constants are placed in the
     824              :                            constant pool, where the size benefit/impact of
     825              :                            STV conversion is affected by whether and how
     826              :                            often each constant pool entry is shared/reused.
     827              :                            The value below is empirically derived from the
     828              :                            CSiBE benchmark (and the optimal value may drift
     829              :                            over time).  */
     830              :                         igain += COSTS_N_BYTES (0);
     831              :                     }
     832              :                   else
     833              :                     {
     834              :                       /* DImode can be immediate for TARGET_64BIT
     835              :                          and SImode always.  */
     836       949832 :                       igain += m * COSTS_N_INSNS (1);
     837       949832 :                       igain -= vector_const_cost (src, bb);
     838              :                     }
     839              :                 }
     840            0 :               else if (MEM_P (dst))
     841              :                 {
     842            0 :                   igain += (m * ix86_cost->int_store[2]
     843            0 :                             - ix86_cost->sse_store[sse_cost_idx]);
     844            0 :                   igain -= vector_const_cost (src, bb);
     845              :                 }
     846              :               break;
     847              : 
     848        36964 :             case VEC_SELECT:
     849        36964 :               if (XVECEXP (XEXP (src, 1), 0, 0) == const0_rtx)
     850              :                 {
     851              :                   // movd (4 bytes) replaced with movdqa (4 bytes).
     852        27088 :                   if (!!speed_p)
     853        25309 :                     igain += COSTS_N_INSNS (ix86_cost->sse_to_integer
     854              :                                             - ix86_cost->xmm_move) / 2;
     855              :                 }
     856              :               else
     857              :                 {
     858              :                   // pshufd; movd replaced with pshufd.
     859         9876 :                   if (!speed_p)
     860          648 :                     igain += COSTS_N_BYTES (4);
     861              :                   else
     862         9228 :                     igain += ix86_cost->sse_to_integer;
     863              :                 }
     864              :               break;
     865              : 
     866            0 :             default:
     867            0 :               gcc_unreachable ();
     868              :             }
     869              :         }
     870              : 
     871     11585746 :       if (speed_p)
     872     10323843 :         weighted_gain += bb_freq * igain;
     873     11587525 :       gain += igain;
     874              : 
     875     11587525 :       if (igain != 0 && dump_file)
     876              :         {
     877           93 :           fprintf (dump_file, "  Instruction gain %d with bb_freq %.2f for",
     878              :                    igain, bb_freq.to_double ());
     879           93 :           dump_insn_slim (dump_file, insn);
     880              :         }
     881              :     }
     882              : 
     883      5885375 :   if (dump_file)
     884              :     {
     885          136 :       fprintf (dump_file, "  Instruction conversion gain: %d, \n",
     886              :                gain);
     887          136 :       fprintf (dump_file, "  Registers conversion cost: %d\n",
     888              :                cost_sse_integer);
     889          136 :       fprintf (dump_file, "  Weighted instruction conversion gain: %.2f, \n",
     890              :                weighted_gain.to_double ());
     891          136 :       fprintf (dump_file, "  Weighted registers conversion cost: %.2f\n",
     892              :                weighted_cost_sse_integer.to_double ());
     893              :     }
     894              : 
     895      5885375 :   if (weighted_gain != weighted_cost_sse_integer)
     896      4750083 :     return weighted_gain > weighted_cost_sse_integer;
     897              :   else
     898      1135292 :     return gain > cost_sse_integer;;
     899              : }
     900              : 
     901              : /* Insert generated conversion instruction sequence INSNS
     902              :    after instruction AFTER.  New BB may be required in case
     903              :    instruction has EH region attached.  */
     904              : 
     905              : void
     906        31208 : scalar_chain::emit_conversion_insns (rtx insns, rtx_insn *after)
     907              : {
     908        31208 :   if (!control_flow_insn_p (after))
     909              :     {
     910        30995 :       emit_insn_after (insns, after);
     911        30995 :       return;
     912              :     }
     913              : 
     914          213 :   basic_block bb = BLOCK_FOR_INSN (after);
     915          213 :   edge e = find_fallthru_edge (bb->succs);
     916          213 :   gcc_assert (e);
     917              : 
     918          213 :   basic_block new_bb = split_edge (e);
     919          213 :   emit_insn_after (insns, BB_HEAD (new_bb));
     920              : }
     921              : 
     922              : } // anon namespace
     923              : 
     924              : /* Generate the canonical SET_SRC to move GPR to a VMODE vector register,
     925              :    zeroing the upper parts.  */
     926              : 
     927              : static rtx
     928       173164 : gen_gpr_to_xmm_move_src (enum machine_mode vmode, rtx gpr)
     929              : {
     930       346328 :   switch (GET_MODE_NUNITS (vmode))
     931              :     {
     932           45 :     case 1:
     933           45 :       return gen_rtx_SUBREG (vmode, gpr, 0);
     934       172557 :     case 2:
     935       345114 :       return gen_rtx_VEC_CONCAT (vmode, gpr,
     936              :                                  CONST0_RTX (GET_MODE_INNER (vmode)));
     937          562 :     default:
     938          562 :       return gen_rtx_VEC_MERGE (vmode, gen_rtx_VEC_DUPLICATE (vmode, gpr),
     939              :                                 CONST0_RTX (vmode), GEN_INT (HOST_WIDE_INT_1U));
     940              :     }
     941              : }
     942              : 
     943              : /* Make vector copies for all register REGNO definitions
     944              :    and replace its uses in a chain.  */
     945              : 
     946              : void
     947         8414 : scalar_chain::make_vector_copies (rtx_insn *insn, rtx reg)
     948              : {
     949         8414 :   rtx vreg = *defs_map.get (reg);
     950              : 
     951         8414 :   start_sequence ();
     952         8414 :   if (!TARGET_INTER_UNIT_MOVES_TO_VEC)
     953              :     {
     954            0 :       rtx tmp = assign_386_stack_local (smode, SLOT_STV_TEMP);
     955            0 :       if (smode == DImode && !TARGET_64BIT)
     956              :         {
     957            0 :           emit_move_insn (adjust_address (tmp, SImode, 0),
     958              :                           gen_rtx_SUBREG (SImode, reg, 0));
     959            0 :           emit_move_insn (adjust_address (tmp, SImode, 4),
     960              :                           gen_rtx_SUBREG (SImode, reg, 4));
     961              :         }
     962              :       else
     963            0 :         emit_move_insn (copy_rtx (tmp), reg);
     964            0 :       emit_insn (gen_rtx_SET (gen_rtx_SUBREG (vmode, vreg, 0),
     965              :                               gen_gpr_to_xmm_move_src (vmode, tmp)));
     966              :     }
     967         8414 :   else if (!TARGET_64BIT && smode == DImode)
     968              :     {
     969         8278 :       if (TARGET_SSE4_1)
     970              :         {
     971          356 :           emit_insn (gen_sse2_loadld (gen_rtx_SUBREG (V4SImode, vreg, 0),
     972              :                                       CONST0_RTX (V4SImode),
     973              :                                       gen_rtx_SUBREG (SImode, reg, 0)));
     974          356 :           emit_insn (gen_sse4_1_pinsrd (gen_rtx_SUBREG (V4SImode, vreg, 0),
     975              :                                         gen_rtx_SUBREG (V4SImode, vreg, 0),
     976              :                                         gen_rtx_SUBREG (SImode, reg, 4),
     977              :                                         GEN_INT (2)));
     978              :         }
     979              :       else
     980              :         {
     981         7922 :           rtx tmp = gen_reg_rtx (DImode);
     982         7922 :           emit_insn (gen_sse2_loadld (gen_rtx_SUBREG (V4SImode, vreg, 0),
     983              :                                       CONST0_RTX (V4SImode),
     984              :                                       gen_rtx_SUBREG (SImode, reg, 0)));
     985         7922 :           emit_insn (gen_sse2_loadld (gen_rtx_SUBREG (V4SImode, tmp, 0),
     986              :                                       CONST0_RTX (V4SImode),
     987              :                                       gen_rtx_SUBREG (SImode, reg, 4)));
     988         7922 :           emit_insn (gen_vec_interleave_lowv4si
     989              :                      (gen_rtx_SUBREG (V4SImode, vreg, 0),
     990              :                       gen_rtx_SUBREG (V4SImode, vreg, 0),
     991              :                       gen_rtx_SUBREG (V4SImode, tmp, 0)));
     992              :         }
     993              :     }
     994              :   else
     995          136 :     emit_insn (gen_rtx_SET (gen_rtx_SUBREG (vmode, vreg, 0),
     996              :                             gen_gpr_to_xmm_move_src (vmode, reg)));
     997         8414 :   rtx_insn *seq = end_sequence ();
     998         8414 :   emit_conversion_insns (seq, insn);
     999              : 
    1000         8414 :   if (dump_file)
    1001            0 :     fprintf (dump_file,
    1002              :              "  Copied r%d to a vector register r%d for insn %d\n",
    1003            0 :              REGNO (reg), REGNO (vreg), INSN_UID (insn));
    1004         8414 : }
    1005              : 
    1006              : /* Copy the definition SRC of INSN inside the chain to DST for
    1007              :    scalar uses outside of the chain.  */
    1008              : 
    1009              : void
    1010        22036 : scalar_chain::convert_reg (rtx_insn *insn, rtx dst, rtx src)
    1011              : {
    1012        22036 :   start_sequence ();
    1013        22036 :   if (!TARGET_INTER_UNIT_MOVES_FROM_VEC)
    1014              :     {
    1015            0 :       rtx tmp = assign_386_stack_local (smode, SLOT_STV_TEMP);
    1016            0 :       emit_move_insn (tmp, src);
    1017            0 :       if (!TARGET_64BIT && smode == DImode)
    1018              :         {
    1019            0 :           emit_move_insn (gen_rtx_SUBREG (SImode, dst, 0),
    1020              :                           adjust_address (tmp, SImode, 0));
    1021            0 :           emit_move_insn (gen_rtx_SUBREG (SImode, dst, 4),
    1022              :                           adjust_address (tmp, SImode, 4));
    1023              :         }
    1024              :       else
    1025            0 :         emit_move_insn (dst, copy_rtx (tmp));
    1026              :     }
    1027        22036 :   else if (!TARGET_64BIT && smode == DImode)
    1028              :     {
    1029        21135 :       if (TARGET_SSE4_1)
    1030              :         {
    1031            0 :           rtx tmp = gen_rtx_PARALLEL (VOIDmode,
    1032              :                                       gen_rtvec (1, const0_rtx));
    1033            0 :           emit_insn
    1034            0 :               (gen_rtx_SET
    1035              :                (gen_rtx_SUBREG (SImode, dst, 0),
    1036              :                 gen_rtx_VEC_SELECT (SImode,
    1037              :                                     gen_rtx_SUBREG (V4SImode, src, 0),
    1038              :                                     tmp)));
    1039              : 
    1040            0 :           tmp = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (1, const1_rtx));
    1041            0 :           emit_insn
    1042            0 :               (gen_rtx_SET
    1043              :                (gen_rtx_SUBREG (SImode, dst, 4),
    1044              :                 gen_rtx_VEC_SELECT (SImode,
    1045              :                                     gen_rtx_SUBREG (V4SImode, src, 0),
    1046              :                                     tmp)));
    1047              :         }
    1048              :       else
    1049              :         {
    1050        21135 :           rtx vcopy = gen_reg_rtx (V2DImode);
    1051        21135 :           emit_move_insn (vcopy, gen_rtx_SUBREG (V2DImode, src, 0));
    1052        21135 :           emit_move_insn (gen_rtx_SUBREG (SImode, dst, 0),
    1053              :                           gen_rtx_SUBREG (SImode, vcopy, 0));
    1054        21135 :           emit_move_insn (vcopy,
    1055              :                           gen_rtx_LSHIFTRT (V2DImode,
    1056              :                                             vcopy, GEN_INT (32)));
    1057        21135 :           emit_move_insn (gen_rtx_SUBREG (SImode, dst, 4),
    1058              :                           gen_rtx_SUBREG (SImode, vcopy, 0));
    1059              :         }
    1060              :     }
    1061              :   else
    1062          901 :     emit_move_insn (dst, src);
    1063              : 
    1064        22036 :   rtx_insn *seq = end_sequence ();
    1065        22036 :   emit_conversion_insns (seq, insn);
    1066              : 
    1067        22036 :   if (dump_file)
    1068            0 :     fprintf (dump_file,
    1069              :              "  Copied r%d to a scalar register r%d for insn %d\n",
    1070            0 :              REGNO (src), REGNO (dst), INSN_UID (insn));
    1071        22036 : }
    1072              : 
    1073              : /* Helper function to convert immediate constant X to vmode.  */
    1074              : static rtx
    1075        36105 : smode_convert_cst (rtx x, enum machine_mode vmode)
    1076              : {
    1077              :   /* Prefer all ones vector in case of -1.  */
    1078        36105 :   if (constm1_operand (x, GET_MODE (x)))
    1079          625 :     return CONSTM1_RTX (vmode);
    1080              : 
    1081        35480 :   unsigned n = GET_MODE_NUNITS (vmode);
    1082        35480 :   rtx *v = XALLOCAVEC (rtx, n);
    1083        35480 :   v[0] = x;
    1084        41240 :   for (unsigned i = 1; i < n; ++i)
    1085         5760 :     v[i] = const0_rtx;
    1086        35480 :   return gen_rtx_CONST_VECTOR (vmode, gen_rtvec_v (n, v));
    1087              : }
    1088              : 
    1089              : /* Convert operand OP in INSN.  We should handle
    1090              :    memory operands and uninitialized registers.
    1091              :    All other register uses are converted during
    1092              :    registers conversion.  */
    1093              : 
    1094              : void
    1095       247574 : scalar_chain::convert_op (rtx *op, rtx_insn *insn)
    1096              : {
    1097       247574 :   rtx tmp;
    1098              : 
    1099       247574 :   if (GET_MODE (*op) == V1TImode)
    1100              :     return;
    1101              : 
    1102       247391 :   *op = copy_rtx_if_shared (*op);
    1103              : 
    1104       247391 :   if (GET_CODE (*op) == NOT
    1105       247391 :       || GET_CODE (*op) == ASHIFT)
    1106              :     {
    1107         3493 :       convert_op (&XEXP (*op, 0), insn);
    1108         3493 :       PUT_MODE (*op, vmode);
    1109              :     }
    1110              :   else if (MEM_P (*op))
    1111              :     {
    1112       173028 :       rtx_insn *movabs = NULL;
    1113              : 
    1114              :       /* Emit MOVABS to load from a 64-bit absolute address to a GPR.  */
    1115       173028 :       if (!memory_operand (*op, GET_MODE (*op)))
    1116              :         {
    1117            0 :           tmp = gen_reg_rtx (GET_MODE (*op));
    1118            0 :           movabs = emit_insn_before (gen_rtx_SET (tmp, *op), insn);
    1119              : 
    1120            0 :           *op = tmp;
    1121              :         }
    1122              : 
    1123       173028 :       tmp = gen_rtx_SUBREG (vmode, gen_reg_rtx (GET_MODE (*op)), 0);
    1124              : 
    1125       173028 :       rtx_insn *eh_insn
    1126       173028 :         = emit_insn_before (gen_rtx_SET (copy_rtx (tmp),
    1127              :                                          gen_gpr_to_xmm_move_src (vmode, *op)),
    1128       173028 :                             insn);
    1129              : 
    1130       173028 :       if (cfun->can_throw_non_call_exceptions)
    1131              :         {
    1132              :           /* Handle REG_EH_REGION note.  */
    1133       168761 :           rtx note = find_reg_note (insn, REG_EH_REGION, NULL_RTX);
    1134       168761 :           if (note)
    1135              :             {
    1136         3588 :               if (movabs)
    1137            0 :                 eh_insn = movabs;
    1138         3588 :               control_flow_insns.safe_push (eh_insn);
    1139         3588 :               add_reg_note (eh_insn, REG_EH_REGION, XEXP (note, 0));
    1140              :             }
    1141              :         }
    1142              : 
    1143       173028 :       *op = tmp;
    1144              : 
    1145       173028 :       if (dump_file)
    1146            0 :         fprintf (dump_file, "  Preloading operand for insn %d into r%d\n",
    1147            0 :                  INSN_UID (insn), reg_or_subregno (tmp));
    1148              :     }
    1149              :   else if (REG_P (*op))
    1150        64750 :     *op = gen_rtx_SUBREG (vmode, *op, 0);
    1151              :   else if (CONST_SCALAR_INT_P (*op))
    1152              :     {
    1153         6117 :       rtx vec_cst = smode_convert_cst (*op, vmode);
    1154              : 
    1155         6117 :       if (!standard_sse_constant_p (vec_cst, vmode))
    1156              :         {
    1157         2718 :           start_sequence ();
    1158         2718 :           vec_cst = validize_mem (force_const_mem (vmode, vec_cst));
    1159         2718 :           rtx_insn *seq = end_sequence ();
    1160         2718 :           emit_insn_before (seq, insn);
    1161              :         }
    1162              : 
    1163         6117 :       tmp = gen_rtx_SUBREG (vmode, gen_reg_rtx (smode), 0);
    1164              : 
    1165         6117 :       emit_insn_before (gen_move_insn (copy_rtx (tmp), vec_cst), insn);
    1166         6117 :       *op = tmp;
    1167              :     }
    1168              :   else
    1169              :     {
    1170            0 :       gcc_assert (SUBREG_P (*op));
    1171            3 :       if (GET_MODE (*op) != vmode)
    1172            3 :         *op = gen_lowpart (vmode, *op);
    1173              :     }
    1174              : }
    1175              : 
    1176              : /* Convert CCZmode COMPARE to vector mode.  */
    1177              : 
    1178              : rtx
    1179           12 : scalar_chain::convert_compare (rtx op1, rtx op2, rtx_insn *insn)
    1180              : {
    1181           12 :   rtx src, tmp;
    1182              : 
    1183              :   /* Handle any REG_EQUAL notes.  */
    1184           12 :   tmp = find_reg_equal_equiv_note (insn);
    1185           12 :   if (tmp)
    1186              :     {
    1187            1 :       if (GET_CODE (XEXP (tmp, 0)) == COMPARE
    1188            1 :           && GET_MODE (XEXP (tmp, 0)) == CCZmode
    1189            1 :           && REG_P (XEXP (XEXP (tmp, 0), 0)))
    1190              :         {
    1191            1 :           rtx *op = &XEXP (XEXP (tmp, 0), 1);
    1192            1 :           if (CONST_SCALAR_INT_P (*op))
    1193              :             {
    1194            1 :               if (constm1_operand (*op, GET_MODE (*op)))
    1195            0 :                 *op = CONSTM1_RTX (vmode);
    1196              :               else
    1197              :                 {
    1198            1 :                   unsigned n = GET_MODE_NUNITS (vmode);
    1199            1 :                   rtx *v = XALLOCAVEC (rtx, n);
    1200            1 :                   v[0] = *op;
    1201            1 :                   for (unsigned i = 1; i < n; ++i)
    1202            0 :                     v[i] = const0_rtx;
    1203            1 :                   *op = gen_rtx_CONST_VECTOR (vmode, gen_rtvec_v (n, v));
    1204              :                 }
    1205              :               tmp = NULL_RTX;
    1206              :             }
    1207            0 :           else if (REG_P (*op))
    1208              :             tmp = NULL_RTX;
    1209              :         }
    1210              : 
    1211              :       if (tmp)
    1212            0 :         remove_note (insn, tmp);
    1213              :     }
    1214              : 
    1215              :   /* Comparison against anything other than zero, requires an XOR.  */
    1216           12 :   if (op2 != const0_rtx)
    1217              :     {
    1218            6 :       convert_op (&op1, insn);
    1219            6 :       convert_op (&op2, insn);
    1220              :       /* If both operands are MEMs, explicitly load the OP1 into TMP.  */
    1221            6 :       if (MEM_P (op1) && MEM_P (op2))
    1222              :         {
    1223            0 :           tmp = gen_reg_rtx (vmode);
    1224            0 :           emit_insn_before (gen_rtx_SET (tmp, op1), insn);
    1225            0 :           src = tmp;
    1226              :         }
    1227              :       else
    1228              :         src = op1;
    1229            6 :       src = gen_rtx_XOR (vmode, src, op2);
    1230              :     }
    1231            6 :   else if (GET_CODE (op1) == AND
    1232            0 :            && GET_CODE (XEXP (op1, 0)) == NOT)
    1233              :     {
    1234            0 :       rtx op11 = XEXP (XEXP (op1, 0), 0);
    1235            0 :       rtx op12 = XEXP (op1, 1);
    1236            0 :       convert_op (&op11, insn);
    1237            0 :       convert_op (&op12, insn);
    1238            0 :       if (!REG_P (op11))
    1239              :         {
    1240            0 :           tmp = gen_reg_rtx (vmode);
    1241            0 :           emit_insn_before (gen_rtx_SET (tmp, op11), insn);
    1242            0 :           op11 = tmp;
    1243              :         }
    1244            0 :       src = gen_rtx_AND (vmode, gen_rtx_NOT (vmode, op11), op12);
    1245            0 :     }
    1246            6 :   else if (GET_CODE (op1) == AND)
    1247              :     {
    1248            0 :       rtx op11 = XEXP (op1, 0);
    1249            0 :       rtx op12 = XEXP (op1, 1);
    1250            0 :       convert_op (&op11, insn);
    1251            0 :       convert_op (&op12, insn);
    1252            0 :       if (!REG_P (op11))
    1253              :         {
    1254            0 :           tmp = gen_reg_rtx (vmode);
    1255            0 :           emit_insn_before (gen_rtx_SET (tmp, op11), insn);
    1256            0 :           op11 = tmp;
    1257              :         }
    1258            0 :       return gen_rtx_UNSPEC (CCZmode, gen_rtvec (2, op11, op12),
    1259              :                              UNSPEC_PTEST);
    1260              :     }
    1261              :   else
    1262              :     {
    1263            6 :       convert_op (&op1, insn);
    1264            6 :       src = op1;
    1265              :     }
    1266              : 
    1267           12 :   if (!REG_P (src))
    1268              :     {
    1269            8 :       tmp = gen_reg_rtx (vmode);
    1270            8 :       emit_insn_before (gen_rtx_SET (tmp, src), insn);
    1271            8 :       src = tmp;
    1272              :     }
    1273              : 
    1274           12 :   if (vmode == V2DImode)
    1275              :     {
    1276            0 :       tmp = gen_reg_rtx (vmode);
    1277            0 :       emit_insn_before (gen_vec_interleave_lowv2di (tmp, src, src), insn);
    1278            0 :       src = tmp;
    1279              :     }
    1280           12 :   else if (vmode == V4SImode)
    1281              :     {
    1282            0 :       tmp = gen_reg_rtx (vmode);
    1283            0 :       emit_insn_before (gen_sse2_pshufd (tmp, src, const0_rtx), insn);
    1284            0 :       src = tmp;
    1285              :     }
    1286              : 
    1287           12 :   return gen_rtx_UNSPEC (CCZmode, gen_rtvec (2, src, src), UNSPEC_PTEST);
    1288              : }
    1289              : 
    1290              : /* Helper function for converting INSN to vector mode.  */
    1291              : 
    1292              : void
    1293      1318701 : scalar_chain::convert_insn_common (rtx_insn *insn)
    1294              : {
    1295              :   /* Generate copies for out-of-chain uses of defs and adjust debug uses.  */
    1296      2019780 :   for (df_ref ref = DF_INSN_DEFS (insn); ref; ref = DF_REF_NEXT_LOC (ref))
    1297       701079 :     if (bitmap_bit_p (defs_conv, DF_REF_REGNO (ref)))
    1298              :       {
    1299        23468 :         df_link *use;
    1300        44151 :         for (use = DF_REF_CHAIN (ref); use; use = use->next)
    1301        42719 :           if (NONDEBUG_INSN_P (DF_REF_INSN (use->ref))
    1302        42719 :               && (DF_REF_REG_MEM_P (use->ref)
    1303        38640 :                   || !bitmap_bit_p (insns, DF_REF_INSN_UID (use->ref))))
    1304              :             break;
    1305        23468 :         if (use)
    1306        22036 :           convert_reg (insn, DF_REF_REG (ref),
    1307        22036 :                        *defs_map.get (regno_reg_rtx [DF_REF_REGNO (ref)]));
    1308         1432 :         else if (MAY_HAVE_DEBUG_BIND_INSNS)
    1309              :           {
    1310              :             /* If we generated a scalar copy we can leave debug-insns
    1311              :                as-is, if not, we have to adjust them.  */
    1312         1310 :             auto_vec<rtx_insn *, 5> to_reset_debug_insns;
    1313         3877 :             for (use = DF_REF_CHAIN (ref); use; use = use->next)
    1314         2567 :               if (DEBUG_INSN_P (DF_REF_INSN (use->ref)))
    1315              :                 {
    1316          825 :                   rtx_insn *debug_insn = DF_REF_INSN (use->ref);
    1317              :                   /* If there's a reaching definition outside of the
    1318              :                      chain we have to reset.  */
    1319          825 :                   df_link *def;
    1320         2934 :                   for (def = DF_REF_CHAIN (use->ref); def; def = def->next)
    1321         2292 :                     if (!bitmap_bit_p (insns, DF_REF_INSN_UID (def->ref)))
    1322              :                       break;
    1323          825 :                   if (def)
    1324          183 :                     to_reset_debug_insns.safe_push (debug_insn);
    1325              :                   else
    1326              :                     {
    1327          642 :                       *DF_REF_REAL_LOC (use->ref)
    1328          642 :                         = *defs_map.get (regno_reg_rtx [DF_REF_REGNO (ref)]);
    1329          642 :                       df_insn_rescan (debug_insn);
    1330              :                     }
    1331              :                 }
    1332              :             /* Have to do the reset outside of the DF_CHAIN walk to not
    1333              :                disrupt it.  */
    1334         2803 :             while (!to_reset_debug_insns.is_empty ())
    1335              :               {
    1336          183 :                 rtx_insn *debug_insn = to_reset_debug_insns.pop ();
    1337          183 :                 INSN_VAR_LOCATION_LOC (debug_insn) = gen_rtx_UNKNOWN_VAR_LOC ();
    1338          183 :                 df_insn_rescan_debug_internal (debug_insn);
    1339              :               }
    1340         1310 :           }
    1341              :       }
    1342              : 
    1343              :   /* Replace uses in this insn with the defs we use in the chain.  */
    1344      3293523 :   for (df_ref ref = DF_INSN_USES (insn); ref; ref = DF_REF_NEXT_LOC (ref))
    1345      1974822 :     if (!DF_REF_REG_MEM_P (ref))
    1346       711677 :       if (rtx *vreg = defs_map.get (regno_reg_rtx[DF_REF_REGNO (ref)]))
    1347              :         {
    1348              :           /* Also update a corresponding REG_DEAD note.  */
    1349        35367 :           rtx note = find_reg_note (insn, REG_DEAD, DF_REF_REG (ref));
    1350        35367 :           if (note)
    1351        23379 :             XEXP (note, 0) = *vreg;
    1352        35367 :           *DF_REF_REAL_LOC (ref) = *vreg;
    1353              :         }
    1354      1318701 : }
    1355              : 
    1356              : /* Convert INSN which is an SImode or DImode rotation by a constant
    1357              :    to vector mode.  CODE is either ROTATE or ROTATERT with operands
    1358              :    OP0 and OP1.  Returns the SET_SRC of the last instruction in the
    1359              :    resulting sequence, which is emitted before INSN.  */
    1360              : 
    1361              : rtx
    1362           92 : general_scalar_chain::convert_rotate (enum rtx_code code, rtx op0, rtx op1,
    1363              :                                       rtx_insn *insn)
    1364              : {
    1365           92 :   int bits = INTVAL (op1);
    1366           92 :   rtx pat, result;
    1367              : 
    1368           92 :   convert_op (&op0, insn);
    1369           92 :   if (bits == 0)
    1370            0 :     return op0;
    1371              : 
    1372           92 :   if (smode == DImode)
    1373              :     {
    1374           92 :       if (code == ROTATE)
    1375           45 :         bits = 64 - bits;
    1376           92 :       if (bits == 32)
    1377              :         {
    1378            0 :           rtx tmp1 = gen_reg_rtx (V4SImode);
    1379            0 :           pat = gen_sse2_pshufd (tmp1, gen_lowpart (V4SImode, op0),
    1380              :                                  GEN_INT (225));
    1381            0 :           emit_insn_before (pat, insn);
    1382            0 :           result = gen_lowpart (V2DImode, tmp1);
    1383              :         }
    1384           92 :       else if (TARGET_AVX512VL)
    1385            0 :         result = simplify_gen_binary (code, V2DImode, op0, op1);
    1386           92 :       else if (bits == 16 || bits == 48)
    1387              :         {
    1388            0 :           rtx tmp1 = gen_reg_rtx (V8HImode);
    1389            0 :           pat = gen_sse2_pshuflw (tmp1, gen_lowpart (V8HImode, op0),
    1390              :                                   GEN_INT (bits == 16 ? 57 : 147));
    1391            0 :           emit_insn_before (pat, insn);
    1392            0 :           result = gen_lowpart (V2DImode, tmp1);
    1393              :         }
    1394           92 :       else if ((bits & 0x07) == 0)
    1395              :         {
    1396            0 :           rtx tmp1 = gen_reg_rtx (V4SImode);
    1397            0 :           pat = gen_sse2_pshufd (tmp1, gen_lowpart (V4SImode, op0),
    1398              :                                  GEN_INT (68));
    1399            0 :           emit_insn_before (pat, insn);
    1400            0 :           rtx tmp2 = gen_reg_rtx (V1TImode);
    1401            0 :           pat = gen_sse2_lshrv1ti3 (tmp2, gen_lowpart (V1TImode, tmp1),
    1402              :                                     GEN_INT (bits));
    1403            0 :           emit_insn_before (pat, insn);
    1404            0 :           result = gen_lowpart (V2DImode, tmp2);
    1405              :         }
    1406              :       else
    1407              :         {
    1408           92 :           rtx tmp1 = gen_reg_rtx (V4SImode);
    1409           92 :           pat = gen_sse2_pshufd (tmp1, gen_lowpart (V4SImode, op0),
    1410              :                                  GEN_INT (20));
    1411           92 :           emit_insn_before (pat, insn);
    1412           92 :           rtx tmp2 = gen_reg_rtx (V2DImode);
    1413           92 :           pat = gen_lshrv2di3 (tmp2, gen_lowpart (V2DImode, tmp1),
    1414              :                                GEN_INT (bits & 31));
    1415           92 :           emit_insn_before (pat, insn);
    1416           92 :           rtx tmp3 = gen_reg_rtx (V4SImode);
    1417          139 :           pat = gen_sse2_pshufd (tmp3, gen_lowpart (V4SImode, tmp2),
    1418              :                                  GEN_INT (bits > 32 ? 34 : 136));
    1419           92 :           emit_insn_before (pat, insn);
    1420           92 :           result = gen_lowpart (V2DImode, tmp3);
    1421              :         }
    1422              :     }
    1423            0 :   else if (bits == 16)
    1424              :     {
    1425            0 :       rtx tmp1 = gen_reg_rtx (V8HImode);
    1426            0 :       pat = gen_sse2_pshuflw (tmp1, gen_lowpart (V8HImode, op0), GEN_INT (225));
    1427            0 :       emit_insn_before (pat, insn);
    1428            0 :       result = gen_lowpart (V4SImode, tmp1);
    1429              :     }
    1430            0 :   else if (TARGET_AVX512VL)
    1431            0 :     result = simplify_gen_binary (code, V4SImode, op0, op1);
    1432              :   else
    1433              :     {
    1434            0 :       if (code == ROTATE)
    1435            0 :         bits = 32 - bits;
    1436              : 
    1437            0 :       rtx tmp1 = gen_reg_rtx (V4SImode);
    1438            0 :       emit_insn_before (gen_sse2_pshufd (tmp1, op0, GEN_INT (224)), insn);
    1439            0 :       rtx tmp2 = gen_reg_rtx (V2DImode);
    1440            0 :       pat = gen_lshrv2di3 (tmp2, gen_lowpart (V2DImode, tmp1),
    1441              :                            GEN_INT (bits));
    1442            0 :       emit_insn_before (pat, insn);
    1443            0 :       result = gen_lowpart (V4SImode, tmp2);
    1444              :     }
    1445              : 
    1446              :   return result;
    1447              : }
    1448              : 
    1449              : /* Convert INSN to vector mode.  */
    1450              : 
    1451              : void
    1452       411616 : general_scalar_chain::convert_insn (rtx_insn *insn)
    1453              : {
    1454       411616 :   rtx def_set = single_set (insn);
    1455       411616 :   rtx src = SET_SRC (def_set);
    1456       411616 :   rtx dst = SET_DEST (def_set);
    1457       411616 :   rtx subreg;
    1458              : 
    1459       411616 :   if (MEM_P (dst) && !REG_P (src))
    1460              :     {
    1461              :       /* There are no scalar integer instructions and therefore
    1462              :          temporary register usage is required.  */
    1463          758 :       rtx tmp = gen_reg_rtx (smode);
    1464          758 :       emit_conversion_insns (gen_move_insn (dst, tmp), insn);
    1465          758 :       dst = gen_rtx_SUBREG (vmode, tmp, 0);
    1466          758 :     }
    1467       410858 :   else if (REG_P (dst) && GET_MODE (dst) == smode)
    1468              :     {
    1469              :       /* Replace the definition with a SUBREG to the definition we
    1470              :          use inside the chain.  */
    1471       215545 :       rtx *vdef = defs_map.get (dst);
    1472       215545 :       if (vdef)
    1473        23468 :         dst = *vdef;
    1474       215545 :       dst = gen_rtx_SUBREG (vmode, dst, 0);
    1475              :       /* IRA doesn't like to have REG_EQUAL/EQUIV notes when the SET_DEST
    1476              :          is a non-REG_P.  So kill those off.  */
    1477       215545 :       rtx note = find_reg_equal_equiv_note (insn);
    1478       215545 :       if (note)
    1479         9727 :         remove_note (insn, note);
    1480              :     }
    1481              : 
    1482       411616 :   switch (GET_CODE (src))
    1483              :     {
    1484        30092 :     case PLUS:
    1485        30092 :     case MINUS:
    1486        30092 :     case IOR:
    1487        30092 :     case XOR:
    1488        30092 :     case AND:
    1489        30092 :     case SMAX:
    1490        30092 :     case SMIN:
    1491        30092 :     case UMAX:
    1492        30092 :     case UMIN:
    1493        30092 :       convert_op (&XEXP (src, 1), insn);
    1494              :       /* FALLTHRU */
    1495              : 
    1496        37480 :     case ABS:
    1497        37480 :     case ASHIFT:
    1498        37480 :     case ASHIFTRT:
    1499        37480 :     case LSHIFTRT:
    1500        37480 :       convert_op (&XEXP (src, 0), insn);
    1501        37480 :       PUT_MODE (src, vmode);
    1502        37480 :       break;
    1503              : 
    1504           92 :     case ROTATE:
    1505           92 :     case ROTATERT:
    1506           92 :       src = convert_rotate (GET_CODE (src), XEXP (src, 0), XEXP (src, 1),
    1507              :                             insn);
    1508           92 :       break;
    1509              : 
    1510          391 :     case NEG:
    1511          391 :       src = XEXP (src, 0);
    1512              : 
    1513          391 :       if (GET_CODE (src) == ABS)
    1514              :         {
    1515            0 :           src = XEXP (src, 0);
    1516            0 :           convert_op (&src, insn);
    1517            0 :           subreg = gen_reg_rtx (vmode);
    1518            0 :           emit_insn_before (gen_rtx_SET (subreg,
    1519              :                                          gen_rtx_ABS (vmode, src)), insn);
    1520            0 :           src = subreg;
    1521              :         }
    1522              :       else
    1523          391 :         convert_op (&src, insn);
    1524              : 
    1525          391 :       subreg = gen_reg_rtx (vmode);
    1526          391 :       emit_insn_before (gen_move_insn (subreg, CONST0_RTX (vmode)), insn);
    1527          391 :       src = gen_rtx_MINUS (vmode, subreg, src);
    1528          391 :       break;
    1529              : 
    1530          250 :     case NOT:
    1531          250 :       src = XEXP (src, 0);
    1532          250 :       convert_op (&src, insn);
    1533          250 :       subreg = gen_reg_rtx (vmode);
    1534          250 :       emit_insn_before (gen_move_insn (subreg, CONSTM1_RTX (vmode)), insn);
    1535          250 :       src = gen_rtx_XOR (vmode, src, subreg);
    1536          250 :       break;
    1537              : 
    1538       170860 :     case MEM:
    1539       170860 :       if (!REG_P (dst))
    1540       170860 :         convert_op (&src, insn);
    1541              :       break;
    1542              : 
    1543       196623 :     case REG:
    1544       196623 :       if (!MEM_P (dst))
    1545         1310 :         convert_op (&src, insn);
    1546              :       break;
    1547              : 
    1548            0 :     case SUBREG:
    1549            0 :       gcc_assert (GET_MODE (src) == vmode);
    1550              :       break;
    1551              : 
    1552            0 :     case COMPARE:
    1553            0 :       dst = gen_rtx_REG (CCZmode, FLAGS_REG);
    1554            0 :       src = convert_compare (XEXP (src, 0), XEXP (src, 1), insn);
    1555            0 :       break;
    1556              : 
    1557         3362 :     case CONST_INT:
    1558         3362 :       convert_op (&src, insn);
    1559         3362 :       break;
    1560              : 
    1561         2558 :     case VEC_SELECT:
    1562         2558 :       if (XVECEXP (XEXP (src, 1), 0, 0) == const0_rtx)
    1563         1565 :         src = XEXP (src, 0);
    1564          993 :       else if (smode == DImode)
    1565              :         {
    1566          748 :           rtx tmp = gen_lowpart (V1TImode, XEXP (src, 0));
    1567          748 :           dst = gen_lowpart (V1TImode, dst);
    1568          748 :           src = gen_rtx_LSHIFTRT (V1TImode, tmp, GEN_INT (64));
    1569              :         }
    1570              :       else
    1571              :         {
    1572          245 :           rtx tmp = XVECEXP (XEXP (src, 1), 0, 0);
    1573          245 :           rtvec vec = gen_rtvec (4, tmp, tmp, tmp, tmp);
    1574          245 :           rtx par = gen_rtx_PARALLEL (VOIDmode, vec);
    1575          245 :           src = gen_rtx_VEC_SELECT (vmode, XEXP (src, 0), par);
    1576              :         }
    1577              :       break;
    1578              : 
    1579            0 :     default:
    1580            0 :       gcc_unreachable ();
    1581              :     }
    1582              : 
    1583       411616 :   SET_SRC (def_set) = src;
    1584       411616 :   SET_DEST (def_set) = dst;
    1585              : 
    1586              :   /* Drop possible dead definitions.  */
    1587       411616 :   PATTERN (insn) = def_set;
    1588              : 
    1589       411616 :   INSN_CODE (insn) = -1;
    1590       411616 :   int patt = recog_memoized (insn);
    1591       411616 :   if  (patt == -1)
    1592            0 :     fatal_insn_not_found (insn);
    1593       411616 :   df_insn_rescan (insn);
    1594       411616 : }
    1595              : 
    1596              : /* Helper function to compute gain for loading an immediate constant.
    1597              :    Typically, two movabsq for TImode vs. vmovdqa for V1TImode, but
    1598              :    with numerous special cases.  */
    1599              : 
    1600              : static int
    1601           19 : timode_immed_const_gain (rtx cst, basic_block bb)
    1602              : {
    1603              :   /* movabsq vs. movabsq+vmovq+vunpacklqdq.  */
    1604           19 :   if (CONST_WIDE_INT_P (cst)
    1605            7 :       && CONST_WIDE_INT_NUNITS (cst) == 2
    1606           26 :       && CONST_WIDE_INT_ELT (cst, 0) == CONST_WIDE_INT_ELT (cst, 1))
    1607            0 :     return optimize_bb_for_size_p (bb) ? -COSTS_N_BYTES (9)
    1608              :                                        : -COSTS_N_INSNS (2);
    1609              :   /* 2x movabsq ~ vmovdqa.  */
    1610              :   return 0;
    1611              : }
    1612              : 
    1613              : /* Return true it's cost profitable for for chain conversion.  */
    1614              : 
    1615              : bool
    1616       499867 : timode_scalar_chain::compute_convert_gain ()
    1617              : {
    1618              :   /* Assume that if we have to move TImode values between units,
    1619              :      then transforming this chain isn't worth it.  */
    1620       499867 :   if (cost_sse_integer)
    1621              :     return false;
    1622              : 
    1623       499867 :   bitmap_iterator bi;
    1624       499867 :   unsigned insn_uid;
    1625              : 
    1626              :   /* Split ties to prefer V1TImode when not optimizing for size.  */
    1627       499867 :   int gain = optimize_size ? 0 : 1;
    1628       499867 :   sreal weighted_gain  = 0;
    1629              : 
    1630       499867 :   if (dump_file)
    1631            0 :     fprintf (dump_file, "Computing gain for chain #%d...\n", chain_id);
    1632              : 
    1633      1495389 :   EXECUTE_IF_SET_IN_BITMAP (insns, 0, insn_uid, bi)
    1634              :     {
    1635       995522 :       rtx_insn *insn = DF_INSN_UID_GET (insn_uid)->insn;
    1636       995522 :       rtx def_set = single_set (insn);
    1637       995522 :       rtx src = SET_SRC (def_set);
    1638       995522 :       rtx dst = SET_DEST (def_set);
    1639       995522 :       HOST_WIDE_INT op1val;
    1640       995522 :       basic_block bb = BLOCK_FOR_INSN (insn);
    1641       995522 :       int scost, vcost;
    1642       995522 :       int igain = 0;
    1643       995522 :       profile_count entry_count = ENTRY_BLOCK_PTR_FOR_FN (cfun)->count;
    1644       995522 :       bool speed_p = optimize_bb_for_speed_p (bb);
    1645       995522 :       sreal bb_freq = bb->count.to_sreal_scale (entry_count);
    1646              : 
    1647       995522 :       switch (GET_CODE (src))
    1648              :         {
    1649       519981 :         case REG:
    1650       519981 :           if (GENERAL_REGNO_P (REGNO (src)))
    1651              :             {
    1652        24979 :               if (TARGET_AVX)
    1653              :                 /* vmovq + vpinsrq */
    1654           26 :                 igain = speed_p ? -ix86_cost->integer_to_sse
    1655              :                                   - COSTS_N_INSNS (1)
    1656              :                                 : -COSTS_N_BYTES (11);
    1657              :               else
    1658              :                 /* movq + movq + punpcklqdq */
    1659        24953 :                 igain = speed_p ? -ix86_cost->integer_to_sse
    1660              :                                   - COSTS_N_INSNS (2)
    1661              :                                 : -COSTS_N_BYTES (14);
    1662              :             }
    1663       495002 :           else if (GENERAL_REG_P (dst))
    1664              :             {
    1665        24503 :               if (TARGET_AVX)
    1666              :                 /* vpextrq + vmovq */
    1667           26 :                 igain = speed_p ? -ix86_cost->sse_to_integer
    1668              :                                   - COSTS_N_INSNS (1)
    1669              :                                 : -COSTS_N_BYTES (11);
    1670              :               else
    1671              :                 /* movhlps + movq + movq */
    1672        24477 :                 igain = speed_p ? -ix86_cost->sse_to_integer
    1673              :                                   - COSTS_N_INSNS (2)
    1674              :                                 : -COSTS_N_BYTES (13);
    1675              :             }
    1676       470499 :           else if (!speed_p)
    1677        14482 :             igain = MEM_P (dst) ? COSTS_N_BYTES (6) : COSTS_N_BYTES (3);
    1678              :           else
    1679              :             igain = COSTS_N_INSNS (1);
    1680              :           break;
    1681              : 
    1682       429606 :         case MEM:
    1683       429606 :           igain = !speed_p ? COSTS_N_BYTES (7) : COSTS_N_INSNS (1);
    1684              :           break;
    1685              : 
    1686        10593 :         case CONST_INT:
    1687        10593 :           if (MEM_P (dst)
    1688        10593 :               && standard_sse_constant_p (src, V1TImode))
    1689        10058 :             igain = !speed_p ? COSTS_N_BYTES (11) : 1;
    1690              :           break;
    1691              : 
    1692        30350 :         case CONST_WIDE_INT:
    1693              :           /* 2 x mov vs. vmovdqa.  */
    1694        30350 :           if (MEM_P (dst))
    1695        29878 :             igain = !speed_p ? COSTS_N_BYTES (3) : COSTS_N_INSNS (1);
    1696              :           break;
    1697              : 
    1698           78 :         case NOT:
    1699           78 :           if (MEM_P (dst))
    1700        66323 :             igain = -COSTS_N_INSNS (1);
    1701              :           break;
    1702              : 
    1703           38 :         case AND:
    1704           38 :           if (!MEM_P (dst))
    1705           27 :             igain = COSTS_N_INSNS (1);
    1706           38 :           if (CONST_SCALAR_INT_P (XEXP (src, 1)))
    1707           10 :             igain += timode_immed_const_gain (XEXP (src, 1), bb);
    1708              :           break;
    1709              : 
    1710         4262 :         case XOR:
    1711         4262 :         case IOR:
    1712         4262 :           if (timode_concatdi_p (src))
    1713              :             {
    1714              :               /* vmovq;vpinsrq (11 bytes).  */
    1715         4136 :               igain = speed_p ? -ix86_cost->integer_to_sse - COSTS_N_INSNS (1)
    1716              :                               : -COSTS_N_BYTES (11);
    1717              :               break;
    1718              :             }
    1719          126 :           if (!MEM_P (dst))
    1720          118 :             igain = COSTS_N_INSNS (1);
    1721          126 :           if (CONST_SCALAR_INT_P (XEXP (src, 1)))
    1722            9 :             igain += timode_immed_const_gain (XEXP (src, 1), bb);
    1723              :           break;
    1724              : 
    1725            0 :         case PLUS:
    1726            0 :           if (timode_concatdi_p (src))
    1727              :             /* vmovq;vpinsrq (11 bytes).  */
    1728            0 :             igain = speed_p ? -ix86_cost->integer_to_sse - COSTS_N_INSNS (1)
    1729              :                             : -COSTS_N_BYTES (11);
    1730              :           break;
    1731              : 
    1732          206 :         case ASHIFT:
    1733          206 :         case LSHIFTRT:
    1734              :           /* See ix86_expand_v1ti_shift.  */
    1735          206 :           op1val = INTVAL (XEXP (src, 1));
    1736          206 :           if (!speed_p)
    1737              :             {
    1738           19 :               if (op1val == 64 || op1val == 65)
    1739              :                 scost = COSTS_N_BYTES (5);
    1740           13 :               else if (op1val >= 66)
    1741              :                 scost = COSTS_N_BYTES (6);
    1742           13 :               else if (op1val == 1)
    1743              :                 scost = COSTS_N_BYTES (8);
    1744              :               else
    1745              :                 scost = COSTS_N_BYTES (9);
    1746              : 
    1747           17 :               if ((op1val & 7) == 0)
    1748              :                 vcost = COSTS_N_BYTES (5);
    1749           13 :               else if (op1val > 64)
    1750              :                 vcost = COSTS_N_BYTES (10);
    1751              :               else
    1752           13 :                 vcost = TARGET_AVX ? COSTS_N_BYTES (19) : COSTS_N_BYTES (23);
    1753              :             }
    1754              :           else
    1755              :             {
    1756          187 :               scost = COSTS_N_INSNS (2);
    1757          187 :               if ((op1val & 7) == 0)
    1758              :                 vcost = COSTS_N_INSNS (1);
    1759          133 :               else if (op1val > 64)
    1760              :                 vcost = COSTS_N_INSNS (2);
    1761              :               else
    1762          133 :                 vcost = TARGET_AVX ? COSTS_N_INSNS (4) : COSTS_N_INSNS (5);
    1763              :             }
    1764          206 :           igain = scost - vcost;
    1765          206 :           break;
    1766              : 
    1767          123 :         case ASHIFTRT:
    1768              :           /* See ix86_expand_v1ti_ashiftrt.  */
    1769          123 :           op1val = INTVAL (XEXP (src, 1));
    1770          123 :           if (!speed_p)
    1771              :             {
    1772            9 :               if (op1val == 64 || op1val == 127)
    1773              :                 scost = COSTS_N_BYTES (7);
    1774            9 :               else if (op1val == 1)
    1775              :                 scost = COSTS_N_BYTES (8);
    1776            8 :               else if (op1val == 65)
    1777              :                 scost = COSTS_N_BYTES (10);
    1778            8 :               else if (op1val >= 66)
    1779              :                 scost = COSTS_N_BYTES (11);
    1780              :               else
    1781              :                 scost = COSTS_N_BYTES (9);
    1782              : 
    1783            0 :               if (op1val == 127)
    1784              :                 vcost = COSTS_N_BYTES (10);
    1785            9 :               else if (op1val == 64)
    1786              :                 vcost = COSTS_N_BYTES (14);
    1787            9 :               else if (op1val == 96)
    1788              :                 vcost = COSTS_N_BYTES (18);
    1789            9 :               else if (op1val >= 111)
    1790              :                 vcost = COSTS_N_BYTES (15);
    1791            9 :               else if (TARGET_AVX2 && op1val == 32)
    1792              :                 vcost = COSTS_N_BYTES (16);
    1793            9 :               else if (TARGET_SSE4_1 && op1val == 32)
    1794              :                 vcost = COSTS_N_BYTES (20);
    1795            9 :               else if (op1val >= 96)
    1796              :                 vcost = COSTS_N_BYTES (23);
    1797            9 :               else if ((op1val & 7) == 0)
    1798              :                 vcost = COSTS_N_BYTES (28);
    1799            9 :               else if (TARGET_AVX2 && op1val < 32)
    1800              :                 vcost = COSTS_N_BYTES (30);
    1801            9 :               else if (op1val == 1 || op1val >= 64)
    1802              :                 vcost = COSTS_N_BYTES (42);
    1803              :               else
    1804            8 :                 vcost = COSTS_N_BYTES (47);
    1805              :             }
    1806              :           else
    1807              :             {
    1808          114 :               if (op1val >= 65 && op1val <= 126)
    1809              :                 scost = COSTS_N_INSNS (3);
    1810              :               else
    1811          114 :                 scost = COSTS_N_INSNS (2);
    1812              : 
    1813          114 :               if (op1val == 127)
    1814              :                 vcost = COSTS_N_INSNS (2);
    1815          113 :               else if (op1val == 64)
    1816              :                 vcost = COSTS_N_INSNS (3);
    1817          113 :               else if (op1val == 96)
    1818              :                 vcost = COSTS_N_INSNS (3);
    1819          113 :               else if (op1val >= 111)
    1820              :                 vcost = COSTS_N_INSNS (3);
    1821          113 :               else if (TARGET_SSE4_1 && op1val == 32)
    1822              :                 vcost = COSTS_N_INSNS (3);
    1823          113 :               else if (TARGET_SSE4_1
    1824            0 :                        && (op1val == 8 || op1val == 16 || op1val == 24))
    1825              :                 vcost = COSTS_N_INSNS (3);
    1826          113 :               else if (op1val >= 96)
    1827              :                 vcost = COSTS_N_INSNS (4);
    1828          113 :               else if (TARGET_SSE4_1 && (op1val == 28 || op1val == 80))
    1829              :                 vcost = COSTS_N_INSNS (4);
    1830          113 :               else if ((op1val & 7) == 0)
    1831              :                 vcost = COSTS_N_INSNS (5);
    1832          113 :               else if (TARGET_AVX2 && op1val < 32)
    1833              :                 vcost = COSTS_N_INSNS (6);
    1834          113 :               else if (TARGET_SSE4_1 && op1val < 15)
    1835              :                 vcost = COSTS_N_INSNS (6);
    1836          113 :               else if (op1val == 1 || op1val >= 64)
    1837              :                 vcost = COSTS_N_INSNS (8);
    1838              :               else
    1839           16 :                 vcost = COSTS_N_INSNS (9);
    1840              :             }
    1841          123 :           igain = scost - vcost;
    1842          123 :           break;
    1843              : 
    1844            6 :         case ROTATE:
    1845            6 :         case ROTATERT:
    1846              :           /* See ix86_expand_v1ti_rotate.  */
    1847            6 :           op1val = INTVAL (XEXP (src, 1));
    1848            6 :           if (!speed_p)
    1849              :             {
    1850            0 :               scost = COSTS_N_BYTES (13);
    1851            0 :               if ((op1val & 31) == 0)
    1852              :                 vcost = COSTS_N_BYTES (5);
    1853            0 :               else if ((op1val & 7) == 0)
    1854            0 :                 vcost = TARGET_AVX ? COSTS_N_BYTES (13) : COSTS_N_BYTES (18);
    1855            0 :               else if (op1val > 32 && op1val < 96)
    1856              :                 vcost = COSTS_N_BYTES (24);
    1857              :               else
    1858            0 :                 vcost = COSTS_N_BYTES (19);
    1859              :             }
    1860              :           else
    1861              :             {
    1862            6 :               scost = COSTS_N_INSNS (3);
    1863            6 :               if ((op1val & 31) == 0)
    1864              :                 vcost = COSTS_N_INSNS (1);
    1865            4 :               else if ((op1val & 7) == 0)
    1866            1 :                 vcost = TARGET_AVX ? COSTS_N_INSNS (3) : COSTS_N_INSNS (4);
    1867            3 :               else if (op1val > 32 && op1val < 96)
    1868              :                 vcost = COSTS_N_INSNS (5);
    1869              :               else
    1870            3 :                 vcost = COSTS_N_INSNS (4);
    1871              :             }
    1872            6 :           igain = scost - vcost;
    1873            6 :           break;
    1874              : 
    1875           19 :         case COMPARE:
    1876           19 :           if (XEXP (src, 1) == const0_rtx)
    1877              :             {
    1878            8 :               if (GET_CODE (XEXP (src, 0)) == AND)
    1879              :                 /* and;and;or (9 bytes) vs. ptest (5 bytes).  */
    1880              :                 igain = !speed_p ? COSTS_N_BYTES (4) : COSTS_N_INSNS (2);
    1881              :               /* or (3 bytes) vs. ptest (5 bytes).  */
    1882            8 :               else if (!speed_p)
    1883            0 :                 igain = -COSTS_N_BYTES (2);
    1884              :             }
    1885           11 :           else if (XEXP (src, 1) == const1_rtx)
    1886              :             /* and;cmp -1 (7 bytes) vs. pcmpeqd;pxor;ptest (13 bytes).  */
    1887            0 :             igain = !speed_p ? -COSTS_N_BYTES (6) : -COSTS_N_INSNS (1);
    1888              :           break;
    1889              : 
    1890          260 :         case ZERO_EXTEND:
    1891          260 :           if (GET_MODE (XEXP (src, 0)) == DImode)
    1892              :             /* xor (2 bytes) vs. vmovq (5 bytes).  */
    1893          260 :             igain = speed_p ? COSTS_N_INSNS (1) - ix86_cost->sse_to_integer
    1894              :                             : -COSTS_N_BYTES (3);
    1895              :           break;
    1896              : 
    1897              :         default:
    1898              :           break;
    1899              :         }
    1900              : 
    1901      1959692 :       gain += igain;
    1902       995514 :       if (speed_p)
    1903       964178 :         weighted_gain += bb_freq * igain;
    1904              : 
    1905       995522 :       if (igain != 0 && dump_file)
    1906              :         {
    1907            0 :           fprintf (dump_file, "  Instruction gain %d with bb_freq %.2f for ",
    1908              :                    igain, bb_freq.to_double ());
    1909            0 :           dump_insn_slim (dump_file, insn);
    1910              :         }
    1911              :     }
    1912              : 
    1913       499867 :   if (dump_file)
    1914            0 :     fprintf (dump_file, "  Total gain: %d, weighted gain %.2f\n",
    1915              :              gain, weighted_gain.to_double ());
    1916              : 
    1917       499867 :   if (weighted_gain > (sreal) 0)
    1918              :     return true;
    1919              :   else
    1920        54212 :     return gain > 0;
    1921              : }
    1922              : 
    1923              : /* Fix uses of converted REG in debug insns.  */
    1924              : 
    1925              : void
    1926       419412 : timode_scalar_chain::fix_debug_reg_uses (rtx reg)
    1927              : {
    1928       419412 :   if (!flag_var_tracking)
    1929              :     return;
    1930              : 
    1931       370692 :   df_ref ref, next;
    1932       760135 :   for (ref = DF_REG_USE_CHAIN (REGNO (reg)); ref; ref = next)
    1933              :     {
    1934       389443 :       rtx_insn *insn = DF_REF_INSN (ref);
    1935              :       /* Make sure the next ref is for a different instruction,
    1936              :          so that we're not affected by the rescan.  */
    1937       389443 :       next = DF_REF_NEXT_REG (ref);
    1938       389443 :       while (next && DF_REF_INSN (next) == insn)
    1939            0 :         next = DF_REF_NEXT_REG (next);
    1940              : 
    1941       389443 :       if (DEBUG_INSN_P (insn))
    1942              :         {
    1943              :           /* It may be a debug insn with a TImode variable in
    1944              :              register.  */
    1945              :           bool changed = false;
    1946          228 :           for (; ref != next; ref = DF_REF_NEXT_REG (ref))
    1947              :             {
    1948          114 :               rtx *loc = DF_REF_LOC (ref);
    1949          114 :               if (REG_P (*loc) && GET_MODE (*loc) == V1TImode)
    1950              :                 {
    1951          105 :                   *loc = gen_rtx_SUBREG (TImode, *loc, 0);
    1952          105 :                   changed = true;
    1953              :                 }
    1954              :             }
    1955          114 :           if (changed)
    1956          105 :             df_insn_rescan (insn);
    1957              :         }
    1958              :     }
    1959              : }
    1960              : 
    1961              : /* Convert SRC, a *concatditi3 pattern, into a vec_concatv2di instruction.
    1962              :    Insert this before INSN, and return the result as a V1TImode subreg.  */
    1963              : 
    1964              : static rtx
    1965          266 : timode_convert_concatdi (rtx src, rtx_insn *insn)
    1966              : {
    1967          266 :   rtx hi, lo;
    1968          266 :   rtx tmp = gen_reg_rtx (V2DImode);
    1969          266 :   if (GET_CODE (XEXP (src, 0)) == ASHIFT)
    1970              :     {
    1971          266 :       hi = XEXP (XEXP (XEXP (src, 0), 0), 0);
    1972          266 :       lo = XEXP (XEXP (src, 1), 0);
    1973              :     }
    1974              :   else
    1975              :     {
    1976            0 :       hi = XEXP (XEXP (XEXP (src, 1), 0), 0);
    1977            0 :       lo = XEXP (XEXP (src, 0), 0);
    1978              :     }
    1979          266 :   emit_insn_before (gen_vec_concatv2di (tmp, lo, hi), insn);
    1980          266 :   return gen_rtx_SUBREG (V1TImode, tmp, 0);
    1981              : }
    1982              : 
    1983              : /* Convert INSN from TImode to V1T1mode.  */
    1984              : 
    1985              : void
    1986       907085 : timode_scalar_chain::convert_insn (rtx_insn *insn)
    1987              : {
    1988       907085 :   rtx def_set = single_set (insn);
    1989       907085 :   rtx src = SET_SRC (def_set);
    1990       907085 :   rtx dst = SET_DEST (def_set);
    1991       907085 :   rtx tmp;
    1992              : 
    1993       907085 :   switch (GET_CODE (dst))
    1994              :     {
    1995       419929 :     case REG:
    1996       419929 :       if (GET_MODE (dst) == TImode)
    1997              :         {
    1998       419300 :           if (!HARD_REGISTER_NUM_P (REGNO (dst)))
    1999              :             {
    2000       418795 :               PUT_MODE (dst, V1TImode);
    2001       418795 :               fix_debug_reg_uses (dst);
    2002              :             }
    2003          505 :           else if (!GENERAL_REGNO_P (REGNO (dst)))
    2004          359 :             dst = gen_raw_REG (V1TImode, REGNO (dst));
    2005              :         }
    2006       419929 :       if (GET_MODE (dst) == V1TImode)
    2007              :         {
    2008              :           /* It might potentially be helpful to convert REG_EQUAL notes,
    2009              :              but for now we just remove them.  */
    2010       419771 :           rtx note = find_reg_equal_equiv_note (insn);
    2011       419771 :           if (note)
    2012          470 :             remove_note (insn, note);
    2013              :         }
    2014              :       break;
    2015       487156 :     case MEM:
    2016       487156 :       PUT_MODE (dst, V1TImode);
    2017       487156 :       break;
    2018              : 
    2019            0 :     default:
    2020            0 :       gcc_unreachable ();
    2021              :     }
    2022              : 
    2023       907085 :   switch (GET_CODE (src))
    2024              :     {
    2025       448038 :     case REG:
    2026       448038 :       if (GET_MODE (src) == TImode)
    2027              :         {
    2028          823 :           if (GENERAL_REGNO_P (REGNO (src)))
    2029              :             {
    2030          201 :               rtx lo = gen_reg_rtx (DImode);
    2031          201 :               rtx hi = gen_reg_rtx (DImode);
    2032          201 :               emit_insn_before (gen_rtx_SET (lo, gen_lowpart (DImode, src)),
    2033              :                                 insn);
    2034          201 :               emit_insn_before (gen_rtx_SET (hi, gen_highpart (DImode, src)),
    2035              :                                 insn);
    2036          201 :               src = gen_reg_rtx (V2DImode);
    2037          201 :               emit_insn_before (gen_vec_concatv2di (src, lo, hi), insn);
    2038          201 :               src = gen_lowpart (V1TImode, src);
    2039              :             }
    2040          622 :           else if (!HARD_REGISTER_NUM_P (REGNO (src)))
    2041              :             {
    2042          617 :               PUT_MODE (src, V1TImode);
    2043          617 :               fix_debug_reg_uses (src);
    2044              :             }
    2045              :           else
    2046            5 :             src = gen_raw_REG (V1TImode, REGNO (src));
    2047              :         }
    2048       448038 :       if (GENERAL_REG_P (dst))
    2049              :         {
    2050          146 :           rtx tmp = gen_reg_rtx (V2DImode);
    2051          146 :           src = gen_lowpart (V2DImode, src);
    2052          146 :           emit_insn_before (gen_rtx_SET (tmp, src), insn);
    2053              :           /* Extracting hi before lo helps register allocation.  */
    2054          146 :           rtx hi = gen_reg_rtx (DImode);
    2055          146 :           rtx lo = gen_reg_rtx (DImode);
    2056          146 :           emit_insn_before (gen_vec_extractv2didi (hi, tmp, const1_rtx), insn);
    2057          146 :           emit_insn_before (gen_vec_extractv2didi (lo, tmp, const0_rtx), insn);
    2058              : 
    2059              :           /* Construct *concatditi3 pattern from lo and hi.  */
    2060          146 :           hi = gen_rtx_ZERO_EXTEND (TImode, hi);
    2061          146 :           hi = gen_rtx_ASHIFT (TImode, hi, GEN_INT (64));
    2062          146 :           lo = gen_rtx_ZERO_EXTEND (TImode, lo);
    2063          146 :           src = gen_rtx_PLUS (TImode, hi, lo);
    2064              :         }
    2065              :       break;
    2066              : 
    2067       417759 :     case MEM:
    2068       417759 :       PUT_MODE (src, V1TImode);
    2069       417759 :       break;
    2070              : 
    2071        30182 :     case CONST_WIDE_INT:
    2072        30182 :       if (NONDEBUG_INSN_P (insn))
    2073              :         {
    2074              :           /* Since there are no instructions to store 128-bit constant,
    2075              :              temporary register usage is required.  */
    2076        30182 :           bool use_move;
    2077        30182 :           start_sequence ();
    2078        30182 :           tmp = ix86_convert_const_wide_int_to_broadcast (TImode, src);
    2079        30182 :           if (tmp)
    2080              :             {
    2081          194 :               src = lowpart_subreg (V1TImode, tmp, TImode);
    2082          194 :               use_move = true;
    2083              :             }
    2084              :           else
    2085              :             {
    2086        29988 :               src = smode_convert_cst (src, V1TImode);
    2087        29988 :               src = validize_mem (force_const_mem (V1TImode, src));
    2088        29988 :               use_move = MEM_P (dst);
    2089              :             }
    2090        30182 :           rtx_insn *seq = end_sequence ();
    2091        30182 :           if (seq)
    2092          195 :             emit_insn_before (seq, insn);
    2093        30182 :           if (use_move)
    2094              :             {
    2095        29879 :               tmp = gen_reg_rtx (V1TImode);
    2096        29879 :               emit_insn_before (gen_rtx_SET (tmp, src), insn);
    2097        29879 :               src = tmp;
    2098              :             }
    2099              :         }
    2100              :       break;
    2101              : 
    2102        10593 :     case CONST_INT:
    2103        10593 :       switch (standard_sse_constant_p (src, TImode))
    2104              :         {
    2105        10370 :         case 1:
    2106        10370 :           src = CONST0_RTX (GET_MODE (dst));
    2107        10370 :           break;
    2108          223 :         case 2:
    2109          223 :           src = CONSTM1_RTX (GET_MODE (dst));
    2110          223 :           break;
    2111            0 :         default:
    2112            0 :           gcc_unreachable ();
    2113              :         }
    2114        10593 :       if (MEM_P (dst))
    2115              :         {
    2116        10058 :           tmp = gen_reg_rtx (V1TImode);
    2117        10058 :           emit_insn_before (gen_rtx_SET (tmp, src), insn);
    2118        10058 :           src = tmp;
    2119              :         }
    2120              :       break;
    2121              : 
    2122           13 :     case AND:
    2123           13 :       if (GET_CODE (XEXP (src, 0)) == NOT)
    2124              :         {
    2125            0 :           convert_op (&XEXP (XEXP (src, 0), 0), insn);
    2126            0 :           convert_op (&XEXP (src, 1), insn);
    2127            0 :           PUT_MODE (XEXP (src, 0), V1TImode);
    2128            0 :           PUT_MODE (src, V1TImode);
    2129            0 :           break;
    2130              :         }
    2131           13 :       convert_op (&XEXP (src, 0), insn);
    2132           13 :       convert_op (&XEXP (src, 1), insn);
    2133           13 :       PUT_MODE (src, V1TImode);
    2134           13 :       if (MEM_P (dst))
    2135              :         {
    2136           10 :           tmp = gen_reg_rtx (V1TImode);
    2137           10 :           emit_insn_before (gen_rtx_SET (tmp, src), insn);
    2138           10 :           src = tmp;
    2139              :         }
    2140              :       break;
    2141              : 
    2142          343 :     case XOR:
    2143          343 :     case IOR:
    2144          343 :       if (timode_concatdi_p (src))
    2145              :         {
    2146          266 :           src = timode_convert_concatdi (src, insn);
    2147          266 :           break;
    2148              :         }
    2149           77 :       convert_op (&XEXP (src, 0), insn);
    2150           77 :       convert_op (&XEXP (src, 1), insn);
    2151           77 :       PUT_MODE (src, V1TImode);
    2152           77 :       if (MEM_P (dst))
    2153              :         {
    2154            8 :           tmp = gen_reg_rtx (V1TImode);
    2155            8 :           emit_insn_before (gen_rtx_SET (tmp, src), insn);
    2156            8 :           src = tmp;
    2157              :         }
    2158              :       break;
    2159              : 
    2160            3 :     case NOT:
    2161            3 :       src = XEXP (src, 0);
    2162            3 :       convert_op (&src, insn);
    2163            3 :       tmp = gen_reg_rtx (V1TImode);
    2164            3 :       emit_insn_before (gen_move_insn (tmp, CONSTM1_RTX (V1TImode)), insn);
    2165            3 :       src = gen_rtx_XOR (V1TImode, src, tmp);
    2166            3 :       if (MEM_P (dst))
    2167              :         {
    2168            0 :           tmp = gen_reg_rtx (V1TImode);
    2169            0 :           emit_insn_before (gen_rtx_SET (tmp, src), insn);
    2170            0 :           src = tmp;
    2171              :         }
    2172              :       break;
    2173              : 
    2174           12 :     case COMPARE:
    2175           12 :       dst = gen_rtx_REG (CCZmode, FLAGS_REG);
    2176           12 :       src = convert_compare (XEXP (src, 0), XEXP (src, 1), insn);
    2177           12 :       break;
    2178              : 
    2179           43 :     case ASHIFT:
    2180           43 :     case LSHIFTRT:
    2181           43 :     case ASHIFTRT:
    2182           43 :     case ROTATERT:
    2183           43 :     case ROTATE:
    2184           43 :       convert_op (&XEXP (src, 0), insn);
    2185           43 :       PUT_MODE (src, V1TImode);
    2186           43 :       break;
    2187              : 
    2188           99 :     case ZERO_EXTEND:
    2189           99 :       if (GET_MODE (XEXP (src, 0)) == DImode)
    2190              :         {
    2191              :           /* Convert to *vec_concatv2di_0.  */
    2192           99 :           rtx tmp = gen_reg_rtx (V2DImode);
    2193           99 :           rtx pat = gen_rtx_VEC_CONCAT (V2DImode, XEXP (src, 0), const0_rtx);
    2194           99 :           emit_insn_before (gen_move_insn (tmp, pat), insn);
    2195           99 :           src = gen_rtx_SUBREG (vmode, tmp, 0);
    2196              :         }
    2197              :       else
    2198            0 :         gcc_unreachable ();
    2199           99 :       break;
    2200              : 
    2201            0 :     case PLUS:
    2202            0 :       if (timode_concatdi_p (src))
    2203            0 :         src = timode_convert_concatdi (src, insn);
    2204              :       else
    2205            0 :         gcc_unreachable ();
    2206            0 :       break;
    2207              : 
    2208            0 :     default:
    2209            0 :       gcc_unreachable ();
    2210              :     }
    2211              : 
    2212       907085 :   SET_SRC (def_set) = src;
    2213       907085 :   SET_DEST (def_set) = dst;
    2214              : 
    2215              :   /* Drop possible dead definitions.  */
    2216       907085 :   PATTERN (insn) = def_set;
    2217              : 
    2218       907085 :   INSN_CODE (insn) = -1;
    2219       907085 :   recog_memoized (insn);
    2220       907085 :   df_insn_rescan (insn);
    2221       907085 : }
    2222              : 
    2223              : /* Generate copies from defs used by the chain but not defined therein.
    2224              :    Also populates defs_map which is used later by convert_insn.  */
    2225              : 
    2226              : void
    2227       631957 : scalar_chain::convert_registers ()
    2228              : {
    2229       631957 :   bitmap_iterator bi;
    2230       631957 :   unsigned id;
    2231       658127 :   EXECUTE_IF_SET_IN_BITMAP (defs_conv, 0, id, bi)
    2232              :     {
    2233        26170 :       rtx chain_reg = gen_reg_rtx (smode);
    2234        26170 :       defs_map.put (regno_reg_rtx[id], chain_reg);
    2235              :     }
    2236       640371 :   EXECUTE_IF_SET_IN_BITMAP (insns_conv, 0, id, bi)
    2237        21067 :     for (df_ref ref = DF_INSN_UID_DEFS (id); ref; ref = DF_REF_NEXT_LOC (ref))
    2238        12653 :       if (bitmap_bit_p (defs_conv, DF_REF_REGNO (ref)))
    2239         8414 :         make_vector_copies (DF_REF_INSN (ref), DF_REF_REAL_REG (ref));
    2240       631957 : }
    2241              : 
    2242              : /* Convert whole chain creating required register
    2243              :    conversions and copies.  */
    2244              : 
    2245              : int
    2246       631957 : scalar_chain::convert ()
    2247              : {
    2248       631957 :   bitmap_iterator bi;
    2249       631957 :   unsigned id;
    2250       631957 :   int converted_insns = 0;
    2251              : 
    2252       631957 :   if (!dbg_cnt (stv_conversion))
    2253              :     return 0;
    2254              : 
    2255       631957 :   if (dump_file)
    2256            0 :     fprintf (dump_file, "Converting chain #%d...\n", chain_id);
    2257              : 
    2258       631957 :   convert_registers ();
    2259              : 
    2260      1950658 :   EXECUTE_IF_SET_IN_BITMAP (insns, 0, id, bi)
    2261              :     {
    2262      1318701 :       rtx_insn *insn = DF_INSN_UID_GET (id)->insn;
    2263      1318701 :       convert_insn_common (insn);
    2264      1318701 :       convert_insn (insn);
    2265      1318701 :       converted_insns++;
    2266              :     }
    2267              : 
    2268              :   return converted_insns;
    2269              : }
    2270              : 
    2271              : /* Return the SET expression if INSN doesn't reference hard register.
    2272              :    Return NULL if INSN uses or defines a hard register, excluding
    2273              :    pseudo register pushes, hard register uses in a memory address,
    2274              :    clobbers and flags definitions.  */
    2275              : 
    2276              : static rtx
    2277    338790339 : pseudo_reg_set (rtx_insn *insn)
    2278              : {
    2279    338790339 :   rtx set = single_set (insn);
    2280    338790339 :   if (!set)
    2281              :     return NULL;
    2282              : 
    2283              :   /* Check pseudo register push first. */
    2284    134994096 :   machine_mode mode = TARGET_64BIT ? TImode : DImode;
    2285    134994096 :   if (REG_P (SET_SRC (set))
    2286     38213537 :       && !HARD_REGISTER_P (SET_SRC (set))
    2287    164804080 :       && push_operand (SET_DEST (set), mode))
    2288              :     return set;
    2289              : 
    2290    134740953 :   df_ref ref;
    2291    218062626 :   FOR_EACH_INSN_DEF (ref, insn)
    2292    120178038 :     if (HARD_REGISTER_P (DF_REF_REAL_REG (ref))
    2293     64697182 :         && !DF_REF_FLAGS_IS_SET (ref, DF_REF_MUST_CLOBBER)
    2294    170433143 :         && DF_REF_REGNO (ref) != FLAGS_REG)
    2295              :       return NULL;
    2296              : 
    2297    187740355 :   FOR_EACH_INSN_USE (ref, insn)
    2298    115178667 :     if (!DF_REF_REG_MEM_P (ref) && HARD_REGISTER_P (DF_REF_REAL_REG (ref)))
    2299              :       return NULL;
    2300              : 
    2301              :   return set;
    2302              : }
    2303              : 
    2304              : /* Return true if the register REG is defined in a single DEF chain.
    2305              :    If it is defined in more than one DEF chains, we may not be able
    2306              :    to convert it in all chains.  */
    2307              : 
    2308              : static bool
    2309      1240630 : single_def_chain_p (rtx reg)
    2310              : {
    2311      1240630 :   df_ref ref = DF_REG_DEF_CHAIN (REGNO (reg));
    2312      1240630 :   if (!ref)
    2313              :     return false;
    2314      1240610 :   return DF_REF_NEXT_REG (ref) == nullptr;
    2315              : }
    2316              : 
    2317              : /* Check if comparison INSN may be transformed into vector comparison.
    2318              :    Currently we transform equality/inequality checks which look like:
    2319              :    (set (reg:CCZ 17 flags) (compare:CCZ (reg:TI x) (reg:TI y)))  */
    2320              : 
    2321              : static bool
    2322     12785335 : convertible_comparison_p (rtx_insn *insn, enum machine_mode mode)
    2323              : {
    2324     14195677 :   if (mode != (TARGET_64BIT ? TImode : DImode))
    2325              :     return false;
    2326              : 
    2327      4672554 :   if (!TARGET_SSE4_1)
    2328              :     return false;
    2329              : 
    2330       164004 :   rtx def_set = single_set (insn);
    2331              : 
    2332       164004 :   gcc_assert (def_set);
    2333              : 
    2334       164004 :   rtx src = SET_SRC (def_set);
    2335       164004 :   rtx dst = SET_DEST (def_set);
    2336              : 
    2337       164004 :   gcc_assert (GET_CODE (src) == COMPARE);
    2338              : 
    2339       164004 :   if (!REG_P (dst)
    2340       164004 :       || REGNO (dst) != FLAGS_REG
    2341       328008 :       || GET_MODE (dst) != CCZmode)
    2342              :     return false;
    2343              : 
    2344       114364 :   rtx op1 = XEXP (src, 0);
    2345       114364 :   rtx op2 = XEXP (src, 1);
    2346              : 
    2347              :   /* *cmp<dwi>_doubleword.  */
    2348       114364 :   if (general_operand (op1, mode)
    2349       114364 :       && general_operand (op2, mode))
    2350              :     return true;
    2351              : 
    2352              :   /* *testti_doubleword.  */
    2353       114308 :   if (op2 == const0_rtx
    2354        38179 :       && GET_CODE (op1) == AND
    2355          142 :       && REG_P (XEXP (op1, 0)))
    2356              :     {
    2357          142 :       rtx op12 = XEXP (op1, 1);
    2358          142 :       return GET_MODE (XEXP (op1, 0)) == TImode
    2359          142 :              && (CONST_SCALAR_INT_P (op12)
    2360            0 :                  || ((REG_P (op12) || MEM_P (op12))
    2361            0 :                      && GET_MODE (op12) == TImode));
    2362              :     }
    2363              : 
    2364              :   /* *test<dwi>_not_doubleword.  */
    2365       114166 :   if (op2 == const0_rtx
    2366        38037 :       && GET_CODE (op1) == AND
    2367            0 :       && GET_CODE (XEXP (op1, 0)) == NOT)
    2368              :     {
    2369            0 :       rtx op11 = XEXP (XEXP (op1, 0), 0);
    2370            0 :       rtx op12 = XEXP (op1, 1);
    2371            0 :       return (REG_P (op11) || MEM_P (op11))
    2372            0 :              && (REG_P (op12) || MEM_P (op12))
    2373            0 :              && GET_MODE (op11) == mode
    2374            0 :              && GET_MODE (op12) == mode;
    2375              :     }
    2376              : 
    2377              :   return false;
    2378              : }
    2379              : 
    2380              : /* The general version of scalar_to_vector_candidate_p.  */
    2381              : 
    2382              : static bool
    2383    236964414 : general_scalar_to_vector_candidate_p (rtx_insn *insn, enum machine_mode mode)
    2384              : {
    2385    236964414 :   rtx def_set = pseudo_reg_set (insn);
    2386              : 
    2387    236964414 :   if (!def_set)
    2388              :     return false;
    2389              : 
    2390     49246740 :   rtx src = SET_SRC (def_set);
    2391     49246740 :   rtx dst = SET_DEST (def_set);
    2392              : 
    2393     49246740 :   if (GET_CODE (src) == COMPARE)
    2394      8817952 :     return convertible_comparison_p (insn, mode);
    2395              : 
    2396              :   /* We are interested in "mode" only.  */
    2397     40428788 :   if ((GET_MODE (src) != mode
    2398     27618172 :        && !CONST_INT_P (src))
    2399     17909006 :       || GET_MODE (dst) != mode)
    2400              :     return false;
    2401              : 
    2402     15069205 :   if (!REG_P (dst) && !MEM_P (dst))
    2403              :     return false;
    2404              : 
    2405     14812226 :   switch (GET_CODE (src))
    2406              :     {
    2407       530461 :     case ASHIFT:
    2408       530461 :     case LSHIFTRT:
    2409       530461 :     case ASHIFTRT:
    2410       530461 :     case ROTATE:
    2411       530461 :     case ROTATERT:
    2412       530461 :       if (!CONST_INT_P (XEXP (src, 1))
    2413      1024985 :           || !IN_RANGE (INTVAL (XEXP (src, 1)), 0, GET_MODE_BITSIZE (mode)-1))
    2414              :         return false;
    2415              : 
    2416              :       /* Check for extend highpart case.  */
    2417       494520 :       if (mode != DImode
    2418       361009 :           || GET_CODE (src) != ASHIFTRT
    2419        81904 :           || GET_CODE (XEXP (src, 0)) != ASHIFT)
    2420              :         break;
    2421              : 
    2422      3682276 :       src = XEXP (src, 0);
    2423              :       break;
    2424              : 
    2425        87166 :     case SMAX:
    2426        87166 :     case SMIN:
    2427        87166 :     case UMAX:
    2428        87166 :     case UMIN:
    2429        87166 :       if ((mode == DImode && !TARGET_AVX512VL)
    2430        17860 :           || (mode == SImode && !TARGET_SSE4_1))
    2431              :         return false;
    2432              :       /* Fallthru.  */
    2433              : 
    2434      3226132 :     case AND:
    2435      3226132 :     case IOR:
    2436      3226132 :     case XOR:
    2437      3226132 :     case PLUS:
    2438      3226132 :     case MINUS:
    2439      3226132 :       if (!REG_P (XEXP (src, 1))
    2440              :           && !MEM_P (XEXP (src, 1))
    2441              :           && !CONST_INT_P (XEXP (src, 1)))
    2442              :         return false;
    2443              : 
    2444      3135024 :       if (GET_MODE (XEXP (src, 1)) != mode
    2445      1826635 :           && !CONST_INT_P (XEXP (src, 1)))
    2446              :         return false;
    2447              : 
    2448              :       /* Check for andnot case.  */
    2449      3135024 :       if (GET_CODE (src) != AND
    2450       177746 :           || GET_CODE (XEXP (src, 0)) != NOT)
    2451              :         break;
    2452              : 
    2453      3682276 :       src = XEXP (src, 0);
    2454              :       /* FALLTHRU */
    2455              : 
    2456              :     case NOT:
    2457              :       break;
    2458              : 
    2459        24839 :     case NEG:
    2460              :       /* Check for nabs case.  */
    2461        24839 :       if (GET_CODE (XEXP (src, 0)) != ABS)
    2462              :         break;
    2463              : 
    2464              :       src = XEXP (src, 0);
    2465              :       /* FALLTHRU */
    2466              : 
    2467         3793 :     case ABS:
    2468         3793 :       if ((mode == DImode && !TARGET_AVX512VL)
    2469         1431 :           || (mode == SImode && !TARGET_SSSE3))
    2470              :         return false;
    2471              :       break;
    2472              : 
    2473              :     case REG:
    2474              :       return true;
    2475              : 
    2476      5962011 :     case MEM:
    2477      5962011 :     case CONST_INT:
    2478      5962011 :       return REG_P (dst);
    2479              : 
    2480        57285 :     case VEC_SELECT:
    2481              :       /* Excluding MEM_P (dst) avoids intefering with vpextr[dq].  */
    2482        57285 :       return REG_P (dst)
    2483        46815 :              && REG_P (XEXP (src, 0))
    2484        53404 :              && GET_MODE (XEXP (src, 0)) == (mode == DImode ? V2DImode
    2485              :                                                             : V4SImode)
    2486        36964 :              && GET_CODE (XEXP (src, 1)) == PARALLEL
    2487        36964 :              && XVECLEN (XEXP (src, 1), 0) == 1
    2488        94249 :              && CONST_INT_P (XVECEXP (XEXP (src, 1), 0, 0));
    2489              : 
    2490              :     default:
    2491              :       return false;
    2492              :     }
    2493              : 
    2494      3682276 :   if (!REG_P (XEXP (src, 0))
    2495              :       && !MEM_P (XEXP (src, 0))
    2496              :       && !CONST_INT_P (XEXP (src, 0)))
    2497              :     return false;
    2498              : 
    2499      3376138 :   if (GET_MODE (XEXP (src, 0)) != mode
    2500            0 :       && !CONST_INT_P (XEXP (src, 0)))
    2501              :     return false;
    2502              : 
    2503              :   return true;
    2504              : }
    2505              : 
    2506              : /* Check for a suitable TImode memory operand.  */
    2507              : 
    2508              : static bool
    2509         1582 : timode_mem_p (rtx x)
    2510              : {
    2511         1582 :   return MEM_P (x)
    2512         1582 :          && (TARGET_SSE_UNALIGNED_LOAD_OPTIMAL
    2513            0 :              || !misaligned_operand (x, TImode));
    2514              : }
    2515              : 
    2516              : /* The TImode version of scalar_to_vector_candidate_p.  */
    2517              : 
    2518              : static bool
    2519    101825925 : timode_scalar_to_vector_candidate_p (rtx_insn *insn)
    2520              : {
    2521    101825925 :   rtx def_set = pseudo_reg_set (insn);
    2522              : 
    2523              :   /* We allow two exceptions to the pseudo registers only rule.
    2524              :      Setting a hard register from a pseudo, and setting a pseudo
    2525              :      from a hard register.  */
    2526    101825925 :   if (!def_set)
    2527              :     {
    2528     78257834 :       def_set = single_set (insn);
    2529     78257834 :       if (def_set)
    2530              :         {
    2531     17876965 :           rtx src = SET_SRC (def_set);
    2532     17876965 :           rtx dst = SET_DEST (def_set);
    2533     17876965 :           if (GET_MODE (dst) == TImode
    2534       220539 :               && REG_P (src) && REG_P (dst))
    2535              :             {
    2536       101506 :               if (HARD_REGISTER_P (dst)
    2537        52167 :                   && !HARD_REGISTER_P (src)
    2538       153673 :                   && single_def_chain_p (src))
    2539              :                 return true;
    2540        72881 :               if (HARD_REGISTER_P (src)
    2541        49339 :                   && !HARD_REGISTER_P (dst)
    2542       122220 :                   && single_def_chain_p (dst))
    2543              :                 return true;
    2544              :             }
    2545              :         }
    2546              :       return false;
    2547              :     }
    2548              : 
    2549     23568091 :   rtx src = SET_SRC (def_set);
    2550     23568091 :   rtx dst = SET_DEST (def_set);
    2551              : 
    2552     23568091 :   if (GET_CODE (src) == COMPARE)
    2553      3967383 :     return convertible_comparison_p (insn, TImode);
    2554              : 
    2555     19600708 :   if (GET_MODE (dst) != TImode
    2556      1182149 :       || (GET_MODE (src) != TImode
    2557        59090 :           && !CONST_SCALAR_INT_P (src)))
    2558              :     return false;
    2559              : 
    2560      1182149 :   if (!REG_P (dst) && !MEM_P (dst))
    2561              :     return false;
    2562              : 
    2563      1180696 :   if (MEM_P (dst)
    2564       523673 :       && misaligned_operand (dst, TImode)
    2565      1487671 :       && !TARGET_SSE_UNALIGNED_STORE_OPTIMAL)
    2566              :     return false;
    2567              : 
    2568      1180691 :   if (REG_P (dst) && !single_def_chain_p (dst))
    2569              :     return false;
    2570              : 
    2571      1029417 :   switch (GET_CODE (src))
    2572              :     {
    2573       482101 :     case REG:
    2574       482101 :       return single_def_chain_p (src);
    2575              : 
    2576              :     case CONST_WIDE_INT:
    2577              :       return true;
    2578              : 
    2579        12482 :     case CONST_INT:
    2580              :       /* ??? Verify performance impact before enabling CONST_INT for
    2581              :          __int128 store.  */
    2582        12482 :       return standard_sse_constant_p (src, TImode);
    2583              : 
    2584       439787 :     case MEM:
    2585              :       /* Memory must be aligned or unaligned load is optimal.  */
    2586       439787 :       return (REG_P (dst)
    2587       439787 :               && (!misaligned_operand (src, TImode)
    2588       141320 :                   || TARGET_SSE_UNALIGNED_LOAD_OPTIMAL));
    2589              : 
    2590         3109 :     case AND:
    2591         3109 :       if (!MEM_P (dst)
    2592         3068 :           && GET_CODE (XEXP (src, 0)) == NOT
    2593            0 :           && REG_P (XEXP (XEXP (src, 0), 0))
    2594         3109 :           && (REG_P (XEXP (src, 1))
    2595            0 :               || CONST_SCALAR_INT_P (XEXP (src, 1))
    2596            0 :               || timode_mem_p (XEXP (src, 1))))
    2597            0 :         return true;
    2598         3109 :       return (REG_P (XEXP (src, 0))
    2599           46 :               || timode_mem_p (XEXP (src, 0)))
    2600         3155 :              && (REG_P (XEXP (src, 1))
    2601         1280 :                  || CONST_SCALAR_INT_P (XEXP (src, 1))
    2602           35 :                  || timode_mem_p (XEXP (src, 1)));
    2603              : 
    2604        14048 :     case IOR:
    2605        14048 :     case XOR:
    2606        14048 :       if (timode_concatdi_p (src))
    2607              :         return true;
    2608         2722 :       return (REG_P (XEXP (src, 0))
    2609         1431 :               || timode_mem_p (XEXP (src, 0)))
    2610         2739 :              && (REG_P (XEXP (src, 1))
    2611          290 :                  || CONST_SCALAR_INT_P (XEXP (src, 1))
    2612           54 :                  || timode_mem_p (XEXP (src, 1)));
    2613              : 
    2614          509 :     case NOT:
    2615          509 :       return REG_P (XEXP (src, 0)) || timode_mem_p (XEXP (src, 0));
    2616              : 
    2617        11664 :     case ASHIFT:
    2618        11664 :     case LSHIFTRT:
    2619        11664 :     case ASHIFTRT:
    2620        11664 :     case ROTATERT:
    2621        11664 :     case ROTATE:
    2622              :       /* Handle shifts/rotates by integer constants between 0 and 127.  */
    2623        11664 :       return REG_P (XEXP (src, 0))
    2624        11632 :              && CONST_INT_P (XEXP (src, 1))
    2625        22936 :              && (INTVAL (XEXP (src, 1)) & ~0x7f) == 0;
    2626              : 
    2627         7017 :     case PLUS:
    2628         7017 :       return timode_concatdi_p (src);
    2629              : 
    2630         3751 :     case ZERO_EXTEND:
    2631         3751 :       return REG_P (XEXP (src, 0))
    2632         3751 :              && GET_MODE (XEXP (src, 0)) == DImode;
    2633              : 
    2634              :     default:
    2635              :       return false;
    2636              :     }
    2637              : }
    2638              : 
    2639              : /* For a register REGNO, scan instructions for its defs and uses.
    2640              :    Put REGNO in REGS if a def or use isn't in CANDIDATES.  */
    2641              : 
    2642              : static void
    2643      1221400 : timode_check_non_convertible_regs (bitmap candidates, bitmap regs,
    2644              :                                    unsigned int regno)
    2645              : {
    2646              :   /* Do nothing if REGNO is already in REGS or is a hard reg.  */
    2647      1221400 :   if (bitmap_bit_p (regs, regno)
    2648      1221400 :       || HARD_REGISTER_NUM_P (regno))
    2649              :     return;
    2650              : 
    2651      1213358 :   for (df_ref def = DF_REG_DEF_CHAIN (regno);
    2652      2416377 :        def;
    2653      1203019 :        def = DF_REF_NEXT_REG (def))
    2654              :     {
    2655      1213338 :       if (!bitmap_bit_p (candidates, DF_REF_INSN_UID (def)))
    2656              :         {
    2657        10319 :           if (dump_file)
    2658            0 :             fprintf (dump_file,
    2659              :                      "r%d has non convertible def in insn %d\n",
    2660            0 :                      regno, DF_REF_INSN_UID (def));
    2661              : 
    2662        10319 :           bitmap_set_bit (regs, regno);
    2663        10319 :           break;
    2664              :         }
    2665              :     }
    2666              : 
    2667      1213358 :   for (df_ref ref = DF_REG_USE_CHAIN (regno);
    2668      2687554 :        ref;
    2669      1474196 :        ref = DF_REF_NEXT_REG (ref))
    2670              :     {
    2671              :       /* Debug instructions are skipped.  */
    2672      1543826 :       if (NONDEBUG_INSN_P (DF_REF_INSN (ref))
    2673      1543826 :           && !bitmap_bit_p (candidates, DF_REF_INSN_UID (ref)))
    2674              :         {
    2675        69630 :           if (dump_file)
    2676            0 :             fprintf (dump_file,
    2677              :                      "r%d has non convertible use in insn %d\n",
    2678            0 :                      regno, DF_REF_INSN_UID (ref));
    2679              : 
    2680        69630 :           bitmap_set_bit (regs, regno);
    2681        69630 :           break;
    2682              :         }
    2683              :     }
    2684              : }
    2685              : 
    2686              : /* For a given bitmap of insn UIDs scans all instructions and
    2687              :    remove insn from CANDIDATES in case it has both convertible
    2688              :    and not convertible definitions.
    2689              : 
    2690              :    All insns in a bitmap are conversion candidates according to
    2691              :    scalar_to_vector_candidate_p.  Currently it implies all insns
    2692              :    are single_set.  */
    2693              : 
    2694              : static void
    2695       834120 : timode_remove_non_convertible_regs (bitmap candidates)
    2696              : {
    2697       834120 :   bitmap_iterator bi;
    2698       834120 :   unsigned id;
    2699       834120 :   bitmap regs = BITMAP_ALLOC (NULL);
    2700       855231 :   bool changed;
    2701              : 
    2702       855231 :   do {
    2703       855231 :     changed = false;
    2704      2099843 :     EXECUTE_IF_SET_IN_BITMAP (candidates, 0, id, bi)
    2705              :       {
    2706      1244612 :         rtx_insn *insn = DF_INSN_UID_GET (id)->insn;
    2707      1244612 :         df_ref ref;
    2708              : 
    2709      1946193 :         FOR_EACH_INSN_DEF (ref, insn)
    2710       701581 :           if (!DF_REF_REG_MEM_P (ref)
    2711       701581 :               && GET_MODE (DF_REF_REG (ref)) == TImode)
    2712       613896 :             timode_check_non_convertible_regs (candidates, regs,
    2713              :                                                DF_REF_REGNO (ref));
    2714              : 
    2715      3069629 :         FOR_EACH_INSN_USE (ref, insn)
    2716      1825017 :           if (DF_REF_TYPE (ref) == DF_REF_REG_USE
    2717       743614 :               && GET_MODE (DF_REF_REG (ref)) == TImode
    2718       607509 :               && !SUBREG_P (DF_REF_REG (ref)))
    2719       607504 :             timode_check_non_convertible_regs (candidates, regs,
    2720              :                                                DF_REF_REGNO (ref));
    2721              :       }
    2722              : 
    2723      1030662 :     EXECUTE_IF_SET_IN_BITMAP (regs, 0, id, bi)
    2724              :       {
    2725       175431 :         for (df_ref def = DF_REG_DEF_CHAIN (id);
    2726       356686 :              def;
    2727       181255 :              def = DF_REF_NEXT_REG (def))
    2728       181255 :           if (bitmap_bit_p (candidates, DF_REF_INSN_UID (def)))
    2729              :             {
    2730        56075 :               if (dump_file)
    2731            0 :                 fprintf (dump_file, "Removing insn %d from candidates list\n",
    2732            0 :                          DF_REF_INSN_UID (def));
    2733              : 
    2734        56075 :               bitmap_clear_bit (candidates, DF_REF_INSN_UID (def));
    2735        56075 :               changed = true;
    2736              :             }
    2737              : 
    2738       175431 :         for (df_ref ref = DF_REG_USE_CHAIN (id);
    2739       521357 :              ref;
    2740       345926 :              ref = DF_REF_NEXT_REG (ref))
    2741       345926 :           if (bitmap_bit_p (candidates, DF_REF_INSN_UID (ref)))
    2742              :             {
    2743        16235 :               if (dump_file)
    2744            0 :                 fprintf (dump_file, "Removing insn %d from candidates list\n",
    2745            0 :                          DF_REF_INSN_UID (ref));
    2746              : 
    2747        16235 :               bitmap_clear_bit (candidates, DF_REF_INSN_UID (ref));
    2748        16235 :               changed = true;
    2749              :             }
    2750              :       }
    2751              :   } while (changed);
    2752              : 
    2753       834120 :   BITMAP_FREE (regs);
    2754       834120 : }
    2755              : 
    2756              : /* Main STV pass function.  Find and convert scalar
    2757              :    instructions into vector mode when profitable.  */
    2758              : 
    2759              : static unsigned int
    2760      1794454 : convert_scalars_to_vector (bool timode_p)
    2761              : {
    2762      1794454 :   basic_block bb;
    2763      1794454 :   int converted_insns = 0;
    2764      1794454 :   auto_vec<rtx_insn *> control_flow_insns;
    2765              : 
    2766      1794454 :   bitmap_obstack_initialize (NULL);
    2767      1794454 :   const machine_mode cand_mode[3] = { SImode, DImode, TImode };
    2768      1794454 :   const machine_mode cand_vmode[3] = { V4SImode, V2DImode, V1TImode };
    2769      5383362 :   bitmap_head candidates[3];  /* { SImode, DImode, TImode } */
    2770      7177816 :   for (unsigned i = 0; i < 3; ++i)
    2771      5383362 :     bitmap_initialize (&candidates[i], &bitmap_default_obstack);
    2772              : 
    2773      1794454 :   calculate_dominance_info (CDI_DOMINATORS);
    2774      1794454 :   df_set_flags (DF_DEFER_INSN_RESCAN | DF_RD_PRUNE_DEAD_DEFS);
    2775      1794454 :   df_chain_add_problem (DF_DU_CHAIN | DF_UD_CHAIN);
    2776      1794454 :   df_analyze ();
    2777              : 
    2778              :   /* Find all instructions we want to convert into vector mode.  */
    2779      1794454 :   if (dump_file)
    2780           44 :     fprintf (dump_file, "Searching for mode conversion candidates...\n");
    2781              : 
    2782     19693860 :   FOR_EACH_BB_FN (bb, cfun)
    2783              :     {
    2784     17899406 :       rtx_insn *insn;
    2785    239966144 :       FOR_BB_INSNS (bb, insn)
    2786    222066738 :         if (timode_p
    2787    222066738 :             && timode_scalar_to_vector_candidate_p (insn))
    2788              :           {
    2789      1067832 :             if (dump_file)
    2790            0 :               fprintf (dump_file, "  insn %d is marked as a TImode candidate\n",
    2791            0 :                        INSN_UID (insn));
    2792              : 
    2793      1067832 :             bitmap_set_bit (&candidates[2], INSN_UID (insn));
    2794              :           }
    2795    220998906 :         else if (!timode_p)
    2796              :           {
    2797              :             /* Check {SI,DI}mode.  */
    2798    345603812 :             for (unsigned i = 0; i <= 1; ++i)
    2799    236964414 :               if (general_scalar_to_vector_candidate_p (insn, cand_mode[i]))
    2800              :                 {
    2801     11601415 :                   if (dump_file)
    2802          554 :                     fprintf (dump_file, "  insn %d is marked as a %s candidate\n",
    2803          277 :                              INSN_UID (insn), i == 0 ? "SImode" : "DImode");
    2804              : 
    2805     11601415 :                   bitmap_set_bit (&candidates[i], INSN_UID (insn));
    2806     11601415 :                   break;
    2807              :                 }
    2808              :           }
    2809              :     }
    2810              : 
    2811      1794454 :   if (timode_p)
    2812       834120 :     timode_remove_non_convertible_regs (&candidates[2]);
    2813              : 
    2814      5685229 :   for (unsigned i = 0; i <= 2; ++i)
    2815      4530779 :     if (!bitmap_empty_p (&candidates[i]))
    2816              :       break;
    2817      3890775 :     else if (i == 2 && dump_file)
    2818           23 :       fprintf (dump_file, "There are no candidates for optimization.\n");
    2819              : 
    2820      7177816 :   for (unsigned i = 0; i <= 2; ++i)
    2821              :     {
    2822      5383362 :       auto_bitmap disallowed;
    2823      5383362 :       bitmap_tree_view (&candidates[i]);
    2824     17155835 :       while (!bitmap_empty_p (&candidates[i]))
    2825              :         {
    2826      6389111 :           unsigned uid = bitmap_first_set_bit (&candidates[i]);
    2827      6389111 :           scalar_chain *chain;
    2828              : 
    2829      6389111 :           if (cand_mode[i] == TImode)
    2830       499867 :             chain = new timode_scalar_chain;
    2831              :           else
    2832      5889244 :             chain = new general_scalar_chain (cand_mode[i], cand_vmode[i]);
    2833              : 
    2834              :           /* Find instructions chain we want to convert to vector mode.
    2835              :              Check all uses and definitions to estimate all required
    2836              :              conversions.  */
    2837      6389111 :           if (chain->build (&candidates[i], uid, disallowed))
    2838              :             {
    2839      6385242 :               if (chain->compute_convert_gain ())
    2840       631957 :                 converted_insns += chain->convert ();
    2841      5753285 :               else if (dump_file)
    2842          136 :                 fprintf (dump_file, "Chain #%d conversion is not profitable\n",
    2843              :                          chain->chain_id);
    2844              :             }
    2845              : 
    2846      6389111 :           rtx_insn* iter_insn;
    2847      6389111 :           unsigned int ii;
    2848      6392699 :           FOR_EACH_VEC_ELT (chain->control_flow_insns, ii, iter_insn)
    2849         3588 :             control_flow_insns.safe_push (iter_insn);
    2850              : 
    2851      6389111 :           delete chain;
    2852              :         }
    2853      5383362 :     }
    2854              : 
    2855      1794454 :   if (dump_file)
    2856           44 :     fprintf (dump_file, "Total insns converted: %d\n", converted_insns);
    2857              : 
    2858      7177816 :   for (unsigned i = 0; i <= 2; ++i)
    2859      5383362 :     bitmap_release (&candidates[i]);
    2860      1794454 :   bitmap_obstack_release (NULL);
    2861      1794454 :   df_process_deferred_rescans ();
    2862              : 
    2863              :   /* Conversion means we may have 128bit register spills/fills
    2864              :      which require aligned stack.  */
    2865      1794454 :   if (converted_insns)
    2866              :     {
    2867       104035 :       if (crtl->stack_alignment_needed < 128)
    2868         2324 :         crtl->stack_alignment_needed = 128;
    2869       104035 :       if (crtl->stack_alignment_estimated < 128)
    2870          221 :         crtl->stack_alignment_estimated = 128;
    2871              : 
    2872       104035 :       crtl->stack_realign_needed
    2873       104035 :         = INCOMING_STACK_BOUNDARY < crtl->stack_alignment_estimated;
    2874       104035 :       crtl->stack_realign_tried = crtl->stack_realign_needed;
    2875              : 
    2876       104035 :       crtl->stack_realign_processed = true;
    2877              : 
    2878       104035 :       if (!crtl->drap_reg)
    2879              :         {
    2880       103858 :           rtx drap_rtx = targetm.calls.get_drap_rtx ();
    2881              : 
    2882              :           /* stack_realign_drap and drap_rtx must match.  */
    2883       103858 :           gcc_assert ((stack_realign_drap != 0) == (drap_rtx != NULL));
    2884              : 
    2885              :           /* Do nothing if NULL is returned,
    2886              :              which means DRAP is not needed.  */
    2887       103858 :           if (drap_rtx != NULL)
    2888              :             {
    2889            0 :               crtl->args.internal_arg_pointer = drap_rtx;
    2890              : 
    2891              :               /* Call fixup_tail_calls to clean up
    2892              :                  REG_EQUIV note if DRAP is needed. */
    2893            0 :               fixup_tail_calls ();
    2894              :             }
    2895              :         }
    2896              : 
    2897              :       /* Fix up DECL_RTL/DECL_INCOMING_RTL of arguments.  */
    2898       104035 :       if (TARGET_64BIT)
    2899        65538 :         for (tree parm = DECL_ARGUMENTS (current_function_decl);
    2900       179247 :              parm; parm = DECL_CHAIN (parm))
    2901              :           {
    2902       113709 :             if (TYPE_MODE (TREE_TYPE (parm)) != TImode)
    2903        98034 :               continue;
    2904        15675 :             if (DECL_RTL_SET_P (parm)
    2905        31350 :                 && GET_MODE (DECL_RTL (parm)) == V1TImode)
    2906              :               {
    2907          611 :                 rtx r = DECL_RTL (parm);
    2908          611 :                 if (REG_P (r))
    2909          611 :                   SET_DECL_RTL (parm, gen_rtx_SUBREG (TImode, r, 0));
    2910              :               }
    2911        15675 :             if (DECL_INCOMING_RTL (parm)
    2912        15675 :                 && GET_MODE (DECL_INCOMING_RTL (parm)) == V1TImode)
    2913              :               {
    2914            0 :                 rtx r = DECL_INCOMING_RTL (parm);
    2915            0 :                 if (REG_P (r))
    2916            0 :                   DECL_INCOMING_RTL (parm) = gen_rtx_SUBREG (TImode, r, 0);
    2917              :               }
    2918              :           }
    2919              : 
    2920       104035 :       if (!control_flow_insns.is_empty ())
    2921              :         {
    2922         1130 :           free_dominance_info (CDI_DOMINATORS);
    2923              : 
    2924         1130 :           unsigned int i;
    2925         1130 :           rtx_insn* insn;
    2926         5848 :           FOR_EACH_VEC_ELT (control_flow_insns, i, insn)
    2927         3588 :             if (control_flow_insn_p (insn))
    2928              :               {
    2929              :                 /* Split the block after insn.  There will be a fallthru
    2930              :                    edge, which is OK so we keep it.  We have to create
    2931              :                    the exception edges ourselves.  */
    2932         3588 :                 bb = BLOCK_FOR_INSN (insn);
    2933         3588 :                 split_block (bb, insn);
    2934         3588 :                 rtl_make_eh_edge (NULL, bb, BB_END (bb));
    2935              :               }
    2936              :         }
    2937              :     }
    2938              : 
    2939      1794454 :   return 0;
    2940      1794454 : }
    2941              : 
    2942              : static unsigned int
    2943        74570 : rest_of_handle_insert_vzeroupper (void)
    2944              : {
    2945              :   /* vzeroupper instructions are inserted immediately after reload and
    2946              :      postreload_cse to clean up after it a little bit to account for possible
    2947              :      spills from 256bit or 512bit registers.  The pass reuses mode switching
    2948              :      infrastructure by re-running mode insertion pass, so disable entities
    2949              :      that have already been processed.  */
    2950       521990 :   for (int i = 0; i < MAX_386_ENTITIES; i++)
    2951       447420 :     ix86_optimize_mode_switching[i] = 0;
    2952              : 
    2953        74570 :   ix86_optimize_mode_switching[AVX_U128] = 1;
    2954              : 
    2955              :   /* Call optimize_mode_switching.  */
    2956        74570 :   g->get_passes ()->execute_pass_mode_switching ();
    2957              : 
    2958              :   /* LRA removes all REG_DEAD/REG_UNUSED notes and normally they
    2959              :      reappear in the IL only at the start of pass_rtl_dse2, which does
    2960              :      df_note_add_problem (); df_analyze ();
    2961              :      The vzeroupper is scheduled after postreload_cse pass and mode
    2962              :      switching computes the notes as well, the problem is that e.g.
    2963              :      pass_gcse2 doesn't maintain the notes, see PR113059 and
    2964              :      PR112760.  Remove the notes now to restore status quo ante
    2965              :      until we figure out how to maintain the notes or what else
    2966              :      to do.  */
    2967        74570 :   basic_block bb;
    2968        74570 :   rtx_insn *insn;
    2969       407172 :   FOR_EACH_BB_FN (bb, cfun)
    2970      4262225 :     FOR_BB_INSNS (bb, insn)
    2971      3929623 :       if (NONDEBUG_INSN_P (insn))
    2972              :         {
    2973      2090925 :           rtx *pnote = &REG_NOTES (insn);
    2974      3881958 :           while (*pnote != 0)
    2975              :             {
    2976      1791033 :               if (REG_NOTE_KIND (*pnote) == REG_DEAD
    2977       818796 :                   || REG_NOTE_KIND (*pnote) == REG_UNUSED)
    2978      1283144 :                 *pnote = XEXP (*pnote, 1);
    2979              :               else
    2980       507889 :                 pnote = &XEXP (*pnote, 1);
    2981              :             }
    2982              :         }
    2983              : 
    2984        74570 :   df_remove_problem (df_note);
    2985        74570 :   df_analyze ();
    2986        74570 :   return 0;
    2987              : }
    2988              : 
    2989              : namespace {
    2990              : 
    2991              : const pass_data pass_data_insert_vzeroupper =
    2992              : {
    2993              :   RTL_PASS, /* type */
    2994              :   "vzeroupper", /* name */
    2995              :   OPTGROUP_NONE, /* optinfo_flags */
    2996              :   TV_MACH_DEP, /* tv_id */
    2997              :   0, /* properties_required */
    2998              :   0, /* properties_provided */
    2999              :   0, /* properties_destroyed */
    3000              :   0, /* todo_flags_start */
    3001              :   TODO_df_finish, /* todo_flags_finish */
    3002              : };
    3003              : 
    3004              : class pass_insert_vzeroupper : public rtl_opt_pass
    3005              : {
    3006              : public:
    3007       288767 :   pass_insert_vzeroupper(gcc::context *ctxt)
    3008       577534 :     : rtl_opt_pass(pass_data_insert_vzeroupper, ctxt)
    3009              :   {}
    3010              : 
    3011              :   /* opt_pass methods: */
    3012      1481491 :   bool gate (function *) final override
    3013              :     {
    3014      1481491 :       return TARGET_AVX && TARGET_VZEROUPPER;
    3015              :     }
    3016              : 
    3017        74570 :   unsigned int execute (function *) final override
    3018              :     {
    3019        74570 :       return rest_of_handle_insert_vzeroupper ();
    3020              :     }
    3021              : 
    3022              : }; // class pass_insert_vzeroupper
    3023              : 
    3024              : const pass_data pass_data_stv =
    3025              : {
    3026              :   RTL_PASS, /* type */
    3027              :   "stv", /* name */
    3028              :   OPTGROUP_NONE, /* optinfo_flags */
    3029              :   TV_MACH_DEP, /* tv_id */
    3030              :   0, /* properties_required */
    3031              :   0, /* properties_provided */
    3032              :   0, /* properties_destroyed */
    3033              :   0, /* todo_flags_start */
    3034              :   TODO_df_finish, /* todo_flags_finish */
    3035              : };
    3036              : 
    3037              : class pass_stv : public rtl_opt_pass
    3038              : {
    3039              : public:
    3040       577534 :   pass_stv (gcc::context *ctxt)
    3041       577534 :     : rtl_opt_pass (pass_data_stv, ctxt),
    3042      1155068 :       timode_p (false)
    3043              :   {}
    3044              : 
    3045              :   /* opt_pass methods: */
    3046      2962982 :   bool gate (function *) final override
    3047              :     {
    3048      1481491 :       return ((!timode_p || TARGET_64BIT)
    3049      4317938 :               && TARGET_STV && TARGET_SSE2 && optimize > 1);
    3050              :     }
    3051              : 
    3052      1794454 :   unsigned int execute (function *) final override
    3053              :     {
    3054      1794454 :       return convert_scalars_to_vector (timode_p);
    3055              :     }
    3056              : 
    3057       288767 :   opt_pass *clone () final override
    3058              :     {
    3059       288767 :       return new pass_stv (m_ctxt);
    3060              :     }
    3061              : 
    3062       577534 :   void set_pass_param (unsigned int n, bool param) final override
    3063              :     {
    3064       577534 :       gcc_assert (n == 0);
    3065       577534 :       timode_p = param;
    3066       577534 :     }
    3067              : 
    3068              : private:
    3069              :   bool timode_p;
    3070              : }; // class pass_stv
    3071              : 
    3072              : } // anon namespace
    3073              : 
    3074              : rtl_opt_pass *
    3075       288767 : make_pass_insert_vzeroupper (gcc::context *ctxt)
    3076              : {
    3077       288767 :   return new pass_insert_vzeroupper (ctxt);
    3078              : }
    3079              : 
    3080              : rtl_opt_pass *
    3081       288767 : make_pass_stv (gcc::context *ctxt)
    3082              : {
    3083       288767 :   return new pass_stv (ctxt);
    3084              : }
    3085              : 
    3086              : /* Inserting ENDBR and pseudo patchable-area instructions.  */
    3087              : 
    3088              : static void
    3089       197428 : rest_of_insert_endbr_and_patchable_area (bool need_endbr,
    3090              :                                          unsigned int patchable_area_size)
    3091              : {
    3092       197428 :   rtx endbr;
    3093       197428 :   rtx_insn *insn;
    3094       197428 :   rtx_insn *endbr_insn = NULL;
    3095       197428 :   basic_block bb;
    3096              : 
    3097       197428 :   if (need_endbr)
    3098              :     {
    3099              :       /* Currently emit EB if it's a tracking function, i.e. 'nocf_check'
    3100              :          is absent among function attributes.  Later an optimization will
    3101              :          be introduced to make analysis if an address of a static function
    3102              :          is taken.  A static function whose address is not taken will get
    3103              :          a nocf_check attribute.  This will allow to reduce the number of
    3104              :          EB.  */
    3105       197383 :       if (!lookup_attribute ("nocf_check",
    3106       197383 :                              TYPE_ATTRIBUTES (TREE_TYPE (cfun->decl)))
    3107       197365 :           && (!flag_manual_endbr
    3108            8 :               || lookup_attribute ("cf_check",
    3109            8 :                                    DECL_ATTRIBUTES (cfun->decl)))
    3110       394747 :           && (!cgraph_node::get (cfun->decl)->only_called_directly_p ()
    3111        28556 :               || ix86_cmodel == CM_LARGE
    3112        28555 :               || ix86_cmodel == CM_LARGE_PIC
    3113        28554 :               || flag_force_indirect_call
    3114        28554 :               || (TARGET_DLLIMPORT_DECL_ATTRIBUTES
    3115              :                   && DECL_DLLIMPORT_P (cfun->decl))))
    3116              :         {
    3117       168811 :           if (crtl->profile && flag_fentry)
    3118              :             {
    3119              :               /* Queue ENDBR insertion to x86_function_profiler.
    3120              :                  NB: Any patchable-area insn will be inserted after
    3121              :                  ENDBR.  */
    3122            6 :               cfun->machine->insn_queued_at_entrance = TYPE_ENDBR;
    3123              :             }
    3124              :           else
    3125              :             {
    3126       168805 :               endbr = gen_nop_endbr ();
    3127       168805 :               bb = ENTRY_BLOCK_PTR_FOR_FN (cfun)->next_bb;
    3128       168805 :               rtx_insn *insn = BB_HEAD (bb);
    3129       168805 :               endbr_insn = emit_insn_before (endbr, insn);
    3130              :             }
    3131              :         }
    3132              :     }
    3133              : 
    3134       197428 :   if (patchable_area_size)
    3135              :     {
    3136           51 :       if (crtl->profile && flag_fentry)
    3137              :         {
    3138              :           /* Queue patchable-area insertion to x86_function_profiler.
    3139              :              NB: If there is a queued ENDBR, x86_function_profiler
    3140              :              will also handle patchable-area.  */
    3141            2 :           if (!cfun->machine->insn_queued_at_entrance)
    3142            1 :             cfun->machine->insn_queued_at_entrance = TYPE_PATCHABLE_AREA;
    3143              :         }
    3144              :       else
    3145              :         {
    3146           49 :           rtx patchable_area
    3147           49 :             = gen_patchable_area (GEN_INT (patchable_area_size),
    3148           49 :                                   GEN_INT (crtl->patch_area_entry == 0));
    3149           49 :           if (endbr_insn)
    3150            3 :             emit_insn_after (patchable_area, endbr_insn);
    3151              :           else
    3152              :             {
    3153           46 :               bb = ENTRY_BLOCK_PTR_FOR_FN (cfun)->next_bb;
    3154           46 :               insn = BB_HEAD (bb);
    3155           46 :               emit_insn_before (patchable_area, insn);
    3156              :             }
    3157              :         }
    3158              :     }
    3159              : 
    3160       197428 :   if (!need_endbr)
    3161              :     return;
    3162              : 
    3163       197383 :   bb = 0;
    3164      4017437 :   FOR_EACH_BB_FN (bb, cfun)
    3165              :     {
    3166     74049696 :       for (insn = BB_HEAD (bb); insn != NEXT_INSN (BB_END (bb));
    3167     70229642 :            insn = NEXT_INSN (insn))
    3168              :         {
    3169     70229642 :           if (CALL_P (insn))
    3170              :             {
    3171      1388266 :               need_endbr = find_reg_note (insn, REG_SETJMP, NULL) != NULL;
    3172      1388266 :               if (!need_endbr && !SIBLING_CALL_P (insn))
    3173              :                 {
    3174      1338198 :                   rtx call = get_call_rtx_from (insn);
    3175      1338198 :                   rtx fnaddr = XEXP (call, 0);
    3176      1338198 :                   tree fndecl = NULL_TREE;
    3177              : 
    3178              :                   /* Also generate ENDBRANCH for non-tail call which
    3179              :                      may return via indirect branch.  */
    3180      1338198 :                   if (SYMBOL_REF_P (XEXP (fnaddr, 0)))
    3181      1280518 :                     fndecl = SYMBOL_REF_DECL (XEXP (fnaddr, 0));
    3182      1280518 :                   if (fndecl == NULL_TREE)
    3183        58048 :                     fndecl = MEM_EXPR (fnaddr);
    3184        58048 :                   if (fndecl
    3185      1335940 :                       && TREE_CODE (TREE_TYPE (fndecl)) != FUNCTION_TYPE
    3186       566483 :                       && TREE_CODE (TREE_TYPE (fndecl)) != METHOD_TYPE)
    3187              :                     fndecl = NULL_TREE;
    3188      1338198 :                   if (fndecl && TYPE_ARG_TYPES (TREE_TYPE (fndecl)))
    3189              :                     {
    3190      1297565 :                       tree fntype = TREE_TYPE (fndecl);
    3191      1297565 :                       if (lookup_attribute ("indirect_return",
    3192      1297565 :                                             TYPE_ATTRIBUTES (fntype)))
    3193              :                         need_endbr = true;
    3194              :                     }
    3195              :                 }
    3196      1388254 :               if (!need_endbr)
    3197      1388246 :                 continue;
    3198              :               /* Generate ENDBRANCH after CALL, which can return more than
    3199              :                  twice, setjmp-like functions.  */
    3200              : 
    3201           20 :               endbr = gen_nop_endbr ();
    3202           20 :               emit_insn_after_setloc (endbr, insn, INSN_LOCATION (insn));
    3203           20 :               continue;
    3204           20 :             }
    3205              : 
    3206     68841376 :           if (JUMP_P (insn) && flag_cet_switch)
    3207              :             {
    3208            9 :               rtx target = JUMP_LABEL (insn);
    3209            9 :               if (target == NULL_RTX || ANY_RETURN_P (target))
    3210            5 :                 continue;
    3211              : 
    3212              :               /* Check the jump is a switch table.  */
    3213            4 :               rtx_insn *label = as_a<rtx_insn *> (target);
    3214            4 :               rtx_insn *table = next_insn (label);
    3215            4 :               if (table == NULL_RTX || !JUMP_TABLE_DATA_P (table))
    3216            2 :                 continue;
    3217              : 
    3218              :               /* For the indirect jump find out all places it jumps and insert
    3219              :                  ENDBRANCH there.  It should be done under a special flag to
    3220              :                  control ENDBRANCH generation for switch stmts.  */
    3221            2 :               edge_iterator ei;
    3222            2 :               edge e;
    3223            2 :               basic_block dest_blk;
    3224              : 
    3225           24 :               FOR_EACH_EDGE (e, ei, bb->succs)
    3226              :                 {
    3227           22 :                   rtx_insn *insn;
    3228              : 
    3229           22 :                   dest_blk = e->dest;
    3230           22 :                   insn = BB_HEAD (dest_blk);
    3231           22 :                   gcc_assert (LABEL_P (insn));
    3232           22 :                   endbr = gen_nop_endbr ();
    3233           22 :                   emit_insn_after (endbr, insn);
    3234              :                 }
    3235            2 :               continue;
    3236            2 :             }
    3237              : 
    3238     68841367 :           if (LABEL_P (insn) && LABEL_PRESERVE_P (insn))
    3239              :             {
    3240       142093 :               endbr = gen_nop_endbr ();
    3241       142093 :               emit_insn_after (endbr, insn);
    3242       142093 :               continue;
    3243              :             }
    3244              :         }
    3245              :     }
    3246              : 
    3247              :   return;
    3248              : }
    3249              : 
    3250              : namespace {
    3251              : 
    3252              : const pass_data pass_data_insert_endbr_and_patchable_area =
    3253              : {
    3254              :   RTL_PASS, /* type.  */
    3255              :   "endbr_and_patchable_area", /* name.  */
    3256              :   OPTGROUP_NONE, /* optinfo_flags.  */
    3257              :   TV_MACH_DEP, /* tv_id.  */
    3258              :   0, /* properties_required.  */
    3259              :   0, /* properties_provided.  */
    3260              :   0, /* properties_destroyed.  */
    3261              :   0, /* todo_flags_start.  */
    3262              :   0, /* todo_flags_finish.  */
    3263              : };
    3264              : 
    3265              : class pass_insert_endbr_and_patchable_area : public rtl_opt_pass
    3266              : {
    3267              : public:
    3268       288767 :   pass_insert_endbr_and_patchable_area (gcc::context *ctxt)
    3269       577534 :     : rtl_opt_pass (pass_data_insert_endbr_and_patchable_area, ctxt)
    3270              :   {}
    3271              : 
    3272              :   /* opt_pass methods: */
    3273      1481491 :   bool gate (function *) final override
    3274              :     {
    3275      1481491 :       need_endbr = (flag_cf_protection & CF_BRANCH) != 0;
    3276      1481491 :       patchable_area_size = crtl->patch_area_size - crtl->patch_area_entry;
    3277      1481491 :       return need_endbr || patchable_area_size;
    3278              :     }
    3279              : 
    3280       197428 :   unsigned int execute (function *) final override
    3281              :     {
    3282       197428 :       timevar_push (TV_MACH_DEP);
    3283       197428 :       rest_of_insert_endbr_and_patchable_area (need_endbr,
    3284              :                                                patchable_area_size);
    3285       197428 :       timevar_pop (TV_MACH_DEP);
    3286       197428 :       return 0;
    3287              :     }
    3288              : 
    3289              : private:
    3290              :   bool need_endbr;
    3291              :   unsigned int patchable_area_size;
    3292              : }; // class pass_insert_endbr_and_patchable_area
    3293              : 
    3294              : } // anon namespace
    3295              : 
    3296              : rtl_opt_pass *
    3297       288767 : make_pass_insert_endbr_and_patchable_area (gcc::context *ctxt)
    3298              : {
    3299       288767 :   return new pass_insert_endbr_and_patchable_area (ctxt);
    3300              : }
    3301              : 
    3302              : bool
    3303      6061569 : ix86_rpad_gate ()
    3304              : {
    3305      6061569 :   return (TARGET_AVX
    3306       385907 :           && TARGET_SSE_PARTIAL_REG_DEPENDENCY
    3307       291012 :           && TARGET_SSE_MATH
    3308       290698 :           && optimize
    3309      6347041 :           && optimize_function_for_speed_p (cfun));
    3310              : }
    3311              : 
    3312              : enum x86_cse_kind
    3313              : {
    3314              :   X86_CSE_CONST0_VECTOR,
    3315              :   X86_CSE_CONSTM1_VECTOR,
    3316              :   X86_CSE_CONST_VECTOR,
    3317              :   X86_CSE_VEC_DUP,
    3318              :   X86_CSE_TLS_GD,
    3319              :   X86_CSE_TLS_LD_BASE,
    3320              :   X86_CSE_TLSDESC
    3321              : };
    3322              : 
    3323       154859 : struct redundant_pattern
    3324              : {
    3325              :   /* Bitmap of basic blocks with broadcast instructions.  */
    3326              :   auto_bitmap bbs;
    3327              :   /* Bitmap of broadcast instructions.  */
    3328              :   auto_bitmap insns;
    3329              :   /* The broadcast inner scalar.  */
    3330              :   rtx val;
    3331              :   /* The actual redundant source value for UNSPEC_TLSDESC.  */
    3332              :   rtx tlsdesc_val;
    3333              :   /* The inner scalar mode.  */
    3334              :   machine_mode mode;
    3335              :   /* The destination mode which can be changed to the integer mode of
    3336              :      the same time.  */
    3337              :   machine_mode dest_mode;
    3338              :   /* The instruction which sets the inner scalar.  Nullptr if the inner
    3339              :      scalar is applied to the whole function, instead of within the same
    3340              :      block.  */
    3341              :   rtx_insn *def_insn;
    3342              :   /* The widest broadcast source.  */
    3343              :   rtx broadcast_source;
    3344              :   /* The widest broadcast register.  */
    3345              :   rtx broadcast_reg;
    3346              :   /* The basic block of the broadcast instruction.  */
    3347              :   basic_block bb;
    3348              :   /* The number of broadcast instructions with the same inner scalar.  */
    3349              :   unsigned HOST_WIDE_INT count;
    3350              :   /* The threshold of broadcast instructions with the same inner
    3351              :      scalar.  */
    3352              :   unsigned int threshold;
    3353              :   /* The widest broadcast size in bytes.  */
    3354              :   unsigned int size;
    3355              :   /* Load kind.  */
    3356              :   x86_cse_kind kind;
    3357              : };
    3358              : 
    3359              : /* Generate a vector set, DEST = SRC, at entry of the nearest dominator
    3360              :    for basic block map BBS, which is in the fake loop that contains the
    3361              :    whole function, so that there is only a single vector set in the
    3362              :    whole function.  If not nullptr, LOAD is a pointer to the load.  */
    3363              : 
    3364              : static void
    3365        43324 : ix86_place_single_vector_set (rtx dest, rtx src, bitmap bbs,
    3366              :                               redundant_pattern *load = nullptr)
    3367              : {
    3368        43324 :   basic_block bb = nearest_common_dominator_for_set (CDI_DOMINATORS, bbs);
    3369              :   /* For X86_CSE_VEC_DUP and X86_CSE_CONST_VECTOR, don't place the vector
    3370              :      set outside of the loop to avoid extra spills.  */
    3371        43324 :   if (!load
    3372        42302 :       || (load->kind != X86_CSE_VEC_DUP
    3373        42302 :           && load->kind != X86_CSE_CONST_VECTOR))
    3374              :     {
    3375        23860 :       while (bb->loop_father->latch
    3376        23860 :              != EXIT_BLOCK_PTR_FOR_FN (cfun))
    3377         1361 :         bb = get_immediate_dominator (CDI_DOMINATORS,
    3378              :                                       bb->loop_father->header);
    3379              :     }
    3380              : 
    3381        43324 :   if (CONST_INT_P (src))
    3382        10493 :     dest = gen_rtx_SUBREG (load->dest_mode, dest, 0);
    3383        32831 :   else if (CONST_VECTOR_P (src))
    3384              :     {
    3385              :       /* The only possible CONST_VECTORs of SRC are CONST0_RTX and
    3386              :          CONSTM1_RTX.  Otherwise,
    3387              : 
    3388              :          rtx set = gen_rtx_SET (dest, src);
    3389              : 
    3390              :          won't be a valid instruction.  CONST0_RTX always works.  It
    3391              :          can comes from:
    3392              : 
    3393              :          1. remove_partial_avx_dependency with LOAD == NULL.
    3394              :          2. X86_CSE_VEC_DUP with
    3395              : 
    3396              :          (insn 48 58 16 3 (set (reg:V4HI 123)
    3397              :                 (const_vector:V4HI [
    3398              :                         (const_int 0 [0]) repeated x4
    3399              :                   ])) 2065 {*movv4hi_internal} (nil))
    3400              : 
    3401              :          3. X86_CSE_CONST0_VECTOR.
    3402              :        */
    3403        22499 :       machine_mode mode = GET_MODE (dest);
    3404        22499 :       if (!(src == CONST0_RTX (mode)
    3405         1578 :             || (src == CONSTM1_RTX (mode)
    3406         1578 :                 && load->kind == X86_CSE_CONSTM1_VECTOR)))
    3407            0 :         gcc_unreachable ();
    3408              :     }
    3409        43324 :   rtx set = gen_rtx_SET (dest, src);
    3410              : 
    3411        43324 :   rtx_insn *insn = BB_HEAD (bb);
    3412       170033 :   while (insn && !NONDEBUG_INSN_P (insn))
    3413              :     {
    3414       126713 :       if (insn == BB_END (bb))
    3415              :         {
    3416              :           insn = NULL;
    3417              :           break;
    3418              :         }
    3419       126709 :       insn = NEXT_INSN (insn);
    3420              :     }
    3421              : 
    3422        43324 :   rtx_insn *set_insn;
    3423        43324 :   if (insn == BB_HEAD (bb))
    3424              :     {
    3425            0 :       set_insn = emit_insn_before (set, insn);
    3426            0 :       if (dump_file)
    3427              :         {
    3428            0 :           fprintf (dump_file, "\nPlace:\n\n");
    3429            0 :           print_rtl_single (dump_file, set_insn);
    3430            0 :           fprintf (dump_file, "\nbefore:\n\n");
    3431            0 :           print_rtl_single (dump_file, insn);
    3432            0 :           fprintf (dump_file, "\n");
    3433              :         }
    3434              :     }
    3435              :   else
    3436              :     {
    3437        43324 :       rtx_insn *after = insn ? PREV_INSN (insn) : BB_END (bb);
    3438        43324 :       set_insn = emit_insn_after (set, after);
    3439        43324 :       if (dump_file)
    3440              :         {
    3441            2 :           fprintf (dump_file, "\nPlace:\n\n");
    3442            2 :           print_rtl_single (dump_file, set_insn);
    3443            2 :           fprintf (dump_file, "\nafter:\n\n");
    3444            2 :           print_rtl_single (dump_file, after);
    3445            2 :           fprintf (dump_file, "\n");
    3446              :         }
    3447              :     }
    3448              : 
    3449        43324 :   if (load && load->kind == X86_CSE_VEC_DUP)
    3450              :     {
    3451              :       /* Get the source from LOAD as (reg:SI 99) in
    3452              : 
    3453              :          (vec_duplicate:V4SI (reg:SI 99))
    3454              : 
    3455              :        */
    3456        10332 :       rtx inner_scalar = load->val;
    3457              :       /* Set the source in (vec_duplicate:V4SI (reg:SI 99)).  */
    3458        10332 :       rtx reg = XEXP (src, 0);
    3459        10332 :       machine_mode reg_mode = GET_MODE (reg);
    3460        10332 :       if (reg_mode != GET_MODE (inner_scalar))
    3461              :         {
    3462        10051 :           if (REG_P (inner_scalar) || MEM_P (inner_scalar))
    3463            0 :             inner_scalar = gen_rtx_SUBREG (reg_mode, inner_scalar, 0);
    3464        10051 :           else if (!SCALAR_INT_MODE_P (reg_mode))
    3465              :             {
    3466              :               /* For non-int load with integer constant, generate
    3467              : 
    3468              :                  (set (subreg:SI (reg/v:SF 105 [ f ]) 0)
    3469              :                       (const_int 1313486336 [0x4e4a3600]))
    3470              : 
    3471              :                */
    3472            1 :               gcc_assert (CONST_INT_P (inner_scalar));
    3473            1 :               unsigned int bits = GET_MODE_BITSIZE (reg_mode);
    3474            1 :               machine_mode mode = int_mode_for_size (bits, 0).require ();
    3475            1 :               reg = gen_rtx_SUBREG (mode, reg, 0);
    3476              :             }
    3477              :         }
    3478        10332 :       rtx set = gen_rtx_SET (reg, inner_scalar);
    3479        10332 :       insn = emit_insn_before (set, set_insn);
    3480        10332 :       if (dump_file)
    3481              :         {
    3482            0 :           fprintf (dump_file, "\nAdd:\n\n");
    3483            0 :           print_rtl_single (dump_file, insn);
    3484            0 :           fprintf (dump_file, "\nbefore:\n\n");
    3485            0 :           print_rtl_single (dump_file, set_insn);
    3486            0 :           fprintf (dump_file, "\n");
    3487              :         }
    3488              :     }
    3489        43324 : }
    3490              : 
    3491              : /* At entry of the nearest common dominator for basic blocks with
    3492              :    conversions/rcp/sqrt/rsqrt/round, generate a single
    3493              :         vxorps %xmmN, %xmmN, %xmmN
    3494              :    for all
    3495              :         vcvtss2sd  op, %xmmN, %xmmX
    3496              :         vcvtsd2ss  op, %xmmN, %xmmX
    3497              :         vcvtsi2ss  op, %xmmN, %xmmX
    3498              :         vcvtsi2sd  op, %xmmN, %xmmX
    3499              : 
    3500              :    NB: We want to generate only a single vxorps to cover the whole
    3501              :    function.  The LCM algorithm isn't appropriate here since it may
    3502              :    place a vxorps inside the loop.  */
    3503              : 
    3504              : static unsigned int
    3505        33407 : remove_partial_avx_dependency (void)
    3506              : {
    3507        33407 :   timevar_push (TV_MACH_DEP);
    3508              : 
    3509        33407 :   bitmap_obstack_initialize (NULL);
    3510        33407 :   bitmap convert_bbs = BITMAP_ALLOC (NULL);
    3511              : 
    3512        33407 :   basic_block bb;
    3513        33407 :   rtx_insn *insn, *set_insn;
    3514        33407 :   rtx set;
    3515        33407 :   rtx v4sf_const0 = NULL_RTX;
    3516              : 
    3517        33407 :   auto_vec<rtx_insn *> control_flow_insns;
    3518              : 
    3519              :   /* We create invalid RTL initially so defer rescans.  */
    3520        33407 :   df_set_flags (DF_DEFER_INSN_RESCAN);
    3521              : 
    3522       311853 :   FOR_EACH_BB_FN (bb, cfun)
    3523              :     {
    3524      3493404 :       FOR_BB_INSNS (bb, insn)
    3525              :         {
    3526      3214958 :           if (!NONDEBUG_INSN_P (insn))
    3527      1437683 :             continue;
    3528              : 
    3529      1777275 :           set = single_set (insn);
    3530      1777275 :           if (!set)
    3531        70966 :             continue;
    3532              : 
    3533      1706309 :           if (get_attr_avx_partial_xmm_update (insn)
    3534              :               != AVX_PARTIAL_XMM_UPDATE_TRUE)
    3535      1703130 :             continue;
    3536              : 
    3537              :           /* Convert PARTIAL_XMM_UPDATE_TRUE insns, DF -> SF, SF -> DF,
    3538              :              SI -> SF, SI -> DF, DI -> SF, DI -> DF, sqrt, rsqrt, rcp,
    3539              :              round, to vec_dup and vec_merge with subreg.  */
    3540         3179 :           rtx src = SET_SRC (set);
    3541         3179 :           rtx dest = SET_DEST (set);
    3542         3179 :           machine_mode dest_mode = GET_MODE (dest);
    3543         3179 :           bool convert_p = false;
    3544         3179 :           switch (GET_CODE (src))
    3545              :             {
    3546         3114 :             case FLOAT:
    3547         3114 :             case FLOAT_EXTEND:
    3548         3114 :             case FLOAT_TRUNCATE:
    3549         3114 :             case UNSIGNED_FLOAT:
    3550         3114 :               convert_p = true;
    3551         3114 :               break;
    3552              :             default:
    3553              :               break;
    3554              :             }
    3555              : 
    3556              :           /* Only handle conversion here.  */
    3557         3114 :           machine_mode src_mode
    3558         3114 :             = convert_p ? GET_MODE (XEXP (src, 0)) : VOIDmode;
    3559         3114 :           switch (src_mode)
    3560              :             {
    3561          153 :             case E_SFmode:
    3562          153 :             case E_DFmode:
    3563          153 :               if (TARGET_USE_VECTOR_FP_CONVERTS
    3564          147 :                   || !TARGET_SSE_PARTIAL_REG_FP_CONVERTS_DEPENDENCY)
    3565            8 :                 continue;
    3566              :               break;
    3567         2961 :             case E_SImode:
    3568         2961 :             case E_DImode:
    3569         2961 :               if (TARGET_USE_VECTOR_CONVERTS
    3570         2949 :                   || !TARGET_SSE_PARTIAL_REG_CONVERTS_DEPENDENCY)
    3571           14 :                 continue;
    3572              :               break;
    3573           65 :             case E_VOIDmode:
    3574           65 :               gcc_assert (!convert_p);
    3575              :               break;
    3576            0 :             default:
    3577            0 :               gcc_unreachable ();
    3578              :             }
    3579              : 
    3580         3157 :           if (!v4sf_const0)
    3581         1022 :             v4sf_const0 = gen_reg_rtx (V4SFmode);
    3582              : 
    3583         3157 :           rtx zero;
    3584         3157 :           machine_mode dest_vecmode;
    3585         3157 :           switch (dest_mode)
    3586              :             {
    3587           50 :             case E_HFmode:
    3588           50 :               dest_vecmode = V8HFmode;
    3589           50 :               zero = gen_rtx_SUBREG (V8HFmode, v4sf_const0, 0);
    3590           50 :               break;
    3591              :             case E_SFmode:
    3592              :               dest_vecmode = V4SFmode;
    3593              :               zero = v4sf_const0;
    3594              :               break;
    3595         1167 :             case E_DFmode:
    3596         1167 :               dest_vecmode = V2DFmode;
    3597         1167 :               zero = gen_rtx_SUBREG (V2DFmode, v4sf_const0, 0);
    3598         1167 :               break;
    3599            0 :             default:
    3600            0 :               gcc_unreachable ();
    3601              :             }
    3602              : 
    3603              :           /* Change source to vector mode.  */
    3604         3157 :           src = gen_rtx_VEC_DUPLICATE (dest_vecmode, src);
    3605         3157 :           src = gen_rtx_VEC_MERGE (dest_vecmode, src, zero,
    3606              :                                    GEN_INT (HOST_WIDE_INT_1U));
    3607              :           /* Change destination to vector mode.  */
    3608         3157 :           rtx vec = gen_reg_rtx (dest_vecmode);
    3609              :           /* Generate an XMM vector SET.  */
    3610         3157 :           set = gen_rtx_SET (vec, src);
    3611         3157 :           set_insn = emit_insn_before (set, insn);
    3612              : 
    3613         3157 :           if (cfun->can_throw_non_call_exceptions)
    3614              :             {
    3615              :               /* Handle REG_EH_REGION note.  */
    3616            0 :               rtx note = find_reg_note (insn, REG_EH_REGION, NULL_RTX);
    3617            0 :               if (note)
    3618              :                 {
    3619            0 :                   control_flow_insns.safe_push (set_insn);
    3620            0 :                   add_reg_note (set_insn, REG_EH_REGION, XEXP (note, 0));
    3621              :                 }
    3622              :             }
    3623              : 
    3624         3157 :           src = gen_rtx_SUBREG (dest_mode, vec, 0);
    3625         3157 :           set = gen_rtx_SET (dest, src);
    3626              : 
    3627              :           /* Drop possible dead definitions.  */
    3628         3157 :           PATTERN (insn) = set;
    3629              : 
    3630         3157 :           INSN_CODE (insn) = -1;
    3631         3157 :           recog_memoized (insn);
    3632         3157 :           df_insn_rescan (insn);
    3633         3157 :           bitmap_set_bit (convert_bbs, bb->index);
    3634              :         }
    3635              :     }
    3636              : 
    3637        33407 :   if (v4sf_const0)
    3638              :     {
    3639              :       /* (Re-)discover loops so that bb->loop_father can be used in the
    3640              :          analysis below.  */
    3641         1022 :       calculate_dominance_info (CDI_DOMINATORS);
    3642         1022 :       loop_optimizer_init (AVOID_CFG_MODIFICATIONS);
    3643              : 
    3644         1022 :       ix86_place_single_vector_set (v4sf_const0,
    3645              :                                     CONST0_RTX (V4SFmode),
    3646              :                                     convert_bbs);
    3647              : 
    3648         1022 :       loop_optimizer_finalize ();
    3649              : 
    3650         1022 :       if (!control_flow_insns.is_empty ())
    3651              :         {
    3652            0 :           free_dominance_info (CDI_DOMINATORS);
    3653              : 
    3654            0 :           unsigned int i;
    3655            0 :           FOR_EACH_VEC_ELT (control_flow_insns, i, insn)
    3656            0 :             if (control_flow_insn_p (insn))
    3657              :               {
    3658              :                 /* Split the block after insn.  There will be a fallthru
    3659              :                    edge, which is OK so we keep it.  We have to create
    3660              :                    the exception edges ourselves.  */
    3661            0 :                 bb = BLOCK_FOR_INSN (insn);
    3662            0 :                 split_block (bb, insn);
    3663            0 :                 rtl_make_eh_edge (NULL, bb, BB_END (bb));
    3664              :               }
    3665              :         }
    3666              :     }
    3667              : 
    3668        33407 :   df_process_deferred_rescans ();
    3669        33407 :   df_clear_flags (DF_DEFER_INSN_RESCAN);
    3670        33407 :   bitmap_obstack_release (NULL);
    3671        33407 :   BITMAP_FREE (convert_bbs);
    3672              : 
    3673        33407 :   timevar_pop (TV_MACH_DEP);
    3674        33407 :   return 0;
    3675        33407 : }
    3676              : 
    3677              : namespace {
    3678              : 
    3679              : const pass_data pass_data_remove_partial_avx_dependency =
    3680              : {
    3681              :   RTL_PASS, /* type */
    3682              :   "rpad", /* name */
    3683              :   OPTGROUP_NONE, /* optinfo_flags */
    3684              :   TV_MACH_DEP, /* tv_id */
    3685              :   0, /* properties_required */
    3686              :   0, /* properties_provided */
    3687              :   0, /* properties_destroyed */
    3688              :   0, /* todo_flags_start */
    3689              :   0, /* todo_flags_finish */
    3690              : };
    3691              : 
    3692              : class pass_remove_partial_avx_dependency : public rtl_opt_pass
    3693              : {
    3694              : public:
    3695       288767 :   pass_remove_partial_avx_dependency (gcc::context *ctxt)
    3696       577534 :     : rtl_opt_pass (pass_data_remove_partial_avx_dependency, ctxt)
    3697              :   {}
    3698              : 
    3699              :   /* opt_pass methods: */
    3700      1481491 :   bool gate (function *) final override
    3701              :     {
    3702      1481491 :       return ix86_rpad_gate ();
    3703              :     }
    3704              : 
    3705        33407 :   unsigned int execute (function *) final override
    3706              :     {
    3707        33407 :       return remove_partial_avx_dependency ();
    3708              :     }
    3709              : }; // class pass_rpad
    3710              : 
    3711              : } // anon namespace
    3712              : 
    3713              : rtl_opt_pass *
    3714       288767 : make_pass_remove_partial_avx_dependency (gcc::context *ctxt)
    3715              : {
    3716       288767 :   return new pass_remove_partial_avx_dependency (ctxt);
    3717              : }
    3718              : 
    3719              : /* Return a machine mode suitable for vector SIZE with SMODE inner
    3720              :    mode.  */
    3721              : 
    3722              : static machine_mode
    3723        64127 : ix86_get_vector_cse_mode (unsigned int size, machine_mode smode)
    3724              : {
    3725              :   /* Use the inner scalar mode of vector broadcast source in:
    3726              : 
    3727              :      (set (reg:V8DF 394)
    3728              :           (vec_duplicate:V8DF (reg:V2DF 190 [ alpha ])))
    3729              : 
    3730              :      to compute the vector mode for broadcast from vector source.
    3731              :    */
    3732        64127 :   if (VECTOR_MODE_P (smode))
    3733        31026 :     smode = GET_MODE_INNER (smode);
    3734        64127 :   scalar_mode s_mode = as_a <scalar_mode> (smode);
    3735       128254 :   poly_uint64 nunits = size / GET_MODE_SIZE (smode);
    3736        64127 :   machine_mode mode = mode_for_vector (s_mode, nunits).require ();
    3737        64127 :   return mode;
    3738              : }
    3739              : 
    3740              : /* Replace the source operand of instructions in VECTOR_INSNS with
    3741              :    VECTOR_CONST in VECTOR_MODE.  */
    3742              : 
    3743              : static void
    3744        63656 : replace_vector_const (machine_mode vector_mode, rtx vector_const,
    3745              :                       auto_bitmap &vector_insns,
    3746              :                       machine_mode scalar_mode)
    3747              : {
    3748        63656 :   bitmap_iterator bi;
    3749        63656 :   unsigned int id;
    3750              : 
    3751       222461 :   EXECUTE_IF_SET_IN_BITMAP (vector_insns, 0, id, bi)
    3752              :     {
    3753       158805 :       rtx_insn *insn = DF_INSN_UID_GET (id)->insn;
    3754              : 
    3755              :       /* Get the single SET instruction.  */
    3756       158805 :       rtx set = single_set (insn);
    3757       158805 :       rtx src = SET_SRC (set);
    3758       158805 :       rtx dest = SET_DEST (set);
    3759       158805 :       machine_mode mode = GET_MODE (dest);
    3760              : 
    3761       158805 :       rtx replace;
    3762              :       /* Replace the source operand with VECTOR_CONST.  */
    3763       158805 :       if (SUBREG_P (src)
    3764       158805 :           || mode == vector_mode
    3765        60165 :           || CONST_INT_P (vector_const))
    3766              :         replace = vector_const;
    3767              :       else
    3768              :         {
    3769        60165 :           unsigned int size = GET_MODE_SIZE (mode);
    3770        60165 :           if (size < ix86_regmode_natural_size (mode))
    3771              :             {
    3772              :               /* If the mode size is smaller than its natural size,
    3773              :                  first insert an extra move with a QI vector SUBREG
    3774              :                  of the same size to avoid validate_subreg failure.  */
    3775          471 :               machine_mode vmode
    3776          471 :                 = ix86_get_vector_cse_mode (size, scalar_mode);
    3777          471 :               rtx vreg;
    3778          471 :               if (mode == vmode)
    3779              :                 vreg = vector_const;
    3780              :               else
    3781              :                 {
    3782           59 :                   vreg = gen_reg_rtx (vmode);
    3783           59 :                   rtx vsubreg = gen_rtx_SUBREG (vmode, vector_const, 0);
    3784           59 :                   rtx pat = gen_rtx_SET (vreg, vsubreg);
    3785           59 :                   rtx_insn *vinsn = emit_insn_before (pat, insn);
    3786           59 :                   if (dump_file)
    3787              :                     {
    3788            0 :                       fprintf (dump_file, "\nInsert an extra move:\n\n");
    3789            0 :                       print_rtl_single (dump_file, vinsn);
    3790            0 :                       fprintf (dump_file, "\nbefore:\n\n");
    3791            0 :                       print_rtl_single (dump_file, insn);
    3792            0 :                       fprintf (dump_file, "\n");
    3793              :                     }
    3794              :                 }
    3795          471 :               replace = gen_rtx_SUBREG (mode, vreg, 0);
    3796              :             }
    3797              :           else
    3798        59694 :             replace = gen_rtx_SUBREG (mode, vector_const, 0);
    3799              :         }
    3800              : 
    3801       158805 :       if (dump_file)
    3802              :         {
    3803            3 :           fprintf (dump_file, "\nReplace:\n\n");
    3804            3 :           print_rtl_single (dump_file, insn);
    3805              :         }
    3806       158805 :       SET_SRC (set) = replace;
    3807       158805 :       if (CONST_INT_P (replace))
    3808              :         {
    3809        23527 :           dest = gen_rtx_SUBREG (scalar_mode, dest, 0);
    3810        23527 :           SET_DEST (set) = dest;
    3811              :         }
    3812              :       /* Drop possible dead definitions.  */
    3813       158805 :       PATTERN (insn) = set;
    3814       158805 :       INSN_CODE (insn) = -1;
    3815       158805 :       recog_memoized (insn);
    3816       158805 :       if (dump_file)
    3817              :         {
    3818            3 :           fprintf (dump_file, "\nwith:\n\n");
    3819            3 :           print_rtl_single (dump_file, insn);
    3820            3 :           fprintf (dump_file, "\n");
    3821              :         }
    3822       158805 :       df_insn_rescan (insn);
    3823              :     }
    3824        63656 : }
    3825              : 
    3826              : /* Return the inner scalar if OP is a broadcast, else return nullptr.  */
    3827              : 
    3828              : static rtx
    3829      2202109 : ix86_broadcast_inner (rtx op, machine_mode mode,
    3830              :                       machine_mode *scalar_mode_p,
    3831              :                       x86_cse_kind *kind_p, rtx_insn **insn_p)
    3832              : {
    3833      2202109 :   switch (standard_sse_constant_p (op, mode))
    3834              :     {
    3835       114524 :     case 1:
    3836       114524 :       *scalar_mode_p = QImode;
    3837       114524 :       *kind_p = X86_CSE_CONST0_VECTOR;
    3838       114524 :       *insn_p = nullptr;
    3839       114524 :       return const0_rtx;
    3840        12130 :     case 2:
    3841        12130 :       *scalar_mode_p = QImode;
    3842        12130 :       *kind_p = X86_CSE_CONSTM1_VECTOR;
    3843        12130 :       *insn_p = nullptr;
    3844        12130 :       return constm1_rtx;
    3845      2075455 :     default:
    3846      2075455 :       break;
    3847              :     }
    3848              : 
    3849      2075455 :   mode = GET_MODE (op);
    3850      2075455 :   int nunits = GET_MODE_NUNITS (mode);
    3851      2075455 :   if (nunits < 2)
    3852              :     return nullptr;
    3853              : 
    3854      1600386 :   bool const_vector_p = CONST_VECTOR_P (op);
    3855      1600386 :   bool duplicated = GET_CODE (op) == VEC_DUPLICATE;
    3856      1600386 :   rtx orig_op = op;
    3857      1600386 :   if (!const_vector_p)
    3858              :     {
    3859              :       /* Check CONST_VECTOR in REG_EQUAL note.  */
    3860      1600366 :       rtx equal = find_reg_equal_equiv_note (*insn_p);
    3861      1600366 :       if (equal)
    3862              :         {
    3863       371383 :           equal = XEXP (equal, 0);
    3864       371383 :           const_vector_p = CONST_VECTOR_P (equal);
    3865              :           /* Use CONST_VECTOR in REG_EQUAL note.  */
    3866       371383 :           if (const_vector_p)
    3867              :             {
    3868              :               /* Handle REG_EQUAL note in:
    3869              : 
    3870              :                  (insn 7 5 12 2 (set (subreg:V8SI (reg:V4DI 100) 0)
    3871              :                         (vec_duplicate:V8SI (reg:SI 102)))
    3872              :                     (expr_list:REG_DEAD (reg:SI 102)
    3873              :                        (expr_list:REG_EQUAL (const_vector:V4DI [
    3874              :                           (const_int -1 [0xffffffffffffffff]) repeated x4]) (nil))))
    3875              : 
    3876              :                  NB: Don't treat it as CONST_VECTOR since EQUAL isn't
    3877              :                  supported by ISAs as in gcc.target/i386/pr40957.c.  */
    3878       260409 :               if (GET_MODE (equal) != mode)
    3879              :                 const_vector_p = false;
    3880              :               else
    3881      1600386 :                 op = equal;
    3882              :             }
    3883              :         }
    3884              :     }
    3885              : 
    3886      1600386 :   machine_mode inner_mode = GET_MODE_INNER (mode);
    3887              : 
    3888      1600386 :   if (const_vector_p)
    3889              :     {
    3890       520790 :       bool int_load_p = GET_MODE_SIZE (mode) <= UNITS_PER_WORD;
    3891       260395 :       *kind_p = X86_CSE_CONST_VECTOR;
    3892       260395 :       if (int_load_p)
    3893              :         {
    3894              :           /* This CONST_VECTOR load can be converted to constant
    3895              :              integer load.  */
    3896        34643 :           *scalar_mode_p = mode;
    3897        34643 :           *insn_p = nullptr;
    3898        34643 :           return op;
    3899              :         }
    3900              : 
    3901              :       /* This CONST_VECTOR is wider than the integer register.  */
    3902       225752 :       rtx first = XVECEXP (op, 0, 0);
    3903              : 
    3904       225752 :       if (duplicated)
    3905              :         {
    3906              :           /* Check if CONST_VECTOR in REG_EQUAL note is duplicated in
    3907              : 
    3908              :              (insn 10 7 12 2 (set (reg:V8SI 128)
    3909              :                 (vec_duplicate:V8SI (vec_select:V2SI (reg:V4SI 180)
    3910              :                         (parallel [(const_int 0 [0])
    3911              :                                    (const_int 1 [0x1])]))))
    3912              :                   (expr_list:REG_EQUAL (const_vector:V8SI [
    3913              :                     (const_int 0 [0])
    3914              :                     (const_int 34 [0x22])
    3915              :                     (const_int 0 [0])
    3916              :                     (const_int 34 [0x22])
    3917              :                     (const_int 0 [0])
    3918              :                     (const_int 34 [0x22])
    3919              :                     (const_int 0 [0])
    3920              :                     (const_int 34 [0x22])])(nil)))
    3921              : 
    3922              :            */
    3923              : 
    3924       211066 :           bool duplicated_const_vector = true;
    3925       211066 :           for (int i = 1; i < nunits; ++i)
    3926              :             {
    3927       138153 :               rtx tmp = XVECEXP (op, 0, i);
    3928       138153 :               if (!rtx_equal_p (tmp, first))
    3929              :                 {
    3930              :                   duplicated_const_vector = false;
    3931              :                   break;
    3932              :                 }
    3933              :             }
    3934              : 
    3935        72929 :           if (duplicated_const_vector)
    3936              :             {
    3937        72913 :               bool const_double_p = CONST_DOUBLE_P (first);
    3938              :               /* Force the floating point constant to memory.  */
    3939        72913 :               if (const_double_p)
    3940         5534 :                 first = validize_mem (force_const_mem (inner_mode, first));
    3941              : 
    3942        72913 :               if (const_double_p || CONST_INT_P (first))
    3943              :                 {
    3944              :                   /* Handle
    3945              : 
    3946              :                      (insn 7 6 8 2 (set (reg:V4SF 99)
    3947              :                           (vec_duplicate:V4SF (mem/u/c:SF (symbol_ref/u:DI ("*.LC2") [flags 0x2]) [0  S4 A32])))
    3948              :                         (expr_list:REG_EQUAL (const_vector:V4SF [
    3949              :                            (const_double:SF 3.4e+1 [0x0.88p+6]) repeated x4]) (nil)))
    3950              : 
    3951              :                      and
    3952              : 
    3953              :                      (insn 14 15 16 3 (set (reg:V4SI 116)
    3954              :                           (vec_duplicate:V4SI (reg:SI 117)))
    3955              :                        (expr_list:REG_EQUAL (const_vector:V4SI [
    3956              :                           (const_int 34 [0x22]) repeated x4]) (nil)))
    3957              : 
    3958              :                    */
    3959        72913 :                   *kind_p = X86_CSE_VEC_DUP;
    3960        72913 :                   *insn_p = nullptr;
    3961        72913 :                   *scalar_mode_p = inner_mode;
    3962        72913 :                   return first;
    3963              :                 }
    3964              :             }
    3965              : 
    3966              :           op = orig_op;
    3967              :         }
    3968              :       else
    3969              :         {
    3970              :           /* Only native CONST_VECTOR is allowed.  */
    3971       152823 :           if (orig_op != op)
    3972              :             return nullptr;
    3973              : 
    3974              :           /* Check if VEC_DUPLICATE can be used.  */
    3975           48 :           for (int i = 1; i < nunits; ++i)
    3976              :             {
    3977           48 :               rtx tmp = XVECEXP (op, 0, i);
    3978              :               /* Vector duplicate value.  */
    3979           48 :               if (!rtx_equal_p (tmp, first))
    3980              :                 return nullptr;
    3981              :             }
    3982              : 
    3983              :           /* Use the inner mode to handle
    3984              :              (const_vector:V2QI [(const_int 0 [0]) repeated x2])
    3985              :            */
    3986            0 :           *scalar_mode_p = inner_mode;
    3987            0 :           *insn_p = nullptr;
    3988            0 :           return first;
    3989              :         }
    3990              :     }
    3991              : 
    3992      1340007 :   if (!duplicated)
    3993              :     return nullptr;
    3994              : 
    3995        22642 :   *kind_p = X86_CSE_VEC_DUP;
    3996              : 
    3997              :   /* Only
    3998              : 
    3999              :      (vec_duplicate:V4SI (reg:SI 99))
    4000              :      (vec_duplicate:V2DF (mem/u/c:DF (symbol_ref/u:DI ("*.LC1") [flags 0x2]) [0 S8 A64]))
    4001              : 
    4002              :      are supported.  Set OP to the broadcast source by default.  */
    4003        22642 :   op = XEXP (op, 0);
    4004        22642 :   rtx reg = op;
    4005        22642 :   if (SUBREG_P (op)
    4006          401 :       && SUBREG_BYTE (op) == 0
    4007        23043 :       && !paradoxical_subreg_p (op))
    4008          401 :     reg = SUBREG_REG (op);
    4009        22642 :   if (!REG_P (reg))
    4010              :     {
    4011         2301 :       if (MEM_P (op)
    4012         2045 :           && SYMBOL_REF_P (XEXP (op, 0))
    4013         2538 :           && CONSTANT_POOL_ADDRESS_P (XEXP (op, 0)))
    4014              :         {
    4015              :           /* Handle constant broadcast from memory.  */
    4016           11 :           *scalar_mode_p = inner_mode;
    4017           11 :           *insn_p = nullptr;
    4018           11 :           return op;
    4019              :         }
    4020              :       return nullptr;
    4021              :     }
    4022              : 
    4023        20341 :   machine_mode orig_mode = mode;
    4024        20341 :   mode = GET_MODE (op);
    4025              : 
    4026              :   /* Only single def chain is supported.  */
    4027        20341 :   df_ref ref = DF_REG_DEF_CHAIN (REGNO (reg));
    4028        20341 :   if (!ref
    4029        20340 :       || DF_REF_IS_ARTIFICIAL (ref)
    4030        20340 :       || DF_REF_NEXT_REG (ref) != nullptr)
    4031              :     return nullptr;
    4032              : 
    4033        14863 :   rtx_insn *insn = DF_REF_INSN (ref);
    4034        14863 :   rtx set = single_set (insn);
    4035        14863 :   if (!set)
    4036              :     return nullptr;
    4037              : 
    4038        14824 :   rtx src = SET_SRC (set);
    4039              : 
    4040        14824 :   if (CONST_INT_P (src))
    4041              :     {
    4042              :       /* Handle sequences like
    4043              : 
    4044              :          (set (subreg:SI (reg/v:SF 105 [ f ]) 0)
    4045              :               (const_int 0 [0]))
    4046              :          (set (reg:V4SF 110)
    4047              :               (vec_duplicate:V4SF (reg/v:SF 105 [ f ])))
    4048              : 
    4049              :          and
    4050              : 
    4051              :          (set (reg:SI 99)
    4052              :                (const_int 34 [0x22]))
    4053              :          (set (reg:V4SI 98)
    4054              :                (vec_duplicate:V4SI (reg:SI 99)))
    4055              : 
    4056              :          Set *INSN_P to nullptr and return SET_SRC if SET_SRC is an
    4057              :          integer constant.  */
    4058          234 :       op = src;
    4059          234 :       if (SCALAR_INT_MODE_P (mode) && mode != GET_MODE (reg))
    4060            0 :         op = gen_int_mode (INTVAL (src), mode);
    4061          234 :       if (op == const0_rtx)
    4062              :         {
    4063            6 :            if (standard_sse_constant_p (CONST0_RTX (orig_mode),
    4064              :                                         orig_mode) == 1)
    4065              :              {
    4066            6 :                *scalar_mode_p = QImode;
    4067            6 :                *kind_p = X86_CSE_CONST0_VECTOR;
    4068            6 :                *insn_p = nullptr;
    4069            6 :                return const0_rtx;
    4070              :              }
    4071            0 :            op = CONST0_RTX (mode);
    4072              :         }
    4073          228 :       else if (op == constm1_rtx
    4074          228 :                && standard_sse_constant_p (CONSTM1_RTX (orig_mode),
    4075              :                                            orig_mode) == 2)
    4076              :         {
    4077            0 :           *scalar_mode_p = QImode;
    4078            0 :           *kind_p = X86_CSE_CONSTM1_VECTOR;
    4079            0 :           *insn_p = nullptr;
    4080            0 :           return constm1_rtx;
    4081              :         }
    4082              : 
    4083              :       /* Check if we can convert:
    4084              : 
    4085              :          (insn 14 465 412 3 (set (reg:SI 507 [ j_lsm.26 ])
    4086              :                 (const_int 2 [0x2])) "foo.c":10:12 discrim 2 100 {*movsi_internal} (nil))
    4087              :          ...
    4088              :          (insn 518 507 434 16 (set (reg:V2SI 493)
    4089              :                 (vec_duplicate:V2SI (reg:SI 507 [ j_lsm.26 ]))) 2395 {*vec_dupv2si} (nil))
    4090              : 
    4091              :          to constant integer load:
    4092              : 
    4093              :          (insn 566 55 56 6 (set (subreg:DI (reg:V2SI 517) 0)
    4094              :                 (const_int 8589934594 [0x200000002])) -1 (nil))
    4095              :          ...
    4096              :          (insn 518 507 434 16 (set (reg:V2SI 493)
    4097              :                 (reg:V2SI 517)) 2066 {*movv2si_internal} (nil))
    4098              : 
    4099              :        */
    4100          456 :       if (GET_MODE_SIZE (orig_mode) <= UNITS_PER_WORD)
    4101            6 :         *kind_p = X86_CSE_CONST_VECTOR;
    4102              : 
    4103          228 :       *insn_p = nullptr;
    4104              :     }
    4105              :   else
    4106              :     {
    4107              :       /* Handle sequences like
    4108              : 
    4109              :          (set (reg:QI 105 [ c ])
    4110              :               (reg:QI 5 di [ c ]))
    4111              :          (set (reg:V64QI 102 [ _1 ])
    4112              :               (vec_duplicate:V64QI (reg:QI 105 [ c ])))
    4113              : 
    4114              :          (set (reg/v:SI 116 [ argc ])
    4115              :               (mem/c:SI (reg:SI 135) [2 argc+0 S4 A32]))
    4116              :          (set (reg:V4SI 119 [ _45 ])
    4117              :               (vec_duplicate:V4SI (reg/v:SI 116 [ argc ])))
    4118              : 
    4119              :          (set (reg:SI 98 [ _1 ])
    4120              :               (sign_extend:SI (reg:QI 106 [ c ])))
    4121              :          (set (reg:V16SI 103 [ _2 ])
    4122              :                (vec_duplicate:V16SI (reg:SI 98 [ _1 ])))
    4123              : 
    4124              :          (set (reg:SI 102 [ cost ])
    4125              :               (mem/c:SI (symbol_ref:DI ("cost") [flags 0x40])))
    4126              :          (set (reg:V4HI 103 [ _16 ])
    4127              :               (vec_duplicate:V4HI (subreg:HI (reg:SI 102 [ cost ]) 0)))
    4128              : 
    4129              :          (set (subreg:SI (reg/v:HI 107 [ cr_val ]) 0)
    4130              :               (ashift:SI (reg:SI 158)
    4131              :                          (subreg:QI (reg:SI 156 [ _2 ]) 0)))
    4132              :          (set (reg:V16HI 183 [ _61 ])
    4133              :               (vec_duplicate:V16HI (reg/v:HI 107 [ cr_val ])))
    4134              : 
    4135              :          Set *INSN_P to INSN and return the broadcast source otherwise.  */
    4136        14590 :       *insn_p = insn;
    4137              :     }
    4138              : 
    4139        14818 :   *scalar_mode_p = mode;
    4140        14818 :   return op;
    4141              : }
    4142              : 
    4143              : /* Replace CALL instruction in TLS_CALL_INSNS with SET from SRC and
    4144              :    put the updated instruction in UPDATED_TLS_INSNS.  */
    4145              : 
    4146              : static void
    4147          313 : replace_tls_call (rtx src, auto_bitmap &tls_call_insns,
    4148              :                   auto_bitmap &updated_tls_insns)
    4149              : {
    4150          313 :   bitmap_iterator bi;
    4151          313 :   unsigned int id;
    4152              : 
    4153         1739 :   EXECUTE_IF_SET_IN_BITMAP (tls_call_insns, 0, id, bi)
    4154              :     {
    4155         1426 :       rtx_insn *insn = DF_INSN_UID_GET (id)->insn;
    4156              : 
    4157              :       /* If this isn't a CALL, only GNU2 TLS implicit CALL patterns are
    4158              :          allowed.  */
    4159         1426 :       if (!CALL_P (insn))
    4160              :         {
    4161           47 :           attr_tls64 tls64 = get_attr_tls64 (insn);
    4162           47 :           if (tls64 != TLS64_CALL && tls64 != TLS64_COMBINE)
    4163            0 :             gcc_unreachable ();
    4164              :         }
    4165              : 
    4166         1426 :       rtx pat = PATTERN (insn);
    4167         1426 :       gcc_assert (GET_CODE (pat) == PARALLEL);
    4168         1426 :       rtx set = XVECEXP (pat, 0, 0);
    4169         1426 :       gcc_assert (GET_CODE (set) == SET);
    4170         1426 :       rtx dest = SET_DEST (set);
    4171              : 
    4172         1426 :       set = gen_rtx_SET (dest, src);
    4173         1426 :       rtx_insn *set_insn = emit_insn_after (set, insn);
    4174         1426 :       if (recog_memoized (set_insn) < 0)
    4175            0 :         gcc_unreachable ();
    4176              : 
    4177              :       /* Put SET_INSN in UPDATED_TLS_INSNS.  */
    4178         1426 :       bitmap_set_bit (updated_tls_insns, INSN_UID (set_insn));
    4179              : 
    4180         1426 :       if (dump_file)
    4181              :         {
    4182            0 :           fprintf (dump_file, "\nReplace:\n\n");
    4183            0 :           print_rtl_single (dump_file, insn);
    4184            0 :           fprintf (dump_file, "\nwith:\n\n");
    4185            0 :           print_rtl_single (dump_file, set_insn);
    4186            0 :           fprintf (dump_file, "\n");
    4187              :         }
    4188              : 
    4189              :       /* Delete the CALL insn.  */
    4190         1426 :       delete_insn (insn);
    4191              : 
    4192         1426 :       df_insn_rescan (set_insn);
    4193              :     }
    4194          313 : }
    4195              : 
    4196              : /* Return the basic block which dominates all basic blocks which set
    4197              :    hard register REGNO used in basic block BB.  */
    4198              : 
    4199              : static basic_block
    4200            2 : ix86_get_dominator_for_reg (unsigned int regno, basic_block bb)
    4201              : {
    4202            2 :   basic_block set_bb;
    4203            2 :   auto_bitmap set_bbs;
    4204              : 
    4205              :   /* Get all BBs which set REGNO and dominate the current BB from all
    4206              :      DEFs of REGNO.  */
    4207            2 :   for (df_ref def = DF_REG_DEF_CHAIN (regno);
    4208           18 :        def;
    4209           16 :        def = DF_REF_NEXT_REG (def))
    4210           16 :     if (!DF_REF_IS_ARTIFICIAL (def)
    4211           16 :         && !DF_REF_FLAGS_IS_SET (def, DF_REF_MAY_CLOBBER)
    4212            6 :         && !DF_REF_FLAGS_IS_SET (def, DF_REF_MUST_CLOBBER))
    4213              :       {
    4214            4 :         set_bb = DF_REF_BB (def);
    4215            4 :         if (dominated_by_p (CDI_DOMINATORS, bb, set_bb))
    4216            2 :           bitmap_set_bit (set_bbs, set_bb->index);
    4217              :       }
    4218              : 
    4219            2 :   bb = nearest_common_dominator_for_set (CDI_DOMINATORS, set_bbs);
    4220            2 :   return bb;
    4221            2 : }
    4222              : 
    4223              : /* Mark FLAGS register as live in DATA, a bitmap of live caller-saved
    4224              :    registers, if DEST is FLAGS register.  */
    4225              : 
    4226              : static void
    4227          381 : ix86_check_flags_reg (rtx dest, const_rtx x, void *data)
    4228              : {
    4229          381 :   if (GET_CODE (x) == CLOBBER)
    4230              :     return;
    4231              : 
    4232          374 :   auto_bitmap *live_caller_saved_regs = (auto_bitmap *) data;
    4233          374 :   if (REG_P (dest) && REGNO (dest) == FLAGS_REG)
    4234            0 :     bitmap_set_bit (*live_caller_saved_regs, FLAGS_REG);
    4235              : }
    4236              : 
    4237              : /* Emit a TLS_SET instruction of KIND in basic block BB.   Store the
    4238              :    insertion point in *BEFORE_P for emit_insn_before or in *AFTER_P
    4239              :    for emit_insn_after.  UPDATED_GNU_TLS_INSNS contains instructions
    4240              :    which replace the GNU TLS instructions.  UPDATED_GNU2_TLS_INSNS
    4241              :    contains instructions which replace the GNU2 TLS instructions.  */
    4242              : 
    4243              : static rtx_insn *
    4244          313 : ix86_emit_tls_call (rtx tls_set, x86_cse_kind kind, basic_block bb,
    4245              :                     rtx_insn **before_p, rtx_insn **after_p,
    4246              :                     auto_bitmap &updated_gnu_tls_insns,
    4247              :                     auto_bitmap &updated_gnu2_tls_insns)
    4248              : {
    4249          315 :   rtx_insn *tls_insn;
    4250              : 
    4251          315 :   do
    4252              :     {
    4253          315 :       rtx_insn *insn = BB_HEAD (bb);
    4254         1297 :       while (insn && !NONDEBUG_INSN_P (insn))
    4255              :         {
    4256          986 :           if (insn == BB_END (bb))
    4257              :             {
    4258              :               /* This must be the beginning basic block:
    4259              : 
    4260              :                  (note 4 0 2 2 [bb 2] NOTE_INSN_BASIC_BLOCK)
    4261              :                  (note 2 4 26 2 NOTE_INSN_FUNCTION_BEG)
    4262              : 
    4263              :                  or a basic block with only a label:
    4264              : 
    4265              :                  (code_label 78 11 77 3 14 (nil) [1 uses])
    4266              :                  (note 77 78 54 3 [bb 3] NOTE_INSN_BASIC_BLOCK)
    4267              : 
    4268              :                  or a basic block with only a debug marker:
    4269              : 
    4270              :                  (note 3 0 2 2 [bb 2] NOTE_INSN_BASIC_BLOCK)
    4271              :                  (note 2 3 5 2 NOTE_INSN_FUNCTION_BEG)
    4272              :                  (debug_insn 5 2 16 2 (debug_marker) "x.c":6:3 -1 (nil))
    4273              : 
    4274              :                  or a basic block with only deleted instructions:
    4275              : 
    4276              :                  (code_label 348 23 349 45 3 (nil) [0 uses])
    4277              :                  (note 349 348 436 45 [bb 45] NOTE_INSN_BASIC_BLOCK)
    4278              :                  (note 436 349 362 45 NOTE_INSN_DELETED)
    4279              : 
    4280              :                */
    4281            4 :               gcc_assert (DEBUG_INSN_P (insn)
    4282              :                           || (NOTE_P (insn)
    4283              :                               && ((NOTE_KIND (insn)
    4284              :                                    == NOTE_INSN_FUNCTION_BEG)
    4285              :                                   || (NOTE_KIND (insn)
    4286              :                                       == NOTE_INSN_DELETED)
    4287              :                                   || (NOTE_KIND (insn)
    4288              :                                       == NOTE_INSN_BASIC_BLOCK))));
    4289              :               insn = NULL;
    4290              :               break;
    4291              :             }
    4292          982 :           insn = NEXT_INSN (insn);
    4293              :         }
    4294              : 
    4295              :       /* TLS_GD and TLS_LD_BASE instructions are normal functions which
    4296              :          clobber caller-saved registers.  TLSDESC instructions only
    4297              :          clobber FLAGS.  If any registers clobbered by TLS instructions
    4298              :          are live in this basic block, we must insert TLS instructions
    4299              :          after all live registers clobbered are dead.  */
    4300              : 
    4301          315 :       auto_bitmap live_caller_saved_regs;
    4302          630 :       bitmap in = df_live ? DF_LIVE_IN (bb) : DF_LR_IN (bb);
    4303              : 
    4304          315 :       if (bitmap_bit_p (in, FLAGS_REG))
    4305            4 :         bitmap_set_bit (live_caller_saved_regs, FLAGS_REG);
    4306              : 
    4307          315 :       unsigned int i;
    4308              : 
    4309              :       /* Get all live caller-saved registers for TLS_GD and TLS_LD_BASE
    4310              :          instructions.  */
    4311          315 :       if (kind != X86_CSE_TLSDESC)
    4312        27249 :         for (i = 0; i < FIRST_PSEUDO_REGISTER; i++)
    4313        26956 :           if (call_used_regs[i]
    4314        25198 :               && !fixed_regs[i]
    4315        38993 :               && bitmap_bit_p (in, i))
    4316          344 :             bitmap_set_bit (live_caller_saved_regs, i);
    4317              : 
    4318          315 :       if (bitmap_empty_p (live_caller_saved_regs))
    4319              :         {
    4320           82 :           if (insn == BB_HEAD (bb))
    4321              :             {
    4322            0 :               *before_p = insn;
    4323            0 :               tls_insn = emit_insn_before (tls_set, insn);
    4324              :             }
    4325              :           else
    4326              :             {
    4327              :               /* Emit the TLS call after NOTE_INSN_FUNCTION_BEG in the
    4328              :                  beginning basic block:
    4329              : 
    4330              :                  (note 4 0 2 2 [bb 2] NOTE_INSN_BASIC_BLOCK)
    4331              :                  (note 2 4 26 2 NOTE_INSN_FUNCTION_BEG)
    4332              : 
    4333              :                  or after NOTE_INSN_BASIC_BLOCK in a basic block with
    4334              :                  only a label:
    4335              : 
    4336              :                  (code_label 78 11 77 3 14 (nil) [1 uses])
    4337              :                  (note 77 78 54 3 [bb 3] NOTE_INSN_BASIC_BLOCK)
    4338              : 
    4339              :                  or after debug marker in a basic block with only a
    4340              :                  debug marker:
    4341              : 
    4342              :                  (note 3 0 2 2 [bb 2] NOTE_INSN_BASIC_BLOCK)
    4343              :                  (note 2 3 5 2 NOTE_INSN_FUNCTION_BEG)
    4344              :                  (debug_insn 5 2 16 2 (debug_marker) "x.c":6:3 -1 (nil))
    4345              : 
    4346              :                */
    4347           82 :               insn = insn ? PREV_INSN (insn) : BB_END (bb);
    4348           82 :               *after_p = insn;
    4349           82 :               tls_insn = emit_insn_after (tls_set, insn);
    4350              :             }
    4351           82 :           return tls_insn;
    4352              :         }
    4353              : 
    4354          233 :       bool repeat = false;
    4355              : 
    4356              :       /* Search for REG_DEAD notes in this basic block.  */
    4357          661 :       FOR_BB_INSNS (bb, insn)
    4358              :         {
    4359          661 :           if (!NONDEBUG_INSN_P (insn))
    4360          283 :             continue;
    4361              : 
    4362              :           /* NB: Conditional jump is the only instruction which reads
    4363              :              flags register and changes control flow.  We can never
    4364              :              place the TLS call after unconditional jump.  */
    4365          378 :           if (JUMP_P (insn))
    4366              :             {
    4367              :               /* This must be a conditional jump.  */
    4368            2 :               rtx label = JUMP_LABEL (insn);
    4369            2 :               if (label == nullptr
    4370            2 :                   || ANY_RETURN_P (label)
    4371            2 :                   || !(LABEL_P (label) || SYMBOL_REF_P (label)))
    4372            0 :                 gcc_unreachable ();
    4373              : 
    4374              :               /* Place the call before all FLAGS_REG setting BBs since
    4375              :                  we can't place a call before nor after a conditional
    4376              :                  jump.  */
    4377            2 :               bb = ix86_get_dominator_for_reg (FLAGS_REG, bb);
    4378              : 
    4379              :               /* Start over again.  */
    4380            2 :               repeat = true;
    4381            2 :               break;
    4382              :             }
    4383              : 
    4384          376 :           if (bitmap_bit_p (updated_gnu_tls_insns, INSN_UID (insn)))
    4385              :             {
    4386              :               /* Insert the __tls_get_addr call before INSN which
    4387              :                  replaces a __tls_get_addr call.  */
    4388            1 :               *before_p = insn;
    4389            1 :               tls_insn = emit_insn_before (tls_set, insn);
    4390            1 :               return tls_insn;
    4391              :             }
    4392              : 
    4393          375 :           if (bitmap_bit_p (updated_gnu2_tls_insns, INSN_UID (insn)))
    4394              :             {
    4395              :               /* Mark FLAGS register as dead since FLAGS register
    4396              :                  would be clobbered by the GNU2 TLS instruction.  */
    4397            1 :               bitmap_clear_bit (live_caller_saved_regs, FLAGS_REG);
    4398            1 :               continue;
    4399              :             }
    4400              : 
    4401              :           /* Check if FLAGS register is live.  */
    4402          374 :           note_stores (insn, ix86_check_flags_reg,
    4403              :                        &live_caller_saved_regs);
    4404              : 
    4405          374 :           rtx link;
    4406          515 :           for (link = REG_NOTES (insn); link; link = XEXP (link, 1))
    4407          371 :             if ((REG_NOTE_KIND (link) == REG_DEAD
    4408            9 :                  || (REG_NOTE_KIND (link) == REG_UNUSED
    4409            7 :                      && REGNO (XEXP (link, 0)) == FLAGS_REG))
    4410          378 :                 && REG_P (XEXP (link, 0)))
    4411              :               {
    4412              :                 /* Mark the live caller-saved register as dead.  */
    4413          743 :                 for (i = REGNO (XEXP (link, 0));
    4414          743 :                      i < END_REGNO (XEXP (link, 0));
    4415              :                      i++)
    4416          374 :                   if (i < FIRST_PSEUDO_REGISTER)
    4417          351 :                     bitmap_clear_bit (live_caller_saved_regs, i);
    4418              : 
    4419          369 :                 if (bitmap_empty_p (live_caller_saved_regs))
    4420              :                   {
    4421          230 :                     *after_p = insn;
    4422          230 :                     tls_insn = emit_insn_after (tls_set, insn);
    4423          230 :                     return tls_insn;
    4424              :                   }
    4425              :               }
    4426              :         }
    4427              : 
    4428              :       /* NB: Start over again for conditional jump.  */
    4429            2 :       if (repeat)
    4430            2 :         continue;
    4431              : 
    4432            0 :       gcc_assert (!bitmap_empty_p (live_caller_saved_regs));
    4433              : 
    4434              :       /* If any live caller-saved registers aren't dead at the end of
    4435              :          this basic block, get the basic block which dominates all
    4436              :          basic blocks which set the remaining live registers.  */
    4437            0 :       auto_bitmap set_bbs;
    4438            0 :       bitmap_iterator bi;
    4439            0 :       unsigned int id;
    4440            0 :       EXECUTE_IF_SET_IN_BITMAP (live_caller_saved_regs, 0, id, bi)
    4441              :         {
    4442            0 :           basic_block set_bb = ix86_get_dominator_for_reg (id, bb);
    4443            0 :           bitmap_set_bit (set_bbs, set_bb->index);
    4444              :         }
    4445            0 :       bb = nearest_common_dominator_for_set (CDI_DOMINATORS, set_bbs);
    4446            2 :     }
    4447              :   while (true);
    4448              : }
    4449              : 
    4450              : /* Generate a TLS call of KIND with VAL and copy the call result to DEST,
    4451              :    at entry of the nearest dominator for basic block map BBS, which is in
    4452              :    the fake loop that contains the whole function, so that there is only
    4453              :    a single TLS CALL of KIND with VAL in the whole function.
    4454              :    UPDATED_GNU_TLS_INSNS contains instructions which replace the GNU TLS
    4455              :    instructions.  UPDATED_GNU2_TLS_INSNS contains instructions which
    4456              :    replace the GNU2 TLS instructions.  If TLSDESC_SET isn't nullptr,
    4457              :    insert it before the TLS call.  */
    4458              : 
    4459              : static void
    4460          313 : ix86_place_single_tls_call (rtx dest, rtx val, x86_cse_kind kind,
    4461              :                             auto_bitmap &bbs,
    4462              :                             auto_bitmap &updated_gnu_tls_insns,
    4463              :                             auto_bitmap &updated_gnu2_tls_insns,
    4464              :                             rtx tlsdesc_set = nullptr)
    4465              : {
    4466          313 :   basic_block bb = nearest_common_dominator_for_set (CDI_DOMINATORS, bbs);
    4467          313 :   while (bb->loop_father->latch
    4468          322 :          != EXIT_BLOCK_PTR_FOR_FN (cfun))
    4469            9 :     bb = get_immediate_dominator (CDI_DOMINATORS,
    4470              :                                   bb->loop_father->header);
    4471              : 
    4472          313 :   rtx rax = nullptr, rdi;
    4473          313 :   rtx eqv = nullptr;
    4474          313 :   rtx caddr;
    4475          313 :   rtx set;
    4476          313 :   rtx clob;
    4477          313 :   rtx symbol;
    4478          313 :   rtx tls;
    4479              : 
    4480          313 :   switch (kind)
    4481              :     {
    4482          262 :     case X86_CSE_TLS_GD:
    4483          262 :       rax = gen_rtx_REG (Pmode, AX_REG);
    4484          262 :       rdi = gen_rtx_REG (Pmode, DI_REG);
    4485          262 :       caddr = ix86_tls_get_addr ();
    4486              : 
    4487          262 :       symbol = XVECEXP (val, 0, 0);
    4488          262 :       tls = gen_tls_global_dynamic_64 (Pmode, rax, symbol, caddr, rdi);
    4489              : 
    4490          262 :       if (GET_MODE (symbol) != Pmode)
    4491            0 :         symbol = gen_rtx_ZERO_EXTEND (Pmode, symbol);
    4492              :       eqv = symbol;
    4493              :       break;
    4494              : 
    4495           30 :     case X86_CSE_TLS_LD_BASE:
    4496           30 :       rax = gen_rtx_REG (Pmode, AX_REG);
    4497           30 :       rdi = gen_rtx_REG (Pmode, DI_REG);
    4498           30 :       caddr = ix86_tls_get_addr ();
    4499              : 
    4500           30 :       tls = gen_tls_local_dynamic_base_64 (Pmode, rax, caddr, rdi);
    4501              : 
    4502              :       /* Attach a unique REG_EQUAL to DEST, to allow the RTL optimizers
    4503              :          to share the LD_BASE result with other LD model accesses.  */
    4504           30 :       eqv = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, const0_rtx),
    4505              :                             UNSPEC_TLS_LD_BASE);
    4506              : 
    4507           30 :       break;
    4508              : 
    4509           21 :     case X86_CSE_TLSDESC:
    4510           21 :       set = gen_rtx_SET (dest, val);
    4511           21 :       clob = gen_rtx_CLOBBER (VOIDmode,
    4512              :                               gen_rtx_REG (CCmode, FLAGS_REG));
    4513           21 :       tls = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, set, clob));
    4514           21 :       break;
    4515              : 
    4516            0 :     default:
    4517            0 :       gcc_unreachable ();
    4518              :     }
    4519              : 
    4520              :   /* Emit the TLS CALL insn.  */
    4521          313 :   rtx_insn *before = nullptr;
    4522          313 :   rtx_insn *after = nullptr;
    4523          313 :   rtx_insn *tls_insn = ix86_emit_tls_call (tls, kind, bb, &before,
    4524              :                                            &after,
    4525              :                                            updated_gnu_tls_insns,
    4526              :                                            updated_gnu2_tls_insns);
    4527              : 
    4528          313 :   rtx_insn *tlsdesc_insn = nullptr;
    4529          313 :   if (tlsdesc_set)
    4530              :     {
    4531           16 :       rtx dest = copy_rtx (SET_DEST (tlsdesc_set));
    4532           16 :       rtx src = copy_rtx (SET_SRC (tlsdesc_set));
    4533           16 :       tlsdesc_set = gen_rtx_SET (dest, src);
    4534           16 :       tlsdesc_insn = emit_insn_before (tlsdesc_set, tls_insn);
    4535              :     }
    4536              : 
    4537          313 :   if (kind != X86_CSE_TLSDESC)
    4538              :     {
    4539          292 :       RTL_CONST_CALL_P (tls_insn) = 1;
    4540              : 
    4541              :       /* Indicate that this function can't jump to non-local gotos.  */
    4542          292 :       make_reg_eh_region_note_nothrow_nononlocal (tls_insn);
    4543              :     }
    4544              : 
    4545          313 :   if (recog_memoized (tls_insn) < 0)
    4546            0 :     gcc_unreachable ();
    4547              : 
    4548          313 :   if (dump_file)
    4549              :     {
    4550            0 :       if (after)
    4551              :         {
    4552            0 :           fprintf (dump_file, "\nPlace:\n\n");
    4553            0 :           if (tlsdesc_insn)
    4554            0 :             print_rtl_single (dump_file, tlsdesc_insn);
    4555            0 :           print_rtl_single (dump_file, tls_insn);
    4556            0 :           fprintf (dump_file, "\nafter:\n\n");
    4557            0 :           print_rtl_single (dump_file, after);
    4558            0 :           fprintf (dump_file, "\n");
    4559              :         }
    4560              :       else
    4561              :         {
    4562            0 :           fprintf (dump_file, "\nPlace:\n\n");
    4563            0 :           if (tlsdesc_insn)
    4564            0 :             print_rtl_single (dump_file, tlsdesc_insn);
    4565            0 :           print_rtl_single (dump_file, tls_insn);
    4566            0 :           fprintf (dump_file, "\nbefore:\n\n");
    4567            0 :           print_rtl_single (dump_file, before);
    4568            0 :           fprintf (dump_file, "\n");
    4569              :         }
    4570              :     }
    4571              : 
    4572          313 :   if (kind != X86_CSE_TLSDESC)
    4573              :     {
    4574              :       /* Copy RAX to DEST.  */
    4575          292 :       set = gen_rtx_SET (dest, rax);
    4576          292 :       rtx_insn *set_insn = emit_insn_after (set, tls_insn);
    4577          292 :       set_dst_reg_note (set_insn, REG_EQUAL, copy_rtx (eqv), dest);
    4578          292 :       if (dump_file)
    4579              :         {
    4580            0 :           fprintf (dump_file, "\nPlace:\n\n");
    4581            0 :           print_rtl_single (dump_file, set_insn);
    4582            0 :           fprintf (dump_file, "\nafter:\n\n");
    4583            0 :           print_rtl_single (dump_file, tls_insn);
    4584            0 :           fprintf (dump_file, "\n");
    4585              :         }
    4586              :     }
    4587          313 : }
    4588              : 
    4589              : namespace {
    4590              : 
    4591              : const pass_data pass_data_x86_cse =
    4592              : {
    4593              :   RTL_PASS, /* type */
    4594              :   "x86_cse", /* name */
    4595              :   OPTGROUP_NONE, /* optinfo_flags */
    4596              :   TV_MACH_DEP, /* tv_id */
    4597              :   0, /* properties_required */
    4598              :   0, /* properties_provided */
    4599              :   0, /* properties_destroyed */
    4600              :   0, /* todo_flags_start */
    4601              :   0, /* todo_flags_finish */
    4602              : };
    4603              : 
    4604              : class pass_x86_cse : public rtl_opt_pass
    4605              : {
    4606              : public:
    4607       288767 :   pass_x86_cse (gcc::context *ctxt)
    4608       577534 :     : rtl_opt_pass (pass_data_x86_cse, ctxt)
    4609              :   {}
    4610              : 
    4611              :   /* opt_pass methods: */
    4612      1481491 :   bool gate (function *fun) final override
    4613              :     {
    4614      1481491 :       return optimize && optimize_function_for_speed_p (fun);
    4615              :     }
    4616              : 
    4617       981264 :   unsigned int execute (function *) final override
    4618              :     {
    4619       981264 :       return x86_cse ();
    4620              :     }
    4621              : 
    4622              : private:
    4623              :   /* The redundant source value.  */
    4624              :   rtx val;
    4625              :   /* The actual redundant source value for UNSPEC_TLSDESC.  */
    4626              :   rtx tlsdesc_val;
    4627              :   /* The instruction which defines the redundant value.  */
    4628              :   rtx_insn *def_insn;
    4629              :   /* Mode of the destination of the candidate redundant instruction.  */
    4630              :   machine_mode mode;
    4631              :   /* Mode of the source of the candidate redundant instruction.  */
    4632              :   machine_mode scalar_mode;
    4633              :   /* The classification of the candidate redundant instruction.  */
    4634              :   x86_cse_kind kind;
    4635              : 
    4636              :   unsigned int x86_cse (void);
    4637              :   bool candidate_gnu_tls_p (rtx_insn *, attr_tls64);
    4638              :   bool candidate_gnu2_tls_p (rtx, attr_tls64);
    4639              :   bool candidate_vector_p (rtx, rtx_insn *);
    4640              :   rtx_insn *tls_set_insn_from_symbol (const_rtx, const_rtx);
    4641              : }; // class pass_x86_cse
    4642              : 
    4643              : /* Return the instruction which sets REG from TLS_SYMBOL.  */
    4644              : 
    4645              : rtx_insn *
    4646           42 : pass_x86_cse::tls_set_insn_from_symbol (const_rtx reg,
    4647              :                                         const_rtx tls_symbol)
    4648              : {
    4649           42 :   rtx_insn *set_insn = nullptr;
    4650           42 :   for (df_ref ref = DF_REG_DEF_CHAIN (REGNO (reg));
    4651          111 :        ref;
    4652           69 :        ref = DF_REF_NEXT_REG (ref))
    4653              :     {
    4654           69 :       if (DF_REF_IS_ARTIFICIAL (ref))
    4655              :         return nullptr;
    4656              : 
    4657           69 :       set_insn = DF_REF_INSN (ref);
    4658           69 :       if (get_attr_tls64 (set_insn) != TLS64_LEA)
    4659              :         return nullptr;
    4660              : 
    4661           69 :       rtx tls_set = PATTERN (set_insn);
    4662           69 :       rtx tls_src = XVECEXP (SET_SRC (tls_set), 0, 0);
    4663           69 :       if (!rtx_equal_p (tls_symbol, tls_src))
    4664              :         return nullptr;
    4665              :     }
    4666              : 
    4667              :   return set_insn;
    4668              : }
    4669              : 
    4670              : /* Return true and output def_insn, val, mode, scalar_mode and kind if
    4671              :    INSN is UNSPEC_TLS_GD or UNSPEC_TLS_LD_BASE.  */
    4672              : 
    4673              : bool
    4674         2185 : pass_x86_cse::candidate_gnu_tls_p (rtx_insn *insn, attr_tls64 tls64)
    4675              : {
    4676         2185 :   if (!TARGET_64BIT || !cfun->machine->tls_descriptor_call_multiple_p)
    4677              :     return false;
    4678              : 
    4679              :   /* Record the redundant TLS CALLs for 64-bit:
    4680              : 
    4681              :      (parallel [
    4682              :         (set (reg:DI 0 ax)
    4683              :              (call:DI (mem:QI (symbol_ref:DI ("__tls_get_addr")))
    4684              :                       (const_int 0 [0])))
    4685              :         (unspec:DI [(symbol_ref:DI ("foo") [flags 0x50])
    4686              :                     (reg/f:DI 7 sp)] UNSPEC_TLS_GD)
    4687              :         (clobber (reg:DI 5 di))])
    4688              : 
    4689              : 
    4690              :      and
    4691              : 
    4692              :      (parallel [
    4693              :         (set (reg:DI 0 ax)
    4694              :              (call:DI (mem:QI (symbol_ref:DI ("__tls_get_addr")))
    4695              :                       (const_int 0 [0])))
    4696              :         (unspec:DI [(reg/f:DI 7 sp)] UNSPEC_TLS_LD_BASE)])
    4697              : 
    4698              :    */
    4699              : 
    4700         2022 :   rtx pat = PATTERN (insn);
    4701         2022 :   rtx set = XVECEXP (pat, 0, 0);
    4702         2022 :   gcc_assert (GET_CODE (set) == SET);
    4703         2022 :   rtx dest = SET_DEST (set);
    4704         2022 :   scalar_mode = mode = GET_MODE (dest);
    4705         2022 :   val = XVECEXP (pat, 0, 1);
    4706         2022 :   gcc_assert (GET_CODE (val) == UNSPEC);
    4707              : 
    4708         2022 :   if (tls64 == TLS64_GD)
    4709         1921 :     kind = X86_CSE_TLS_GD;
    4710              :   else
    4711          101 :     kind = X86_CSE_TLS_LD_BASE;
    4712              : 
    4713         2022 :   def_insn = nullptr;
    4714         2022 :   return true;
    4715              : }
    4716              : 
    4717              : /* Return true and output def_insn, val, mode, scalar_mode and kind if
    4718              :    SET is UNSPEC_TLSDESC.  */
    4719              : 
    4720              : bool
    4721           56 : pass_x86_cse::candidate_gnu2_tls_p (rtx set, attr_tls64 tls64)
    4722              : {
    4723           56 :   if (!TARGET_64BIT || !cfun->machine->tls_descriptor_call_multiple_p)
    4724              :     return false;
    4725              : 
    4726           54 :   rtx tls_symbol;
    4727           54 :   rtx_insn *set_insn;
    4728           54 :   rtx src = SET_SRC (set);
    4729           54 :   val = src;
    4730           54 :   tlsdesc_val = src;
    4731           54 :   kind = X86_CSE_TLSDESC;
    4732              : 
    4733           54 :   if (tls64 == TLS64_COMBINE)
    4734              :     {
    4735              :       /* Record 64-bit TLS64_COMBINE:
    4736              : 
    4737              :          (set (reg/f:DI 104)
    4738              :               (plus:DI (unspec:DI [
    4739              :                           (symbol_ref:DI ("_TLS_MODULE_BASE_") [flags 0x10])
    4740              :                           (reg:DI 114)
    4741              :                           (reg/f:DI 7 sp)] UNSPEC_TLSDESC)
    4742              :                        (const:DI (unspec:DI [
    4743              :                                     (symbol_ref:DI ("e") [flags 0x1a])
    4744              :                                   ] UNSPEC_DTPOFF))))
    4745              : 
    4746              :          (set (reg/f:DI 104)
    4747              :               (plus:DI (unspec:DI [
    4748              :                           (symbol_ref:DI ("_TLS_MODULE_BASE_") [flags 0x10])
    4749              :                           (unspec:DI [
    4750              :                              (symbol_ref:DI ("_TLS_MODULE_BASE_") [flags 0x10])
    4751              :                           ] UNSPEC_TLSDESC)
    4752              :                           (reg/f:DI 7 sp)] UNSPEC_TLSDESC)
    4753              :                        (const:DI (unspec:DI [
    4754              :                                     (symbol_ref:DI ("e") [flags 0x1a])
    4755              :                                  ] UNSPEC_DTPOFF))))
    4756              :      */
    4757              : 
    4758           12 :       scalar_mode = mode = GET_MODE (src);
    4759              : 
    4760              :       /* Since the first operand of PLUS in the source TLS_COMBINE
    4761              :          pattern is unused, use the second operand of PLUS:
    4762              : 
    4763              :          (const:DI (unspec:DI [
    4764              :                       (symbol_ref:DI ("e") [flags 0x1a])
    4765              :                    ] UNSPEC_DTPOFF))
    4766              : 
    4767              :          as VAL to check if 2 TLS_COMBINE patterns have the same
    4768              :          source.  */
    4769           12 :       val = XEXP (src, 1);
    4770           12 :       gcc_assert (GET_CODE (val) == CONST
    4771              :                   && GET_CODE (XEXP (val, 0)) == UNSPEC
    4772              :                       && XINT (XEXP (val, 0), 1) == UNSPEC_DTPOFF
    4773              :                       && SYMBOL_REF_P (XVECEXP (XEXP (val, 0), 0, 0)));
    4774           12 :       def_insn = nullptr;
    4775           12 :       return true;
    4776              :     }
    4777              : 
    4778              :   /* Record 64-bit TLS_CALL:
    4779              : 
    4780              :      (set (reg:DI 101)
    4781              :           (unspec:DI [(symbol_ref:DI ("foo") [flags 0x50])
    4782              :                       (reg:DI 112)
    4783              :                       (reg/f:DI 7 sp)] UNSPEC_TLSDESC))
    4784              : 
    4785              :    */
    4786              : 
    4787           42 :   gcc_assert (GET_CODE (src) == UNSPEC);
    4788           42 :   tls_symbol = XVECEXP (src, 0, 0);
    4789           42 :   src = XVECEXP (src, 0, 1);
    4790           42 :   scalar_mode = mode = GET_MODE (src);
    4791           42 :   gcc_assert (REG_P (src));
    4792              : 
    4793              :   /* All definitions of reg:DI 129 in
    4794              : 
    4795              :      (set (reg:DI 110)
    4796              :           (unspec:DI [(symbol_ref:DI ("foo"))
    4797              :                       (reg:DI 129)
    4798              :                       (reg/f:DI 7 sp)] UNSPEC_TLSDESC))
    4799              : 
    4800              :      should have the same source as in
    4801              : 
    4802              :      (set (reg:DI 129)
    4803              :           (unspec:DI [(symbol_ref:DI ("foo"))] UNSPEC_TLSDESC))
    4804              : 
    4805              :    */
    4806              : 
    4807           42 :   set_insn = tls_set_insn_from_symbol (src, tls_symbol);
    4808           42 :   if (!set_insn)
    4809              :     return false;
    4810              : 
    4811              :   /* Use TLS_SYMBOL as VAL to check if 2 patterns have the same source.  */
    4812           42 :   val = tls_symbol;
    4813           42 :   def_insn = set_insn;
    4814           42 :   return true;
    4815              : }
    4816              : 
    4817              : /* Return true and output def_insn, val, mode, scalar_mode and kind if
    4818              :   INSN is a vector broadcast instruction.  */
    4819              : 
    4820              : bool
    4821     50034109 : pass_x86_cse::candidate_vector_p (rtx set, rtx_insn *insn)
    4822              : {
    4823     50034109 :   rtx src = SET_SRC (set);
    4824     50034109 :   rtx dest = SET_DEST (set);
    4825     50034109 :   mode = GET_MODE (dest);
    4826              :   /* Skip non-vector instruction.  */
    4827     50034109 :   if (!VECTOR_MODE_P (mode))
    4828              :     return false;
    4829              : 
    4830              :   /* Skip non-vector load instruction.  */
    4831      3715770 :   if (!REG_P (dest) && !SUBREG_P (dest))
    4832              :     return false;
    4833              : 
    4834      2202109 :   def_insn = insn;
    4835      2202109 :   val = ix86_broadcast_inner (src, mode, &scalar_mode, &kind,
    4836              :                               &def_insn);
    4837      2202109 :   return val ? true : false;
    4838              : }
    4839              : 
    4840              : /* At entry of the nearest common dominator for basic blocks with
    4841              : 
    4842              :    1. Vector CONST0_RTX patterns.
    4843              :    2. Vector CONSTM1_RTX patterns.
    4844              :    3. Vector broadcast patterns.
    4845              :    4. UNSPEC_TLS_GD patterns.
    4846              :    5. UNSPEC_TLS_LD_BASE patterns.
    4847              :    6. UNSPEC_TLSDESC patterns.
    4848              : 
    4849              :    generate a single pattern whose destination is used to replace the
    4850              :    source in all identical patterns.
    4851              : 
    4852              :    NB: We want to generate a pattern, which is executed only once, to
    4853              :    cover the whole function.  The LCM algorithm isn't appropriate here
    4854              :    since it may place a pattern inside the loop.  */
    4855              : 
    4856              : unsigned int
    4857       981264 : pass_x86_cse::x86_cse (void)
    4858              : {
    4859       981264 :   timevar_push (TV_MACH_DEP);
    4860              : 
    4861       981264 :   auto_vec<redundant_pattern *> loads;
    4862       981264 :   redundant_pattern *load;
    4863       981264 :   basic_block bb;
    4864       981264 :   rtx_insn *insn;
    4865       981264 :   unsigned int i;
    4866       981264 :   auto_bitmap updated_gnu_tls_insns;
    4867       981264 :   auto_bitmap updated_gnu2_tls_insns;
    4868       981264 :   auto_bitmap call_bbs;
    4869              : 
    4870       981264 :   df_set_flags (DF_DEFER_INSN_RESCAN);
    4871              : 
    4872       981264 :   bool recursive_call_p = cfun->machine->recursive_function;
    4873              : 
    4874     10941498 :   FOR_EACH_BB_FN (bb, cfun)
    4875              :     {
    4876    132278937 :       FOR_BB_INSNS (bb, insn)
    4877              :         {
    4878    122318703 :           if (!NONDEBUG_INSN_P (insn))
    4879     68601410 :             continue;
    4880              : 
    4881     53717293 :           bool matched = false;
    4882              :           /* Remove redundant pattens if there are more than 2 of
    4883              :              them.  */
    4884     53717293 :           unsigned int threshold = 2;
    4885              : 
    4886     53717293 :           bool call_p = CALL_P (insn);
    4887     53717293 :           rtx set = single_set (insn);
    4888     53717293 :           if (!set && !call_p)
    4889      1105914 :             continue;
    4890              : 
    4891     52611379 :           tlsdesc_val = nullptr;
    4892              : 
    4893     52611379 :           attr_tls64 tls64 = get_attr_tls64 (insn);
    4894              : 
    4895              :           /* NB: TLS calls preserve all registers.  */
    4896     52611379 :           if (call_p && tls64 == TLS64_NONE)
    4897      4446590 :             bitmap_set_bit (call_bbs, BLOCK_FOR_INSN (insn)->index);
    4898              : 
    4899     52611379 :           switch (tls64)
    4900              :             {
    4901         2185 :             case TLS64_GD:
    4902         2185 :             case TLS64_LD_BASE:
    4903              :               /* Verify UNSPEC_TLS_GD and UNSPEC_TLS_LD_BASE.  */
    4904         2185 :               if (candidate_gnu_tls_p (insn, tls64))
    4905              :                 break;
    4906          163 :               continue;
    4907              : 
    4908           56 :             case TLS64_CALL:
    4909           56 :             case TLS64_COMBINE:
    4910              :               /* Verify UNSPEC_TLSDESC.  */
    4911           56 :               if (candidate_gnu2_tls_p (set, tls64))
    4912              :                 break;
    4913            2 :               continue;
    4914              : 
    4915           38 :             case TLS64_LEA:
    4916              :               /* Skip TLS64_LEA.  */
    4917           38 :               continue;
    4918              : 
    4919     52609100 :             case TLS64_NONE:
    4920     52609100 :               if (!set)
    4921      2574991 :                 continue;
    4922              : 
    4923              :               /* Check for vector broadcast.  */
    4924     50034109 :               if (candidate_vector_p (set, insn))
    4925              :                 break;
    4926     49785064 :               continue;
    4927              :             }
    4928              : 
    4929              :           /* Check if there is a matching redundant load.   */
    4930       592668 :           FOR_EACH_VEC_ELT (loads, i, load)
    4931       437809 :             if (load->val
    4932       437809 :                 && load->kind == kind
    4933       291799 :                 && load->mode == scalar_mode
    4934       256144 :                 && (load->bb == bb
    4935       196938 :                     || (kind != X86_CSE_VEC_DUP
    4936       196938 :                         && kind != X86_CSE_CONST_VECTOR)
    4937              :                     /* Non all 0s/1s vector load must be in the same
    4938              :                        basic block if it is in a recursive call.  */
    4939       137251 :                     || !recursive_call_p)
    4940       691826 :                 && rtx_equal_p (load->val, val))
    4941              :               {
    4942              :                 /* Record instruction.  */
    4943        96262 :                 bitmap_set_bit (load->insns, INSN_UID (insn));
    4944              : 
    4945              :                 /* Record the maximum vector size.  */
    4946        96262 :                 if (kind <= X86_CSE_VEC_DUP
    4947       191411 :                     && load->size < GET_MODE_SIZE (mode))
    4948         1012 :                   load->size = GET_MODE_SIZE (mode);
    4949              : 
    4950              :                 /* Record the basic block.  */
    4951        96262 :                 bitmap_set_bit (load->bbs, bb->index);
    4952              : 
    4953              :                 /* Increment the count.  */
    4954        96262 :                 load->count++;
    4955              : 
    4956        96262 :                 matched = true;
    4957        96262 :                 break;
    4958              :               }
    4959              : 
    4960       251121 :           if (matched)
    4961        96262 :             continue;
    4962              : 
    4963              :           /* We see this instruction the first time.  Record the
    4964              :              redundant source value, its mode, the destination size,
    4965              :              instruction which defines the redundant source value,
    4966              :              instruction basic block and the instruction kind.  */
    4967       154859 :           load = new redundant_pattern;
    4968              : 
    4969              :           /* Convert CONST_VECTOR load no larger than integer register
    4970              :              to constant integer load even if there is no redundant
    4971              :              CONST_VECTOR load.  */
    4972       154859 :           if (CONST_VECTOR_P (val))
    4973        31025 :             threshold = 1;
    4974              : 
    4975       154859 :           load->val = copy_rtx (val);
    4976       154859 :           if (tlsdesc_val)
    4977           28 :             load->tlsdesc_val = copy_rtx (tlsdesc_val);
    4978              :           else
    4979       154831 :             load->tlsdesc_val = nullptr;
    4980       154859 :           load->mode = scalar_mode;
    4981       154859 :           load->dest_mode = mode;
    4982       154859 :           load->size = GET_MODE_SIZE (mode);
    4983       154859 :           load->def_insn = def_insn;
    4984       154859 :           load->count = 1;
    4985       154859 :           load->threshold = threshold;
    4986       154859 :           load->bb = BLOCK_FOR_INSN (insn);
    4987       154859 :           load->kind = kind;
    4988              : 
    4989       154859 :           bitmap_set_bit (load->insns, INSN_UID (insn));
    4990       154859 :           bitmap_set_bit (load->bbs, bb->index);
    4991              : 
    4992       154859 :           loads.safe_push (load);
    4993              :         }
    4994              :     }
    4995              : 
    4996              :   bool replaced = false;
    4997      1136123 :   FOR_EACH_VEC_ELT (loads, i, load)
    4998       154859 :     if (load->count >= load->threshold)
    4999              :       {
    5000        63969 :         machine_mode mode;
    5001        63969 :         rtx reg, broadcast_reg;
    5002        63969 :         rtx broadcast_source = nullptr;
    5003        63969 :         replaced = true;
    5004        63969 :         switch (load->kind)
    5005              :           {
    5006          313 :           case X86_CSE_TLS_GD:
    5007          313 :           case X86_CSE_TLS_LD_BASE:
    5008          313 :           case X86_CSE_TLSDESC:
    5009          313 :             broadcast_reg = gen_reg_rtx (load->mode);
    5010          313 :             replace_tls_call (broadcast_reg, load->insns,
    5011          313 :                               (load->kind == X86_CSE_TLSDESC
    5012              :                                ? updated_gnu2_tls_insns
    5013              :                                : updated_gnu_tls_insns));
    5014          313 :             load->broadcast_reg = broadcast_reg;
    5015          313 :             break;
    5016              : 
    5017        11153 :           case X86_CSE_VEC_DUP:
    5018        11153 :             if (CONST_INT_P (load->val)
    5019        10051 :                 && (load->val == CONST0_RTX (load->mode)
    5020        10075 :                     || load->size <= UNITS_PER_WORD))
    5021              :               {
    5022              :                 /* Generate CONST_VECTOR load.  */
    5023        31026 :               case X86_CSE_CONST_VECTOR:
    5024        31026 :                 mode = ix86_get_vector_cse_mode (load->size,
    5025              :                                                  load->mode);
    5026              : 
    5027        31026 :                 if (CONST_VECTOR_P (load->val))
    5028              :                   broadcast_source = load->val;
    5029            1 :                 else if (load->val == CONST0_RTX (load->mode))
    5030            0 :                   broadcast_source = CONST0_RTX (mode);
    5031            1 :                 else if (load->val == CONSTM1_RTX (load->mode))
    5032            0 :                   broadcast_source = CONSTM1_RTX (mode);
    5033              :                 else
    5034              :                   {
    5035            1 :                     int nunits = GET_MODE_NUNITS (mode);
    5036            1 :                     rtvec v = rtvec_alloc (nunits);
    5037            3 :                     for (int j = 0; j < nunits ; j++)
    5038            2 :                       RTVEC_ELT (v, j) = load->val;
    5039            1 :                     broadcast_source = gen_rtx_CONST_VECTOR (mode, v);
    5040              :                   }
    5041              : 
    5042              :                 /* NB: Zero CONST_VECTOR load works for MMX and XMM
    5043              :                    registers.  */
    5044        32437 :                 if (load->size <= UNITS_PER_WORD)
    5045              :                   {
    5046              :                     /* Convert CONST_VECTOR load no larger than integer
    5047              :                        register:
    5048              : 
    5049              :                        (set (reg:V2SI 106)
    5050              :                             (const_vector:V2SI [(const_int 1 [1]) repeated x2]))
    5051              : 
    5052              :                        to constant integer load:
    5053              : 
    5054              :                        (set (subreg:DI (reg:V2SI 106 [ _20 ]) 0)
    5055              :                             (const_int 4294967297 [0x100000001]))
    5056              :                        */
    5057        31026 :                     machine_mode int_mode
    5058        31026 :                       = int_mode_for_mode (mode).require ();
    5059        31026 :                     load->dest_mode = int_mode;
    5060        31026 :                     broadcast_source = simplify_subreg (int_mode,
    5061              :                                                         broadcast_source,
    5062              :                                                         mode, 0);
    5063        31026 :                     gcc_assert (broadcast_source != nullptr);
    5064              : 
    5065        31026 :                     bool keep_const_int_load = false;
    5066        31026 :                     if (!bitmap_empty_p (call_bbs))
    5067              :                       {
    5068        27798 :                         bitmap_iterator bi;
    5069        27798 :                         unsigned int id;
    5070        36194 :                         EXECUTE_IF_SET_IN_BITMAP (load->bbs, 0, id, bi)
    5071        28929 :                           if (bitmap_bit_p (call_bbs, id))
    5072              :                             {
    5073              :                               /* NB: Constant integer load is faster
    5074              :                                  than save and restore an integer
    5075              :                                  register when crossing a function call.
    5076              :                                */
    5077              :                               keep_const_int_load = true;
    5078              :                               break;
    5079              :                             }
    5080              :                       }
    5081              : 
    5082        27798 :                     if (keep_const_int_load)
    5083              :                       {
    5084              :                         /* Keep constant integer load.  */
    5085        20533 :                         replace_vector_const (mode, broadcast_source,
    5086        20533 :                                               load->insns, int_mode);
    5087        20533 :                         load->broadcast_source = nullptr;
    5088        20533 :                         load->broadcast_reg = nullptr;
    5089              :                       }
    5090              :                     else
    5091              :                       {
    5092        10493 :                         broadcast_reg = gen_reg_rtx (mode);
    5093        10493 :                         reg = gen_reg_rtx (load->mode);
    5094        10493 :                         replace_vector_const (mode, broadcast_reg,
    5095        10493 :                                               load->insns, load->mode);
    5096        10493 :                         load->broadcast_source = broadcast_source;
    5097        10493 :                         load->broadcast_reg = broadcast_reg;
    5098              :                       }
    5099              :                     break;
    5100              :                   }
    5101              :               }
    5102              :             /* FALLTHRU */
    5103              : 
    5104        32630 :           case X86_CSE_CONST0_VECTOR:
    5105        32630 :           case X86_CSE_CONSTM1_VECTOR:
    5106        32630 :             mode = ix86_get_vector_cse_mode (load->size, load->mode);
    5107        32630 :             broadcast_reg = gen_reg_rtx (mode);
    5108        32630 :             if (load->def_insn)
    5109              :               {
    5110              :                 /* Replace redundant vector loads with a single vector
    5111              :                    load in the same basic block.  */
    5112          821 :                 reg = load->val;
    5113          821 :                 if (load->mode != GET_MODE (reg))
    5114            0 :                   reg = gen_rtx_SUBREG (load->mode, reg, 0);
    5115          821 :                 broadcast_source = gen_rtx_VEC_DUPLICATE (mode, reg);
    5116              :               }
    5117              :             else
    5118              :               /* This is a constant integer/double vector.  If the
    5119              :                  inner scalar is 0 or -1, set vector to CONST0_RTX
    5120              :                  or CONSTM1_RTX directly.  */
    5121        31809 :               switch (load->kind)
    5122              :                 {
    5123        19899 :                 case X86_CSE_CONST0_VECTOR:
    5124        19899 :                   broadcast_source = CONST0_RTX (mode);
    5125        19899 :                   break;
    5126         1578 :                 case X86_CSE_CONSTM1_VECTOR:
    5127         1578 :                   broadcast_source = CONSTM1_RTX (mode);
    5128         1578 :                   break;
    5129        10332 :                 case X86_CSE_CONST_VECTOR:
    5130        10332 :                 case X86_CSE_VEC_DUP:
    5131        10332 :                   if (!broadcast_source)
    5132              :                     {
    5133        10332 :                       reg = gen_reg_rtx (load->mode);
    5134        10332 :                       broadcast_source = gen_rtx_VEC_DUPLICATE (mode,
    5135              :                                                                 reg);
    5136              :                     }
    5137              :                   break;
    5138            0 :                 default:
    5139            0 :                   gcc_unreachable ();
    5140              :                 }
    5141        32630 :             replace_vector_const (mode, broadcast_reg, load->insns,
    5142              :                                   load->mode);
    5143        32630 :             load->broadcast_source = broadcast_source;
    5144        32630 :             load->broadcast_reg = broadcast_reg;
    5145        32630 :             break;
    5146              :           }
    5147              :       }
    5148              : 
    5149       981264 :   if (replaced)
    5150              :     {
    5151        41558 :       auto_vec<rtx_insn *> control_flow_insns;
    5152              : 
    5153              :       /* (Re-)discover loops so that bb->loop_father can be used in the
    5154              :          analysis below.  */
    5155        41558 :       calculate_dominance_info (CDI_DOMINATORS);
    5156        41558 :       loop_optimizer_init (AVOID_CFG_MODIFICATIONS);
    5157              : 
    5158       126131 :       FOR_EACH_VEC_ELT (loads, i, load)
    5159        84573 :         if (load->count >= load->threshold)
    5160              :           {
    5161        63969 :             rtx set;
    5162        63969 :             if (load->def_insn)
    5163          837 :               switch (load->kind)
    5164              :                 {
    5165           16 :                 case X86_CSE_TLSDESC:
    5166           16 :                   ix86_place_single_tls_call (load->broadcast_reg,
    5167              :                                               load->tlsdesc_val,
    5168              :                                               load->kind,
    5169           16 :                                               load->bbs,
    5170              :                                               updated_gnu_tls_insns,
    5171              :                                               updated_gnu2_tls_insns,
    5172           16 :                                               PATTERN (load->def_insn));
    5173           16 :                   break;
    5174          821 :                 case X86_CSE_VEC_DUP:
    5175              :                   /* Insert a broadcast after the original scalar
    5176              :                      definition.  */
    5177          821 :                   set = gen_rtx_SET (load->broadcast_reg,
    5178              :                                      load->broadcast_source);
    5179          821 :                   insn = emit_insn_after (set, load->def_insn);
    5180              : 
    5181          821 :                   if (cfun->can_throw_non_call_exceptions)
    5182              :                     {
    5183              :                       /* Handle REG_EH_REGION note in DEF_INSN.  */
    5184            4 :                       rtx note = find_reg_note (load->def_insn,
    5185              :                                                 REG_EH_REGION, nullptr);
    5186            4 :                       if (note)
    5187              :                         {
    5188            1 :                           control_flow_insns.safe_push (load->def_insn);
    5189            1 :                           add_reg_note (insn, REG_EH_REGION,
    5190              :                                         XEXP (note, 0));
    5191              :                         }
    5192              :                     }
    5193              : 
    5194          821 :                   if (dump_file)
    5195              :                     {
    5196            0 :                       fprintf (dump_file, "\nAdd:\n\n");
    5197            0 :                       print_rtl_single (dump_file, insn);
    5198            0 :                       fprintf (dump_file, "\nafter:\n\n");
    5199            0 :                       print_rtl_single (dump_file, load->def_insn);
    5200            0 :                       fprintf (dump_file, "\n");
    5201              :                     }
    5202              :                   break;
    5203            0 :                 default:
    5204            0 :                   gcc_unreachable ();
    5205              :                 }
    5206              :             else
    5207        63132 :               switch (load->kind)
    5208              :                 {
    5209          297 :                 case X86_CSE_TLS_GD:
    5210          297 :                 case X86_CSE_TLS_LD_BASE:
    5211          297 :                 case X86_CSE_TLSDESC:
    5212          297 :                   ix86_place_single_tls_call (load->broadcast_reg,
    5213              :                                               (load->kind == X86_CSE_TLSDESC
    5214              :                                                ? load->tlsdesc_val
    5215              :                                                : load->val),
    5216              :                                               load->kind,
    5217          297 :                                               load->bbs,
    5218              :                                               updated_gnu_tls_insns,
    5219              :                                               updated_gnu2_tls_insns);
    5220          297 :                   break;
    5221        41358 :                 case X86_CSE_CONST_VECTOR:
    5222        41358 :                 case X86_CSE_VEC_DUP:
    5223              :                   /* Keep redundant constant integer load.  */
    5224        41358 :                   if (!load->broadcast_reg)
    5225              :                     break;
    5226              :                   /* FALLTHRU */
    5227        42302 :                 case X86_CSE_CONST0_VECTOR:
    5228        42302 :                 case X86_CSE_CONSTM1_VECTOR:
    5229        42302 :                   ix86_place_single_vector_set (load->broadcast_reg,
    5230              :                                                 load->broadcast_source,
    5231              :                                                 load->bbs,
    5232              :                                                 load);
    5233        42302 :                   break;
    5234              :                 }
    5235              :           }
    5236              : 
    5237        41558 :       loop_optimizer_finalize ();
    5238              : 
    5239        41558 :       if (!control_flow_insns.is_empty ())
    5240              :         {
    5241            1 :           free_dominance_info (CDI_DOMINATORS);
    5242              : 
    5243            3 :           FOR_EACH_VEC_ELT (control_flow_insns, i, insn)
    5244            1 :             if (control_flow_insn_p (insn))
    5245              :               {
    5246              :                 /* Split the block after insn.  There will be a fallthru
    5247              :                    edge, which is OK so we keep it.  We have to create
    5248              :                    the exception edges ourselves.  */
    5249            1 :                 bb = BLOCK_FOR_INSN (insn);
    5250            1 :                 split_block (bb, insn);
    5251            1 :                 rtl_make_eh_edge (NULL, bb, BB_END (bb));
    5252              :               }
    5253              :         }
    5254              : 
    5255        41558 :       df_process_deferred_rescans ();
    5256        41558 :     }
    5257              : 
    5258      1136123 :   FOR_EACH_VEC_ELT (loads, i, load)
    5259       309718 :     delete load;
    5260              : 
    5261       981264 :   df_clear_flags (DF_DEFER_INSN_RESCAN);
    5262              : 
    5263       981264 :   timevar_pop (TV_MACH_DEP);
    5264       981264 :   return 0;
    5265       981264 : }
    5266              : 
    5267              : } // anon namespace
    5268              : 
    5269              : rtl_opt_pass *
    5270       288767 : make_pass_x86_cse (gcc::context *ctxt)
    5271              : {
    5272       288767 :   return new pass_x86_cse (ctxt);
    5273              : }
    5274              : 
    5275              : /* Convert legacy instructions that clobbers EFLAGS to APX_NF
    5276              :    instructions when there are no flag set between a flag
    5277              :    producer and user.  */
    5278              : 
    5279              : static unsigned int
    5280          370 : ix86_apx_nf_convert (void)
    5281              : {
    5282          370 :   timevar_push (TV_MACH_DEP);
    5283              : 
    5284          370 :   basic_block bb;
    5285          370 :   rtx_insn *insn;
    5286          370 :   hash_map <rtx_insn *, rtx> converting_map;
    5287          370 :   auto_vec <rtx_insn *> current_convert_list;
    5288              : 
    5289          370 :   bool converting_seq = false;
    5290          370 :   rtx cc = gen_rtx_REG (CCmode, FLAGS_REG);
    5291              : 
    5292          792 :   FOR_EACH_BB_FN (bb, cfun)
    5293              :     {
    5294              :       /* Reset conversion for each bb.  */
    5295          422 :       converting_seq = false;
    5296         5071 :       FOR_BB_INSNS (bb, insn)
    5297              :         {
    5298         4649 :           if (!NONDEBUG_INSN_P (insn))
    5299         4988 :             continue;
    5300              : 
    5301         3707 :           if (recog_memoized (insn) < 0)
    5302          336 :             continue;
    5303              : 
    5304              :           /* Convert candidate insns after cstore, which should
    5305              :              satisify the two conditions:
    5306              :              1. Is not flag user or producer, only clobbers
    5307              :              FLAGS_REG.
    5308              :              2. Have corresponding nf pattern.  */
    5309              : 
    5310         3371 :           rtx pat = PATTERN (insn);
    5311              : 
    5312              :           /* Starting convertion at first cstorecc.  */
    5313         3371 :           rtx set = NULL_RTX;
    5314         3371 :           if (!converting_seq
    5315         2790 :               && (set = single_set (insn))
    5316         2714 :               && ix86_comparison_operator (SET_SRC (set), VOIDmode)
    5317          126 :               && reg_overlap_mentioned_p (cc, SET_SRC (set))
    5318         3494 :               && !reg_overlap_mentioned_p (cc, SET_DEST (set)))
    5319              :             {
    5320          123 :               converting_seq = true;
    5321          123 :               current_convert_list.truncate (0);
    5322              :             }
    5323              :           /* Terminate at the next explicit flag set.  */
    5324         3248 :           else if (reg_set_p (cc, pat)
    5325         3248 :                    && GET_CODE (set_of (cc, pat)) != CLOBBER)
    5326              :             converting_seq = false;
    5327              : 
    5328         3151 :           if (!converting_seq)
    5329         2768 :             continue;
    5330              : 
    5331          603 :           if (get_attr_has_nf (insn)
    5332          603 :               && GET_CODE (pat) == PARALLEL)
    5333              :             {
    5334              :               /* Record the insn to candidate map.  */
    5335           72 :               current_convert_list.safe_push (insn);
    5336           72 :               converting_map.put (insn, pat);
    5337              :             }
    5338              :           /* If the insn clobbers flags but has no nf_attr,
    5339              :              revoke all previous candidates.  */
    5340          531 :           else if (!get_attr_has_nf (insn)
    5341          530 :                    && reg_set_p (cc, pat)
    5342          534 :                    && GET_CODE (set_of (cc, pat)) == CLOBBER)
    5343              :             {
    5344            3 :               for (auto item : current_convert_list)
    5345            0 :                 converting_map.remove (item);
    5346            3 :               converting_seq = false;
    5347              :             }
    5348              :         }
    5349              :     }
    5350              : 
    5351          370 :   if (!converting_map.is_empty ())
    5352              :     {
    5353           85 :       for (auto iter = converting_map.begin ();
    5354          170 :            iter != converting_map.end (); ++iter)
    5355              :         {
    5356           72 :           rtx_insn *replace = (*iter).first;
    5357           72 :           rtx pat = (*iter).second;
    5358           72 :           int i, n = 0, len = XVECLEN (pat, 0);
    5359           72 :           rtx *new_elems = XALLOCAVEC (rtx, len);
    5360           72 :           rtx new_pat;
    5361          216 :           for (i = 0; i < len; i++)
    5362              :             {
    5363          144 :               rtx temp = XVECEXP (pat, 0, i);
    5364          216 :               if (! (GET_CODE (temp) == CLOBBER
    5365           72 :                      && reg_overlap_mentioned_p (cc,
    5366           72 :                                                  XEXP (temp, 0))))
    5367              :                 {
    5368           72 :                   new_elems[n] = temp;
    5369           72 :                   n++;
    5370              :                 }
    5371              :             }
    5372              : 
    5373           72 :           if (n == 1)
    5374           72 :             new_pat = new_elems[0];
    5375              :           else
    5376            0 :             new_pat =
    5377            0 :               gen_rtx_PARALLEL (VOIDmode,
    5378              :                                 gen_rtvec_v (n,
    5379              :                                              new_elems));
    5380              : 
    5381           72 :           PATTERN (replace) = new_pat;
    5382           72 :           INSN_CODE (replace) = -1;
    5383           72 :           recog_memoized (replace);
    5384           72 :           df_insn_rescan (replace);
    5385              :         }
    5386              :     }
    5387              : 
    5388          370 :   timevar_pop (TV_MACH_DEP);
    5389          370 :   return 0;
    5390          370 : }
    5391              : 
    5392              : 
    5393              : namespace {
    5394              : 
    5395              : const pass_data pass_data_apx_nf_convert =
    5396              : {
    5397              :   RTL_PASS, /* type */
    5398              :   "apx_nfcvt", /* name */
    5399              :   OPTGROUP_NONE, /* optinfo_flags */
    5400              :   TV_MACH_DEP, /* tv_id */
    5401              :   0, /* properties_required */
    5402              :   0, /* properties_provided */
    5403              :   0, /* properties_destroyed */
    5404              :   0, /* todo_flags_start */
    5405              :   0, /* todo_flags_finish */
    5406              : };
    5407              : 
    5408              : class pass_apx_nf_convert : public rtl_opt_pass
    5409              : {
    5410              : public:
    5411       288767 :   pass_apx_nf_convert (gcc::context *ctxt)
    5412       577534 :     : rtl_opt_pass (pass_data_apx_nf_convert, ctxt)
    5413              :   {}
    5414              : 
    5415              :   /* opt_pass methods: */
    5416      1481491 :   bool gate (function *) final override
    5417              :     {
    5418      1481491 :       return (TARGET_APX_NF
    5419          462 :               && optimize
    5420      1481945 :               && optimize_function_for_speed_p (cfun));
    5421              :     }
    5422              : 
    5423          370 :   unsigned int execute (function *) final override
    5424              :     {
    5425          370 :       return ix86_apx_nf_convert ();
    5426              :     }
    5427              : }; // class pass_apx_nf_convert
    5428              : 
    5429              : } // anon namespace
    5430              : 
    5431              : rtl_opt_pass *
    5432       288767 : make_pass_apx_nf_convert (gcc::context *ctxt)
    5433              : {
    5434       288767 :   return new pass_apx_nf_convert (ctxt);
    5435              : }
    5436              : 
    5437              : /* When a hot loop can be fit into one cacheline,
    5438              :    force align the loop without considering the max skip.  */
    5439              : static void
    5440       980785 : ix86_align_loops ()
    5441              : {
    5442       980785 :   basic_block bb;
    5443              : 
    5444              :   /* Don't do this when we don't know cache line size.  */
    5445       980785 :   if (ix86_cost->prefetch_block == 0)
    5446            9 :     return;
    5447              : 
    5448       980776 :   loop_optimizer_init (AVOID_CFG_MODIFICATIONS);
    5449       980776 :   profile_count count_threshold = cfun->cfg->count_max / param_align_threshold;
    5450     11408539 :   FOR_EACH_BB_FN (bb, cfun)
    5451              :     {
    5452     10427763 :       rtx_insn *label = BB_HEAD (bb);
    5453     10427763 :       bool has_fallthru = 0;
    5454     10427763 :       edge e;
    5455     10427763 :       edge_iterator ei;
    5456              : 
    5457     10427763 :       if (!LABEL_P (label))
    5458      5311390 :         continue;
    5459              : 
    5460      5121185 :       profile_count fallthru_count = profile_count::zero ();
    5461      5121185 :       profile_count branch_count = profile_count::zero ();
    5462              : 
    5463     14880616 :       FOR_EACH_EDGE (e, ei, bb->preds)
    5464              :         {
    5465      9759431 :           if (e->flags & EDGE_FALLTHRU)
    5466      2490903 :             has_fallthru = 1, fallthru_count += e->count ();
    5467              :           else
    5468      7268528 :             branch_count += e->count ();
    5469              :         }
    5470              : 
    5471      5121185 :       if (!fallthru_count.initialized_p () || !branch_count.initialized_p ())
    5472         4812 :         continue;
    5473              : 
    5474      5116373 :       if (bb->loop_father
    5475      5116373 :           && bb->loop_father->latch != EXIT_BLOCK_PTR_FOR_FN (cfun)
    5476      6461192 :           && (has_fallthru
    5477      1344819 :               ? (!(single_succ_p (bb)
    5478       146997 :                    && single_succ (bb) == EXIT_BLOCK_PTR_FOR_FN (cfun))
    5479       937108 :                  && optimize_bb_for_speed_p (bb)
    5480       857035 :                  && branch_count + fallthru_count > count_threshold
    5481       732431 :                  && (branch_count > fallthru_count * param_align_loop_iterations))
    5482              :               /* In case there'no fallthru for the loop.
    5483              :                  Nops inserted won't be executed.  */
    5484       407711 :               : (branch_count > count_threshold
    5485       136494 :                  || (bb->count > bb->prev_bb->count * 10
    5486        12745 :                      && (bb->prev_bb->count
    5487      4582721 :                          <= ENTRY_BLOCK_PTR_FOR_FN (cfun)->count / 2)))))
    5488              :         {
    5489       546397 :           rtx_insn* insn, *end_insn;
    5490       546397 :           HOST_WIDE_INT size = 0;
    5491       546397 :           bool padding_p = true;
    5492       546397 :           basic_block tbb = bb;
    5493       546397 :           unsigned cond_branch_num = 0;
    5494       546397 :           bool detect_tight_loop_p = false;
    5495              : 
    5496       862184 :           for (unsigned int i = 0; i != bb->loop_father->num_nodes;
    5497       315787 :                i++, tbb = tbb->next_bb)
    5498              :             {
    5499              :               /* Only handle continuous cfg layout. */
    5500       862184 :               if (bb->loop_father != tbb->loop_father)
    5501              :                 {
    5502              :                   padding_p = false;
    5503              :                   break;
    5504              :                 }
    5505              : 
    5506     10298585 :               FOR_BB_INSNS (tbb, insn)
    5507              :                 {
    5508      9635786 :                   if (!NONDEBUG_INSN_P (insn))
    5509      5594458 :                     continue;
    5510      4041328 :                   size += ix86_min_insn_size (insn);
    5511              : 
    5512              :                   /* We don't know size of inline asm.
    5513              :                      Don't align loop for call.  */
    5514      4041328 :                   if (asm_noperands (PATTERN (insn)) >= 0
    5515      4041328 :                       || CALL_P (insn))
    5516              :                     {
    5517              :                       size = -1;
    5518              :                       break;
    5519              :                     }
    5520              :                 }
    5521              : 
    5522       821146 :               if (size == -1 || size > ix86_cost->prefetch_block)
    5523              :                 {
    5524              :                   padding_p = false;
    5525              :                   break;
    5526              :                 }
    5527              : 
    5528      1459687 :               FOR_EACH_EDGE (e, ei, tbb->succs)
    5529              :                 {
    5530              :                   /* It could be part of the loop.  */
    5531      1007069 :                   if (e->dest == bb)
    5532              :                     {
    5533              :                       detect_tight_loop_p = true;
    5534              :                       break;
    5535              :                     }
    5536              :                 }
    5537              : 
    5538       637234 :               if (detect_tight_loop_p)
    5539              :                 break;
    5540              : 
    5541       452618 :               end_insn = BB_END (tbb);
    5542       452618 :               if (JUMP_P (end_insn))
    5543              :                 {
    5544              :                   /* For decoded icache:
    5545              :                      1. Up to two branches are allowed per Way.
    5546              :                      2. A non-conditional branch is the last micro-op in a Way.
    5547              :                   */
    5548       364524 :                   if (onlyjump_p (end_insn)
    5549       364524 :                       && (any_uncondjump_p (end_insn)
    5550       308814 :                           || single_succ_p (tbb)))
    5551              :                     {
    5552              :                       padding_p = false;
    5553              :                       break;
    5554              :                     }
    5555       308814 :                   else if (++cond_branch_num >= 2)
    5556              :                     {
    5557              :                       padding_p = false;
    5558              :                       break;
    5559              :                     }
    5560              :                 }
    5561              : 
    5562              :             }
    5563              : 
    5564       546397 :           if (padding_p && detect_tight_loop_p)
    5565              :             {
    5566       369232 :               emit_insn_before (gen_max_skip_align (GEN_INT (ceil_log2 (size)),
    5567              :                                                     GEN_INT (0)), label);
    5568              :               /* End of function.  */
    5569       184616 :               if (!tbb || tbb == EXIT_BLOCK_PTR_FOR_FN (cfun))
    5570              :                 break;
    5571              :               /* Skip bb which already fits into one cacheline.  */
    5572              :               bb = tbb;
    5573              :             }
    5574              :         }
    5575              :     }
    5576              : 
    5577       980776 :   loop_optimizer_finalize ();
    5578       980776 :   free_dominance_info (CDI_DOMINATORS);
    5579              : }
    5580              : 
    5581              : namespace {
    5582              : 
    5583              : const pass_data pass_data_align_tight_loops =
    5584              : {
    5585              :   RTL_PASS, /* type */
    5586              :   "align_tight_loops", /* name */
    5587              :   OPTGROUP_NONE, /* optinfo_flags */
    5588              :   TV_MACH_DEP, /* tv_id */
    5589              :   0, /* properties_required */
    5590              :   0, /* properties_provided */
    5591              :   0, /* properties_destroyed */
    5592              :   0, /* todo_flags_start */
    5593              :   0, /* todo_flags_finish */
    5594              : };
    5595              : 
    5596              : class pass_align_tight_loops : public rtl_opt_pass
    5597              : {
    5598              : public:
    5599       288767 :   pass_align_tight_loops (gcc::context *ctxt)
    5600       577534 :     : rtl_opt_pass (pass_data_align_tight_loops, ctxt)
    5601              :   {}
    5602              : 
    5603              :   /* opt_pass methods: */
    5604      1481491 :   bool gate (function *) final override
    5605              :     {
    5606      1481491 :       return TARGET_ALIGN_TIGHT_LOOPS
    5607      1481005 :              && optimize
    5608      2527287 :              && optimize_function_for_speed_p (cfun);
    5609              :     }
    5610              : 
    5611       980785 :   unsigned int execute (function *) final override
    5612              :     {
    5613       980785 :       timevar_push (TV_MACH_DEP);
    5614              : #ifdef ASM_OUTPUT_MAX_SKIP_ALIGN
    5615       980785 :       ix86_align_loops ();
    5616              : #endif
    5617       980785 :       timevar_pop (TV_MACH_DEP);
    5618       980785 :       return 0;
    5619              :     }
    5620              : }; // class pass_align_tight_loops
    5621              : 
    5622              : } // anon namespace
    5623              : 
    5624              : rtl_opt_pass *
    5625       288767 : make_pass_align_tight_loops (gcc::context *ctxt)
    5626              : {
    5627       288767 :   return new pass_align_tight_loops (ctxt);
    5628              : }
    5629              : 
    5630              : /* This compares the priority of target features in function DECL1
    5631              :    and DECL2.  It returns positive value if DECL1 is higher priority,
    5632              :    negative value if DECL2 is higher priority and 0 if they are the
    5633              :    same.  */
    5634              : 
    5635              : int
    5636         5772 : ix86_compare_version_priority (tree decl1, tree decl2)
    5637              : {
    5638         5772 :   unsigned int priority1 = get_builtin_code_for_version (decl1, NULL);
    5639         5772 :   unsigned int priority2 = get_builtin_code_for_version (decl2, NULL);
    5640              : 
    5641         5772 :   return (int)priority1 - (int)priority2;
    5642              : }
    5643              : 
    5644              : /* V1 and V2 point to function versions with different priorities
    5645              :    based on the target ISA.  This function compares their priorities.  */
    5646              : 
    5647              : static int
    5648         6860 : feature_compare (const void *v1, const void *v2)
    5649              : {
    5650         6860 :   typedef struct _function_version_info
    5651              :     {
    5652              :       tree version_decl;
    5653              :       tree predicate_chain;
    5654              :       unsigned int dispatch_priority;
    5655              :     } function_version_info;
    5656              : 
    5657         6860 :   const function_version_info c1 = *(const function_version_info *)v1;
    5658         6860 :   const function_version_info c2 = *(const function_version_info *)v2;
    5659         6860 :   return (c2.dispatch_priority - c1.dispatch_priority);
    5660              : }
    5661              : 
    5662              : /* This adds a condition to the basic_block NEW_BB in function FUNCTION_DECL
    5663              :    to return a pointer to VERSION_DECL if the outcome of the expression
    5664              :    formed by PREDICATE_CHAIN is true.  This function will be called during
    5665              :    version dispatch to decide which function version to execute.  It returns
    5666              :    the basic block at the end, to which more conditions can be added.  */
    5667              : 
    5668              : static basic_block
    5669          834 : add_condition_to_bb (tree function_decl, tree version_decl,
    5670              :                      tree predicate_chain, basic_block new_bb)
    5671              : {
    5672          834 :   gimple *return_stmt;
    5673          834 :   tree convert_expr, result_var;
    5674          834 :   gimple *convert_stmt;
    5675          834 :   gimple *call_cond_stmt;
    5676          834 :   gimple *if_else_stmt;
    5677              : 
    5678          834 :   basic_block bb1, bb2, bb3;
    5679          834 :   edge e12, e23;
    5680              : 
    5681          834 :   tree cond_var, and_expr_var = NULL_TREE;
    5682          834 :   gimple_seq gseq;
    5683              : 
    5684          834 :   tree predicate_decl, predicate_arg;
    5685              : 
    5686          834 :   push_cfun (DECL_STRUCT_FUNCTION (function_decl));
    5687              : 
    5688          834 :   gcc_assert (new_bb != NULL);
    5689          834 :   gseq = bb_seq (new_bb);
    5690              : 
    5691              : 
    5692          834 :   convert_expr = build1 (CONVERT_EXPR, ptr_type_node,
    5693              :                          build_fold_addr_expr (version_decl));
    5694          834 :   result_var = create_tmp_var (ptr_type_node);
    5695          834 :   convert_stmt = gimple_build_assign (result_var, convert_expr);
    5696          834 :   return_stmt = gimple_build_return (result_var);
    5697              : 
    5698          834 :   if (predicate_chain == NULL_TREE)
    5699              :     {
    5700          200 :       gimple_seq_add_stmt (&gseq, convert_stmt);
    5701          200 :       gimple_seq_add_stmt (&gseq, return_stmt);
    5702          200 :       set_bb_seq (new_bb, gseq);
    5703          200 :       gimple_set_bb (convert_stmt, new_bb);
    5704          200 :       gimple_set_bb (return_stmt, new_bb);
    5705          200 :       pop_cfun ();
    5706          200 :       return new_bb;
    5707              :     }
    5708              : 
    5709         1307 :   while (predicate_chain != NULL)
    5710              :     {
    5711          673 :       cond_var = create_tmp_var (integer_type_node);
    5712          673 :       predicate_decl = TREE_PURPOSE (predicate_chain);
    5713          673 :       predicate_arg = TREE_VALUE (predicate_chain);
    5714          673 :       call_cond_stmt = gimple_build_call (predicate_decl, 1, predicate_arg);
    5715          673 :       gimple_call_set_lhs (call_cond_stmt, cond_var);
    5716              : 
    5717          673 :       gimple_set_block (call_cond_stmt, DECL_INITIAL (function_decl));
    5718          673 :       gimple_set_bb (call_cond_stmt, new_bb);
    5719          673 :       gimple_seq_add_stmt (&gseq, call_cond_stmt);
    5720              : 
    5721          673 :       predicate_chain = TREE_CHAIN (predicate_chain);
    5722              : 
    5723          673 :       if (and_expr_var == NULL)
    5724              :         and_expr_var = cond_var;
    5725              :       else
    5726              :         {
    5727           39 :           gimple *assign_stmt;
    5728              :           /* Use MIN_EXPR to check if any integer is zero?.
    5729              :              and_expr_var = min_expr <cond_var, and_expr_var>  */
    5730           39 :           assign_stmt = gimple_build_assign (and_expr_var,
    5731              :                           build2 (MIN_EXPR, integer_type_node,
    5732              :                                   cond_var, and_expr_var));
    5733              : 
    5734           39 :           gimple_set_block (assign_stmt, DECL_INITIAL (function_decl));
    5735           39 :           gimple_set_bb (assign_stmt, new_bb);
    5736           39 :           gimple_seq_add_stmt (&gseq, assign_stmt);
    5737              :         }
    5738              :     }
    5739              : 
    5740          634 :   if_else_stmt = gimple_build_cond (GT_EXPR, and_expr_var,
    5741              :                                     integer_zero_node,
    5742              :                                     NULL_TREE, NULL_TREE);
    5743          634 :   gimple_set_block (if_else_stmt, DECL_INITIAL (function_decl));
    5744          634 :   gimple_set_bb (if_else_stmt, new_bb);
    5745          634 :   gimple_seq_add_stmt (&gseq, if_else_stmt);
    5746              : 
    5747          634 :   gimple_seq_add_stmt (&gseq, convert_stmt);
    5748          634 :   gimple_seq_add_stmt (&gseq, return_stmt);
    5749          634 :   set_bb_seq (new_bb, gseq);
    5750              : 
    5751          634 :   bb1 = new_bb;
    5752          634 :   e12 = split_block (bb1, if_else_stmt);
    5753          634 :   bb2 = e12->dest;
    5754          634 :   e12->flags &= ~EDGE_FALLTHRU;
    5755          634 :   e12->flags |= EDGE_TRUE_VALUE;
    5756              : 
    5757          634 :   e23 = split_block (bb2, return_stmt);
    5758              : 
    5759          634 :   gimple_set_bb (convert_stmt, bb2);
    5760          634 :   gimple_set_bb (return_stmt, bb2);
    5761              : 
    5762          634 :   bb3 = e23->dest;
    5763          634 :   make_edge (bb1, bb3, EDGE_FALSE_VALUE);
    5764              : 
    5765          634 :   remove_edge (e23);
    5766          634 :   make_edge (bb2, EXIT_BLOCK_PTR_FOR_FN (cfun), 0);
    5767              : 
    5768          634 :   pop_cfun ();
    5769              : 
    5770          634 :   return bb3;
    5771              : }
    5772              : 
    5773              : /* This function generates the dispatch function for
    5774              :    multi-versioned functions.  DISPATCH_DECL is the function which will
    5775              :    contain the dispatch logic.  FNDECLS are the function choices for
    5776              :    dispatch, and is a tree chain.  EMPTY_BB is the basic block pointer
    5777              :    in DISPATCH_DECL in which the dispatch code is generated.  */
    5778              : 
    5779              : static int
    5780          200 : dispatch_function_versions (tree dispatch_decl,
    5781              :                             void *fndecls_p,
    5782              :                             basic_block *empty_bb)
    5783              : {
    5784          200 :   tree default_decl;
    5785          200 :   gimple *ifunc_cpu_init_stmt;
    5786          200 :   gimple_seq gseq;
    5787          200 :   int ix;
    5788          200 :   tree ele;
    5789          200 :   vec<tree> *fndecls;
    5790          200 :   unsigned int num_versions = 0;
    5791          200 :   unsigned int actual_versions = 0;
    5792          200 :   unsigned int i;
    5793              : 
    5794          200 :   struct _function_version_info
    5795              :     {
    5796              :       tree version_decl;
    5797              :       tree predicate_chain;
    5798              :       unsigned int dispatch_priority;
    5799              :     }*function_version_info;
    5800              : 
    5801          200 :   gcc_assert (dispatch_decl != NULL
    5802              :               && fndecls_p != NULL
    5803              :               && empty_bb != NULL);
    5804              : 
    5805              :   /*fndecls_p is actually a vector.  */
    5806          200 :   fndecls = static_cast<vec<tree> *> (fndecls_p);
    5807              : 
    5808              :   /* At least one more version other than the default.  */
    5809          200 :   num_versions = fndecls->length ();
    5810          200 :   gcc_assert (num_versions >= 2);
    5811              : 
    5812          200 :   function_version_info = (struct _function_version_info *)
    5813          200 :     XNEWVEC (struct _function_version_info, (num_versions - 1));
    5814              : 
    5815              :   /* The first version in the vector is the default decl.  */
    5816          200 :   default_decl = (*fndecls)[0];
    5817              : 
    5818          200 :   push_cfun (DECL_STRUCT_FUNCTION (dispatch_decl));
    5819              : 
    5820          200 :   gseq = bb_seq (*empty_bb);
    5821              :   /* Function version dispatch is via IFUNC.  IFUNC resolvers fire before
    5822              :      constructors, so explicity call __builtin_cpu_init here.  */
    5823          200 :   ifunc_cpu_init_stmt
    5824          200 :     = gimple_build_call_vec (get_ix86_builtin (IX86_BUILTIN_CPU_INIT), vNULL);
    5825          200 :   gimple_seq_add_stmt (&gseq, ifunc_cpu_init_stmt);
    5826          200 :   gimple_set_bb (ifunc_cpu_init_stmt, *empty_bb);
    5827          200 :   set_bb_seq (*empty_bb, gseq);
    5828              : 
    5829          200 :   pop_cfun ();
    5830              : 
    5831              : 
    5832          991 :   for (ix = 1; fndecls->iterate (ix, &ele); ++ix)
    5833              :     {
    5834          791 :       tree version_decl = ele;
    5835          791 :       tree predicate_chain = NULL_TREE;
    5836          791 :       unsigned int priority;
    5837              :       /* Get attribute string, parse it and find the right predicate decl.
    5838              :          The predicate function could be a lengthy combination of many
    5839              :          features, like arch-type and various isa-variants.  */
    5840          791 :       priority = get_builtin_code_for_version (version_decl,
    5841              :                                                &predicate_chain);
    5842              : 
    5843          791 :       if (predicate_chain == NULL_TREE)
    5844          157 :         continue;
    5845              : 
    5846          634 :       function_version_info [actual_versions].version_decl = version_decl;
    5847          634 :       function_version_info [actual_versions].predicate_chain
    5848          634 :          = predicate_chain;
    5849          634 :       function_version_info [actual_versions].dispatch_priority = priority;
    5850          634 :       actual_versions++;
    5851              :     }
    5852              : 
    5853              :   /* Sort the versions according to descending order of dispatch priority.  The
    5854              :      priority is based on the ISA.  This is not a perfect solution.  There
    5855              :      could still be ambiguity.  If more than one function version is suitable
    5856              :      to execute,  which one should be dispatched?  In future, allow the user
    5857              :      to specify a dispatch  priority next to the version.  */
    5858          200 :   qsort (function_version_info, actual_versions,
    5859              :          sizeof (struct _function_version_info), feature_compare);
    5860              : 
    5861         1034 :   for  (i = 0; i < actual_versions; ++i)
    5862          634 :     *empty_bb = add_condition_to_bb (dispatch_decl,
    5863              :                                      function_version_info[i].version_decl,
    5864          634 :                                      function_version_info[i].predicate_chain,
    5865              :                                      *empty_bb);
    5866              : 
    5867              :   /* dispatch default version at the end.  */
    5868          200 :   *empty_bb = add_condition_to_bb (dispatch_decl, default_decl,
    5869              :                                    NULL, *empty_bb);
    5870              : 
    5871          200 :   free (function_version_info);
    5872          200 :   return 0;
    5873              : }
    5874              : 
    5875              : /* This function changes the assembler name for functions that are
    5876              :    versions.  If DECL is a function version and has a "target"
    5877              :    attribute, it appends the attribute string to its assembler name.  */
    5878              : 
    5879              : static tree
    5880         1113 : ix86_mangle_function_version_assembler_name (tree decl, tree id)
    5881              : {
    5882         1113 :   tree version_attr;
    5883         1113 :   char *attr_str;
    5884              : 
    5885         1113 :   if (DECL_DECLARED_INLINE_P (decl)
    5886         1162 :       && lookup_attribute ("gnu_inline",
    5887           49 :                            DECL_ATTRIBUTES (decl)))
    5888            0 :     error_at (DECL_SOURCE_LOCATION (decl),
    5889              :               "function versions cannot be marked as %<gnu_inline%>,"
    5890              :               " bodies have to be generated");
    5891              : 
    5892         1113 :   if (DECL_VIRTUAL_P (decl)
    5893         2226 :       || DECL_VINDEX (decl))
    5894            0 :     sorry ("virtual function multiversioning not supported");
    5895              : 
    5896         1113 :   version_attr = lookup_attribute ("target", DECL_ATTRIBUTES (decl));
    5897              : 
    5898              :   /* target attribute string cannot be NULL.  */
    5899         1113 :   gcc_assert (version_attr != NULL_TREE);
    5900              : 
    5901         1113 :   attr_str = sorted_attr_string (TREE_VALUE (version_attr));
    5902              : 
    5903              :   /* Allow assembler name to be modified if already set.  */
    5904         1113 :   if (DECL_ASSEMBLER_NAME_SET_P (decl))
    5905         1098 :     SET_DECL_RTL (decl, NULL);
    5906              : 
    5907         1113 :   tree ret = clone_identifier (id, attr_str, true);
    5908              : 
    5909         1113 :   XDELETEVEC (attr_str);
    5910              : 
    5911         1113 :   return ret;
    5912              : }
    5913              : 
    5914              : tree
    5915    485380750 : ix86_mangle_decl_assembler_name (tree decl, tree id)
    5916              : {
    5917              :   /* For function version, add the target suffix to the assembler name.  */
    5918    485380750 :   if (TREE_CODE (decl) == FUNCTION_DECL)
    5919              :     {
    5920    451127040 :       cgraph_node *node = cgraph_node::get (decl);
    5921              :       /* Mangle all versions when annotated with target_clones, but only
    5922              :          non-default versions when annotated with target attributes.  */
    5923    451127040 :       if (DECL_FUNCTION_VERSIONED (decl)
    5924    451127040 :           && (node->is_target_clone
    5925         1089 :               || !is_function_default_version (node->decl)))
    5926         1113 :         id = ix86_mangle_function_version_assembler_name (decl, id);
    5927              :       /* Mangle the dispatched symbol but only in the case of target clones.  */
    5928    451125927 :       else if (node && node->dispatcher_function && !node->is_target_clone)
    5929          117 :         id = clone_identifier (id, "ifunc");
    5930     64075049 :       else if (node && node->dispatcher_resolver_function)
    5931          200 :         id = clone_identifier (id, "resolver");
    5932              :     }
    5933              : #ifdef SUBTARGET_MANGLE_DECL_ASSEMBLER_NAME
    5934              :   id = SUBTARGET_MANGLE_DECL_ASSEMBLER_NAME (decl, id);
    5935              : #endif
    5936              : 
    5937    485380750 :   return id;
    5938              : }
    5939              : 
    5940              : /* Make a dispatcher declaration for the multi-versioned function DECL.
    5941              :    Calls to DECL function will be replaced with calls to the dispatcher
    5942              :    by the front-end.  Returns the decl of the dispatcher function.  */
    5943              : 
    5944              : tree
    5945          326 : ix86_get_function_versions_dispatcher (void *decl)
    5946              : {
    5947          326 :   tree fn = (tree) decl;
    5948          326 :   struct cgraph_node *node = NULL;
    5949          326 :   struct cgraph_node *default_node = NULL;
    5950          326 :   struct cgraph_function_version_info *node_v = NULL;
    5951              : 
    5952          326 :   tree dispatch_decl = NULL;
    5953              : 
    5954          326 :   struct cgraph_function_version_info *default_version_info = NULL;
    5955              : 
    5956          652 :   gcc_assert (fn != NULL && DECL_FUNCTION_VERSIONED (fn));
    5957              : 
    5958          326 :   node = cgraph_node::get (fn);
    5959          326 :   gcc_assert (node != NULL);
    5960              : 
    5961          326 :   node_v = node->function_version ();
    5962          326 :   gcc_assert (node_v != NULL);
    5963              : 
    5964          326 :   if (node_v->dispatcher_resolver != NULL)
    5965              :     return node_v->dispatcher_resolver;
    5966              : 
    5967              :   /* The default node is always the beginning of the chain.  */
    5968              :   default_version_info = node_v;
    5969          674 :   while (default_version_info->prev != NULL)
    5970              :     default_version_info = default_version_info->prev;
    5971          212 :   default_node = default_version_info->this_node;
    5972              : 
    5973              :   /* If there is no default node, just return NULL.  */
    5974          212 :   if (!is_function_default_version (default_node->decl))
    5975              :     return NULL;
    5976              : 
    5977              : #if defined (ASM_OUTPUT_TYPE_DIRECTIVE)
    5978          203 :   if (targetm.has_ifunc_p ())
    5979              :     {
    5980          203 :       struct cgraph_function_version_info *it_v = NULL;
    5981              : 
    5982              :       /* Right now, the dispatching is done via ifunc.  */
    5983          203 :       dispatch_decl = make_dispatcher_decl (default_node->decl);
    5984              : 
    5985              :       /* Set the dispatcher for all the versions.  */
    5986          203 :       it_v = default_version_info;
    5987         1403 :       while (it_v != NULL)
    5988              :         {
    5989          997 :           it_v->dispatcher_resolver = dispatch_decl;
    5990          997 :           it_v = it_v->next;
    5991              :         }
    5992              :     }
    5993              :   else
    5994              : #endif
    5995              :     {
    5996            0 :       error_at (DECL_SOURCE_LOCATION (default_node->decl),
    5997              :                 "multiversioning needs %<ifunc%> which is not supported "
    5998              :                 "on this target");
    5999              :     }
    6000              : 
    6001              :   return dispatch_decl;
    6002              : }
    6003              : 
    6004              : /* Make the resolver function decl to dispatch the versions of
    6005              :    a multi-versioned function,  DEFAULT_DECL.  IFUNC_ALIAS_DECL is
    6006              :    ifunc alias that will point to the created resolver.  Create an
    6007              :    empty basic block in the resolver and store the pointer in
    6008              :    EMPTY_BB.  Return the decl of the resolver function.  */
    6009              : 
    6010              : static tree
    6011          200 : make_resolver_func (const tree default_decl,
    6012              :                     const tree ifunc_alias_decl,
    6013              :                     basic_block *empty_bb)
    6014              : {
    6015          200 :   tree decl, type, t;
    6016              : 
    6017              :   /* The resolver function should return a (void *). */
    6018          200 :   type = build_function_type_list (ptr_type_node, NULL_TREE);
    6019              : 
    6020          200 :   cgraph_node *node = cgraph_node::get (default_decl);
    6021          200 :   gcc_assert (node && node->function_version ());
    6022              : 
    6023          200 :   decl = build_fn_decl (IDENTIFIER_POINTER (DECL_NAME (default_decl)), type);
    6024              : 
    6025              :   /* Set the assembler name to prevent cgraph_node attempting to mangle.  */
    6026          200 :   SET_DECL_ASSEMBLER_NAME (decl, DECL_ASSEMBLER_NAME (default_decl));
    6027              : 
    6028          200 :   cgraph_node *resolver_node = cgraph_node::get_create (decl);
    6029          200 :   resolver_node->dispatcher_resolver_function = true;
    6030              : 
    6031          200 :   if (node->is_target_clone)
    6032           86 :     resolver_node->is_target_clone = true;
    6033              : 
    6034          200 :   tree id = ix86_mangle_decl_assembler_name
    6035          200 :     (decl, node->function_version ()->assembler_name);
    6036          200 :   symtab->change_decl_assembler_name (decl, id);
    6037              : 
    6038          200 :   DECL_NAME (decl) = DECL_NAME (default_decl);
    6039          200 :   TREE_USED (decl) = 1;
    6040          200 :   DECL_ARTIFICIAL (decl) = 1;
    6041          200 :   DECL_IGNORED_P (decl) = 1;
    6042          200 :   TREE_PUBLIC (decl) = 0;
    6043          200 :   DECL_UNINLINABLE (decl) = 1;
    6044              : 
    6045              :   /* Resolver is not external, body is generated.  */
    6046          200 :   DECL_EXTERNAL (decl) = 0;
    6047          200 :   DECL_EXTERNAL (ifunc_alias_decl) = 0;
    6048              : 
    6049          200 :   DECL_CONTEXT (decl) = NULL_TREE;
    6050          200 :   DECL_INITIAL (decl) = make_node (BLOCK);
    6051          200 :   DECL_STATIC_CONSTRUCTOR (decl) = 0;
    6052              : 
    6053          200 :   if (DECL_COMDAT_GROUP (default_decl)
    6054          200 :       || TREE_PUBLIC (default_decl))
    6055              :     {
    6056              :       /* In this case, each translation unit with a call to this
    6057              :          versioned function will put out a resolver.  Ensure it
    6058              :          is comdat to keep just one copy.  */
    6059          176 :       DECL_COMDAT (decl) = 1;
    6060          176 :       make_decl_one_only (decl, DECL_ASSEMBLER_NAME (decl));
    6061              :     }
    6062              :   else
    6063           24 :     TREE_PUBLIC (ifunc_alias_decl) = 0;
    6064              : 
    6065              :   /* Build result decl and add to function_decl. */
    6066          200 :   t = build_decl (UNKNOWN_LOCATION, RESULT_DECL, NULL_TREE, ptr_type_node);
    6067          200 :   DECL_CONTEXT (t) = decl;
    6068          200 :   DECL_ARTIFICIAL (t) = 1;
    6069          200 :   DECL_IGNORED_P (t) = 1;
    6070          200 :   DECL_RESULT (decl) = t;
    6071              : 
    6072          200 :   gimplify_function_tree (decl);
    6073          200 :   push_cfun (DECL_STRUCT_FUNCTION (decl));
    6074          200 :   *empty_bb = init_lowered_empty_function (decl, false,
    6075              :                                            profile_count::uninitialized ());
    6076              : 
    6077          200 :   cgraph_node::add_new_function (decl, true);
    6078          200 :   symtab->call_cgraph_insertion_hooks (cgraph_node::get_create (decl));
    6079              : 
    6080          200 :   pop_cfun ();
    6081              : 
    6082          200 :   gcc_assert (ifunc_alias_decl != NULL);
    6083              :   /* Mark ifunc_alias_decl as "ifunc" with resolver as resolver_name.  */
    6084          200 :   DECL_ATTRIBUTES (ifunc_alias_decl)
    6085          200 :     = make_attribute ("ifunc", IDENTIFIER_POINTER (DECL_ASSEMBLER_NAME (decl)),
    6086          200 :                       DECL_ATTRIBUTES (ifunc_alias_decl));
    6087              : 
    6088              :   /* Create the alias for dispatch to resolver here.  */
    6089          200 :   cgraph_node::create_same_body_alias (ifunc_alias_decl, decl);
    6090          200 :   return decl;
    6091              : }
    6092              : 
    6093              : /* Generate the dispatching code body to dispatch multi-versioned function
    6094              :    DECL.  The target hook is called to process the "target" attributes and
    6095              :    provide the code to dispatch the right function at run-time.  NODE points
    6096              :    to the dispatcher decl whose body will be created.  */
    6097              : 
    6098              : tree
    6099          200 : ix86_generate_version_dispatcher_body (void *node_p)
    6100              : {
    6101          200 :   tree resolver_decl;
    6102          200 :   basic_block empty_bb;
    6103          200 :   tree default_ver_decl;
    6104          200 :   struct cgraph_node *versn;
    6105          200 :   struct cgraph_node *node;
    6106              : 
    6107          200 :   struct cgraph_function_version_info *node_version_info = NULL;
    6108          200 :   struct cgraph_function_version_info *versn_info = NULL;
    6109              : 
    6110          200 :   node = (cgraph_node *)node_p;
    6111              : 
    6112          200 :   node_version_info = node->function_version ();
    6113          200 :   gcc_assert (node->dispatcher_function
    6114              :               && node_version_info != NULL);
    6115              : 
    6116          200 :   if (node_version_info->dispatcher_resolver)
    6117              :     return node_version_info->dispatcher_resolver;
    6118              : 
    6119              :   /* The first version in the chain corresponds to the default version.  */
    6120          200 :   default_ver_decl = node_version_info->next->this_node->decl;
    6121              : 
    6122              :   /* node is going to be an alias, so remove the finalized bit.  */
    6123          200 :   node->definition = false;
    6124              : 
    6125          200 :   resolver_decl = make_resolver_func (default_ver_decl,
    6126              :                                       node->decl, &empty_bb);
    6127              : 
    6128          200 :   node_version_info->dispatcher_resolver = resolver_decl;
    6129              : 
    6130          200 :   push_cfun (DECL_STRUCT_FUNCTION (resolver_decl));
    6131              : 
    6132          200 :   auto_vec<tree, 2> fn_ver_vec;
    6133              : 
    6134         1191 :   for (versn_info = node_version_info->next; versn_info;
    6135          991 :        versn_info = versn_info->next)
    6136              :     {
    6137          991 :       versn = versn_info->this_node;
    6138              :       /* Check for virtual functions here again, as by this time it should
    6139              :          have been determined if this function needs a vtable index or
    6140              :          not.  This happens for methods in derived classes that override
    6141              :          virtual methods in base classes but are not explicitly marked as
    6142              :          virtual.  */
    6143          991 :       if (DECL_VIRTUAL_P (versn->decl))
    6144            0 :         sorry ("virtual function multiversioning not supported");
    6145              : 
    6146          991 :       fn_ver_vec.safe_push (versn->decl);
    6147              :     }
    6148              : 
    6149          200 :   dispatch_function_versions (resolver_decl, &fn_ver_vec, &empty_bb);
    6150          200 :   cgraph_edge::rebuild_edges ();
    6151          200 :   pop_cfun ();
    6152          200 :   return resolver_decl;
    6153          200 : }
    6154              : 
    6155              : 
        

Generated by: LCOV version 2.4-beta

LCOV profile is generated on x86_64 machine using following configure options: configure --disable-bootstrap --enable-coverage=opt --enable-languages=c,c++,fortran,go,jit,lto,rust,m2 --enable-host-shared. GCC test suite is run with the built compiler.