LCOV - code coverage report
Current view: top level - gcc/config/i386 - i386-features.cc (source / functions) Coverage Total Hit
Test: gcc.info Lines: 89.0 % 2805 2497
Test Date: 2026-06-20 15:32:29 Functions: 98.9 % 95 94
Legend: Lines:     hit not hit

            Line data    Source code
       1              : /* Copyright (C) 1988-2026 Free Software Foundation, Inc.
       2              : 
       3              : This file is part of GCC.
       4              : 
       5              : GCC is free software; you can redistribute it and/or modify
       6              : it under the terms of the GNU General Public License as published by
       7              : the Free Software Foundation; either version 3, or (at your option)
       8              : any later version.
       9              : 
      10              : GCC is distributed in the hope that it will be useful,
      11              : but WITHOUT ANY WARRANTY; without even the implied warranty of
      12              : MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
      13              : GNU General Public License for more details.
      14              : 
      15              : You should have received a copy of the GNU General Public License
      16              : along with GCC; see the file COPYING3.  If not see
      17              : <http://www.gnu.org/licenses/>.  */
      18              : 
      19              : #define IN_TARGET_CODE 1
      20              : 
      21              : #include "config.h"
      22              : #include "system.h"
      23              : #include "coretypes.h"
      24              : #include "backend.h"
      25              : #include "rtl.h"
      26              : #include "tree.h"
      27              : #include "memmodel.h"
      28              : #include "gimple.h"
      29              : #include "cfghooks.h"
      30              : #include "cfgloop.h"
      31              : #include "df.h"
      32              : #include "tm_p.h"
      33              : #include "stringpool.h"
      34              : #include "expmed.h"
      35              : #include "optabs.h"
      36              : #include "regs.h"
      37              : #include "emit-rtl.h"
      38              : #include "recog.h"
      39              : #include "cgraph.h"
      40              : #include "diagnostic.h"
      41              : #include "cfgbuild.h"
      42              : #include "alias.h"
      43              : #include "fold-const.h"
      44              : #include "attribs.h"
      45              : #include "calls.h"
      46              : #include "stor-layout.h"
      47              : #include "varasm.h"
      48              : #include "output.h"
      49              : #include "insn-attr.h"
      50              : #include "flags.h"
      51              : #include "except.h"
      52              : #include "explow.h"
      53              : #include "expr.h"
      54              : #include "cfgrtl.h"
      55              : #include "common/common-target.h"
      56              : #include "langhooks.h"
      57              : #include "reload.h"
      58              : #include "gimplify.h"
      59              : #include "dwarf2.h"
      60              : #include "tm-constrs.h"
      61              : #include "cselib.h"
      62              : #include "sched-int.h"
      63              : #include "opts.h"
      64              : #include "tree-pass.h"
      65              : #include "context.h"
      66              : #include "pass_manager.h"
      67              : #include "target-globals.h"
      68              : #include "gimple-iterator.h"
      69              : #include "shrink-wrap.h"
      70              : #include "builtins.h"
      71              : #include "rtl-iter.h"
      72              : #include "tree-iterator.h"
      73              : #include "dbgcnt.h"
      74              : #include "case-cfn-macros.h"
      75              : #include "dojump.h"
      76              : #include "fold-const-call.h"
      77              : #include "tree-vrp.h"
      78              : #include "tree-ssanames.h"
      79              : #include "selftest.h"
      80              : #include "selftest-rtl.h"
      81              : #include "print-rtl.h"
      82              : #include "intl.h"
      83              : #include "ifcvt.h"
      84              : #include "symbol-summary.h"
      85              : #include "sreal.h"
      86              : #include "ipa-cp.h"
      87              : #include "ipa-prop.h"
      88              : #include "ipa-fnsummary.h"
      89              : #include "wide-int-bitmask.h"
      90              : #include "tree-vector-builder.h"
      91              : #include "debug.h"
      92              : #include "dwarf2out.h"
      93              : #include "i386-builtins.h"
      94              : #include "i386-features.h"
      95              : #include "i386-expand.h"
      96              : 
      97              : const char * const xlogue_layout::STUB_BASE_NAMES[XLOGUE_STUB_COUNT] = {
      98              :   "savms64",
      99              :   "resms64",
     100              :   "resms64x",
     101              :   "savms64f",
     102              :   "resms64f",
     103              :   "resms64fx"
     104              : };
     105              : 
     106              : const unsigned xlogue_layout::REG_ORDER[xlogue_layout::MAX_REGS] = {
     107              : /* The below offset values are where each register is stored for the layout
     108              :    relative to incoming stack pointer.  The value of each m_regs[].offset will
     109              :    be relative to the incoming base pointer (rax or rsi) used by the stub.
     110              : 
     111              :     s_instances:   0            1               2               3
     112              :     Offset:                                     realigned or    aligned + 8
     113              :     Register       aligned      aligned + 8     aligned w/HFP   w/HFP   */
     114              :     XMM15_REG,  /* 0x10         0x18            0x10            0x18    */
     115              :     XMM14_REG,  /* 0x20         0x28            0x20            0x28    */
     116              :     XMM13_REG,  /* 0x30         0x38            0x30            0x38    */
     117              :     XMM12_REG,  /* 0x40         0x48            0x40            0x48    */
     118              :     XMM11_REG,  /* 0x50         0x58            0x50            0x58    */
     119              :     XMM10_REG,  /* 0x60         0x68            0x60            0x68    */
     120              :     XMM9_REG,   /* 0x70         0x78            0x70            0x78    */
     121              :     XMM8_REG,   /* 0x80         0x88            0x80            0x88    */
     122              :     XMM7_REG,   /* 0x90         0x98            0x90            0x98    */
     123              :     XMM6_REG,   /* 0xa0         0xa8            0xa0            0xa8    */
     124              :     SI_REG,     /* 0xa8         0xb0            0xa8            0xb0    */
     125              :     DI_REG,     /* 0xb0         0xb8            0xb0            0xb8    */
     126              :     BX_REG,     /* 0xb8         0xc0            0xb8            0xc0    */
     127              :     BP_REG,     /* 0xc0         0xc8            N/A             N/A     */
     128              :     R12_REG,    /* 0xc8         0xd0            0xc0            0xc8    */
     129              :     R13_REG,    /* 0xd0         0xd8            0xc8            0xd0    */
     130              :     R14_REG,    /* 0xd8         0xe0            0xd0            0xd8    */
     131              :     R15_REG,    /* 0xe0         0xe8            0xd8            0xe0    */
     132              : };
     133              : 
     134              : /* Instantiate static const values.  */
     135              : const HOST_WIDE_INT xlogue_layout::STUB_INDEX_OFFSET;
     136              : const unsigned xlogue_layout::MIN_REGS;
     137              : const unsigned xlogue_layout::MAX_REGS;
     138              : const unsigned xlogue_layout::MAX_EXTRA_REGS;
     139              : const unsigned xlogue_layout::VARIANT_COUNT;
     140              : const unsigned xlogue_layout::STUB_NAME_MAX_LEN;
     141              : 
     142              : /* Initialize xlogue_layout::s_stub_names to zero.  */
     143              : char xlogue_layout::s_stub_names[2][XLOGUE_STUB_COUNT][VARIANT_COUNT]
     144              :                                 [STUB_NAME_MAX_LEN];
     145              : 
     146              : /* Instantiates all xlogue_layout instances.  */
     147              : const xlogue_layout xlogue_layout::s_instances[XLOGUE_SET_COUNT] = {
     148              :   xlogue_layout (0, false),
     149              :   xlogue_layout (8, false),
     150              :   xlogue_layout (0, true),
     151              :   xlogue_layout (8, true)
     152              : };
     153              : 
     154              : /* Return an appropriate const instance of xlogue_layout based upon values
     155              :    in cfun->machine and crtl.  */
     156              : const class xlogue_layout &
     157        49891 : xlogue_layout::get_instance ()
     158              : {
     159        49891 :   enum xlogue_stub_sets stub_set;
     160        49891 :   bool aligned_plus_8 = cfun->machine->call_ms2sysv_pad_in;
     161              : 
     162        49891 :   if (stack_realign_fp)
     163              :     stub_set = XLOGUE_SET_HFP_ALIGNED_OR_REALIGN;
     164        40910 :   else if (frame_pointer_needed)
     165        25246 :     stub_set = aligned_plus_8
     166        31552 :               ? XLOGUE_SET_HFP_ALIGNED_PLUS_8
     167              :               : XLOGUE_SET_HFP_ALIGNED_OR_REALIGN;
     168              :   else
     169         9358 :     stub_set = aligned_plus_8 ? XLOGUE_SET_ALIGNED_PLUS_8 : XLOGUE_SET_ALIGNED;
     170              : 
     171        49891 :   return s_instances[stub_set];
     172              : }
     173              : 
     174              : /* Determine how many clobbered registers can be saved by the stub.
     175              :    Returns the count of registers the stub will save and restore.  */
     176              : unsigned
     177        35225 : xlogue_layout::count_stub_managed_regs ()
     178              : {
     179        35225 :   bool hfp = frame_pointer_needed || stack_realign_fp;
     180        35225 :   unsigned i, count;
     181        35225 :   unsigned regno;
     182              : 
     183        94890 :   for (count = i = MIN_REGS; i < MAX_REGS; ++i)
     184              :     {
     185        93670 :       regno = REG_ORDER[i];
     186        93670 :       if (regno == BP_REG && hfp)
     187        18200 :         continue;
     188        75470 :       if (!ix86_save_reg (regno, false, false))
     189              :         break;
     190        41465 :       ++count;
     191              :     }
     192        35225 :   return count;
     193              : }
     194              : 
     195              : /* Determine if register REGNO is a stub managed register given the
     196              :    total COUNT of stub managed registers.  */
     197              : bool
     198      2641728 : xlogue_layout::is_stub_managed_reg (unsigned regno, unsigned count)
     199              : {
     200      2641728 :   bool hfp = frame_pointer_needed || stack_realign_fp;
     201      2641728 :   unsigned i;
     202              : 
     203     34456982 :   for (i = 0; i < count; ++i)
     204              :     {
     205     32315123 :       gcc_assert (i < MAX_REGS);
     206     32315123 :       if (REG_ORDER[i] == BP_REG && hfp)
     207       519694 :         ++count;
     208     31795429 :       else if (REG_ORDER[i] == regno)
     209              :         return true;
     210              :     }
     211              :   return false;
     212              : }
     213              : 
     214              : /* Constructor for xlogue_layout.  */
     215      1190632 : xlogue_layout::xlogue_layout (HOST_WIDE_INT stack_align_off_in, bool hfp)
     216      1190632 :   : m_hfp (hfp) , m_nregs (hfp ? 17 : 18),
     217      1190632 :     m_stack_align_off_in (stack_align_off_in)
     218              : {
     219      1190632 :   HOST_WIDE_INT offset = stack_align_off_in;
     220      1190632 :   unsigned i, j;
     221              : 
     222     22622008 :   for (i = j = 0; i < MAX_REGS; ++i)
     223              :     {
     224     21431376 :       unsigned regno = REG_ORDER[i];
     225              : 
     226     21431376 :       if (regno == BP_REG && hfp)
     227       595316 :         continue;
     228     20836060 :       if (SSE_REGNO_P (regno))
     229              :         {
     230     11906320 :           offset += 16;
     231              :           /* Verify that SSE regs are always aligned.  */
     232     11906320 :           gcc_assert (!((stack_align_off_in + offset) & 15));
     233              :         }
     234              :       else
     235      8929740 :         offset += 8;
     236              : 
     237     20836060 :       m_regs[j].regno    = regno;
     238     20836060 :       m_regs[j++].offset = offset - STUB_INDEX_OFFSET;
     239              :     }
     240      1190632 :   gcc_assert (j == m_nregs);
     241      1190632 : }
     242              : 
     243              : const char *
     244        14666 : xlogue_layout::get_stub_name (enum xlogue_stub stub,
     245              :                               unsigned n_extra_regs)
     246              : {
     247        14666 :   const int have_avx = TARGET_AVX;
     248        14666 :   char *name = s_stub_names[!!have_avx][stub][n_extra_regs];
     249              : 
     250              :   /* Lazy init */
     251        14666 :   if (!*name)
     252              :     {
     253          362 :       int res = snprintf (name, STUB_NAME_MAX_LEN, "__%s_%s_%u",
     254              :                           (have_avx ? "avx" : "sse"),
     255          181 :                           STUB_BASE_NAMES[stub],
     256              :                           MIN_REGS + n_extra_regs);
     257          181 :       gcc_checking_assert (res < (int)STUB_NAME_MAX_LEN);
     258              :     }
     259              : 
     260        14666 :   return name;
     261              : }
     262              : 
     263              : /* Return rtx of a symbol ref for the entry point (based upon
     264              :    cfun->machine->call_ms2sysv_extra_regs) of the specified stub.  */
     265              : rtx
     266        14666 : xlogue_layout::get_stub_rtx (enum xlogue_stub stub)
     267              : {
     268        14666 :   const unsigned n_extra_regs = cfun->machine->call_ms2sysv_extra_regs;
     269        14666 :   gcc_checking_assert (n_extra_regs <= MAX_EXTRA_REGS);
     270        14666 :   gcc_assert (stub < XLOGUE_STUB_COUNT);
     271        14666 :   gcc_assert (crtl->stack_realign_finalized);
     272              : 
     273        14666 :   return gen_rtx_SYMBOL_REF (Pmode, get_stub_name (stub, n_extra_regs));
     274              : }
     275              : 
     276              : unsigned scalar_chain::max_id = 0;
     277              : 
     278              : namespace {
     279              : 
     280              : /* Initialize new chain.  */
     281              : 
     282      6314062 : scalar_chain::scalar_chain (enum machine_mode smode_, enum machine_mode vmode_)
     283              : {
     284      6314062 :   smode = smode_;
     285      6314062 :   vmode = vmode_;
     286              : 
     287      6314062 :   chain_id = ++max_id;
     288              : 
     289      6314062 :    if (dump_file)
     290          136 :     fprintf (dump_file, "Created a new instruction chain #%d\n", chain_id);
     291              : 
     292      6314062 :   bitmap_obstack_initialize (NULL);
     293      6314062 :   insns = BITMAP_ALLOC (NULL);
     294      6314062 :   defs = BITMAP_ALLOC (NULL);
     295      6314062 :   defs_conv = BITMAP_ALLOC (NULL);
     296      6314062 :   insns_conv = BITMAP_ALLOC (NULL);
     297      6314062 :   queue = NULL;
     298              : 
     299      6314062 :   cost_sse_integer = 0;
     300      6314062 :   weighted_cost_sse_integer = 0 ;
     301      6314062 :   max_visits = x86_stv_max_visits;
     302      6314062 : }
     303              : 
     304              : /* Free chain's data.  */
     305              : 
     306      6314062 : scalar_chain::~scalar_chain ()
     307              : {
     308      6314062 :   BITMAP_FREE (insns);
     309      6314062 :   BITMAP_FREE (defs);
     310      6314062 :   BITMAP_FREE (defs_conv);
     311      6314062 :   BITMAP_FREE (insns_conv);
     312      6314062 :   bitmap_obstack_release (NULL);
     313      6314062 : }
     314              : 
     315              : /* Add instruction into chains' queue.  */
     316              : 
     317              : void
     318      8142055 : scalar_chain::add_to_queue (unsigned insn_uid)
     319              : {
     320      8142055 :   if (!bitmap_set_bit (queue, insn_uid))
     321              :     return;
     322              : 
     323      6155053 :   if (dump_file)
     324          141 :     fprintf (dump_file, "  Adding insn %d into chain's #%d queue\n",
     325              :              insn_uid, chain_id);
     326              : }
     327              : 
     328              : /* For DImode conversion, mark register defined by DEF as requiring
     329              :    conversion.  */
     330              : 
     331              : void
     332      9191797 : scalar_chain::mark_dual_mode_def (df_ref def)
     333              : {
     334      9191797 :   gcc_assert (DF_REF_REG_DEF_P (def));
     335              : 
     336              :   /* Record the def/insn pair so we can later efficiently iterate over
     337              :      the defs to convert on insns not in the chain.  */
     338      9191797 :   bool reg_new = bitmap_set_bit (defs_conv, DF_REF_REGNO (def));
     339      9191797 :   basic_block bb = BLOCK_FOR_INSN (DF_REF_INSN (def));
     340      9191797 :   profile_count entry_count = ENTRY_BLOCK_PTR_FOR_FN (cfun)->count;
     341      9191797 :   bool speed_p = optimize_bb_for_speed_p (bb);
     342      9191797 :   int cost = 0;
     343              : 
     344      9191797 :   if (!bitmap_bit_p (insns, DF_REF_INSN_UID (def)))
     345              :     {
     346      2678878 :       if (!bitmap_set_bit (insns_conv, DF_REF_INSN_UID (def))
     347      2678878 :           && !reg_new)
     348      1360489 :         return;
     349              : 
     350              :       /* Cost integer to sse moves.  */
     351      2442098 :       if (speed_p)
     352      2165721 :         cost = COSTS_N_INSNS (ix86_cost->integer_to_sse) / 2;
     353       276377 :       else if (TARGET_64BIT || smode == SImode)
     354              :         cost = COSTS_N_BYTES (4);
     355              :       /* vmovd (4 bytes) + vpinsrd (6 bytes).  */
     356        18654 :       else if (TARGET_SSE4_1)
     357              :         cost = COSTS_N_BYTES (10);
     358              :       /* movd (4 bytes) + movd (4 bytes) + unpckldq (4 bytes).  */
     359              :       else
     360      7831308 :         cost = COSTS_N_BYTES (12);
     361              :     }
     362              :   else
     363              :     {
     364      6512919 :       if (!reg_new)
     365              :         return;
     366              : 
     367              :       /* Cost sse to integer moves.  */
     368      5389210 :       if (speed_p)
     369      4838286 :         cost = COSTS_N_INSNS (ix86_cost->sse_to_integer) / 2;
     370       550924 :       else if (TARGET_64BIT || smode == SImode)
     371              :         cost = COSTS_N_BYTES (4);
     372              :       /* vmovd (4 bytes) + vpextrd (6 bytes).  */
     373         2999 :       else if (TARGET_SSE4_1)
     374              :         cost = COSTS_N_BYTES (10);
     375              :       /* movd (4 bytes) + psrlq (5 bytes) + movd (4 bytes).  */
     376              :       else
     377      7831308 :         cost = COSTS_N_BYTES (13);
     378              :     }
     379              : 
     380      7831308 :   if (speed_p)
     381      7004007 :     weighted_cost_sse_integer += bb->count.to_sreal_scale (entry_count) * cost;
     382              : 
     383      7831308 :   cost_sse_integer += cost;
     384              : 
     385      7831308 :   if (dump_file)
     386          240 :     fprintf (dump_file,
     387              :              "  Mark r%d def in insn %d as requiring both modes in chain #%d\n",
     388          240 :              DF_REF_REGNO (def), DF_REF_INSN_UID (def), chain_id);
     389              : }
     390              : 
     391              : /* Check REF's chain to add new insns into a queue
     392              :    and find registers requiring conversion.  Return true if OK, false
     393              :    if the analysis was aborted.  */
     394              : 
     395              : bool
     396     17574674 : scalar_chain::analyze_register_chain (bitmap candidates, df_ref ref,
     397              :                                       bitmap disallowed)
     398              : {
     399     17574674 :   df_link *chain;
     400     17574674 :   bool mark_def = false;
     401              : 
     402     17574674 :   gcc_checking_assert (bitmap_bit_p (insns, DF_REF_INSN_UID (ref)));
     403              : 
     404     60602178 :   for (chain = DF_REF_CHAIN (ref); chain; chain = chain->next)
     405              :     {
     406     43031373 :       unsigned uid = DF_REF_INSN_UID (chain->ref);
     407              : 
     408     43031373 :       if (!NONDEBUG_INSN_P (DF_REF_INSN (chain->ref)))
     409      7576709 :         continue;
     410              : 
     411     35454664 :       if (--max_visits == 0)
     412              :         return false;
     413              : 
     414     35454104 :       if (!DF_REF_REG_MEM_P (chain->ref))
     415              :         {
     416     29589528 :           if (bitmap_bit_p (insns, uid))
     417      9438995 :             continue;
     418              : 
     419     20150533 :           if (bitmap_bit_p (candidates, uid))
     420              :             {
     421      8142055 :               add_to_queue (uid);
     422      8142055 :               continue;
     423              :             }
     424              : 
     425              :           /* If we run into parts of an aborted chain discovery abort.  */
     426     12008478 :           if (bitmap_bit_p (disallowed, uid))
     427              :             return false;
     428              :         }
     429              : 
     430     17869745 :       if (DF_REF_REG_DEF_P (chain->ref))
     431              :         {
     432      2678878 :           if (dump_file)
     433          125 :             fprintf (dump_file, "  r%d def in insn %d isn't convertible\n",
     434              :                      DF_REF_REGNO (chain->ref), uid);
     435      2678878 :           mark_dual_mode_def (chain->ref);
     436              :         }
     437              :       else
     438              :         {
     439     15190867 :           if (dump_file)
     440          524 :             fprintf (dump_file, "  r%d use in insn %d isn't convertible\n",
     441              :                      DF_REF_REGNO (chain->ref), uid);
     442              :           mark_def = true;
     443              :         }
     444              :     }
     445              : 
     446     17570805 :   if (mark_def)
     447      6512919 :     mark_dual_mode_def (ref);
     448              : 
     449              :   return true;
     450              : }
     451              : 
     452              : /* Check whether X is a convertible *concatditi_? variant.  X is known
     453              :    to be any_or_plus:TI, i.e. PLUS:TI, IOR:TI or XOR:TI.  */
     454              : 
     455              : static bool
     456        30008 : timode_concatdi_p (rtx x)
     457              : {
     458        30008 :   rtx op0 = XEXP (x, 0);
     459        30008 :   rtx op1 = XEXP (x, 1);
     460              : 
     461        30008 :   if (GET_CODE (op1) == ASHIFT)
     462          952 :     std::swap (op0, op1);
     463              : 
     464        30008 :   return GET_CODE (op0) == ASHIFT
     465        21083 :          && GET_CODE (XEXP (op0, 0)) == ZERO_EXTEND
     466        21083 :          && GET_MODE (XEXP (XEXP (op0, 0), 0)) == DImode
     467        21083 :          && REG_P (XEXP (XEXP (op0, 0), 0))
     468        20952 :          && CONST_INT_P (XEXP (op0, 1))
     469        20952 :          && INTVAL (XEXP (op0, 1)) == 64
     470        20952 :          && GET_CODE (op1) == ZERO_EXTEND
     471        20000 :          && GET_MODE (XEXP (op1, 0)) == DImode
     472        50008 :          && REG_P (XEXP (op1, 0));
     473              : }
     474              : 
     475              : 
     476              : /* Add instruction into a chain.  Return true if OK, false if the search
     477              :    was aborted.  */
     478              : 
     479              : bool
     480     12464983 : scalar_chain::add_insn (bitmap candidates, unsigned int insn_uid,
     481              :                         bitmap disallowed)
     482              : {
     483     12464983 :   if (!bitmap_set_bit (insns, insn_uid))
     484              :     return true;
     485              : 
     486     12464983 :   if (dump_file)
     487          277 :     fprintf (dump_file, "  Adding insn %d to chain #%d\n", insn_uid, chain_id);
     488              : 
     489     12464983 :   rtx_insn *insn = DF_INSN_UID_GET (insn_uid)->insn;
     490     12464983 :   rtx def_set = single_set (insn);
     491     12464983 :   if (def_set && REG_P (SET_DEST (def_set))
     492     22054876 :       && !HARD_REGISTER_P (SET_DEST (def_set)))
     493      9564994 :     bitmap_set_bit (defs, REGNO (SET_DEST (def_set)));
     494              : 
     495              :   /* ???  The following is quadratic since analyze_register_chain
     496              :      iterates over all refs to look for dual-mode regs.  Instead this
     497              :      should be done separately for all regs mentioned in the chain once.  */
     498     12464983 :   df_ref ref;
     499     25458130 :   for (ref = DF_INSN_UID_DEFS (insn_uid); ref; ref = DF_REF_NEXT_LOC (ref))
     500     12994544 :     if (!HARD_REGISTER_P (DF_REF_REG (ref)))
     501      9564994 :       if (!analyze_register_chain (candidates, ref, disallowed))
     502              :         return false;
     503              : 
     504              :   /* The operand(s) of VEC_SELECT, ZERO_EXTEND and similar ops don't need
     505              :      to be converted/convertible.  */
     506     12463586 :   if (def_set)
     507     12463586 :     switch (GET_CODE (SET_SRC (def_set)))
     508              :       {
     509      3721207 :       case REG:
     510      3721207 :         if (HARD_REGISTER_P (SET_SRC (def_set)))
     511              :           return true;
     512              :         break;
     513              :       case VEC_SELECT:
     514              :         return true;
     515          264 :       case ZERO_EXTEND:
     516          264 :         if (GET_MODE (XEXP (SET_SRC (def_set), 0)) == DImode)
     517              :           return true;
     518              :         break;
     519      2334392 :       case PLUS:
     520      2334392 :       case IOR:
     521      2334392 :       case XOR:
     522      2334392 :         if (smode == TImode && timode_concatdi_p (SET_SRC (def_set)))
     523              :           return true;
     524              :         break;
     525              :       default:
     526              :         break;
     527              :       }
     528              : 
     529     27277480 :   for (ref = DF_INSN_UID_USES (insn_uid); ref; ref = DF_REF_NEXT_LOC (ref))
     530     14882853 :     if (DF_REF_TYPE (ref) == DF_REF_REG_USE
     531      8009683 :         && !SUBREG_P (DF_REF_REG (ref)))
     532      8009680 :       if (!analyze_register_chain (candidates, ref, disallowed))
     533              :         return false;
     534              : 
     535              :   return true;
     536              : }
     537              : 
     538              : /* Build new chain starting from insn INSN_UID recursively
     539              :    adding all dependent uses and definitions.  Return true if OK, false
     540              :    if the chain discovery was aborted.  */
     541              : 
     542              : bool
     543      6314062 : scalar_chain::build (bitmap candidates, unsigned insn_uid, bitmap disallowed)
     544              : {
     545      6314062 :   queue = BITMAP_ALLOC (NULL);
     546      6314062 :   bitmap_set_bit (queue, insn_uid);
     547              : 
     548      6314062 :   if (dump_file)
     549          136 :     fprintf (dump_file, "Building chain #%d...\n", chain_id);
     550              : 
     551     18775176 :   while (!bitmap_empty_p (queue))
     552              :     {
     553     12464983 :       insn_uid = bitmap_first_set_bit (queue);
     554     12464983 :       bitmap_clear_bit (queue, insn_uid);
     555     12464983 :       bitmap_clear_bit (candidates, insn_uid);
     556     12464983 :       if (!add_insn (candidates, insn_uid, disallowed))
     557              :         {
     558              :           /* If we aborted the search put sofar found insn on the set of
     559              :              disallowed insns so that further searches reaching them also
     560              :              abort and thus we abort the whole but yet undiscovered chain.  */
     561         3869 :           bitmap_ior_into (disallowed, insns);
     562         3869 :           if (dump_file)
     563            0 :             fprintf (dump_file, "Aborted chain #%d discovery\n", chain_id);
     564         3869 :           BITMAP_FREE (queue);
     565         3869 :           return false;
     566              :         }
     567              :     }
     568              : 
     569      6310193 :   if (dump_file)
     570              :     {
     571          136 :       fprintf (dump_file, "Collected chain #%d...\n", chain_id);
     572          136 :       fprintf (dump_file, "  insns: ");
     573          136 :       dump_bitmap (dump_file, insns);
     574          136 :       if (!bitmap_empty_p (defs_conv))
     575              :         {
     576          136 :           bitmap_iterator bi;
     577          136 :           unsigned id;
     578          136 :           const char *comma = "";
     579          136 :           fprintf (dump_file, "  defs to convert: ");
     580          366 :           EXECUTE_IF_SET_IN_BITMAP (defs_conv, 0, id, bi)
     581              :             {
     582          230 :               fprintf (dump_file, "%sr%d", comma, id);
     583          230 :               comma = ", ";
     584              :             }
     585          136 :           fprintf (dump_file, "\n");
     586              :         }
     587              :     }
     588              : 
     589      6310193 :   BITMAP_FREE (queue);
     590              : 
     591      6310193 :   return true;
     592              : }
     593              : 
     594              : /* Return a cost of building a vector constant
     595              :    instead of using a scalar one.  */
     596              : 
     597              : int
     598      2601966 : general_scalar_chain::vector_const_cost (rtx exp, basic_block bb)
     599              : {
     600      2601966 :   gcc_assert (CONST_INT_P (exp));
     601              : 
     602      2601966 :   if (standard_sse_constant_p (exp, vmode))
     603       607468 :     return ix86_cost->sse_op;
     604      1994498 :   if (optimize_bb_for_size_p (bb))
     605              :     return COSTS_N_BYTES (8);
     606              :   /* We have separate costs for SImode and DImode, use SImode costs
     607              :      for smaller modes.  */
     608      2372852 :   return COSTS_N_INSNS (ix86_cost->sse_load[smode == DImode ? 1 : 0]) / 2;
     609              : }
     610              : 
     611              : /* Return true if it's cost profitable for chain conversion.  */
     612              : 
     613              : bool
     614      5810710 : general_scalar_chain::compute_convert_gain ()
     615              : {
     616      5810710 :   bitmap_iterator bi;
     617      5810710 :   unsigned insn_uid;
     618      5810710 :   int gain = 0;
     619      5810710 :   sreal weighted_gain = 0;
     620              : 
     621      5810710 :   if (dump_file)
     622          136 :     fprintf (dump_file, "Computing gain for chain #%d...\n", chain_id);
     623              : 
     624              :   /* SSE costs distinguish between SImode and DImode loads/stores, for
     625              :      int costs factor in the number of GPRs involved.  When supporting
     626              :      smaller modes than SImode the int load/store costs need to be
     627              :      adjusted as well.  */
     628      5810710 :   unsigned sse_cost_idx = smode == DImode ? 1 : 0;
     629      5810710 :   int m = smode == DImode ? (TARGET_64BIT ? 1 : 2) : 1;
     630              : 
     631     17266671 :   EXECUTE_IF_SET_IN_BITMAP (insns, 0, insn_uid, bi)
     632              :     {
     633     11455961 :       rtx_insn *insn = DF_INSN_UID_GET (insn_uid)->insn;
     634     11455961 :       rtx def_set = single_set (insn);
     635     11455961 :       rtx src = SET_SRC (def_set);
     636     11455961 :       rtx dst = SET_DEST (def_set);
     637     11455961 :       basic_block bb = BLOCK_FOR_INSN (insn);
     638     11455961 :       int igain = 0;
     639     11455961 :       profile_count entry_count = ENTRY_BLOCK_PTR_FOR_FN (cfun)->count;
     640     11455961 :       bool speed_p = optimize_bb_for_speed_p (bb);
     641     11455961 :       sreal bb_freq = bb->count.to_sreal_scale (entry_count);
     642              : 
     643     11455961 :       if (REG_P (src) && REG_P (dst))
     644              :         {
     645       900055 :           if (!speed_p)
     646              :             /* reg-reg move is 2 bytes, while SSE 3.  */
     647       182066 :             igain += COSTS_N_BYTES (2 * m - 3);
     648              :           else
     649              :             /* Move costs are normalized to reg-reg move having cost 2.  */
     650       717989 :             igain += COSTS_N_INSNS (2 * m - ix86_cost->xmm_move) / 2;
     651              :         }
     652     10555906 :       else if (REG_P (src) && MEM_P (dst))
     653              :         {
     654      2298053 :           if (!speed_p)
     655              :             /* Integer load/store is 3+ bytes and SSE 4+.  */
     656       188831 :             igain += COSTS_N_BYTES (3 * m - 4);
     657              :           else
     658      2109222 :             igain
     659      2109222 :               += COSTS_N_INSNS (m * ix86_cost->int_store[2]
     660              :                                 - ix86_cost->sse_store[sse_cost_idx]) / 2;
     661              :         }
     662      8257853 :       else if (MEM_P (src) && REG_P (dst))
     663              :         {
     664      3709762 :           if (!speed_p)
     665       350314 :             igain += COSTS_N_BYTES (3 * m - 4);
     666              :           else
     667      3359448 :             igain += COSTS_N_INSNS (m * ix86_cost->int_load[2]
     668              :                                     - ix86_cost->sse_load[sse_cost_idx]) / 2;
     669              :         }
     670              :       else
     671              :         {
     672              :           /* For operations on memory operands, include the overhead
     673              :              of explicit load and store instructions.  */
     674      4548091 :           if (MEM_P (dst))
     675              :             {
     676        66364 :               if (!speed_p)
     677              :                 /* ??? This probably should account size difference
     678              :                    of SSE and integer load rather than full SSE load.  */
     679              :                 igain -= COSTS_N_BYTES (8);
     680              :               else
     681              :                 {
     682        57071 :                   int cost = (m * (ix86_cost->int_load[2]
     683        57071 :                                    + ix86_cost->int_store[2])
     684        57071 :                              - (ix86_cost->sse_load[sse_cost_idx] +
     685        57071 :                                 ix86_cost->sse_store[sse_cost_idx]));
     686        57071 :                   igain += COSTS_N_INSNS (cost) / 2;
     687              :                 }
     688              :             }
     689              : 
     690      4548091 :           switch (GET_CODE (src))
     691              :             {
     692       481035 :             case ASHIFT:
     693       481035 :             case ASHIFTRT:
     694       481035 :             case LSHIFTRT:
     695       481035 :               if (m == 2)
     696              :                 {
     697        16981 :                   if (INTVAL (XEXP (src, 1)) >= 32)
     698        11523 :                     igain += ix86_cost->add;
     699              :                   /* Gain for extend highpart case.  */
     700         5458 :                   else if (GET_CODE (XEXP (src, 0)) == ASHIFT)
     701            0 :                     igain += ix86_cost->shift_const - ix86_cost->sse_op;
     702              :                   else
     703         5458 :                     igain += ix86_cost->shift_const;
     704              :                 }
     705              : 
     706       481035 :               igain += ix86_cost->shift_const - ix86_cost->sse_op;
     707              : 
     708       481035 :               if (CONST_INT_P (XEXP (src, 0)))
     709            0 :                 igain -= vector_const_cost (XEXP (src, 0), bb);
     710              :               break;
     711              : 
     712         3646 :             case ROTATE:
     713         3646 :             case ROTATERT:
     714         3646 :               igain += m * ix86_cost->shift_const;
     715         3646 :               if (TARGET_AVX512VL)
     716          204 :                 igain -= ix86_cost->sse_op;
     717         3442 :               else if (smode == DImode)
     718              :                 {
     719          590 :                   int bits = INTVAL (XEXP (src, 1));
     720          590 :                   if ((bits & 0x0f) == 0)
     721          106 :                     igain -= ix86_cost->sse_op;
     722          484 :                   else if ((bits & 0x07) == 0)
     723           27 :                     igain -= 2 * ix86_cost->sse_op;
     724              :                   else
     725          457 :                     igain -= 3 * ix86_cost->sse_op;
     726              :                 }
     727         2852 :               else if (INTVAL (XEXP (src, 1)) == 16)
     728          139 :                 igain -= ix86_cost->sse_op;
     729              :               else
     730         2713 :                 igain -= 2 * ix86_cost->sse_op;
     731              :               break;
     732              : 
     733      2807499 :             case AND:
     734      2807499 :             case IOR:
     735      2807499 :             case XOR:
     736      2807499 :             case PLUS:
     737      2807499 :             case MINUS:
     738      2807499 :               igain += m * ix86_cost->add - ix86_cost->sse_op;
     739              :               /* Additional gain for andnot for targets without BMI.  */
     740      2807499 :               if (GET_CODE (XEXP (src, 0)) == NOT
     741         3598 :                   && !TARGET_BMI)
     742         3589 :                 igain += m * ix86_cost->add;
     743              : 
     744      2807499 :               if (CONST_INT_P (XEXP (src, 0)))
     745            0 :                 igain -= vector_const_cost (XEXP (src, 0), bb);
     746      2807499 :               if (CONST_INT_P (XEXP (src, 1)))
     747      1662264 :                 igain -= vector_const_cost (XEXP (src, 1), bb);
     748      2807499 :               if (MEM_P (XEXP (src, 1)))
     749              :                 {
     750        87980 :                   if (!speed_p)
     751        20721 :                     igain -= COSTS_N_BYTES (m == 2 ? 3 : 5);
     752              :                   else
     753        77615 :                     igain += COSTS_N_INSNS
     754              :                                (m * ix86_cost->int_load[2]
     755              :                                  - ix86_cost->sse_load[sse_cost_idx]) / 2;
     756              :                 }
     757              :               break;
     758              : 
     759        50940 :             case NEG:
     760        50940 :             case NOT:
     761        50940 :               igain -= ix86_cost->sse_op + COSTS_N_INSNS (1);
     762              : 
     763        50940 :               if (GET_CODE (XEXP (src, 0)) != ABS)
     764              :                 {
     765        50940 :                   igain += m * ix86_cost->add;
     766        50940 :                   break;
     767              :                 }
     768              :               /* FALLTHRU */
     769              : 
     770         1002 :             case ABS:
     771         1002 :             case SMAX:
     772         1002 :             case SMIN:
     773         1002 :             case UMAX:
     774         1002 :             case UMIN:
     775              :               /* We do not have any conditional move cost, estimate it as a
     776              :                  reg-reg move.  Comparisons are costed as adds.  */
     777         1002 :               igain += m * (COSTS_N_INSNS (2) + ix86_cost->add);
     778              :               /* Integer SSE ops are all costed the same.  */
     779         1002 :               igain -= ix86_cost->sse_op;
     780         1002 :               break;
     781              : 
     782            0 :             case COMPARE:
     783            0 :               if (XEXP (src, 1) != const0_rtx)
     784              :                 {
     785              :                   /* cmp vs. pxor;pshufd;ptest.  */
     786            0 :                   igain += COSTS_N_INSNS (m - 3);
     787              :                 }
     788            0 :               else if (GET_CODE (XEXP (src, 0)) != AND)
     789              :                 {
     790              :                   /* test vs. pshufd;ptest.  */
     791            0 :                   igain += COSTS_N_INSNS (m - 2);
     792              :                 }
     793            0 :               else if (GET_CODE (XEXP (XEXP (src, 0), 0)) != NOT)
     794              :                 {
     795              :                   /* and;test vs. pshufd;ptest.  */
     796            0 :                   igain += COSTS_N_INSNS (2 * m - 2);
     797              :                 }
     798            0 :               else if (TARGET_BMI)
     799              :                 {
     800              :                   /* andn;test vs. pandn;pshufd;ptest.  */
     801            0 :                   igain += COSTS_N_INSNS (2 * m - 3);
     802              :                 }
     803              :               else
     804              :                 {
     805              :                   /* not;and;test vs. pandn;pshufd;ptest.  */
     806            0 :                   igain += COSTS_N_INSNS (3 * m - 3);
     807              :                 }
     808              :               break;
     809              : 
     810      1166877 :             case CONST_INT:
     811      1166877 :               if (REG_P (dst))
     812              :                 {
     813      1166877 :                   if (!speed_p)
     814              :                     {
     815              :                       /* xor (2 bytes) vs. xorps (3 bytes).  */
     816       227175 :                       if (src == const0_rtx)
     817       119839 :                         igain -= COSTS_N_BYTES (1);
     818              :                       /* movdi_internal vs. movv2di_internal.  */
     819              :                       /* => mov (5 bytes) vs. movaps (7 bytes).  */
     820       107336 :                       else if (x86_64_immediate_operand (src, SImode))
     821        95423 :                         igain -= COSTS_N_BYTES (2);
     822              :                       else
     823              :                         /* ??? Larger immediate constants are placed in the
     824              :                            constant pool, where the size benefit/impact of
     825              :                            STV conversion is affected by whether and how
     826              :                            often each constant pool entry is shared/reused.
     827              :                            The value below is empirically derived from the
     828              :                            CSiBE benchmark (and the optimal value may drift
     829              :                            over time).  */
     830              :                         igain += COSTS_N_BYTES (0);
     831              :                     }
     832              :                   else
     833              :                     {
     834              :                       /* DImode can be immediate for TARGET_64BIT
     835              :                          and SImode always.  */
     836       939702 :                       igain += m * COSTS_N_INSNS (1);
     837       939702 :                       igain -= vector_const_cost (src, bb);
     838              :                     }
     839              :                 }
     840            0 :               else if (MEM_P (dst))
     841              :                 {
     842            0 :                   igain += (m * ix86_cost->int_store[2]
     843            0 :                             - ix86_cost->sse_store[sse_cost_idx]);
     844            0 :                   igain -= vector_const_cost (src, bb);
     845              :                 }
     846              :               break;
     847              : 
     848        37092 :             case VEC_SELECT:
     849        37092 :               if (XVECEXP (XEXP (src, 1), 0, 0) == const0_rtx)
     850              :                 {
     851              :                   // movd (4 bytes) replaced with movdqa (4 bytes).
     852        27101 :                   if (!!speed_p)
     853        25280 :                     igain += COSTS_N_INSNS (ix86_cost->sse_to_integer
     854              :                                             - ix86_cost->xmm_move) / 2;
     855              :                 }
     856              :               else
     857              :                 {
     858              :                   // pshufd; movd replaced with pshufd.
     859         9991 :                   if (!speed_p)
     860          666 :                     igain += COSTS_N_BYTES (4);
     861              :                   else
     862         9325 :                     igain += ix86_cost->sse_to_integer;
     863              :                 }
     864              :               break;
     865              : 
     866            0 :             default:
     867            0 :               gcc_unreachable ();
     868              :             }
     869              :         }
     870              : 
     871     11454140 :       if (speed_p)
     872     10213707 :         weighted_gain += bb_freq * igain;
     873     11455961 :       gain += igain;
     874              : 
     875     11455961 :       if (igain != 0 && dump_file)
     876              :         {
     877           93 :           fprintf (dump_file, "  Instruction gain %d with bb_freq %.2f for",
     878              :                    igain, bb_freq.to_double ());
     879           93 :           dump_insn_slim (dump_file, insn);
     880              :         }
     881              :     }
     882              : 
     883      5810710 :   if (dump_file)
     884              :     {
     885          136 :       fprintf (dump_file, "  Instruction conversion gain: %d, \n",
     886              :                gain);
     887          136 :       fprintf (dump_file, "  Registers conversion cost: %d\n",
     888              :                cost_sse_integer);
     889          136 :       fprintf (dump_file, "  Weighted instruction conversion gain: %.2f, \n",
     890              :                weighted_gain.to_double ());
     891          136 :       fprintf (dump_file, "  Weighted registers conversion cost: %.2f\n",
     892              :                weighted_cost_sse_integer.to_double ());
     893              :     }
     894              : 
     895      5810710 :   if (weighted_gain != weighted_cost_sse_integer)
     896      4685730 :     return weighted_gain > weighted_cost_sse_integer;
     897              :   else
     898      1124980 :     return gain > cost_sse_integer;;
     899              : }
     900              : 
     901              : /* Insert generated conversion instruction sequence INSNS
     902              :    after instruction AFTER.  New BB may be required in case
     903              :    instruction has EH region attached.  */
     904              : 
     905              : void
     906        31197 : scalar_chain::emit_conversion_insns (rtx insns, rtx_insn *after)
     907              : {
     908        31197 :   if (!control_flow_insn_p (after))
     909              :     {
     910        30984 :       emit_insn_after (insns, after);
     911        30984 :       return;
     912              :     }
     913              : 
     914          213 :   basic_block bb = BLOCK_FOR_INSN (after);
     915          213 :   edge e = find_fallthru_edge (bb->succs);
     916          213 :   gcc_assert (e);
     917              : 
     918          213 :   basic_block new_bb = split_edge (e);
     919          213 :   emit_insn_after (insns, BB_HEAD (new_bb));
     920              : }
     921              : 
     922              : } // anon namespace
     923              : 
     924              : /* Generate the canonical SET_SRC to move GPR to a VMODE vector register,
     925              :    zeroing the upper parts.  */
     926              : 
     927              : static rtx
     928       173061 : gen_gpr_to_xmm_move_src (enum machine_mode vmode, rtx gpr)
     929              : {
     930       346122 :   switch (GET_MODE_NUNITS (vmode))
     931              :     {
     932           45 :     case 1:
     933           45 :       return gen_rtx_SUBREG (vmode, gpr, 0);
     934       172454 :     case 2:
     935       344908 :       return gen_rtx_VEC_CONCAT (vmode, gpr,
     936              :                                  CONST0_RTX (GET_MODE_INNER (vmode)));
     937          562 :     default:
     938          562 :       return gen_rtx_VEC_MERGE (vmode, gen_rtx_VEC_DUPLICATE (vmode, gpr),
     939              :                                 CONST0_RTX (vmode), GEN_INT (HOST_WIDE_INT_1U));
     940              :     }
     941              : }
     942              : 
     943              : /* Make vector copies for all register REGNO definitions
     944              :    and replace its uses in a chain.  */
     945              : 
     946              : void
     947         8398 : scalar_chain::make_vector_copies (rtx_insn *insn, rtx reg)
     948              : {
     949         8398 :   rtx vreg = *defs_map.get (reg);
     950              : 
     951         8398 :   start_sequence ();
     952         8398 :   if (!TARGET_INTER_UNIT_MOVES_TO_VEC)
     953              :     {
     954            0 :       rtx tmp = assign_386_stack_local (smode, SLOT_STV_TEMP);
     955            0 :       if (smode == DImode && !TARGET_64BIT)
     956              :         {
     957            0 :           emit_move_insn (adjust_address (tmp, SImode, 0),
     958              :                           gen_rtx_SUBREG (SImode, reg, 0));
     959            0 :           emit_move_insn (adjust_address (tmp, SImode, 4),
     960              :                           gen_rtx_SUBREG (SImode, reg, 4));
     961              :         }
     962              :       else
     963            0 :         emit_move_insn (copy_rtx (tmp), reg);
     964            0 :       emit_insn (gen_rtx_SET (gen_rtx_SUBREG (vmode, vreg, 0),
     965              :                               gen_gpr_to_xmm_move_src (vmode, tmp)));
     966              :     }
     967         8398 :   else if (!TARGET_64BIT && smode == DImode)
     968              :     {
     969         8262 :       if (TARGET_SSE4_1)
     970              :         {
     971          356 :           emit_insn (gen_sse2_loadld (gen_rtx_SUBREG (V4SImode, vreg, 0),
     972              :                                       CONST0_RTX (V4SImode),
     973              :                                       gen_rtx_SUBREG (SImode, reg, 0)));
     974          356 :           emit_insn (gen_sse4_1_pinsrd (gen_rtx_SUBREG (V4SImode, vreg, 0),
     975              :                                         gen_rtx_SUBREG (V4SImode, vreg, 0),
     976              :                                         gen_rtx_SUBREG (SImode, reg, 4),
     977              :                                         GEN_INT (2)));
     978              :         }
     979              :       else
     980              :         {
     981         7906 :           rtx tmp = gen_reg_rtx (DImode);
     982         7906 :           emit_insn (gen_sse2_loadld (gen_rtx_SUBREG (V4SImode, vreg, 0),
     983              :                                       CONST0_RTX (V4SImode),
     984              :                                       gen_rtx_SUBREG (SImode, reg, 0)));
     985         7906 :           emit_insn (gen_sse2_loadld (gen_rtx_SUBREG (V4SImode, tmp, 0),
     986              :                                       CONST0_RTX (V4SImode),
     987              :                                       gen_rtx_SUBREG (SImode, reg, 4)));
     988         7906 :           emit_insn (gen_vec_interleave_lowv4si
     989              :                      (gen_rtx_SUBREG (V4SImode, vreg, 0),
     990              :                       gen_rtx_SUBREG (V4SImode, vreg, 0),
     991              :                       gen_rtx_SUBREG (V4SImode, tmp, 0)));
     992              :         }
     993              :     }
     994              :   else
     995          136 :     emit_insn (gen_rtx_SET (gen_rtx_SUBREG (vmode, vreg, 0),
     996              :                             gen_gpr_to_xmm_move_src (vmode, reg)));
     997         8398 :   rtx_insn *seq = end_sequence ();
     998         8398 :   emit_conversion_insns (seq, insn);
     999              : 
    1000         8398 :   if (dump_file)
    1001            0 :     fprintf (dump_file,
    1002              :              "  Copied r%d to a vector register r%d for insn %d\n",
    1003            0 :              REGNO (reg), REGNO (vreg), INSN_UID (insn));
    1004         8398 : }
    1005              : 
    1006              : /* Copy the definition SRC of INSN inside the chain to DST for
    1007              :    scalar uses outside of the chain.  */
    1008              : 
    1009              : void
    1010        22041 : scalar_chain::convert_reg (rtx_insn *insn, rtx dst, rtx src)
    1011              : {
    1012        22041 :   start_sequence ();
    1013        22041 :   if (!TARGET_INTER_UNIT_MOVES_FROM_VEC)
    1014              :     {
    1015            0 :       rtx tmp = assign_386_stack_local (smode, SLOT_STV_TEMP);
    1016            0 :       emit_move_insn (tmp, src);
    1017            0 :       if (!TARGET_64BIT && smode == DImode)
    1018              :         {
    1019            0 :           emit_move_insn (gen_rtx_SUBREG (SImode, dst, 0),
    1020              :                           adjust_address (tmp, SImode, 0));
    1021            0 :           emit_move_insn (gen_rtx_SUBREG (SImode, dst, 4),
    1022              :                           adjust_address (tmp, SImode, 4));
    1023              :         }
    1024              :       else
    1025            0 :         emit_move_insn (dst, copy_rtx (tmp));
    1026              :     }
    1027        22041 :   else if (!TARGET_64BIT && smode == DImode)
    1028              :     {
    1029        21117 :       if (TARGET_SSE4_1)
    1030              :         {
    1031            0 :           rtx tmp = gen_rtx_PARALLEL (VOIDmode,
    1032              :                                       gen_rtvec (1, const0_rtx));
    1033            0 :           emit_insn
    1034            0 :               (gen_rtx_SET
    1035              :                (gen_rtx_SUBREG (SImode, dst, 0),
    1036              :                 gen_rtx_VEC_SELECT (SImode,
    1037              :                                     gen_rtx_SUBREG (V4SImode, src, 0),
    1038              :                                     tmp)));
    1039              : 
    1040            0 :           tmp = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (1, const1_rtx));
    1041            0 :           emit_insn
    1042            0 :               (gen_rtx_SET
    1043              :                (gen_rtx_SUBREG (SImode, dst, 4),
    1044              :                 gen_rtx_VEC_SELECT (SImode,
    1045              :                                     gen_rtx_SUBREG (V4SImode, src, 0),
    1046              :                                     tmp)));
    1047              :         }
    1048              :       else
    1049              :         {
    1050        21117 :           rtx vcopy = gen_reg_rtx (V2DImode);
    1051        21117 :           emit_move_insn (vcopy, gen_rtx_SUBREG (V2DImode, src, 0));
    1052        21117 :           emit_move_insn (gen_rtx_SUBREG (SImode, dst, 0),
    1053              :                           gen_rtx_SUBREG (SImode, vcopy, 0));
    1054        21117 :           emit_move_insn (vcopy,
    1055              :                           gen_rtx_LSHIFTRT (V2DImode,
    1056              :                                             vcopy, GEN_INT (32)));
    1057        21117 :           emit_move_insn (gen_rtx_SUBREG (SImode, dst, 4),
    1058              :                           gen_rtx_SUBREG (SImode, vcopy, 0));
    1059              :         }
    1060              :     }
    1061              :   else
    1062          924 :     emit_move_insn (dst, src);
    1063              : 
    1064        22041 :   rtx_insn *seq = end_sequence ();
    1065        22041 :   emit_conversion_insns (seq, insn);
    1066              : 
    1067        22041 :   if (dump_file)
    1068            0 :     fprintf (dump_file,
    1069              :              "  Copied r%d to a scalar register r%d for insn %d\n",
    1070            0 :              REGNO (src), REGNO (dst), INSN_UID (insn));
    1071        22041 : }
    1072              : 
    1073              : /* Helper function to convert immediate constant X to vmode.  */
    1074              : static rtx
    1075        35847 : smode_convert_cst (rtx x, enum machine_mode vmode)
    1076              : {
    1077              :   /* Prefer all ones vector in case of -1.  */
    1078        35847 :   if (constm1_operand (x, GET_MODE (x)))
    1079          625 :     return CONSTM1_RTX (vmode);
    1080              : 
    1081        35222 :   unsigned n = GET_MODE_NUNITS (vmode);
    1082        35222 :   rtx *v = XALLOCAVEC (rtx, n);
    1083        35222 :   v[0] = x;
    1084        41004 :   for (unsigned i = 1; i < n; ++i)
    1085         5782 :     v[i] = const0_rtx;
    1086        35222 :   return gen_rtx_CONST_VECTOR (vmode, gen_rtvec_v (n, v));
    1087              : }
    1088              : 
    1089              : /* Convert operand OP in INSN.  We should handle
    1090              :    memory operands and uninitialized registers.
    1091              :    All other register uses are converted during
    1092              :    registers conversion.  */
    1093              : 
    1094              : void
    1095       247860 : scalar_chain::convert_op (rtx *op, rtx_insn *insn)
    1096              : {
    1097       247860 :   rtx tmp;
    1098              : 
    1099       247860 :   if (GET_MODE (*op) == V1TImode)
    1100              :     return;
    1101              : 
    1102       247677 :   *op = copy_rtx_if_shared (*op);
    1103              : 
    1104       247677 :   if (GET_CODE (*op) == NOT
    1105       247677 :       || GET_CODE (*op) == ASHIFT)
    1106              :     {
    1107         3493 :       convert_op (&XEXP (*op, 0), insn);
    1108         3493 :       PUT_MODE (*op, vmode);
    1109              :     }
    1110              :   else if (MEM_P (*op))
    1111              :     {
    1112       172925 :       rtx_insn *movabs = NULL;
    1113              : 
    1114              :       /* Emit MOVABS to load from a 64-bit absolute address to a GPR.  */
    1115       172925 :       if (!memory_operand (*op, GET_MODE (*op)))
    1116              :         {
    1117            0 :           tmp = gen_reg_rtx (GET_MODE (*op));
    1118            0 :           movabs = emit_insn_before (gen_rtx_SET (tmp, *op), insn);
    1119              : 
    1120            0 :           *op = tmp;
    1121              :         }
    1122              : 
    1123       172925 :       tmp = gen_rtx_SUBREG (vmode, gen_reg_rtx (GET_MODE (*op)), 0);
    1124              : 
    1125       172925 :       rtx_insn *eh_insn
    1126       172925 :         = emit_insn_before (gen_rtx_SET (copy_rtx (tmp),
    1127              :                                          gen_gpr_to_xmm_move_src (vmode, *op)),
    1128       172925 :                             insn);
    1129              : 
    1130       172925 :       if (cfun->can_throw_non_call_exceptions)
    1131              :         {
    1132              :           /* Handle REG_EH_REGION note.  */
    1133       168754 :           rtx note = find_reg_note (insn, REG_EH_REGION, NULL_RTX);
    1134       168754 :           if (note)
    1135              :             {
    1136         3588 :               if (movabs)
    1137            0 :                 eh_insn = movabs;
    1138         3588 :               control_flow_insns.safe_push (eh_insn);
    1139         3588 :               add_reg_note (eh_insn, REG_EH_REGION, XEXP (note, 0));
    1140              :             }
    1141              :         }
    1142              : 
    1143       172925 :       *op = tmp;
    1144              : 
    1145       172925 :       if (dump_file)
    1146            0 :         fprintf (dump_file, "  Preloading operand for insn %d into r%d\n",
    1147            0 :                  INSN_UID (insn), reg_or_subregno (tmp));
    1148              :     }
    1149              :   else if (REG_P (*op))
    1150        65117 :     *op = gen_rtx_SUBREG (vmode, *op, 0);
    1151              :   else if (CONST_SCALAR_INT_P (*op))
    1152              :     {
    1153         6139 :       rtx vec_cst = smode_convert_cst (*op, vmode);
    1154              : 
    1155         6139 :       if (!standard_sse_constant_p (vec_cst, vmode))
    1156              :         {
    1157         2731 :           start_sequence ();
    1158         2731 :           vec_cst = validize_mem (force_const_mem (vmode, vec_cst));
    1159         2731 :           rtx_insn *seq = end_sequence ();
    1160         2731 :           emit_insn_before (seq, insn);
    1161              :         }
    1162              : 
    1163         6139 :       tmp = gen_rtx_SUBREG (vmode, gen_reg_rtx (smode), 0);
    1164              : 
    1165         6139 :       emit_insn_before (gen_move_insn (copy_rtx (tmp), vec_cst), insn);
    1166         6139 :       *op = tmp;
    1167              :     }
    1168              :   else
    1169              :     {
    1170            0 :       gcc_assert (SUBREG_P (*op));
    1171            3 :       if (GET_MODE (*op) != vmode)
    1172              :         {
    1173            3 :           rtx inner = SUBREG_REG (*op);
    1174            3 :           poly_uint64 byte = SUBREG_BYTE (*op);
    1175            3 :           if (targetm.modes_tieable_p (vmode, GET_MODE (inner))
    1176            3 :               && validate_subreg (vmode, GET_MODE (inner), inner, byte))
    1177            3 :             *op = gen_lowpart (vmode, *op);
    1178              :           else
    1179              :             {
    1180            0 :               tmp = gen_reg_rtx (GET_MODE (*op));
    1181            0 :               emit_insn_before (gen_rtx_SET (tmp, *op), insn);
    1182            0 :               *op = gen_rtx_SUBREG (vmode, tmp, 0);
    1183              :             }
    1184              :         }
    1185              :     }
    1186              : }
    1187              : 
    1188              : /* Convert CCZmode COMPARE to vector mode.  */
    1189              : 
    1190              : rtx
    1191           12 : scalar_chain::convert_compare (rtx op1, rtx op2, rtx_insn *insn)
    1192              : {
    1193           12 :   rtx src, tmp;
    1194              : 
    1195              :   /* Handle any REG_EQUAL notes.  */
    1196           12 :   tmp = find_reg_equal_equiv_note (insn);
    1197           12 :   if (tmp)
    1198              :     {
    1199            1 :       if (GET_CODE (XEXP (tmp, 0)) == COMPARE
    1200            1 :           && GET_MODE (XEXP (tmp, 0)) == CCZmode
    1201            1 :           && REG_P (XEXP (XEXP (tmp, 0), 0)))
    1202              :         {
    1203            1 :           rtx *op = &XEXP (XEXP (tmp, 0), 1);
    1204            1 :           if (CONST_SCALAR_INT_P (*op))
    1205              :             {
    1206            1 :               if (constm1_operand (*op, GET_MODE (*op)))
    1207            0 :                 *op = CONSTM1_RTX (vmode);
    1208              :               else
    1209              :                 {
    1210            1 :                   unsigned n = GET_MODE_NUNITS (vmode);
    1211            1 :                   rtx *v = XALLOCAVEC (rtx, n);
    1212            1 :                   v[0] = *op;
    1213            1 :                   for (unsigned i = 1; i < n; ++i)
    1214            0 :                     v[i] = const0_rtx;
    1215            1 :                   *op = gen_rtx_CONST_VECTOR (vmode, gen_rtvec_v (n, v));
    1216              :                 }
    1217              :               tmp = NULL_RTX;
    1218              :             }
    1219            0 :           else if (REG_P (*op))
    1220              :             tmp = NULL_RTX;
    1221              :         }
    1222              : 
    1223              :       if (tmp)
    1224            0 :         remove_note (insn, tmp);
    1225              :     }
    1226              : 
    1227              :   /* Comparison against anything other than zero, requires an XOR.  */
    1228           12 :   if (op2 != const0_rtx)
    1229              :     {
    1230            6 :       convert_op (&op1, insn);
    1231            6 :       convert_op (&op2, insn);
    1232              :       /* If both operands are MEMs, explicitly load the OP1 into TMP.  */
    1233            6 :       if (MEM_P (op1) && MEM_P (op2))
    1234              :         {
    1235            0 :           tmp = gen_reg_rtx (vmode);
    1236            0 :           emit_insn_before (gen_rtx_SET (tmp, op1), insn);
    1237            0 :           src = tmp;
    1238              :         }
    1239              :       else
    1240              :         src = op1;
    1241            6 :       src = gen_rtx_XOR (vmode, src, op2);
    1242              :     }
    1243            6 :   else if (GET_CODE (op1) == AND
    1244            0 :            && GET_CODE (XEXP (op1, 0)) == NOT)
    1245              :     {
    1246            0 :       rtx op11 = XEXP (XEXP (op1, 0), 0);
    1247            0 :       rtx op12 = XEXP (op1, 1);
    1248            0 :       convert_op (&op11, insn);
    1249            0 :       convert_op (&op12, insn);
    1250            0 :       if (!REG_P (op11))
    1251              :         {
    1252            0 :           tmp = gen_reg_rtx (vmode);
    1253            0 :           emit_insn_before (gen_rtx_SET (tmp, op11), insn);
    1254            0 :           op11 = tmp;
    1255              :         }
    1256            0 :       src = gen_rtx_AND (vmode, gen_rtx_NOT (vmode, op11), op12);
    1257            0 :     }
    1258            6 :   else if (GET_CODE (op1) == AND)
    1259              :     {
    1260            0 :       rtx op11 = XEXP (op1, 0);
    1261            0 :       rtx op12 = XEXP (op1, 1);
    1262            0 :       convert_op (&op11, insn);
    1263            0 :       convert_op (&op12, insn);
    1264            0 :       if (!REG_P (op11))
    1265              :         {
    1266            0 :           tmp = gen_reg_rtx (vmode);
    1267            0 :           emit_insn_before (gen_rtx_SET (tmp, op11), insn);
    1268            0 :           op11 = tmp;
    1269              :         }
    1270            0 :       return gen_rtx_UNSPEC (CCZmode, gen_rtvec (2, op11, op12),
    1271              :                              UNSPEC_PTEST);
    1272              :     }
    1273              :   else
    1274              :     {
    1275            6 :       convert_op (&op1, insn);
    1276            6 :       src = op1;
    1277              :     }
    1278              : 
    1279           12 :   if (!REG_P (src))
    1280              :     {
    1281            8 :       tmp = gen_reg_rtx (vmode);
    1282            8 :       emit_insn_before (gen_rtx_SET (tmp, src), insn);
    1283            8 :       src = tmp;
    1284              :     }
    1285              : 
    1286           12 :   if (vmode == V2DImode)
    1287              :     {
    1288            0 :       tmp = gen_reg_rtx (vmode);
    1289            0 :       emit_insn_before (gen_vec_interleave_lowv2di (tmp, src, src), insn);
    1290            0 :       src = tmp;
    1291              :     }
    1292           12 :   else if (vmode == V4SImode)
    1293              :     {
    1294            0 :       tmp = gen_reg_rtx (vmode);
    1295            0 :       emit_insn_before (gen_sse2_pshufd (tmp, src, const0_rtx), insn);
    1296            0 :       src = tmp;
    1297              :     }
    1298              : 
    1299           12 :   return gen_rtx_UNSPEC (CCZmode, gen_rtvec (2, src, src), UNSPEC_PTEST);
    1300              : }
    1301              : 
    1302              : /* Helper function for converting INSN to vector mode.  */
    1303              : 
    1304              : void
    1305      1318482 : scalar_chain::convert_insn_common (rtx_insn *insn)
    1306              : {
    1307              :   /* Generate copies for out-of-chain uses of defs and adjust debug uses.  */
    1308      2019968 :   for (df_ref ref = DF_INSN_DEFS (insn); ref; ref = DF_REF_NEXT_LOC (ref))
    1309       701486 :     if (bitmap_bit_p (defs_conv, DF_REF_REGNO (ref)))
    1310              :       {
    1311        23488 :         df_link *use;
    1312        44239 :         for (use = DF_REF_CHAIN (ref); use; use = use->next)
    1313        42792 :           if (NONDEBUG_INSN_P (DF_REF_INSN (use->ref))
    1314        42792 :               && (DF_REF_REG_MEM_P (use->ref)
    1315        38699 :                   || !bitmap_bit_p (insns, DF_REF_INSN_UID (use->ref))))
    1316              :             break;
    1317        23488 :         if (use)
    1318        22041 :           convert_reg (insn, DF_REF_REG (ref),
    1319        22041 :                        *defs_map.get (regno_reg_rtx [DF_REF_REGNO (ref)]));
    1320         1447 :         else if (MAY_HAVE_DEBUG_BIND_INSNS)
    1321              :           {
    1322              :             /* If we generated a scalar copy we can leave debug-insns
    1323              :                as-is, if not, we have to adjust them.  */
    1324         1325 :             auto_vec<rtx_insn *, 5> to_reset_debug_insns;
    1325         3956 :             for (use = DF_REF_CHAIN (ref); use; use = use->next)
    1326         2631 :               if (DEBUG_INSN_P (DF_REF_INSN (use->ref)))
    1327              :                 {
    1328          833 :                   rtx_insn *debug_insn = DF_REF_INSN (use->ref);
    1329              :                   /* If there's a reaching definition outside of the
    1330              :                      chain we have to reset.  */
    1331          833 :                   df_link *def;
    1332         2953 :                   for (def = DF_REF_CHAIN (use->ref); def; def = def->next)
    1333         2300 :                     if (!bitmap_bit_p (insns, DF_REF_INSN_UID (def->ref)))
    1334              :                       break;
    1335          833 :                   if (def)
    1336          180 :                     to_reset_debug_insns.safe_push (debug_insn);
    1337              :                   else
    1338              :                     {
    1339          653 :                       *DF_REF_REAL_LOC (use->ref)
    1340          653 :                         = *defs_map.get (regno_reg_rtx [DF_REF_REGNO (ref)]);
    1341          653 :                       df_insn_rescan (debug_insn);
    1342              :                     }
    1343              :                 }
    1344              :             /* Have to do the reset outside of the DF_CHAIN walk to not
    1345              :                disrupt it.  */
    1346         2830 :             while (!to_reset_debug_insns.is_empty ())
    1347              :               {
    1348          180 :                 rtx_insn *debug_insn = to_reset_debug_insns.pop ();
    1349          180 :                 INSN_VAR_LOCATION_LOC (debug_insn) = gen_rtx_UNKNOWN_VAR_LOC ();
    1350          180 :                 df_insn_rescan_debug_internal (debug_insn);
    1351              :               }
    1352         1325 :           }
    1353              :       }
    1354              : 
    1355              :   /* Replace uses in this insn with the defs we use in the chain.  */
    1356      3292877 :   for (df_ref ref = DF_INSN_USES (insn); ref; ref = DF_REF_NEXT_LOC (ref))
    1357      1974395 :     if (!DF_REF_REG_MEM_P (ref))
    1358       712135 :       if (rtx *vreg = defs_map.get (regno_reg_rtx[DF_REF_REGNO (ref)]))
    1359              :         {
    1360              :           /* Also update a corresponding REG_DEAD note.  */
    1361        35362 :           rtx note = find_reg_note (insn, REG_DEAD, DF_REF_REG (ref));
    1362        35362 :           if (note)
    1363        23369 :             XEXP (note, 0) = *vreg;
    1364        35362 :           *DF_REF_REAL_LOC (ref) = *vreg;
    1365              :         }
    1366      1318482 : }
    1367              : 
    1368              : /* Convert INSN which is an SImode or DImode rotation by a constant
    1369              :    to vector mode.  CODE is either ROTATE or ROTATERT with operands
    1370              :    OP0 and OP1.  Returns the SET_SRC of the last instruction in the
    1371              :    resulting sequence, which is emitted before INSN.  */
    1372              : 
    1373              : rtx
    1374           92 : general_scalar_chain::convert_rotate (enum rtx_code code, rtx op0, rtx op1,
    1375              :                                       rtx_insn *insn)
    1376              : {
    1377           92 :   int bits = INTVAL (op1);
    1378           92 :   rtx pat, result;
    1379              : 
    1380           92 :   convert_op (&op0, insn);
    1381           92 :   if (bits == 0)
    1382            0 :     return op0;
    1383              : 
    1384           92 :   if (smode == DImode)
    1385              :     {
    1386           92 :       if (code == ROTATE)
    1387           45 :         bits = 64 - bits;
    1388           92 :       if (bits == 32)
    1389              :         {
    1390            0 :           rtx tmp1 = gen_reg_rtx (V4SImode);
    1391            0 :           pat = gen_sse2_pshufd (tmp1, gen_lowpart (V4SImode, op0),
    1392              :                                  GEN_INT (225));
    1393            0 :           emit_insn_before (pat, insn);
    1394            0 :           result = gen_lowpart (V2DImode, tmp1);
    1395              :         }
    1396           92 :       else if (TARGET_AVX512VL)
    1397            0 :         result = simplify_gen_binary (code, V2DImode, op0, op1);
    1398           92 :       else if (bits == 16 || bits == 48)
    1399              :         {
    1400            0 :           rtx tmp1 = gen_reg_rtx (V8HImode);
    1401            0 :           pat = gen_sse2_pshuflw (tmp1, gen_lowpart (V8HImode, op0),
    1402              :                                   GEN_INT (bits == 16 ? 57 : 147));
    1403            0 :           emit_insn_before (pat, insn);
    1404            0 :           result = gen_lowpart (V2DImode, tmp1);
    1405              :         }
    1406           92 :       else if ((bits & 0x07) == 0)
    1407              :         {
    1408            0 :           rtx tmp1 = gen_reg_rtx (V4SImode);
    1409            0 :           pat = gen_sse2_pshufd (tmp1, gen_lowpart (V4SImode, op0),
    1410              :                                  GEN_INT (68));
    1411            0 :           emit_insn_before (pat, insn);
    1412            0 :           rtx tmp2 = gen_reg_rtx (V1TImode);
    1413            0 :           pat = gen_sse2_lshrv1ti3 (tmp2, gen_lowpart (V1TImode, tmp1),
    1414              :                                     GEN_INT (bits));
    1415            0 :           emit_insn_before (pat, insn);
    1416            0 :           result = gen_lowpart (V2DImode, tmp2);
    1417              :         }
    1418              :       else
    1419              :         {
    1420           92 :           rtx tmp1 = gen_reg_rtx (V4SImode);
    1421           92 :           pat = gen_sse2_pshufd (tmp1, gen_lowpart (V4SImode, op0),
    1422              :                                  GEN_INT (20));
    1423           92 :           emit_insn_before (pat, insn);
    1424           92 :           rtx tmp2 = gen_reg_rtx (V2DImode);
    1425           92 :           pat = gen_lshrv2di3 (tmp2, gen_lowpart (V2DImode, tmp1),
    1426              :                                GEN_INT (bits & 31));
    1427           92 :           emit_insn_before (pat, insn);
    1428           92 :           rtx tmp3 = gen_reg_rtx (V4SImode);
    1429          139 :           pat = gen_sse2_pshufd (tmp3, gen_lowpart (V4SImode, tmp2),
    1430              :                                  GEN_INT (bits > 32 ? 34 : 136));
    1431           92 :           emit_insn_before (pat, insn);
    1432           92 :           result = gen_lowpart (V2DImode, tmp3);
    1433              :         }
    1434              :     }
    1435            0 :   else if (bits == 16)
    1436              :     {
    1437            0 :       rtx tmp1 = gen_reg_rtx (V8HImode);
    1438            0 :       pat = gen_sse2_pshuflw (tmp1, gen_lowpart (V8HImode, op0), GEN_INT (225));
    1439            0 :       emit_insn_before (pat, insn);
    1440            0 :       result = gen_lowpart (V4SImode, tmp1);
    1441              :     }
    1442            0 :   else if (TARGET_AVX512VL)
    1443            0 :     result = simplify_gen_binary (code, V4SImode, op0, op1);
    1444              :   else
    1445              :     {
    1446            0 :       if (code == ROTATE)
    1447            0 :         bits = 32 - bits;
    1448              : 
    1449            0 :       rtx tmp1 = gen_reg_rtx (V4SImode);
    1450            0 :       emit_insn_before (gen_sse2_pshufd (tmp1, op0, GEN_INT (224)), insn);
    1451            0 :       rtx tmp2 = gen_reg_rtx (V2DImode);
    1452            0 :       pat = gen_lshrv2di3 (tmp2, gen_lowpart (V2DImode, tmp1),
    1453              :                            GEN_INT (bits));
    1454            0 :       emit_insn_before (pat, insn);
    1455            0 :       result = gen_lowpart (V4SImode, tmp2);
    1456              :     }
    1457              : 
    1458              :   return result;
    1459              : }
    1460              : 
    1461              : /* Convert INSN to vector mode.  */
    1462              : 
    1463              : void
    1464       411822 : general_scalar_chain::convert_insn (rtx_insn *insn)
    1465              : {
    1466       411822 :   rtx def_set = single_set (insn);
    1467       411822 :   rtx src = SET_SRC (def_set);
    1468       411822 :   rtx dst = SET_DEST (def_set);
    1469       411822 :   rtx subreg;
    1470              : 
    1471       411822 :   if (MEM_P (dst) && !REG_P (src))
    1472              :     {
    1473              :       /* There are no scalar integer instructions and therefore
    1474              :          temporary register usage is required.  */
    1475          758 :       rtx tmp = gen_reg_rtx (smode);
    1476          758 :       emit_conversion_insns (gen_move_insn (dst, tmp), insn);
    1477          758 :       dst = gen_rtx_SUBREG (vmode, tmp, 0);
    1478          758 :     }
    1479       411064 :   else if (REG_P (dst) && GET_MODE (dst) == smode)
    1480              :     {
    1481              :       /* Replace the definition with a SUBREG to the definition we
    1482              :          use inside the chain.  */
    1483       215844 :       rtx *vdef = defs_map.get (dst);
    1484       215844 :       if (vdef)
    1485        23488 :         dst = *vdef;
    1486       215844 :       dst = gen_rtx_SUBREG (vmode, dst, 0);
    1487              :       /* IRA doesn't like to have REG_EQUAL/EQUIV notes when the SET_DEST
    1488              :          is a non-REG_P.  So kill those off.  */
    1489       215844 :       rtx note = find_reg_equal_equiv_note (insn);
    1490       215844 :       if (note)
    1491         9726 :         remove_note (insn, note);
    1492              :     }
    1493              : 
    1494       411822 :   switch (GET_CODE (src))
    1495              :     {
    1496        30288 :     case PLUS:
    1497        30288 :     case MINUS:
    1498        30288 :     case IOR:
    1499        30288 :     case XOR:
    1500        30288 :     case AND:
    1501        30288 :     case SMAX:
    1502        30288 :     case SMIN:
    1503        30288 :     case UMAX:
    1504        30288 :     case UMIN:
    1505        30288 :       convert_op (&XEXP (src, 1), insn);
    1506              :       /* FALLTHRU */
    1507              : 
    1508        37664 :     case ABS:
    1509        37664 :     case ASHIFT:
    1510        37664 :     case ASHIFTRT:
    1511        37664 :     case LSHIFTRT:
    1512        37664 :       convert_op (&XEXP (src, 0), insn);
    1513        37664 :       PUT_MODE (src, vmode);
    1514        37664 :       break;
    1515              : 
    1516           92 :     case ROTATE:
    1517           92 :     case ROTATERT:
    1518           92 :       src = convert_rotate (GET_CODE (src), XEXP (src, 0), XEXP (src, 1),
    1519              :                             insn);
    1520           92 :       break;
    1521              : 
    1522          391 :     case NEG:
    1523          391 :       src = XEXP (src, 0);
    1524              : 
    1525          391 :       if (GET_CODE (src) == ABS)
    1526              :         {
    1527            0 :           src = XEXP (src, 0);
    1528            0 :           convert_op (&src, insn);
    1529            0 :           subreg = gen_reg_rtx (vmode);
    1530            0 :           emit_insn_before (gen_rtx_SET (subreg,
    1531              :                                          gen_rtx_ABS (vmode, src)), insn);
    1532            0 :           src = subreg;
    1533              :         }
    1534              :       else
    1535          391 :         convert_op (&src, insn);
    1536              : 
    1537          391 :       subreg = gen_reg_rtx (vmode);
    1538          391 :       emit_insn_before (gen_move_insn (subreg, CONST0_RTX (vmode)), insn);
    1539          391 :       src = gen_rtx_MINUS (vmode, subreg, src);
    1540          391 :       break;
    1541              : 
    1542          250 :     case NOT:
    1543          250 :       src = XEXP (src, 0);
    1544          250 :       convert_op (&src, insn);
    1545          250 :       subreg = gen_reg_rtx (vmode);
    1546          250 :       emit_insn_before (gen_move_insn (subreg, CONSTM1_RTX (vmode)), insn);
    1547          250 :       src = gen_rtx_XOR (vmode, src, subreg);
    1548          250 :       break;
    1549              : 
    1550       170761 :     case MEM:
    1551       170761 :       if (!REG_P (dst))
    1552       170761 :         convert_op (&src, insn);
    1553              :       break;
    1554              : 
    1555       196524 :     case REG:
    1556       196524 :       if (!MEM_P (dst))
    1557         1304 :         convert_op (&src, insn);
    1558              :       break;
    1559              : 
    1560            0 :     case SUBREG:
    1561            0 :       gcc_assert (GET_MODE (src) == vmode);
    1562              :       break;
    1563              : 
    1564            0 :     case COMPARE:
    1565            0 :       dst = gen_rtx_REG (CCZmode, FLAGS_REG);
    1566            0 :       src = convert_compare (XEXP (src, 0), XEXP (src, 1), insn);
    1567            0 :       break;
    1568              : 
    1569         3373 :     case CONST_INT:
    1570         3373 :       convert_op (&src, insn);
    1571         3373 :       break;
    1572              : 
    1573         2767 :     case VEC_SELECT:
    1574         2767 :       if (XVECEXP (XEXP (src, 1), 0, 0) == const0_rtx)
    1575         1619 :         src = XEXP (src, 0);
    1576         1148 :       else if (smode == DImode)
    1577              :         {
    1578          759 :           rtx tmp = gen_lowpart (V1TImode, XEXP (src, 0));
    1579          759 :           dst = gen_lowpart (V1TImode, dst);
    1580          759 :           src = gen_rtx_LSHIFTRT (V1TImode, tmp, GEN_INT (64));
    1581              :         }
    1582              :       else
    1583              :         {
    1584          389 :           rtx tmp = XVECEXP (XEXP (src, 1), 0, 0);
    1585          389 :           rtvec vec = gen_rtvec (4, tmp, tmp, tmp, tmp);
    1586          389 :           rtx par = gen_rtx_PARALLEL (VOIDmode, vec);
    1587          389 :           src = gen_rtx_VEC_SELECT (vmode, XEXP (src, 0), par);
    1588              :         }
    1589              :       break;
    1590              : 
    1591            0 :     default:
    1592            0 :       gcc_unreachable ();
    1593              :     }
    1594              : 
    1595       411822 :   SET_SRC (def_set) = src;
    1596       411822 :   SET_DEST (def_set) = dst;
    1597              : 
    1598              :   /* Drop possible dead definitions.  */
    1599       411822 :   PATTERN (insn) = def_set;
    1600              : 
    1601       411822 :   INSN_CODE (insn) = -1;
    1602       411822 :   int patt = recog_memoized (insn);
    1603       411822 :   if  (patt == -1)
    1604            0 :     fatal_insn_not_found (insn);
    1605       411822 :   df_insn_rescan (insn);
    1606       411822 : }
    1607              : 
    1608              : /* Helper function to compute gain for loading an immediate constant.
    1609              :    Typically, two movabsq for TImode vs. vmovdqa for V1TImode, but
    1610              :    with numerous special cases.  */
    1611              : 
    1612              : static int
    1613           19 : timode_immed_const_gain (rtx cst, basic_block bb)
    1614              : {
    1615              :   /* movabsq vs. movabsq+vmovq+vunpacklqdq.  */
    1616           19 :   if (CONST_WIDE_INT_P (cst)
    1617            7 :       && CONST_WIDE_INT_NUNITS (cst) == 2
    1618           26 :       && CONST_WIDE_INT_ELT (cst, 0) == CONST_WIDE_INT_ELT (cst, 1))
    1619            0 :     return optimize_bb_for_size_p (bb) ? -COSTS_N_BYTES (9)
    1620              :                                        : -COSTS_N_INSNS (2);
    1621              :   /* 2x movabsq ~ vmovdqa.  */
    1622              :   return 0;
    1623              : }
    1624              : 
    1625              : /* Return true it's cost profitable for for chain conversion.  */
    1626              : 
    1627              : bool
    1628       499483 : timode_scalar_chain::compute_convert_gain ()
    1629              : {
    1630              :   /* Assume that if we have to move TImode values between units,
    1631              :      then transforming this chain isn't worth it.  */
    1632       499483 :   if (cost_sse_integer)
    1633              :     return false;
    1634              : 
    1635       499483 :   bitmap_iterator bi;
    1636       499483 :   unsigned insn_uid;
    1637              : 
    1638              :   /* Split ties to prefer V1TImode when not optimizing for size.  */
    1639       499483 :   int gain = optimize_size ? 0 : 1;
    1640       499483 :   sreal weighted_gain  = 0;
    1641              : 
    1642       499483 :   if (dump_file)
    1643            0 :     fprintf (dump_file, "Computing gain for chain #%d...\n", chain_id);
    1644              : 
    1645      1494615 :   EXECUTE_IF_SET_IN_BITMAP (insns, 0, insn_uid, bi)
    1646              :     {
    1647       995132 :       rtx_insn *insn = DF_INSN_UID_GET (insn_uid)->insn;
    1648       995132 :       rtx def_set = single_set (insn);
    1649       995132 :       rtx src = SET_SRC (def_set);
    1650       995132 :       rtx dst = SET_DEST (def_set);
    1651       995132 :       HOST_WIDE_INT op1val;
    1652       995132 :       basic_block bb = BLOCK_FOR_INSN (insn);
    1653       995132 :       int scost, vcost;
    1654       995132 :       int igain = 0;
    1655       995132 :       profile_count entry_count = ENTRY_BLOCK_PTR_FOR_FN (cfun)->count;
    1656       995132 :       bool speed_p = optimize_bb_for_speed_p (bb);
    1657       995132 :       sreal bb_freq = bb->count.to_sreal_scale (entry_count);
    1658              : 
    1659       995132 :       switch (GET_CODE (src))
    1660              :         {
    1661       519970 :         case REG:
    1662       519970 :           if (GENERAL_REGNO_P (REGNO (src)))
    1663              :             {
    1664        24980 :               if (TARGET_AVX)
    1665              :                 /* vmovq + vpinsrq */
    1666           26 :                 igain = speed_p ? -ix86_cost->integer_to_sse
    1667              :                                   - COSTS_N_INSNS (1)
    1668              :                                 : -COSTS_N_BYTES (11);
    1669              :               else
    1670              :                 /* movq + movq + punpcklqdq */
    1671        24954 :                 igain = speed_p ? -ix86_cost->integer_to_sse
    1672              :                                   - COSTS_N_INSNS (2)
    1673              :                                 : -COSTS_N_BYTES (14);
    1674              :             }
    1675       494990 :           else if (GENERAL_REG_P (dst))
    1676              :             {
    1677        24506 :               if (TARGET_AVX)
    1678              :                 /* vpextrq + vmovq */
    1679           26 :                 igain = speed_p ? -ix86_cost->sse_to_integer
    1680              :                                   - COSTS_N_INSNS (1)
    1681              :                                 : -COSTS_N_BYTES (11);
    1682              :               else
    1683              :                 /* movhlps + movq + movq */
    1684        24480 :                 igain = speed_p ? -ix86_cost->sse_to_integer
    1685              :                                   - COSTS_N_INSNS (2)
    1686              :                                 : -COSTS_N_BYTES (13);
    1687              :             }
    1688       470484 :           else if (!speed_p)
    1689        14486 :             igain = MEM_P (dst) ? COSTS_N_BYTES (6) : COSTS_N_BYTES (3);
    1690              :           else
    1691              :             igain = COSTS_N_INSNS (1);
    1692              :           break;
    1693              : 
    1694       429462 :         case MEM:
    1695       429462 :           igain = !speed_p ? COSTS_N_BYTES (7) : COSTS_N_INSNS (1);
    1696              :           break;
    1697              : 
    1698        10619 :         case CONST_INT:
    1699        10619 :           if (MEM_P (dst)
    1700        10619 :               && standard_sse_constant_p (src, V1TImode))
    1701        10086 :             igain = !speed_p ? COSTS_N_BYTES (11) : 1;
    1702              :           break;
    1703              : 
    1704        30073 :         case CONST_WIDE_INT:
    1705              :           /* 2 x mov vs. vmovdqa.  */
    1706        30073 :           if (MEM_P (dst))
    1707        29529 :             igain = !speed_p ? COSTS_N_BYTES (3) : COSTS_N_INSNS (1);
    1708              :           break;
    1709              : 
    1710           78 :         case NOT:
    1711           78 :           if (MEM_P (dst))
    1712        66417 :             igain = -COSTS_N_INSNS (1);
    1713              :           break;
    1714              : 
    1715           39 :         case AND:
    1716           39 :           if (!MEM_P (dst))
    1717           28 :             igain = COSTS_N_INSNS (1);
    1718           39 :           if (CONST_SCALAR_INT_P (XEXP (src, 1)))
    1719           10 :             igain += timode_immed_const_gain (XEXP (src, 1), bb);
    1720              :           break;
    1721              : 
    1722         4273 :         case XOR:
    1723         4273 :         case IOR:
    1724         4273 :           if (timode_concatdi_p (src))
    1725              :             {
    1726              :               /* vmovq;vpinsrq (11 bytes).  */
    1727         4145 :               igain = speed_p ? -ix86_cost->integer_to_sse - COSTS_N_INSNS (1)
    1728              :                               : -COSTS_N_BYTES (11);
    1729              :               break;
    1730              :             }
    1731          128 :           if (!MEM_P (dst))
    1732          120 :             igain = COSTS_N_INSNS (1);
    1733          128 :           if (CONST_SCALAR_INT_P (XEXP (src, 1)))
    1734            9 :             igain += timode_immed_const_gain (XEXP (src, 1), bb);
    1735              :           break;
    1736              : 
    1737            0 :         case PLUS:
    1738            0 :           if (timode_concatdi_p (src))
    1739              :             /* vmovq;vpinsrq (11 bytes).  */
    1740            0 :             igain = speed_p ? -ix86_cost->integer_to_sse - COSTS_N_INSNS (1)
    1741              :                             : -COSTS_N_BYTES (11);
    1742              :           break;
    1743              : 
    1744          206 :         case ASHIFT:
    1745          206 :         case LSHIFTRT:
    1746              :           /* See ix86_expand_v1ti_shift.  */
    1747          206 :           op1val = INTVAL (XEXP (src, 1));
    1748          206 :           if (!speed_p)
    1749              :             {
    1750           19 :               if (op1val == 64 || op1val == 65)
    1751              :                 scost = COSTS_N_BYTES (5);
    1752           13 :               else if (op1val >= 66)
    1753              :                 scost = COSTS_N_BYTES (6);
    1754           13 :               else if (op1val == 1)
    1755              :                 scost = COSTS_N_BYTES (8);
    1756              :               else
    1757              :                 scost = COSTS_N_BYTES (9);
    1758              : 
    1759           17 :               if ((op1val & 7) == 0)
    1760              :                 vcost = COSTS_N_BYTES (5);
    1761           13 :               else if (op1val > 64)
    1762              :                 vcost = COSTS_N_BYTES (10);
    1763              :               else
    1764           13 :                 vcost = TARGET_AVX ? COSTS_N_BYTES (19) : COSTS_N_BYTES (23);
    1765              :             }
    1766              :           else
    1767              :             {
    1768          187 :               scost = COSTS_N_INSNS (2);
    1769          187 :               if ((op1val & 7) == 0)
    1770              :                 vcost = COSTS_N_INSNS (1);
    1771          133 :               else if (op1val > 64)
    1772              :                 vcost = COSTS_N_INSNS (2);
    1773              :               else
    1774          133 :                 vcost = TARGET_AVX ? COSTS_N_INSNS (4) : COSTS_N_INSNS (5);
    1775              :             }
    1776          206 :           igain = scost - vcost;
    1777          206 :           break;
    1778              : 
    1779          123 :         case ASHIFTRT:
    1780              :           /* See ix86_expand_v1ti_ashiftrt.  */
    1781          123 :           op1val = INTVAL (XEXP (src, 1));
    1782          123 :           if (!speed_p)
    1783              :             {
    1784            9 :               if (op1val == 64 || op1val == 127)
    1785              :                 scost = COSTS_N_BYTES (7);
    1786            9 :               else if (op1val == 1)
    1787              :                 scost = COSTS_N_BYTES (8);
    1788            8 :               else if (op1val == 65)
    1789              :                 scost = COSTS_N_BYTES (10);
    1790            8 :               else if (op1val >= 66)
    1791              :                 scost = COSTS_N_BYTES (11);
    1792              :               else
    1793              :                 scost = COSTS_N_BYTES (9);
    1794              : 
    1795            0 :               if (op1val == 127)
    1796              :                 vcost = COSTS_N_BYTES (10);
    1797            9 :               else if (op1val == 64)
    1798              :                 vcost = COSTS_N_BYTES (14);
    1799            9 :               else if (op1val == 96)
    1800              :                 vcost = COSTS_N_BYTES (18);
    1801            9 :               else if (op1val >= 111)
    1802              :                 vcost = COSTS_N_BYTES (15);
    1803            9 :               else if (TARGET_AVX2 && op1val == 32)
    1804              :                 vcost = COSTS_N_BYTES (16);
    1805            9 :               else if (TARGET_SSE4_1 && op1val == 32)
    1806              :                 vcost = COSTS_N_BYTES (20);
    1807            9 :               else if (op1val >= 96)
    1808              :                 vcost = COSTS_N_BYTES (23);
    1809            9 :               else if ((op1val & 7) == 0)
    1810              :                 vcost = COSTS_N_BYTES (28);
    1811            9 :               else if (TARGET_AVX2 && op1val < 32)
    1812              :                 vcost = COSTS_N_BYTES (30);
    1813            9 :               else if (op1val == 1 || op1val >= 64)
    1814              :                 vcost = COSTS_N_BYTES (42);
    1815              :               else
    1816            8 :                 vcost = COSTS_N_BYTES (47);
    1817              :             }
    1818              :           else
    1819              :             {
    1820          114 :               if (op1val >= 65 && op1val <= 126)
    1821              :                 scost = COSTS_N_INSNS (3);
    1822              :               else
    1823          114 :                 scost = COSTS_N_INSNS (2);
    1824              : 
    1825          114 :               if (op1val == 127)
    1826              :                 vcost = COSTS_N_INSNS (2);
    1827          113 :               else if (op1val == 64)
    1828              :                 vcost = COSTS_N_INSNS (3);
    1829          113 :               else if (op1val == 96)
    1830              :                 vcost = COSTS_N_INSNS (3);
    1831          113 :               else if (op1val >= 111)
    1832              :                 vcost = COSTS_N_INSNS (3);
    1833          113 :               else if (TARGET_SSE4_1 && op1val == 32)
    1834              :                 vcost = COSTS_N_INSNS (3);
    1835          113 :               else if (TARGET_SSE4_1
    1836            0 :                        && (op1val == 8 || op1val == 16 || op1val == 24))
    1837              :                 vcost = COSTS_N_INSNS (3);
    1838          113 :               else if (op1val >= 96)
    1839              :                 vcost = COSTS_N_INSNS (4);
    1840          113 :               else if (TARGET_SSE4_1 && (op1val == 28 || op1val == 80))
    1841              :                 vcost = COSTS_N_INSNS (4);
    1842          113 :               else if ((op1val & 7) == 0)
    1843              :                 vcost = COSTS_N_INSNS (5);
    1844          113 :               else if (TARGET_AVX2 && op1val < 32)
    1845              :                 vcost = COSTS_N_INSNS (6);
    1846          113 :               else if (TARGET_SSE4_1 && op1val < 15)
    1847              :                 vcost = COSTS_N_INSNS (6);
    1848          113 :               else if (op1val == 1 || op1val >= 64)
    1849              :                 vcost = COSTS_N_INSNS (8);
    1850              :               else
    1851           16 :                 vcost = COSTS_N_INSNS (9);
    1852              :             }
    1853          123 :           igain = scost - vcost;
    1854          123 :           break;
    1855              : 
    1856            6 :         case ROTATE:
    1857            6 :         case ROTATERT:
    1858              :           /* See ix86_expand_v1ti_rotate.  */
    1859            6 :           op1val = INTVAL (XEXP (src, 1));
    1860            6 :           if (!speed_p)
    1861              :             {
    1862            0 :               scost = COSTS_N_BYTES (13);
    1863            0 :               if ((op1val & 31) == 0)
    1864              :                 vcost = COSTS_N_BYTES (5);
    1865            0 :               else if ((op1val & 7) == 0)
    1866            0 :                 vcost = TARGET_AVX ? COSTS_N_BYTES (13) : COSTS_N_BYTES (18);
    1867            0 :               else if (op1val > 32 && op1val < 96)
    1868              :                 vcost = COSTS_N_BYTES (24);
    1869              :               else
    1870            0 :                 vcost = COSTS_N_BYTES (19);
    1871              :             }
    1872              :           else
    1873              :             {
    1874            6 :               scost = COSTS_N_INSNS (3);
    1875            6 :               if ((op1val & 31) == 0)
    1876              :                 vcost = COSTS_N_INSNS (1);
    1877            4 :               else if ((op1val & 7) == 0)
    1878            1 :                 vcost = TARGET_AVX ? COSTS_N_INSNS (3) : COSTS_N_INSNS (4);
    1879            3 :               else if (op1val > 32 && op1val < 96)
    1880              :                 vcost = COSTS_N_INSNS (5);
    1881              :               else
    1882            3 :                 vcost = COSTS_N_INSNS (4);
    1883              :             }
    1884            6 :           igain = scost - vcost;
    1885            6 :           break;
    1886              : 
    1887           19 :         case COMPARE:
    1888           19 :           if (XEXP (src, 1) == const0_rtx)
    1889              :             {
    1890            8 :               if (GET_CODE (XEXP (src, 0)) == AND)
    1891              :                 /* and;and;or (9 bytes) vs. ptest (5 bytes).  */
    1892              :                 igain = !speed_p ? COSTS_N_BYTES (4) : COSTS_N_INSNS (2);
    1893              :               /* or (3 bytes) vs. ptest (5 bytes).  */
    1894            8 :               else if (!speed_p)
    1895            0 :                 igain = -COSTS_N_BYTES (2);
    1896              :             }
    1897           11 :           else if (XEXP (src, 1) == const1_rtx)
    1898              :             /* and;cmp -1 (7 bytes) vs. pcmpeqd;pxor;ptest (13 bytes).  */
    1899            0 :             igain = !speed_p ? -COSTS_N_BYTES (6) : -COSTS_N_INSNS (1);
    1900              :           break;
    1901              : 
    1902          264 :         case ZERO_EXTEND:
    1903          264 :           if (GET_MODE (XEXP (src, 0)) == DImode)
    1904              :             /* xor (2 bytes) vs. vmovq (5 bytes).  */
    1905          264 :             igain = speed_p ? COSTS_N_INSNS (1) - ix86_cost->sse_to_integer
    1906              :                             : -COSTS_N_BYTES (3);
    1907              :           break;
    1908              : 
    1909              :         default:
    1910              :           break;
    1911              :         }
    1912              : 
    1913      1958888 :       gain += igain;
    1914       995124 :       if (speed_p)
    1915       963764 :         weighted_gain += bb_freq * igain;
    1916              : 
    1917       995132 :       if (igain != 0 && dump_file)
    1918              :         {
    1919            0 :           fprintf (dump_file, "  Instruction gain %d with bb_freq %.2f for ",
    1920              :                    igain, bb_freq.to_double ());
    1921            0 :           dump_insn_slim (dump_file, insn);
    1922              :         }
    1923              :     }
    1924              : 
    1925       499483 :   if (dump_file)
    1926            0 :     fprintf (dump_file, "  Total gain: %d, weighted gain %.2f\n",
    1927              :              gain, weighted_gain.to_double ());
    1928              : 
    1929       499483 :   if (weighted_gain > (sreal) 0)
    1930              :     return true;
    1931              :   else
    1932        54248 :     return gain > 0;
    1933              : }
    1934              : 
    1935              : /* Fix uses of converted REG in debug insns.  */
    1936              : 
    1937              : void
    1938       419337 : timode_scalar_chain::fix_debug_reg_uses (rtx reg)
    1939              : {
    1940       419337 :   if (!flag_var_tracking)
    1941              :     return;
    1942              : 
    1943       370532 :   df_ref ref, next;
    1944       759833 :   for (ref = DF_REG_USE_CHAIN (REGNO (reg)); ref; ref = next)
    1945              :     {
    1946       389301 :       rtx_insn *insn = DF_REF_INSN (ref);
    1947              :       /* Make sure the next ref is for a different instruction,
    1948              :          so that we're not affected by the rescan.  */
    1949       389301 :       next = DF_REF_NEXT_REG (ref);
    1950       389301 :       while (next && DF_REF_INSN (next) == insn)
    1951            0 :         next = DF_REF_NEXT_REG (next);
    1952              : 
    1953       389301 :       if (DEBUG_INSN_P (insn))
    1954              :         {
    1955              :           /* It may be a debug insn with a TImode variable in
    1956              :              register.  */
    1957              :           bool changed = false;
    1958          228 :           for (; ref != next; ref = DF_REF_NEXT_REG (ref))
    1959              :             {
    1960          114 :               rtx *loc = DF_REF_LOC (ref);
    1961          114 :               if (REG_P (*loc) && GET_MODE (*loc) == V1TImode)
    1962              :                 {
    1963          105 :                   *loc = gen_rtx_SUBREG (TImode, *loc, 0);
    1964          105 :                   changed = true;
    1965              :                 }
    1966              :             }
    1967          114 :           if (changed)
    1968          105 :             df_insn_rescan (insn);
    1969              :         }
    1970              :     }
    1971              : }
    1972              : 
    1973              : /* Convert SRC, a *concatditi3 pattern, into a vec_concatv2di instruction.
    1974              :    Insert this before INSN, and return the result as a V1TImode subreg.  */
    1975              : 
    1976              : static rtx
    1977          266 : timode_convert_concatdi (rtx src, rtx_insn *insn)
    1978              : {
    1979          266 :   rtx hi, lo;
    1980          266 :   rtx tmp = gen_reg_rtx (V2DImode);
    1981          266 :   if (GET_CODE (XEXP (src, 0)) == ASHIFT)
    1982              :     {
    1983          266 :       hi = XEXP (XEXP (XEXP (src, 0), 0), 0);
    1984          266 :       lo = XEXP (XEXP (src, 1), 0);
    1985              :     }
    1986              :   else
    1987              :     {
    1988            0 :       hi = XEXP (XEXP (XEXP (src, 1), 0), 0);
    1989            0 :       lo = XEXP (XEXP (src, 0), 0);
    1990              :     }
    1991          266 :   emit_insn_before (gen_vec_concatv2di (tmp, lo, hi), insn);
    1992          266 :   return gen_rtx_SUBREG (V1TImode, tmp, 0);
    1993              : }
    1994              : 
    1995              : /* Convert INSN from TImode to V1T1mode.  */
    1996              : 
    1997              : void
    1998       906660 : timode_scalar_chain::convert_insn (rtx_insn *insn)
    1999              : {
    2000       906660 :   rtx def_set = single_set (insn);
    2001       906660 :   rtx src = SET_SRC (def_set);
    2002       906660 :   rtx dst = SET_DEST (def_set);
    2003       906660 :   rtx tmp;
    2004              : 
    2005       906660 :   switch (GET_CODE (dst))
    2006              :     {
    2007       419854 :     case REG:
    2008       419854 :       if (GET_MODE (dst) == TImode)
    2009              :         {
    2010       419223 :           if (!HARD_REGISTER_NUM_P (REGNO (dst)))
    2011              :             {
    2012       418718 :               PUT_MODE (dst, V1TImode);
    2013       418718 :               fix_debug_reg_uses (dst);
    2014              :             }
    2015          505 :           else if (!GENERAL_REGNO_P (REGNO (dst)))
    2016          359 :             dst = gen_raw_REG (V1TImode, REGNO (dst));
    2017              :         }
    2018       419854 :       if (GET_MODE (dst) == V1TImode)
    2019              :         {
    2020              :           /* It might potentially be helpful to convert REG_EQUAL notes,
    2021              :              but for now we just remove them.  */
    2022       419696 :           rtx note = find_reg_equal_equiv_note (insn);
    2023       419696 :           if (note)
    2024          470 :             remove_note (insn, note);
    2025              :         }
    2026              :       break;
    2027       486806 :     case MEM:
    2028       486806 :       PUT_MODE (dst, V1TImode);
    2029       486806 :       break;
    2030              : 
    2031            0 :     default:
    2032            0 :       gcc_unreachable ();
    2033              :     }
    2034              : 
    2035       906660 :   switch (GET_CODE (src))
    2036              :     {
    2037       448009 :     case REG:
    2038       448009 :       if (GET_MODE (src) == TImode)
    2039              :         {
    2040          825 :           if (GENERAL_REGNO_P (REGNO (src)))
    2041              :             {
    2042          201 :               rtx lo = gen_reg_rtx (DImode);
    2043          201 :               rtx hi = gen_reg_rtx (DImode);
    2044          201 :               emit_insn_before (gen_rtx_SET (lo, gen_lowpart (DImode, src)),
    2045              :                                 insn);
    2046          201 :               emit_insn_before (gen_rtx_SET (hi, gen_highpart (DImode, src)),
    2047              :                                 insn);
    2048          201 :               src = gen_reg_rtx (V2DImode);
    2049          201 :               emit_insn_before (gen_vec_concatv2di (src, lo, hi), insn);
    2050          201 :               src = gen_lowpart (V1TImode, src);
    2051              :             }
    2052          624 :           else if (!HARD_REGISTER_NUM_P (REGNO (src)))
    2053              :             {
    2054          619 :               PUT_MODE (src, V1TImode);
    2055          619 :               fix_debug_reg_uses (src);
    2056              :             }
    2057              :           else
    2058            5 :             src = gen_raw_REG (V1TImode, REGNO (src));
    2059              :         }
    2060       448009 :       if (GENERAL_REG_P (dst))
    2061              :         {
    2062          146 :           rtx tmp = gen_reg_rtx (V2DImode);
    2063          146 :           src = gen_lowpart (V2DImode, src);
    2064          146 :           emit_insn_before (gen_rtx_SET (tmp, src), insn);
    2065              :           /* Extracting hi before lo helps register allocation.  */
    2066          146 :           rtx hi = gen_reg_rtx (DImode);
    2067          146 :           rtx lo = gen_reg_rtx (DImode);
    2068          146 :           emit_insn_before (gen_vec_extractv2didi (hi, tmp, const1_rtx), insn);
    2069          146 :           emit_insn_before (gen_vec_extractv2didi (lo, tmp, const0_rtx), insn);
    2070              : 
    2071              :           /* Construct *concatditi3 pattern from lo and hi.  */
    2072          146 :           hi = gen_rtx_ZERO_EXTEND (TImode, hi);
    2073          146 :           hi = gen_rtx_ASHIFT (TImode, hi, GEN_INT (64));
    2074          146 :           lo = gen_rtx_ZERO_EXTEND (TImode, lo);
    2075          146 :           src = gen_rtx_PLUS (TImode, hi, lo);
    2076              :         }
    2077              :       break;
    2078              : 
    2079       417613 :     case MEM:
    2080       417613 :       PUT_MODE (src, V1TImode);
    2081       417613 :       break;
    2082              : 
    2083        29902 :     case CONST_WIDE_INT:
    2084        29902 :       if (NONDEBUG_INSN_P (insn))
    2085              :         {
    2086              :           /* Since there are no instructions to store 128-bit constant,
    2087              :              temporary register usage is required.  */
    2088        29902 :           bool use_move;
    2089        29902 :           start_sequence ();
    2090        29902 :           tmp = ix86_convert_const_wide_int_to_broadcast (TImode, src);
    2091        29902 :           if (tmp)
    2092              :             {
    2093          194 :               src = lowpart_subreg (V1TImode, tmp, TImode);
    2094          194 :               use_move = true;
    2095              :             }
    2096              :           else
    2097              :             {
    2098        29708 :               src = smode_convert_cst (src, V1TImode);
    2099        29708 :               src = validize_mem (force_const_mem (V1TImode, src));
    2100        29708 :               use_move = MEM_P (dst);
    2101              :             }
    2102        29902 :           rtx_insn *seq = end_sequence ();
    2103        29902 :           if (seq)
    2104          195 :             emit_insn_before (seq, insn);
    2105        29902 :           if (use_move)
    2106              :             {
    2107        29530 :               tmp = gen_reg_rtx (V1TImode);
    2108        29530 :               emit_insn_before (gen_rtx_SET (tmp, src), insn);
    2109        29530 :               src = tmp;
    2110              :             }
    2111              :         }
    2112              :       break;
    2113              : 
    2114        10619 :     case CONST_INT:
    2115        10619 :       switch (standard_sse_constant_p (src, TImode))
    2116              :         {
    2117        10396 :         case 1:
    2118        10396 :           src = CONST0_RTX (GET_MODE (dst));
    2119        10396 :           break;
    2120          223 :         case 2:
    2121          223 :           src = CONSTM1_RTX (GET_MODE (dst));
    2122          223 :           break;
    2123            0 :         default:
    2124            0 :           gcc_unreachable ();
    2125              :         }
    2126        10619 :       if (MEM_P (dst))
    2127              :         {
    2128        10086 :           tmp = gen_reg_rtx (V1TImode);
    2129        10086 :           emit_insn_before (gen_rtx_SET (tmp, src), insn);
    2130        10086 :           src = tmp;
    2131              :         }
    2132              :       break;
    2133              : 
    2134           13 :     case AND:
    2135           13 :       if (GET_CODE (XEXP (src, 0)) == NOT)
    2136              :         {
    2137            0 :           convert_op (&XEXP (XEXP (src, 0), 0), insn);
    2138            0 :           convert_op (&XEXP (src, 1), insn);
    2139            0 :           PUT_MODE (XEXP (src, 0), V1TImode);
    2140            0 :           PUT_MODE (src, V1TImode);
    2141            0 :           break;
    2142              :         }
    2143           13 :       convert_op (&XEXP (src, 0), insn);
    2144           13 :       convert_op (&XEXP (src, 1), insn);
    2145           13 :       PUT_MODE (src, V1TImode);
    2146           13 :       if (MEM_P (dst))
    2147              :         {
    2148           10 :           tmp = gen_reg_rtx (V1TImode);
    2149           10 :           emit_insn_before (gen_rtx_SET (tmp, src), insn);
    2150           10 :           src = tmp;
    2151              :         }
    2152              :       break;
    2153              : 
    2154          343 :     case XOR:
    2155          343 :     case IOR:
    2156          343 :       if (timode_concatdi_p (src))
    2157              :         {
    2158          266 :           src = timode_convert_concatdi (src, insn);
    2159          266 :           break;
    2160              :         }
    2161           77 :       convert_op (&XEXP (src, 0), insn);
    2162           77 :       convert_op (&XEXP (src, 1), insn);
    2163           77 :       PUT_MODE (src, V1TImode);
    2164           77 :       if (MEM_P (dst))
    2165              :         {
    2166            8 :           tmp = gen_reg_rtx (V1TImode);
    2167            8 :           emit_insn_before (gen_rtx_SET (tmp, src), insn);
    2168            8 :           src = tmp;
    2169              :         }
    2170              :       break;
    2171              : 
    2172            3 :     case NOT:
    2173            3 :       src = XEXP (src, 0);
    2174            3 :       convert_op (&src, insn);
    2175            3 :       tmp = gen_reg_rtx (V1TImode);
    2176            3 :       emit_insn_before (gen_move_insn (tmp, CONSTM1_RTX (V1TImode)), insn);
    2177            3 :       src = gen_rtx_XOR (V1TImode, src, tmp);
    2178            3 :       if (MEM_P (dst))
    2179              :         {
    2180            0 :           tmp = gen_reg_rtx (V1TImode);
    2181            0 :           emit_insn_before (gen_rtx_SET (tmp, src), insn);
    2182            0 :           src = tmp;
    2183              :         }
    2184              :       break;
    2185              : 
    2186           12 :     case COMPARE:
    2187           12 :       dst = gen_rtx_REG (CCZmode, FLAGS_REG);
    2188           12 :       src = convert_compare (XEXP (src, 0), XEXP (src, 1), insn);
    2189           12 :       break;
    2190              : 
    2191           43 :     case ASHIFT:
    2192           43 :     case LSHIFTRT:
    2193           43 :     case ASHIFTRT:
    2194           43 :     case ROTATERT:
    2195           43 :     case ROTATE:
    2196           43 :       convert_op (&XEXP (src, 0), insn);
    2197           43 :       PUT_MODE (src, V1TImode);
    2198           43 :       break;
    2199              : 
    2200          103 :     case ZERO_EXTEND:
    2201          103 :       if (GET_MODE (XEXP (src, 0)) == DImode)
    2202              :         {
    2203              :           /* Convert to *vec_concatv2di_0.  */
    2204          103 :           rtx tmp = gen_reg_rtx (V2DImode);
    2205          103 :           rtx pat = gen_rtx_VEC_CONCAT (V2DImode, XEXP (src, 0), const0_rtx);
    2206          103 :           emit_insn_before (gen_move_insn (tmp, pat), insn);
    2207          103 :           src = gen_rtx_SUBREG (vmode, tmp, 0);
    2208              :         }
    2209              :       else
    2210            0 :         gcc_unreachable ();
    2211          103 :       break;
    2212              : 
    2213            0 :     case PLUS:
    2214            0 :       if (timode_concatdi_p (src))
    2215            0 :         src = timode_convert_concatdi (src, insn);
    2216              :       else
    2217            0 :         gcc_unreachable ();
    2218            0 :       break;
    2219              : 
    2220            0 :     default:
    2221            0 :       gcc_unreachable ();
    2222              :     }
    2223              : 
    2224       906660 :   SET_SRC (def_set) = src;
    2225       906660 :   SET_DEST (def_set) = dst;
    2226              : 
    2227              :   /* Drop possible dead definitions.  */
    2228       906660 :   PATTERN (insn) = def_set;
    2229              : 
    2230       906660 :   INSN_CODE (insn) = -1;
    2231       906660 :   recog_memoized (insn);
    2232       906660 :   df_insn_rescan (insn);
    2233       906660 : }
    2234              : 
    2235              : /* Generate copies from defs used by the chain but not defined therein.
    2236              :    Also populates defs_map which is used later by convert_insn.  */
    2237              : 
    2238              : void
    2239       631483 : scalar_chain::convert_registers ()
    2240              : {
    2241       631483 :   bitmap_iterator bi;
    2242       631483 :   unsigned id;
    2243       657652 :   EXECUTE_IF_SET_IN_BITMAP (defs_conv, 0, id, bi)
    2244              :     {
    2245        26169 :       rtx chain_reg = gen_reg_rtx (smode);
    2246        26169 :       defs_map.put (regno_reg_rtx[id], chain_reg);
    2247              :     }
    2248       639881 :   EXECUTE_IF_SET_IN_BITMAP (insns_conv, 0, id, bi)
    2249        21038 :     for (df_ref ref = DF_INSN_UID_DEFS (id); ref; ref = DF_REF_NEXT_LOC (ref))
    2250        12640 :       if (bitmap_bit_p (defs_conv, DF_REF_REGNO (ref)))
    2251         8398 :         make_vector_copies (DF_REF_INSN (ref), DF_REF_REAL_REG (ref));
    2252       631483 : }
    2253              : 
    2254              : /* Convert whole chain creating required register
    2255              :    conversions and copies.  */
    2256              : 
    2257              : int
    2258       631483 : scalar_chain::convert ()
    2259              : {
    2260       631483 :   bitmap_iterator bi;
    2261       631483 :   unsigned id;
    2262       631483 :   int converted_insns = 0;
    2263              : 
    2264       631483 :   if (!dbg_cnt (stv_conversion))
    2265              :     return 0;
    2266              : 
    2267       631483 :   if (dump_file)
    2268            0 :     fprintf (dump_file, "Converting chain #%d...\n", chain_id);
    2269              : 
    2270       631483 :   convert_registers ();
    2271              : 
    2272      1949965 :   EXECUTE_IF_SET_IN_BITMAP (insns, 0, id, bi)
    2273              :     {
    2274      1318482 :       rtx_insn *insn = DF_INSN_UID_GET (id)->insn;
    2275      1318482 :       convert_insn_common (insn);
    2276      1318482 :       convert_insn (insn);
    2277      1318482 :       converted_insns++;
    2278              :     }
    2279              : 
    2280              :   return converted_insns;
    2281              : }
    2282              : 
    2283              : /* Return the SET expression if INSN doesn't reference hard register.
    2284              :    Return NULL if INSN uses or defines a hard register, excluding
    2285              :    pseudo register pushes, hard register uses in a memory address,
    2286              :    clobbers and flags definitions.  */
    2287              : 
    2288              : static rtx
    2289    331858289 : pseudo_reg_set (rtx_insn *insn)
    2290              : {
    2291    331858289 :   rtx set = single_set (insn);
    2292    331858289 :   if (!set)
    2293              :     return NULL;
    2294              : 
    2295              :   /* Check pseudo register push first. */
    2296    133735835 :   machine_mode mode = TARGET_64BIT ? TImode : DImode;
    2297    133735835 :   if (REG_P (SET_SRC (set))
    2298     37795028 :       && !HARD_REGISTER_P (SET_SRC (set))
    2299    163213290 :       && push_operand (SET_DEST (set), mode))
    2300              :     return set;
    2301              : 
    2302    133482756 :   df_ref ref;
    2303    216015776 :   FOR_EACH_INSN_DEF (ref, insn)
    2304    119099428 :     if (HARD_REGISTER_P (DF_REF_REAL_REG (ref))
    2305     64147785 :         && !DF_REF_FLAGS_IS_SET (ref, DF_REF_MUST_CLOBBER)
    2306    168924637 :         && DF_REF_REGNO (ref) != FLAGS_REG)
    2307              :       return NULL;
    2308              : 
    2309    185926403 :   FOR_EACH_INSN_USE (ref, insn)
    2310    114087623 :     if (!DF_REF_REG_MEM_P (ref) && HARD_REGISTER_P (DF_REF_REAL_REG (ref)))
    2311              :       return NULL;
    2312              : 
    2313              :   return set;
    2314              : }
    2315              : 
    2316              : /* Return true if the register REG is defined in a single DEF chain.
    2317              :    If it is defined in more than one DEF chains, we may not be able
    2318              :    to convert it in all chains.  */
    2319              : 
    2320              : static bool
    2321      1240414 : single_def_chain_p (rtx reg)
    2322              : {
    2323      1240414 :   df_ref ref = DF_REG_DEF_CHAIN (REGNO (reg));
    2324      1240414 :   if (!ref)
    2325              :     return false;
    2326      1240394 :   return DF_REF_NEXT_REG (ref) == nullptr;
    2327              : }
    2328              : 
    2329              : /* Check if comparison INSN may be transformed into vector comparison.
    2330              :    Currently we transform equality/inequality checks which look like:
    2331              :    (set (reg:CCZ 17 flags) (compare:CCZ (reg:TI x) (reg:TI y)))  */
    2332              : 
    2333              : static bool
    2334     12644761 : convertible_comparison_p (rtx_insn *insn, enum machine_mode mode)
    2335              : {
    2336     14053755 :   if (mode != (TARGET_64BIT ? TImode : DImode))
    2337              :     return false;
    2338              : 
    2339      4625264 :   if (!TARGET_SSE4_1)
    2340              :     return false;
    2341              : 
    2342       162788 :   rtx def_set = single_set (insn);
    2343              : 
    2344       162788 :   gcc_assert (def_set);
    2345              : 
    2346       162788 :   rtx src = SET_SRC (def_set);
    2347       162788 :   rtx dst = SET_DEST (def_set);
    2348              : 
    2349       162788 :   gcc_assert (GET_CODE (src) == COMPARE);
    2350              : 
    2351       162788 :   if (!REG_P (dst)
    2352       162788 :       || REGNO (dst) != FLAGS_REG
    2353       325576 :       || GET_MODE (dst) != CCZmode)
    2354              :     return false;
    2355              : 
    2356       114198 :   rtx op1 = XEXP (src, 0);
    2357       114198 :   rtx op2 = XEXP (src, 1);
    2358              : 
    2359              :   /* *cmp<dwi>_doubleword.  */
    2360       114198 :   if (general_operand (op1, mode)
    2361       114198 :       && general_operand (op2, mode))
    2362              :     return true;
    2363              : 
    2364              :   /* *testti_doubleword.  */
    2365       114142 :   if (op2 == const0_rtx
    2366        38110 :       && GET_CODE (op1) == AND
    2367          142 :       && REG_P (XEXP (op1, 0)))
    2368              :     {
    2369          142 :       rtx op12 = XEXP (op1, 1);
    2370          142 :       return GET_MODE (XEXP (op1, 0)) == TImode
    2371          142 :              && (CONST_SCALAR_INT_P (op12)
    2372            0 :                  || ((REG_P (op12) || MEM_P (op12))
    2373            0 :                      && GET_MODE (op12) == TImode));
    2374              :     }
    2375              : 
    2376              :   /* *test<dwi>_not_doubleword.  */
    2377       114000 :   if (op2 == const0_rtx
    2378        37968 :       && GET_CODE (op1) == AND
    2379            0 :       && GET_CODE (XEXP (op1, 0)) == NOT)
    2380              :     {
    2381            0 :       rtx op11 = XEXP (XEXP (op1, 0), 0);
    2382            0 :       rtx op12 = XEXP (op1, 1);
    2383            0 :       return (REG_P (op11) || MEM_P (op11))
    2384            0 :              && (REG_P (op12) || MEM_P (op12))
    2385            0 :              && GET_MODE (op11) == mode
    2386            0 :              && GET_MODE (op12) == mode;
    2387              :     }
    2388              : 
    2389              :   return false;
    2390              : }
    2391              : 
    2392              : /* The general version of scalar_to_vector_candidate_p.  */
    2393              : 
    2394              : static bool
    2395    232331737 : general_scalar_to_vector_candidate_p (rtx_insn *insn, enum machine_mode mode)
    2396              : {
    2397    232331737 :   rtx def_set = pseudo_reg_set (insn);
    2398              : 
    2399    232331737 :   if (!def_set)
    2400              :     return false;
    2401              : 
    2402     48777665 :   rtx src = SET_SRC (def_set);
    2403     48777665 :   rtx dst = SET_DEST (def_set);
    2404              : 
    2405     48777665 :   if (GET_CODE (src) == COMPARE)
    2406      8723994 :     return convertible_comparison_p (insn, mode);
    2407              : 
    2408              :   /* We are interested in "mode" only.  */
    2409     40053671 :   if ((GET_MODE (src) != mode
    2410     27382226 :        && !CONST_INT_P (src))
    2411     17723813 :       || GET_MODE (dst) != mode)
    2412              :     return false;
    2413              : 
    2414     14909467 :   if (!REG_P (dst) && !MEM_P (dst))
    2415              :     return false;
    2416              : 
    2417     14652393 :   switch (GET_CODE (src))
    2418              :     {
    2419       531384 :     case ASHIFT:
    2420       531384 :     case LSHIFTRT:
    2421       531384 :     case ASHIFTRT:
    2422       531384 :     case ROTATE:
    2423       531384 :     case ROTATERT:
    2424       531384 :       if (!CONST_INT_P (XEXP (src, 1))
    2425      1026763 :           || !IN_RANGE (INTVAL (XEXP (src, 1)), 0, GET_MODE_BITSIZE (mode)-1))
    2426              :         return false;
    2427              : 
    2428              :       /* Check for extend highpart case.  */
    2429       495375 :       if (mode != DImode
    2430       361223 :           || GET_CODE (src) != ASHIFTRT
    2431        81083 :           || GET_CODE (XEXP (src, 0)) != ASHIFT)
    2432              :         break;
    2433              : 
    2434      3651331 :       src = XEXP (src, 0);
    2435              :       break;
    2436              : 
    2437        86344 :     case SMAX:
    2438        86344 :     case SMIN:
    2439        86344 :     case UMAX:
    2440        86344 :     case UMIN:
    2441        86344 :       if ((mode == DImode && !TARGET_AVX512VL)
    2442        17992 :           || (mode == SImode && !TARGET_SSE4_1))
    2443              :         return false;
    2444              :       /* Fallthru.  */
    2445              : 
    2446      3194121 :     case AND:
    2447      3194121 :     case IOR:
    2448      3194121 :     case XOR:
    2449      3194121 :     case PLUS:
    2450      3194121 :     case MINUS:
    2451      3194121 :       if (!REG_P (XEXP (src, 1))
    2452              :           && !MEM_P (XEXP (src, 1))
    2453              :           && !CONST_INT_P (XEXP (src, 1)))
    2454              :         return false;
    2455              : 
    2456      3103201 :       if (GET_MODE (XEXP (src, 1)) != mode
    2457      1798098 :           && !CONST_INT_P (XEXP (src, 1)))
    2458              :         return false;
    2459              : 
    2460              :       /* Check for andnot case.  */
    2461      3103201 :       if (GET_CODE (src) != AND
    2462       177563 :           || GET_CODE (XEXP (src, 0)) != NOT)
    2463              :         break;
    2464              : 
    2465      3651331 :       src = XEXP (src, 0);
    2466              :       /* FALLTHRU */
    2467              : 
    2468              :     case NOT:
    2469              :       break;
    2470              : 
    2471        24730 :     case NEG:
    2472              :       /* Check for nabs case.  */
    2473        24730 :       if (GET_CODE (XEXP (src, 0)) != ABS)
    2474              :         break;
    2475              : 
    2476              :       src = XEXP (src, 0);
    2477              :       /* FALLTHRU */
    2478              : 
    2479         3798 :     case ABS:
    2480         3798 :       if ((mode == DImode && !TARGET_AVX512VL)
    2481         1446 :           || (mode == SImode && !TARGET_SSSE3))
    2482              :         return false;
    2483              :       break;
    2484              : 
    2485              :     case REG:
    2486              :       return true;
    2487              : 
    2488      5876928 :     case MEM:
    2489      5876928 :     case CONST_INT:
    2490      5876928 :       return REG_P (dst);
    2491              : 
    2492        56967 :     case VEC_SELECT:
    2493              :       /* Excluding MEM_P (dst) avoids interfering with vpextr[dq].  */
    2494        56967 :       return REG_P (dst)
    2495        46802 :              && REG_P (XEXP (src, 0))
    2496        53850 :              && GET_MODE (XEXP (src, 0)) == (mode == DImode ? V2DImode
    2497              :                                                             : V4SImode)
    2498        37092 :              && GET_CODE (XEXP (src, 1)) == PARALLEL
    2499        37092 :              && XVECLEN (XEXP (src, 1), 0) == 1
    2500        94059 :              && CONST_INT_P (XVECEXP (XEXP (src, 1), 0, 0));
    2501              : 
    2502              :     default:
    2503              :       return false;
    2504              :     }
    2505              : 
    2506      3651331 :   if (!REG_P (XEXP (src, 0))
    2507              :       && !MEM_P (XEXP (src, 0))
    2508              :       && !CONST_INT_P (XEXP (src, 0)))
    2509              :     return false;
    2510              : 
    2511      3349889 :   if (GET_MODE (XEXP (src, 0)) != mode
    2512            0 :       && !CONST_INT_P (XEXP (src, 0)))
    2513              :     return false;
    2514              : 
    2515              :   return true;
    2516              : }
    2517              : 
    2518              : /* Check for a suitable TImode memory operand.  */
    2519              : 
    2520              : static bool
    2521         1586 : timode_mem_p (rtx x)
    2522              : {
    2523         1586 :   return MEM_P (x)
    2524         1586 :          && (TARGET_SSE_UNALIGNED_LOAD_OPTIMAL
    2525            0 :              || !misaligned_operand (x, TImode));
    2526              : }
    2527              : 
    2528              : /* The TImode version of scalar_to_vector_candidate_p.  */
    2529              : 
    2530              : static bool
    2531     99526552 : timode_scalar_to_vector_candidate_p (rtx_insn *insn)
    2532              : {
    2533     99526552 :   rtx def_set = pseudo_reg_set (insn);
    2534              : 
    2535              :   /* We allow two exceptions to the pseudo registers only rule.
    2536              :      Setting a hard register from a pseudo, and setting a pseudo
    2537              :      from a hard register.  */
    2538     99526552 :   if (!def_set)
    2539              :     {
    2540     76212358 :       def_set = single_set (insn);
    2541     76212358 :       if (def_set)
    2542              :         {
    2543     17698628 :           rtx src = SET_SRC (def_set);
    2544     17698628 :           rtx dst = SET_DEST (def_set);
    2545     17698628 :           if (GET_MODE (dst) == TImode
    2546       220921 :               && REG_P (src) && REG_P (dst))
    2547              :             {
    2548       101574 :               if (HARD_REGISTER_P (dst)
    2549        52212 :                   && !HARD_REGISTER_P (src)
    2550       153786 :                   && single_def_chain_p (src))
    2551              :                 return true;
    2552        72934 :               if (HARD_REGISTER_P (src)
    2553        49362 :                   && !HARD_REGISTER_P (dst)
    2554       122296 :                   && single_def_chain_p (dst))
    2555              :                 return true;
    2556              :             }
    2557              :         }
    2558              :       return false;
    2559              :     }
    2560              : 
    2561     23314194 :   rtx src = SET_SRC (def_set);
    2562     23314194 :   rtx dst = SET_DEST (def_set);
    2563              : 
    2564     23314194 :   if (GET_CODE (src) == COMPARE)
    2565      3920767 :     return convertible_comparison_p (insn, TImode);
    2566              : 
    2567     19393427 :   if (GET_MODE (dst) != TImode
    2568      1181543 :       || (GET_MODE (src) != TImode
    2569        58772 :           && !CONST_SCALAR_INT_P (src)))
    2570              :     return false;
    2571              : 
    2572      1181543 :   if (!REG_P (dst) && !MEM_P (dst))
    2573              :     return false;
    2574              : 
    2575      1180090 :   if (MEM_P (dst)
    2576       523220 :       && misaligned_operand (dst, TImode)
    2577      1486795 :       && !TARGET_SSE_UNALIGNED_STORE_OPTIMAL)
    2578              :     return false;
    2579              : 
    2580      1180085 :   if (REG_P (dst) && !single_def_chain_p (dst))
    2581              :     return false;
    2582              : 
    2583      1028842 :   switch (GET_CODE (src))
    2584              :     {
    2585       481970 :     case REG:
    2586       481970 :       return single_def_chain_p (src);
    2587              : 
    2588              :     case CONST_WIDE_INT:
    2589              :       return true;
    2590              : 
    2591        12471 :     case CONST_INT:
    2592              :       /* ??? Verify performance impact before enabling CONST_INT for
    2593              :          __int128 store.  */
    2594        12471 :       return standard_sse_constant_p (src, TImode);
    2595              : 
    2596       439644 :     case MEM:
    2597              :       /* Memory must be aligned or unaligned load is optimal.  */
    2598       439644 :       return (REG_P (dst)
    2599       439644 :               && (!misaligned_operand (src, TImode)
    2600       141390 :                   || TARGET_SSE_UNALIGNED_LOAD_OPTIMAL));
    2601              : 
    2602         3151 :     case AND:
    2603         3151 :       if (!MEM_P (dst)
    2604         3110 :           && GET_CODE (XEXP (src, 0)) == NOT
    2605            0 :           && REG_P (XEXP (XEXP (src, 0), 0))
    2606         3151 :           && (REG_P (XEXP (src, 1))
    2607            0 :               || CONST_SCALAR_INT_P (XEXP (src, 1))
    2608            0 :               || timode_mem_p (XEXP (src, 1))))
    2609            0 :         return true;
    2610         3151 :       return (REG_P (XEXP (src, 0))
    2611           46 :               || timode_mem_p (XEXP (src, 0)))
    2612         3197 :              && (REG_P (XEXP (src, 1))
    2613         1282 :                  || CONST_SCALAR_INT_P (XEXP (src, 1))
    2614           35 :                  || timode_mem_p (XEXP (src, 1)));
    2615              : 
    2616        14103 :     case IOR:
    2617        14103 :     case XOR:
    2618        14103 :       if (timode_concatdi_p (src))
    2619              :         return true;
    2620         2766 :       return (REG_P (XEXP (src, 0))
    2621         1435 :               || timode_mem_p (XEXP (src, 0)))
    2622         2783 :              && (REG_P (XEXP (src, 1))
    2623          290 :                  || CONST_SCALAR_INT_P (XEXP (src, 1))
    2624           54 :                  || timode_mem_p (XEXP (src, 1)));
    2625              : 
    2626          509 :     case NOT:
    2627          509 :       return REG_P (XEXP (src, 0)) || timode_mem_p (XEXP (src, 0));
    2628              : 
    2629        11541 :     case ASHIFT:
    2630        11541 :     case LSHIFTRT:
    2631        11541 :     case ASHIFTRT:
    2632        11541 :     case ROTATERT:
    2633        11541 :     case ROTATE:
    2634              :       /* Handle shifts/rotates by integer constants between 0 and 127.  */
    2635        11541 :       return REG_P (XEXP (src, 0))
    2636        11509 :              && CONST_INT_P (XEXP (src, 1))
    2637        22690 :              && (INTVAL (XEXP (src, 1)) & ~0x7f) == 0;
    2638              : 
    2639         7016 :     case PLUS:
    2640         7016 :       return timode_concatdi_p (src);
    2641              : 
    2642         3754 :     case ZERO_EXTEND:
    2643         3754 :       return REG_P (XEXP (src, 0))
    2644         3754 :              && GET_MODE (XEXP (src, 0)) == DImode;
    2645              : 
    2646              :     default:
    2647              :       return false;
    2648              :     }
    2649              : }
    2650              : 
    2651              : /* For a register REGNO, scan instructions for its defs and uses.
    2652              :    Put REGNO in REGS if a def or use isn't in CANDIDATES.  */
    2653              : 
    2654              : static void
    2655      1222085 : timode_check_non_convertible_regs (bitmap candidates, bitmap regs,
    2656              :                                    unsigned int regno)
    2657              : {
    2658              :   /* Do nothing if REGNO is already in REGS or is a hard reg.  */
    2659      1222085 :   if (bitmap_bit_p (regs, regno)
    2660      1222085 :       || HARD_REGISTER_NUM_P (regno))
    2661              :     return;
    2662              : 
    2663      1214151 :   for (df_ref def = DF_REG_DEF_CHAIN (regno);
    2664      2417983 :        def;
    2665      1203832 :        def = DF_REF_NEXT_REG (def))
    2666              :     {
    2667      1214131 :       if (!bitmap_bit_p (candidates, DF_REF_INSN_UID (def)))
    2668              :         {
    2669        10299 :           if (dump_file)
    2670            0 :             fprintf (dump_file,
    2671              :                      "r%d has non convertible def in insn %d\n",
    2672            0 :                      regno, DF_REF_INSN_UID (def));
    2673              : 
    2674        10299 :           bitmap_set_bit (regs, regno);
    2675        10299 :           break;
    2676              :         }
    2677              :     }
    2678              : 
    2679      1214151 :   for (df_ref ref = DF_REG_USE_CHAIN (regno);
    2680      2690097 :        ref;
    2681      1475946 :        ref = DF_REF_NEXT_REG (ref))
    2682              :     {
    2683              :       /* Debug instructions are skipped.  */
    2684      1545455 :       if (NONDEBUG_INSN_P (DF_REF_INSN (ref))
    2685      1545455 :           && !bitmap_bit_p (candidates, DF_REF_INSN_UID (ref)))
    2686              :         {
    2687        69509 :           if (dump_file)
    2688            0 :             fprintf (dump_file,
    2689              :                      "r%d has non convertible use in insn %d\n",
    2690            0 :                      regno, DF_REF_INSN_UID (ref));
    2691              : 
    2692        69509 :           bitmap_set_bit (regs, regno);
    2693        69509 :           break;
    2694              :         }
    2695              :     }
    2696              : }
    2697              : 
    2698              : /* For a given bitmap of insn UIDs scans all instructions and
    2699              :    remove insn from CANDIDATES in case it has both convertible
    2700              :    and not convertible definitions.
    2701              : 
    2702              :    All insns in a bitmap are conversion candidates according to
    2703              :    scalar_to_vector_candidate_p.  Currently it implies all insns
    2704              :    are single_set.  */
    2705              : 
    2706              : static void
    2707       829339 : timode_remove_non_convertible_regs (bitmap candidates)
    2708              : {
    2709       829339 :   bitmap_iterator bi;
    2710       829339 :   unsigned id;
    2711       829339 :   bitmap regs = BITMAP_ALLOC (NULL);
    2712       850472 :   bool changed;
    2713              : 
    2714       850472 :   do {
    2715       850472 :     changed = false;
    2716      2094538 :     EXECUTE_IF_SET_IN_BITMAP (candidates, 0, id, bi)
    2717              :       {
    2718      1244066 :         rtx_insn *insn = DF_INSN_UID_GET (id)->insn;
    2719      1244066 :         df_ref ref;
    2720              : 
    2721      1946221 :         FOR_EACH_INSN_DEF (ref, insn)
    2722       702155 :           if (!DF_REF_REG_MEM_P (ref)
    2723       702155 :               && GET_MODE (DF_REF_REG (ref)) == TImode)
    2724       614168 :             timode_check_non_convertible_regs (candidates, regs,
    2725              :                                                DF_REF_REGNO (ref));
    2726              : 
    2727      3068378 :         FOR_EACH_INSN_USE (ref, insn)
    2728      1824312 :           if (DF_REF_TYPE (ref) == DF_REF_REG_USE
    2729       744088 :               && GET_MODE (DF_REF_REG (ref)) == TImode
    2730       607922 :               && !SUBREG_P (DF_REF_REG (ref)))
    2731       607917 :             timode_check_non_convertible_regs (candidates, regs,
    2732              :                                                DF_REF_REGNO (ref));
    2733              :       }
    2734              : 
    2735      1026060 :     EXECUTE_IF_SET_IN_BITMAP (regs, 0, id, bi)
    2736              :       {
    2737       175588 :         for (df_ref def = DF_REG_DEF_CHAIN (id);
    2738       357268 :              def;
    2739       181680 :              def = DF_REF_NEXT_REG (def))
    2740       181680 :           if (bitmap_bit_p (candidates, DF_REF_INSN_UID (def)))
    2741              :             {
    2742        56217 :               if (dump_file)
    2743            0 :                 fprintf (dump_file, "Removing insn %d from candidates list\n",
    2744            0 :                          DF_REF_INSN_UID (def));
    2745              : 
    2746        56217 :               bitmap_clear_bit (candidates, DF_REF_INSN_UID (def));
    2747        56217 :               changed = true;
    2748              :             }
    2749              : 
    2750       175588 :         for (df_ref ref = DF_REG_USE_CHAIN (id);
    2751       525544 :              ref;
    2752       349956 :              ref = DF_REF_NEXT_REG (ref))
    2753       349956 :           if (bitmap_bit_p (candidates, DF_REF_INSN_UID (ref)))
    2754              :             {
    2755        16000 :               if (dump_file)
    2756            0 :                 fprintf (dump_file, "Removing insn %d from candidates list\n",
    2757            0 :                          DF_REF_INSN_UID (ref));
    2758              : 
    2759        16000 :               bitmap_clear_bit (candidates, DF_REF_INSN_UID (ref));
    2760        16000 :               changed = true;
    2761              :             }
    2762              :       }
    2763              :   } while (changed);
    2764              : 
    2765       829339 :   BITMAP_FREE (regs);
    2766       829339 : }
    2767              : 
    2768              : /* Main STV pass function.  Find and convert scalar
    2769              :    instructions into vector mode when profitable.  */
    2770              : 
    2771              : static unsigned int
    2772      1784924 : convert_scalars_to_vector (bool timode_p)
    2773              : {
    2774      1784924 :   basic_block bb;
    2775      1784924 :   int converted_insns = 0;
    2776      1784924 :   auto_vec<rtx_insn *> control_flow_insns;
    2777              : 
    2778      1784924 :   bitmap_obstack_initialize (NULL);
    2779      1784924 :   const machine_mode cand_mode[3] = { SImode, DImode, TImode };
    2780      1784924 :   const machine_mode cand_vmode[3] = { V4SImode, V2DImode, V1TImode };
    2781      5354772 :   bitmap_head candidates[3];  /* { SImode, DImode, TImode } */
    2782      7139696 :   for (unsigned i = 0; i < 3; ++i)
    2783      5354772 :     bitmap_initialize (&candidates[i], &bitmap_default_obstack);
    2784              : 
    2785      1784924 :   calculate_dominance_info (CDI_DOMINATORS);
    2786      1784924 :   df_set_flags (DF_DEFER_INSN_RESCAN | DF_RD_PRUNE_DEAD_DEFS);
    2787      1784924 :   df_chain_add_problem (DF_DU_CHAIN | DF_UD_CHAIN);
    2788      1784924 :   df_analyze ();
    2789              : 
    2790              :   /* Find all instructions we want to convert into vector mode.  */
    2791      1784924 :   if (dump_file)
    2792           44 :     fprintf (dump_file, "Searching for mode conversion candidates...\n");
    2793              : 
    2794     19473538 :   FOR_EACH_BB_FN (bb, cfun)
    2795              :     {
    2796     17688614 :       rtx_insn *insn;
    2797    235136556 :       FOR_BB_INSNS (bb, insn)
    2798    217447942 :         if (timode_p
    2799    217447942 :             && timode_scalar_to_vector_candidate_p (insn))
    2800              :           {
    2801      1067349 :             if (dump_file)
    2802            0 :               fprintf (dump_file, "  insn %d is marked as a TImode candidate\n",
    2803            0 :                        INSN_UID (insn));
    2804              : 
    2805      1067349 :             bitmap_set_bit (&candidates[2], INSN_UID (insn));
    2806              :           }
    2807    216380593 :         else if (!timode_p)
    2808              :           {
    2809              :             /* Check {SI,DI}mode.  */
    2810    338783276 :             for (unsigned i = 0; i <= 1; ++i)
    2811    232331737 :               if (general_scalar_to_vector_candidate_p (insn, cand_mode[i]))
    2812              :                 {
    2813     11469851 :                   if (dump_file)
    2814          554 :                     fprintf (dump_file, "  insn %d is marked as a %s candidate\n",
    2815          277 :                              INSN_UID (insn), i == 0 ? "SImode" : "DImode");
    2816              : 
    2817     11469851 :                   bitmap_set_bit (&candidates[i], INSN_UID (insn));
    2818     11469851 :                   break;
    2819              :                 }
    2820              :           }
    2821              :     }
    2822              : 
    2823      1784924 :   if (timode_p)
    2824       829339 :     timode_remove_non_convertible_regs (&candidates[2]);
    2825              : 
    2826      5654139 :   for (unsigned i = 0; i <= 2; ++i)
    2827      4505860 :     if (!bitmap_empty_p (&candidates[i]))
    2828              :       break;
    2829      3869215 :     else if (i == 2 && dump_file)
    2830           23 :       fprintf (dump_file, "There are no candidates for optimization.\n");
    2831              : 
    2832      7139696 :   for (unsigned i = 0; i <= 2; ++i)
    2833              :     {
    2834      5354772 :       auto_bitmap disallowed;
    2835      5354772 :       bitmap_tree_view (&candidates[i]);
    2836     17023606 :       while (!bitmap_empty_p (&candidates[i]))
    2837              :         {
    2838      6314062 :           unsigned uid = bitmap_first_set_bit (&candidates[i]);
    2839      6314062 :           scalar_chain *chain;
    2840              : 
    2841      6314062 :           if (cand_mode[i] == TImode)
    2842       499483 :             chain = new timode_scalar_chain;
    2843              :           else
    2844      5814579 :             chain = new general_scalar_chain (cand_mode[i], cand_vmode[i]);
    2845              : 
    2846              :           /* Find instructions chain we want to convert to vector mode.
    2847              :              Check all uses and definitions to estimate all required
    2848              :              conversions.  */
    2849      6314062 :           if (chain->build (&candidates[i], uid, disallowed))
    2850              :             {
    2851      6310193 :               if (chain->compute_convert_gain ())
    2852       631483 :                 converted_insns += chain->convert ();
    2853      5678710 :               else if (dump_file)
    2854          136 :                 fprintf (dump_file, "Chain #%d conversion is not profitable\n",
    2855              :                          chain->chain_id);
    2856              :             }
    2857              : 
    2858      6314062 :           rtx_insn* iter_insn;
    2859      6314062 :           unsigned int ii;
    2860      6317650 :           FOR_EACH_VEC_ELT (chain->control_flow_insns, ii, iter_insn)
    2861         3588 :             control_flow_insns.safe_push (iter_insn);
    2862              : 
    2863      6314062 :           delete chain;
    2864              :         }
    2865      5354772 :     }
    2866              : 
    2867      1784924 :   if (dump_file)
    2868           44 :     fprintf (dump_file, "Total insns converted: %d\n", converted_insns);
    2869              : 
    2870      7139696 :   for (unsigned i = 0; i <= 2; ++i)
    2871      5354772 :     bitmap_release (&candidates[i]);
    2872      1784924 :   bitmap_obstack_release (NULL);
    2873      1784924 :   df_process_deferred_rescans ();
    2874              : 
    2875              :   /* Conversion means we may have 128bit register spills/fills
    2876              :      which require aligned stack.  */
    2877      1784924 :   if (converted_insns)
    2878              :     {
    2879       104041 :       if (crtl->stack_alignment_needed < 128)
    2880         2290 :         crtl->stack_alignment_needed = 128;
    2881       104041 :       if (crtl->stack_alignment_estimated < 128)
    2882          220 :         crtl->stack_alignment_estimated = 128;
    2883              : 
    2884       104041 :       crtl->stack_realign_needed
    2885       104041 :         = INCOMING_STACK_BOUNDARY < crtl->stack_alignment_estimated;
    2886       104041 :       crtl->stack_realign_tried = crtl->stack_realign_needed;
    2887              : 
    2888       104041 :       crtl->stack_realign_processed = true;
    2889              : 
    2890       104041 :       if (!crtl->drap_reg)
    2891              :         {
    2892       103864 :           rtx drap_rtx = targetm.calls.get_drap_rtx ();
    2893              : 
    2894              :           /* stack_realign_drap and drap_rtx must match.  */
    2895       103864 :           gcc_assert ((stack_realign_drap != 0) == (drap_rtx != NULL));
    2896              : 
    2897              :           /* Do nothing if NULL is returned,
    2898              :              which means DRAP is not needed.  */
    2899       103864 :           if (drap_rtx != NULL)
    2900              :             {
    2901            0 :               crtl->args.internal_arg_pointer = drap_rtx;
    2902              : 
    2903              :               /* Call fixup_tail_calls to clean up
    2904              :                  REG_EQUIV note if DRAP is needed. */
    2905            0 :               fixup_tail_calls ();
    2906              :             }
    2907              :         }
    2908              : 
    2909              :       /* Fix up DECL_RTL/DECL_INCOMING_RTL of arguments.  */
    2910       104041 :       if (TARGET_64BIT)
    2911        65546 :         for (tree parm = DECL_ARGUMENTS (current_function_decl);
    2912       178960 :              parm; parm = DECL_CHAIN (parm))
    2913              :           {
    2914       113414 :             if (TYPE_MODE (TREE_TYPE (parm)) != TImode)
    2915        97689 :               continue;
    2916        15725 :             if (DECL_RTL_SET_P (parm)
    2917        31450 :                 && GET_MODE (DECL_RTL (parm)) == V1TImode)
    2918              :               {
    2919          611 :                 rtx r = DECL_RTL (parm);
    2920          611 :                 if (REG_P (r))
    2921          611 :                   SET_DECL_RTL (parm, gen_rtx_SUBREG (TImode, r, 0));
    2922              :               }
    2923        15725 :             if (DECL_INCOMING_RTL (parm)
    2924        15725 :                 && GET_MODE (DECL_INCOMING_RTL (parm)) == V1TImode)
    2925              :               {
    2926            0 :                 rtx r = DECL_INCOMING_RTL (parm);
    2927            0 :                 if (REG_P (r))
    2928            0 :                   DECL_INCOMING_RTL (parm) = gen_rtx_SUBREG (TImode, r, 0);
    2929              :               }
    2930              :           }
    2931              : 
    2932       104041 :       if (!control_flow_insns.is_empty ())
    2933              :         {
    2934         1130 :           free_dominance_info (CDI_DOMINATORS);
    2935              : 
    2936         1130 :           unsigned int i;
    2937         1130 :           rtx_insn* insn;
    2938         5848 :           FOR_EACH_VEC_ELT (control_flow_insns, i, insn)
    2939         3588 :             if (control_flow_insn_p (insn))
    2940              :               {
    2941              :                 /* Split the block after insn.  There will be a fallthru
    2942              :                    edge, which is OK so we keep it.  We have to create
    2943              :                    the exception edges ourselves.  */
    2944         3588 :                 bb = BLOCK_FOR_INSN (insn);
    2945         3588 :                 split_block (bb, insn);
    2946         3588 :                 rtl_make_eh_edge (NULL, bb, BB_END (bb));
    2947              :               }
    2948              :         }
    2949              :     }
    2950              : 
    2951      1784924 :   return 0;
    2952      1784924 : }
    2953              : 
    2954              : static unsigned int
    2955        75059 : rest_of_handle_insert_vzeroupper (void)
    2956              : {
    2957              :   /* vzeroupper instructions are inserted immediately after reload and
    2958              :      postreload_cse to clean up after it a little bit to account for possible
    2959              :      spills from 256bit or 512bit registers.  The pass reuses mode switching
    2960              :      infrastructure by re-running mode insertion pass, so disable entities
    2961              :      that have already been processed.  */
    2962       525413 :   for (int i = 0; i < MAX_386_ENTITIES; i++)
    2963       450354 :     ix86_optimize_mode_switching[i] = 0;
    2964              : 
    2965        75059 :   ix86_optimize_mode_switching[AVX_U128] = 1;
    2966              : 
    2967              :   /* Call optimize_mode_switching.  */
    2968        75059 :   g->get_passes ()->execute_pass_mode_switching ();
    2969              : 
    2970              :   /* LRA removes all REG_DEAD/REG_UNUSED notes and normally they
    2971              :      reappear in the IL only at the start of pass_rtl_dse2, which does
    2972              :      df_note_add_problem (); df_analyze ();
    2973              :      The vzeroupper is scheduled after postreload_cse pass and mode
    2974              :      switching computes the notes as well, the problem is that e.g.
    2975              :      pass_gcse2 doesn't maintain the notes, see PR113059 and
    2976              :      PR112760.  Remove the notes now to restore status quo ante
    2977              :      until we figure out how to maintain the notes or what else
    2978              :      to do.  */
    2979        75059 :   basic_block bb;
    2980        75059 :   rtx_insn *insn;
    2981       405839 :   FOR_EACH_BB_FN (bb, cfun)
    2982      4245661 :     FOR_BB_INSNS (bb, insn)
    2983      3914881 :       if (NONDEBUG_INSN_P (insn))
    2984              :         {
    2985      2095565 :           rtx *pnote = &REG_NOTES (insn);
    2986      3891768 :           while (*pnote != 0)
    2987              :             {
    2988      1796203 :               if (REG_NOTE_KIND (*pnote) == REG_DEAD
    2989       822929 :                   || REG_NOTE_KIND (*pnote) == REG_UNUSED)
    2990      1289585 :                 *pnote = XEXP (*pnote, 1);
    2991              :               else
    2992       506618 :                 pnote = &XEXP (*pnote, 1);
    2993              :             }
    2994              :         }
    2995              : 
    2996        75059 :   df_remove_problem (df_note);
    2997        75059 :   df_analyze ();
    2998        75059 :   return 0;
    2999              : }
    3000              : 
    3001              : namespace {
    3002              : 
    3003              : const pass_data pass_data_insert_vzeroupper =
    3004              : {
    3005              :   RTL_PASS, /* type */
    3006              :   "vzeroupper", /* name */
    3007              :   OPTGROUP_NONE, /* optinfo_flags */
    3008              :   TV_MACH_DEP, /* tv_id */
    3009              :   0, /* properties_required */
    3010              :   0, /* properties_provided */
    3011              :   0, /* properties_destroyed */
    3012              :   0, /* todo_flags_start */
    3013              :   TODO_df_finish, /* todo_flags_finish */
    3014              : };
    3015              : 
    3016              : class pass_insert_vzeroupper : public rtl_opt_pass
    3017              : {
    3018              : public:
    3019       298828 :   pass_insert_vzeroupper(gcc::context *ctxt)
    3020       597656 :     : rtl_opt_pass(pass_data_insert_vzeroupper, ctxt)
    3021              :   {}
    3022              : 
    3023              :   /* opt_pass methods: */
    3024      1488378 :   bool gate (function *) final override
    3025              :     {
    3026      1488378 :       return TARGET_AVX && TARGET_VZEROUPPER;
    3027              :     }
    3028              : 
    3029        75059 :   unsigned int execute (function *) final override
    3030              :     {
    3031        75059 :       return rest_of_handle_insert_vzeroupper ();
    3032              :     }
    3033              : 
    3034              : }; // class pass_insert_vzeroupper
    3035              : 
    3036              : const pass_data pass_data_stv =
    3037              : {
    3038              :   RTL_PASS, /* type */
    3039              :   "stv", /* name */
    3040              :   OPTGROUP_NONE, /* optinfo_flags */
    3041              :   TV_MACH_DEP, /* tv_id */
    3042              :   0, /* properties_required */
    3043              :   0, /* properties_provided */
    3044              :   0, /* properties_destroyed */
    3045              :   0, /* todo_flags_start */
    3046              :   TODO_df_finish, /* todo_flags_finish */
    3047              : };
    3048              : 
    3049              : class pass_stv : public rtl_opt_pass
    3050              : {
    3051              : public:
    3052       597656 :   pass_stv (gcc::context *ctxt)
    3053       597656 :     : rtl_opt_pass (pass_data_stv, ctxt),
    3054      1195312 :       timode_p (false)
    3055              :   {}
    3056              : 
    3057              :   /* opt_pass methods: */
    3058      2976756 :   bool gate (function *) final override
    3059              :     {
    3060      1488378 :       return ((!timode_p || TARGET_64BIT)
    3061      4338567 :               && TARGET_STV && TARGET_SSE2 && optimize > 1);
    3062              :     }
    3063              : 
    3064      1784924 :   unsigned int execute (function *) final override
    3065              :     {
    3066      1784924 :       return convert_scalars_to_vector (timode_p);
    3067              :     }
    3068              : 
    3069       298828 :   opt_pass *clone () final override
    3070              :     {
    3071       298828 :       return new pass_stv (m_ctxt);
    3072              :     }
    3073              : 
    3074       597656 :   void set_pass_param (unsigned int n, bool param) final override
    3075              :     {
    3076       597656 :       gcc_assert (n == 0);
    3077       597656 :       timode_p = param;
    3078       597656 :     }
    3079              : 
    3080              : private:
    3081              :   bool timode_p;
    3082              : }; // class pass_stv
    3083              : 
    3084              : } // anon namespace
    3085              : 
    3086              : rtl_opt_pass *
    3087       298828 : make_pass_insert_vzeroupper (gcc::context *ctxt)
    3088              : {
    3089       298828 :   return new pass_insert_vzeroupper (ctxt);
    3090              : }
    3091              : 
    3092              : rtl_opt_pass *
    3093       298828 : make_pass_stv (gcc::context *ctxt)
    3094              : {
    3095       298828 :   return new pass_stv (ctxt);
    3096              : }
    3097              : 
    3098              : /* Inserting ENDBR and pseudo patchable-area instructions.  */
    3099              : 
    3100              : static void
    3101       190323 : rest_of_insert_endbr_and_patchable_area (bool need_endbr,
    3102              :                                          unsigned int patchable_area_size)
    3103              : {
    3104       190323 :   rtx endbr;
    3105       190323 :   rtx_insn *insn;
    3106       190323 :   rtx_insn *endbr_insn = NULL;
    3107       190323 :   basic_block bb;
    3108              : 
    3109       190323 :   if (need_endbr)
    3110              :     {
    3111              :       /* Currently emit EB if it's a tracking function, i.e. 'nocf_check'
    3112              :          is absent among function attributes.  Later an optimization will
    3113              :          be introduced to make analysis if an address of a static function
    3114              :          is taken.  A static function whose address is not taken will get
    3115              :          a nocf_check attribute.  This will allow to reduce the number of
    3116              :          EB.  */
    3117       190278 :       if (!lookup_attribute ("nocf_check",
    3118       190278 :                              TYPE_ATTRIBUTES (TREE_TYPE (cfun->decl)))
    3119       190260 :           && (!flag_manual_endbr
    3120            8 :               || lookup_attribute ("cf_check",
    3121            8 :                                    DECL_ATTRIBUTES (cfun->decl)))
    3122       380537 :           && (!cgraph_node::get (cfun->decl)->only_called_directly_p ()
    3123        27283 :               || ix86_cmodel == CM_LARGE
    3124        27282 :               || ix86_cmodel == CM_LARGE_PIC
    3125        27281 :               || flag_force_indirect_call
    3126        27281 :               || (TARGET_DLLIMPORT_DECL_ATTRIBUTES
    3127              :                   && DECL_DLLIMPORT_P (cfun->decl))))
    3128              :         {
    3129       162979 :           if (crtl->profile && flag_fentry)
    3130              :             {
    3131              :               /* Queue ENDBR insertion to x86_function_profiler.
    3132              :                  NB: Any patchable-area insn will be inserted after
    3133              :                  ENDBR.  */
    3134            6 :               cfun->machine->insn_queued_at_entrance = TYPE_ENDBR;
    3135              :             }
    3136              :           else
    3137              :             {
    3138       162973 :               endbr = gen_nop_endbr ();
    3139       162973 :               bb = ENTRY_BLOCK_PTR_FOR_FN (cfun)->next_bb;
    3140       162973 :               rtx_insn *insn = BB_HEAD (bb);
    3141       162973 :               endbr_insn = emit_insn_before (endbr, insn);
    3142              :             }
    3143              :         }
    3144              :     }
    3145              : 
    3146       190323 :   if (patchable_area_size)
    3147              :     {
    3148           51 :       if (crtl->profile && flag_fentry)
    3149              :         {
    3150              :           /* Queue patchable-area insertion to x86_function_profiler.
    3151              :              NB: If there is a queued ENDBR, x86_function_profiler
    3152              :              will also handle patchable-area.  */
    3153            2 :           if (!cfun->machine->insn_queued_at_entrance)
    3154            1 :             cfun->machine->insn_queued_at_entrance = TYPE_PATCHABLE_AREA;
    3155              :         }
    3156              :       else
    3157              :         {
    3158           49 :           rtx patchable_area
    3159           49 :             = gen_patchable_area (GEN_INT (patchable_area_size),
    3160           49 :                                   GEN_INT (crtl->patch_area_entry == 0));
    3161           49 :           if (endbr_insn)
    3162            3 :             emit_insn_after (patchable_area, endbr_insn);
    3163              :           else
    3164              :             {
    3165           46 :               bb = ENTRY_BLOCK_PTR_FOR_FN (cfun)->next_bb;
    3166           46 :               insn = BB_HEAD (bb);
    3167           46 :               emit_insn_before (patchable_area, insn);
    3168              :             }
    3169              :         }
    3170              :     }
    3171              : 
    3172       190323 :   if (!need_endbr)
    3173              :     return;
    3174              : 
    3175       190278 :   bb = 0;
    3176      3897299 :   FOR_EACH_BB_FN (bb, cfun)
    3177              :     {
    3178     71470982 :       for (insn = BB_HEAD (bb); insn != NEXT_INSN (BB_END (bb));
    3179     67763961 :            insn = NEXT_INSN (insn))
    3180              :         {
    3181     67763961 :           if (CALL_P (insn))
    3182              :             {
    3183      1336996 :               need_endbr = find_reg_note (insn, REG_SETJMP, NULL) != NULL;
    3184      1336996 :               if (!need_endbr && !SIBLING_CALL_P (insn))
    3185              :                 {
    3186      1289568 :                   rtx call = get_call_rtx_from (insn);
    3187      1289568 :                   rtx fnaddr = XEXP (call, 0);
    3188      1289568 :                   tree fndecl = NULL_TREE;
    3189              : 
    3190              :                   /* Also generate ENDBRANCH for non-tail call which
    3191              :                      may return via indirect branch.  */
    3192      1289568 :                   if (SYMBOL_REF_P (XEXP (fnaddr, 0)))
    3193      1233227 :                     fndecl = SYMBOL_REF_DECL (XEXP (fnaddr, 0));
    3194      1233227 :                   if (fndecl == NULL_TREE)
    3195        56709 :                     fndecl = MEM_EXPR (fnaddr);
    3196        56709 :                   if (fndecl
    3197      1287327 :                       && TREE_CODE (TREE_TYPE (fndecl)) != FUNCTION_TYPE
    3198       543006 :                       && TREE_CODE (TREE_TYPE (fndecl)) != METHOD_TYPE)
    3199              :                     fndecl = NULL_TREE;
    3200      1289568 :                   if (fndecl && TYPE_ARG_TYPES (TREE_TYPE (fndecl)))
    3201              :                     {
    3202      1248979 :                       tree fntype = TREE_TYPE (fndecl);
    3203      1248979 :                       if (lookup_attribute ("indirect_return",
    3204      1248979 :                                             TYPE_ATTRIBUTES (fntype)))
    3205              :                         need_endbr = true;
    3206              :                     }
    3207              :                 }
    3208      1336984 :               if (!need_endbr)
    3209      1336976 :                 continue;
    3210              :               /* Generate ENDBRANCH after CALL, which can return more than
    3211              :                  twice, setjmp-like functions.  */
    3212              : 
    3213           20 :               endbr = gen_nop_endbr ();
    3214           20 :               emit_insn_after_setloc (endbr, insn, INSN_LOCATION (insn));
    3215           20 :               continue;
    3216           20 :             }
    3217              : 
    3218     66426965 :           if (JUMP_P (insn) && flag_cet_switch)
    3219              :             {
    3220            9 :               rtx target = JUMP_LABEL (insn);
    3221            9 :               if (target == NULL_RTX || ANY_RETURN_P (target))
    3222            5 :                 continue;
    3223              : 
    3224              :               /* Check the jump is a switch table.  */
    3225            4 :               rtx_insn *label = as_a<rtx_insn *> (target);
    3226            4 :               rtx_insn *table = next_insn (label);
    3227            4 :               if (table == NULL_RTX || !JUMP_TABLE_DATA_P (table))
    3228            2 :                 continue;
    3229              : 
    3230              :               /* For the indirect jump find out all places it jumps and insert
    3231              :                  ENDBRANCH there.  It should be done under a special flag to
    3232              :                  control ENDBRANCH generation for switch stmts.  */
    3233            2 :               edge_iterator ei;
    3234            2 :               edge e;
    3235            2 :               basic_block dest_blk;
    3236              : 
    3237           24 :               FOR_EACH_EDGE (e, ei, bb->succs)
    3238              :                 {
    3239           22 :                   rtx_insn *insn;
    3240              : 
    3241           22 :                   dest_blk = e->dest;
    3242           22 :                   insn = BB_HEAD (dest_blk);
    3243           22 :                   gcc_assert (LABEL_P (insn));
    3244           22 :                   endbr = gen_nop_endbr ();
    3245           22 :                   emit_insn_after (endbr, insn);
    3246              :                 }
    3247            2 :               continue;
    3248            2 :             }
    3249              : 
    3250     66426956 :           if (LABEL_P (insn) && LABEL_PRESERVE_P (insn))
    3251              :             {
    3252       135411 :               endbr = gen_nop_endbr ();
    3253       135411 :               emit_insn_after (endbr, insn);
    3254       135411 :               continue;
    3255              :             }
    3256              :         }
    3257              :     }
    3258              : 
    3259              :   return;
    3260              : }
    3261              : 
    3262              : namespace {
    3263              : 
    3264              : const pass_data pass_data_insert_endbr_and_patchable_area =
    3265              : {
    3266              :   RTL_PASS, /* type.  */
    3267              :   "endbr_and_patchable_area", /* name.  */
    3268              :   OPTGROUP_NONE, /* optinfo_flags.  */
    3269              :   TV_MACH_DEP, /* tv_id.  */
    3270              :   0, /* properties_required.  */
    3271              :   0, /* properties_provided.  */
    3272              :   0, /* properties_destroyed.  */
    3273              :   0, /* todo_flags_start.  */
    3274              :   0, /* todo_flags_finish.  */
    3275              : };
    3276              : 
    3277              : class pass_insert_endbr_and_patchable_area : public rtl_opt_pass
    3278              : {
    3279              : public:
    3280       298828 :   pass_insert_endbr_and_patchable_area (gcc::context *ctxt)
    3281       597656 :     : rtl_opt_pass (pass_data_insert_endbr_and_patchable_area, ctxt)
    3282              :   {}
    3283              : 
    3284              :   /* opt_pass methods: */
    3285      1488378 :   bool gate (function *) final override
    3286              :     {
    3287      1488378 :       need_endbr = (flag_cf_protection & CF_BRANCH) != 0;
    3288      1488378 :       patchable_area_size = crtl->patch_area_size - crtl->patch_area_entry;
    3289      1488378 :       return need_endbr || patchable_area_size;
    3290              :     }
    3291              : 
    3292       190323 :   unsigned int execute (function *) final override
    3293              :     {
    3294       190323 :       timevar_push (TV_MACH_DEP);
    3295       190323 :       rest_of_insert_endbr_and_patchable_area (need_endbr,
    3296              :                                                patchable_area_size);
    3297       190323 :       timevar_pop (TV_MACH_DEP);
    3298       190323 :       return 0;
    3299              :     }
    3300              : 
    3301              : private:
    3302              :   bool need_endbr;
    3303              :   unsigned int patchable_area_size;
    3304              : }; // class pass_insert_endbr_and_patchable_area
    3305              : 
    3306              : } // anon namespace
    3307              : 
    3308              : rtl_opt_pass *
    3309       298828 : make_pass_insert_endbr_and_patchable_area (gcc::context *ctxt)
    3310              : {
    3311       298828 :   return new pass_insert_endbr_and_patchable_area (ctxt);
    3312              : }
    3313              : 
    3314              : bool
    3315      6036338 : ix86_rpad_gate ()
    3316              : {
    3317      6036338 :   return (TARGET_AVX
    3318       392529 :           && TARGET_SSE_PARTIAL_REG_DEPENDENCY
    3319       297634 :           && TARGET_SSE_MATH
    3320       297320 :           && optimize
    3321      6328305 :           && optimize_function_for_speed_p (cfun));
    3322              : }
    3323              : 
    3324              : enum x86_cse_kind
    3325              : {
    3326              :   X86_CSE_CONST0_VECTOR,
    3327              :   X86_CSE_CONSTM1_VECTOR,
    3328              :   X86_CSE_CONST_VECTOR,
    3329              :   X86_CSE_VEC_DUP,
    3330              :   X86_CSE_TLS_GD,
    3331              :   X86_CSE_TLS_LD_BASE,
    3332              :   X86_CSE_TLSDESC
    3333              : };
    3334              : 
    3335       154760 : struct redundant_pattern
    3336              : {
    3337              :   /* Bitmap of basic blocks with broadcast instructions.  */
    3338              :   auto_bitmap bbs;
    3339              :   /* Bitmap of broadcast instructions.  */
    3340              :   auto_bitmap insns;
    3341              :   /* The broadcast inner scalar.  */
    3342              :   rtx val;
    3343              :   /* The actual redundant source value for UNSPEC_TLSDESC.  */
    3344              :   rtx tlsdesc_val;
    3345              :   /* The inner scalar mode.  */
    3346              :   machine_mode mode;
    3347              :   /* The destination mode which can be changed to the integer mode of
    3348              :      the same time.  */
    3349              :   machine_mode dest_mode;
    3350              :   /* The instruction which sets the inner scalar.  Nullptr if the inner
    3351              :      scalar is applied to the whole function, instead of within the same
    3352              :      block.  */
    3353              :   rtx_insn *def_insn;
    3354              :   /* The widest broadcast source.  */
    3355              :   rtx broadcast_source;
    3356              :   /* The widest broadcast register.  */
    3357              :   rtx broadcast_reg;
    3358              :   /* The basic block of the broadcast instruction.  */
    3359              :   basic_block bb;
    3360              :   /* The number of broadcast instructions with the same inner scalar.  */
    3361              :   unsigned HOST_WIDE_INT count;
    3362              :   /* The threshold of broadcast instructions with the same inner
    3363              :      scalar.  */
    3364              :   unsigned int threshold;
    3365              :   /* The widest broadcast size in bytes.  */
    3366              :   unsigned int size;
    3367              :   /* Load kind.  */
    3368              :   x86_cse_kind kind;
    3369              : };
    3370              : 
    3371              : /* Generate a vector set, DEST = SRC, at entry of the nearest dominator
    3372              :    for basic block map BBS, which is in the fake loop that contains the
    3373              :    whole function, so that there is only a single vector set in the
    3374              :    whole function.  If not nullptr, LOAD is a pointer to the load.  */
    3375              : 
    3376              : static void
    3377        43402 : ix86_place_single_vector_set (rtx dest, rtx src, bitmap bbs,
    3378              :                               redundant_pattern *load = nullptr)
    3379              : {
    3380        43402 :   basic_block bb = nearest_common_dominator_for_set (CDI_DOMINATORS, bbs);
    3381              :   /* For X86_CSE_VEC_DUP and X86_CSE_CONST_VECTOR, don't place the vector
    3382              :      set outside of the loop to avoid extra spills.  */
    3383        43402 :   if (!load
    3384        42380 :       || (load->kind != X86_CSE_VEC_DUP
    3385        42380 :           && load->kind != X86_CSE_CONST_VECTOR))
    3386              :     {
    3387        23839 :       while (bb->loop_father->latch
    3388        23839 :              != EXIT_BLOCK_PTR_FOR_FN (cfun))
    3389         1411 :         bb = get_immediate_dominator (CDI_DOMINATORS,
    3390              :                                       bb->loop_father->header);
    3391              :     }
    3392              : 
    3393        43402 :   if (CONST_INT_P (src))
    3394        10644 :     dest = gen_rtx_SUBREG (load->dest_mode, dest, 0);
    3395        32758 :   else if (CONST_VECTOR_P (src))
    3396              :     {
    3397              :       /* The only possible CONST_VECTORs of SRC are CONST0_RTX and
    3398              :          CONSTM1_RTX.  Otherwise,
    3399              : 
    3400              :          rtx set = gen_rtx_SET (dest, src);
    3401              : 
    3402              :          won't be a valid instruction.  CONST0_RTX always works.  It
    3403              :          can comes from:
    3404              : 
    3405              :          1. remove_partial_avx_dependency with LOAD == NULL.
    3406              :          2. X86_CSE_VEC_DUP with
    3407              : 
    3408              :          (insn 48 58 16 3 (set (reg:V4HI 123)
    3409              :                 (const_vector:V4HI [
    3410              :                         (const_int 0 [0]) repeated x4
    3411              :                   ])) 2065 {*movv4hi_internal} (nil))
    3412              : 
    3413              :          3. X86_CSE_CONST0_VECTOR.
    3414              :        */
    3415        22428 :       machine_mode mode = GET_MODE (dest);
    3416        22428 :       if (!(src == CONST0_RTX (mode)
    3417         1578 :             || (src == CONSTM1_RTX (mode)
    3418         1578 :                 && load->kind == X86_CSE_CONSTM1_VECTOR)))
    3419            0 :         gcc_unreachable ();
    3420              :     }
    3421        43402 :   rtx set = gen_rtx_SET (dest, src);
    3422              : 
    3423        43402 :   rtx_insn *insn = BB_HEAD (bb);
    3424       170720 :   while (insn && !NONDEBUG_INSN_P (insn))
    3425              :     {
    3426       127322 :       if (insn == BB_END (bb))
    3427              :         {
    3428              :           insn = NULL;
    3429              :           break;
    3430              :         }
    3431       127318 :       insn = NEXT_INSN (insn);
    3432              :     }
    3433              : 
    3434        43402 :   rtx_insn *set_insn;
    3435        43402 :   if (insn == BB_HEAD (bb))
    3436              :     {
    3437            0 :       set_insn = emit_insn_before (set, insn);
    3438            0 :       if (dump_file)
    3439              :         {
    3440            0 :           fprintf (dump_file, "\nPlace:\n\n");
    3441            0 :           print_rtl_single (dump_file, set_insn);
    3442            0 :           fprintf (dump_file, "\nbefore:\n\n");
    3443            0 :           print_rtl_single (dump_file, insn);
    3444            0 :           fprintf (dump_file, "\n");
    3445              :         }
    3446              :     }
    3447              :   else
    3448              :     {
    3449        43402 :       rtx_insn *after = insn ? PREV_INSN (insn) : BB_END (bb);
    3450        43402 :       set_insn = emit_insn_after (set, after);
    3451        43402 :       if (dump_file)
    3452              :         {
    3453            2 :           fprintf (dump_file, "\nPlace:\n\n");
    3454            2 :           print_rtl_single (dump_file, set_insn);
    3455            2 :           fprintf (dump_file, "\nafter:\n\n");
    3456            2 :           print_rtl_single (dump_file, after);
    3457            2 :           fprintf (dump_file, "\n");
    3458              :         }
    3459              :     }
    3460              : 
    3461        43402 :   if (load && load->kind == X86_CSE_VEC_DUP)
    3462              :     {
    3463              :       /* Get the source from LOAD as (reg:SI 99) in
    3464              : 
    3465              :          (vec_duplicate:V4SI (reg:SI 99))
    3466              : 
    3467              :        */
    3468        10330 :       rtx inner_scalar = load->val;
    3469              :       /* Set the source in (vec_duplicate:V4SI (reg:SI 99)).  */
    3470        10330 :       rtx reg = XEXP (src, 0);
    3471        10330 :       machine_mode reg_mode = GET_MODE (reg);
    3472        10330 :       if (reg_mode != GET_MODE (inner_scalar))
    3473              :         {
    3474        10048 :           if (REG_P (inner_scalar) || MEM_P (inner_scalar))
    3475            0 :             inner_scalar = gen_rtx_SUBREG (reg_mode, inner_scalar, 0);
    3476        10048 :           else if (!SCALAR_INT_MODE_P (reg_mode))
    3477              :             {
    3478              :               /* For non-int load with integer constant, generate
    3479              : 
    3480              :                  (set (subreg:SI (reg/v:SF 105 [ f ]) 0)
    3481              :                       (const_int 1313486336 [0x4e4a3600]))
    3482              : 
    3483              :                */
    3484            1 :               gcc_assert (CONST_INT_P (inner_scalar));
    3485            1 :               unsigned int bits = GET_MODE_BITSIZE (reg_mode);
    3486            1 :               machine_mode mode = int_mode_for_size (bits, 0).require ();
    3487            1 :               reg = gen_rtx_SUBREG (mode, reg, 0);
    3488              :             }
    3489              :         }
    3490        10330 :       rtx set = gen_rtx_SET (reg, inner_scalar);
    3491        10330 :       insn = emit_insn_before (set, set_insn);
    3492        10330 :       if (dump_file)
    3493              :         {
    3494            0 :           fprintf (dump_file, "\nAdd:\n\n");
    3495            0 :           print_rtl_single (dump_file, insn);
    3496            0 :           fprintf (dump_file, "\nbefore:\n\n");
    3497            0 :           print_rtl_single (dump_file, set_insn);
    3498            0 :           fprintf (dump_file, "\n");
    3499              :         }
    3500              :     }
    3501        43402 : }
    3502              : 
    3503              : /* At entry of the nearest common dominator for basic blocks with
    3504              :    conversions/rcp/sqrt/rsqrt/round, generate a single
    3505              :         vxorps %xmmN, %xmmN, %xmmN
    3506              :    for all
    3507              :         vcvtss2sd  op, %xmmN, %xmmX
    3508              :         vcvtsd2ss  op, %xmmN, %xmmX
    3509              :         vcvtsi2ss  op, %xmmN, %xmmX
    3510              :         vcvtsi2sd  op, %xmmN, %xmmX
    3511              : 
    3512              :    NB: We want to generate only a single vxorps to cover the whole
    3513              :    function.  The LCM algorithm isn't appropriate here since it may
    3514              :    place a vxorps inside the loop.  */
    3515              : 
    3516              : static unsigned int
    3517        33896 : remove_partial_avx_dependency (void)
    3518              : {
    3519        33896 :   timevar_push (TV_MACH_DEP);
    3520              : 
    3521        33896 :   bitmap_obstack_initialize (NULL);
    3522        33896 :   bitmap convert_bbs = BITMAP_ALLOC (NULL);
    3523              : 
    3524        33896 :   basic_block bb;
    3525        33896 :   rtx_insn *insn, *set_insn;
    3526        33896 :   rtx set;
    3527        33896 :   rtx v4sf_const0 = NULL_RTX;
    3528              : 
    3529        33896 :   auto_vec<rtx_insn *> control_flow_insns;
    3530              : 
    3531              :   /* We create invalid RTL initially so defer rescans.  */
    3532        33896 :   df_set_flags (DF_DEFER_INSN_RESCAN);
    3533              : 
    3534       311131 :   FOR_EACH_BB_FN (bb, cfun)
    3535              :     {
    3536      3474682 :       FOR_BB_INSNS (bb, insn)
    3537              :         {
    3538      3197447 :           if (!NONDEBUG_INSN_P (insn))
    3539      1417202 :             continue;
    3540              : 
    3541      1780245 :           set = single_set (insn);
    3542      1780245 :           if (!set)
    3543        71411 :             continue;
    3544              : 
    3545      1708834 :           if (get_attr_avx_partial_xmm_update (insn)
    3546              :               != AVX_PARTIAL_XMM_UPDATE_TRUE)
    3547      1705661 :             continue;
    3548              : 
    3549              :           /* Convert PARTIAL_XMM_UPDATE_TRUE insns, DF -> SF, SF -> DF,
    3550              :              SI -> SF, SI -> DF, DI -> SF, DI -> DF, sqrt, rsqrt, rcp,
    3551              :              round, to vec_dup and vec_merge with subreg.  */
    3552         3173 :           rtx src = SET_SRC (set);
    3553         3173 :           rtx dest = SET_DEST (set);
    3554         3173 :           machine_mode dest_mode = GET_MODE (dest);
    3555         3173 :           bool convert_p = false;
    3556         3173 :           switch (GET_CODE (src))
    3557              :             {
    3558         3108 :             case FLOAT:
    3559         3108 :             case FLOAT_EXTEND:
    3560         3108 :             case FLOAT_TRUNCATE:
    3561         3108 :             case UNSIGNED_FLOAT:
    3562         3108 :               convert_p = true;
    3563         3108 :               break;
    3564              :             default:
    3565              :               break;
    3566              :             }
    3567              : 
    3568              :           /* Only handle conversion here.  */
    3569         3108 :           machine_mode src_mode
    3570         3108 :             = convert_p ? GET_MODE (XEXP (src, 0)) : VOIDmode;
    3571         3108 :           switch (src_mode)
    3572              :             {
    3573          153 :             case E_SFmode:
    3574          153 :             case E_DFmode:
    3575          153 :               if (TARGET_USE_VECTOR_FP_CONVERTS
    3576          147 :                   || !TARGET_SSE_PARTIAL_REG_FP_CONVERTS_DEPENDENCY)
    3577            8 :                 continue;
    3578              :               break;
    3579         2955 :             case E_SImode:
    3580         2955 :             case E_DImode:
    3581         2955 :               if (TARGET_USE_VECTOR_CONVERTS
    3582         2943 :                   || !TARGET_SSE_PARTIAL_REG_CONVERTS_DEPENDENCY)
    3583           14 :                 continue;
    3584              :               break;
    3585           65 :             case E_VOIDmode:
    3586           65 :               gcc_assert (!convert_p);
    3587              :               break;
    3588            0 :             default:
    3589            0 :               gcc_unreachable ();
    3590              :             }
    3591              : 
    3592         3151 :           if (!v4sf_const0)
    3593         1022 :             v4sf_const0 = gen_reg_rtx (V4SFmode);
    3594              : 
    3595         3151 :           rtx zero;
    3596         3151 :           machine_mode dest_vecmode;
    3597         3151 :           switch (dest_mode)
    3598              :             {
    3599           50 :             case E_HFmode:
    3600           50 :               dest_vecmode = V8HFmode;
    3601           50 :               zero = gen_rtx_SUBREG (V8HFmode, v4sf_const0, 0);
    3602           50 :               break;
    3603              :             case E_SFmode:
    3604              :               dest_vecmode = V4SFmode;
    3605              :               zero = v4sf_const0;
    3606              :               break;
    3607         1167 :             case E_DFmode:
    3608         1167 :               dest_vecmode = V2DFmode;
    3609         1167 :               zero = gen_rtx_SUBREG (V2DFmode, v4sf_const0, 0);
    3610         1167 :               break;
    3611            0 :             default:
    3612            0 :               gcc_unreachable ();
    3613              :             }
    3614              : 
    3615              :           /* Change source to vector mode.  */
    3616         3151 :           src = gen_rtx_VEC_DUPLICATE (dest_vecmode, src);
    3617         3151 :           src = gen_rtx_VEC_MERGE (dest_vecmode, src, zero,
    3618              :                                    GEN_INT (HOST_WIDE_INT_1U));
    3619              :           /* Change destination to vector mode.  */
    3620         3151 :           rtx vec = gen_reg_rtx (dest_vecmode);
    3621              :           /* Generate an XMM vector SET.  */
    3622         3151 :           set = gen_rtx_SET (vec, src);
    3623         3151 :           set_insn = emit_insn_before (set, insn);
    3624              : 
    3625         3151 :           if (cfun->can_throw_non_call_exceptions)
    3626              :             {
    3627              :               /* Handle REG_EH_REGION note.  */
    3628            0 :               rtx note = find_reg_note (insn, REG_EH_REGION, NULL_RTX);
    3629            0 :               if (note)
    3630              :                 {
    3631            0 :                   control_flow_insns.safe_push (set_insn);
    3632            0 :                   add_reg_note (set_insn, REG_EH_REGION, XEXP (note, 0));
    3633              :                 }
    3634              :             }
    3635              : 
    3636         3151 :           src = gen_rtx_SUBREG (dest_mode, vec, 0);
    3637         3151 :           set = gen_rtx_SET (dest, src);
    3638              : 
    3639              :           /* Drop possible dead definitions.  */
    3640         3151 :           PATTERN (insn) = set;
    3641              : 
    3642         3151 :           INSN_CODE (insn) = -1;
    3643         3151 :           recog_memoized (insn);
    3644         3151 :           df_insn_rescan (insn);
    3645         3151 :           bitmap_set_bit (convert_bbs, bb->index);
    3646              :         }
    3647              :     }
    3648              : 
    3649        33896 :   if (v4sf_const0)
    3650              :     {
    3651              :       /* (Re-)discover loops so that bb->loop_father can be used in the
    3652              :          analysis below.  */
    3653         1022 :       calculate_dominance_info (CDI_DOMINATORS);
    3654         1022 :       loop_optimizer_init (AVOID_CFG_MODIFICATIONS);
    3655              : 
    3656         1022 :       ix86_place_single_vector_set (v4sf_const0,
    3657              :                                     CONST0_RTX (V4SFmode),
    3658              :                                     convert_bbs);
    3659              : 
    3660         1022 :       loop_optimizer_finalize ();
    3661              : 
    3662         1022 :       if (!control_flow_insns.is_empty ())
    3663              :         {
    3664            0 :           free_dominance_info (CDI_DOMINATORS);
    3665              : 
    3666            0 :           unsigned int i;
    3667            0 :           FOR_EACH_VEC_ELT (control_flow_insns, i, insn)
    3668            0 :             if (control_flow_insn_p (insn))
    3669              :               {
    3670              :                 /* Split the block after insn.  There will be a fallthru
    3671              :                    edge, which is OK so we keep it.  We have to create
    3672              :                    the exception edges ourselves.  */
    3673            0 :                 bb = BLOCK_FOR_INSN (insn);
    3674            0 :                 split_block (bb, insn);
    3675            0 :                 rtl_make_eh_edge (NULL, bb, BB_END (bb));
    3676              :               }
    3677              :         }
    3678              :     }
    3679              : 
    3680        33896 :   df_process_deferred_rescans ();
    3681        33896 :   df_clear_flags (DF_DEFER_INSN_RESCAN);
    3682        33896 :   bitmap_obstack_release (NULL);
    3683        33896 :   BITMAP_FREE (convert_bbs);
    3684              : 
    3685        33896 :   timevar_pop (TV_MACH_DEP);
    3686        33896 :   return 0;
    3687        33896 : }
    3688              : 
    3689              : namespace {
    3690              : 
    3691              : const pass_data pass_data_remove_partial_avx_dependency =
    3692              : {
    3693              :   RTL_PASS, /* type */
    3694              :   "rpad", /* name */
    3695              :   OPTGROUP_NONE, /* optinfo_flags */
    3696              :   TV_MACH_DEP, /* tv_id */
    3697              :   0, /* properties_required */
    3698              :   0, /* properties_provided */
    3699              :   0, /* properties_destroyed */
    3700              :   0, /* todo_flags_start */
    3701              :   0, /* todo_flags_finish */
    3702              : };
    3703              : 
    3704              : class pass_remove_partial_avx_dependency : public rtl_opt_pass
    3705              : {
    3706              : public:
    3707       298828 :   pass_remove_partial_avx_dependency (gcc::context *ctxt)
    3708       597656 :     : rtl_opt_pass (pass_data_remove_partial_avx_dependency, ctxt)
    3709              :   {}
    3710              : 
    3711              :   /* opt_pass methods: */
    3712      1488378 :   bool gate (function *) final override
    3713              :     {
    3714      1488378 :       return ix86_rpad_gate ();
    3715              :     }
    3716              : 
    3717        33896 :   unsigned int execute (function *) final override
    3718              :     {
    3719        33896 :       return remove_partial_avx_dependency ();
    3720              :     }
    3721              : }; // class pass_rpad
    3722              : 
    3723              : } // anon namespace
    3724              : 
    3725              : rtl_opt_pass *
    3726       298828 : make_pass_remove_partial_avx_dependency (gcc::context *ctxt)
    3727              : {
    3728       298828 :   return new pass_remove_partial_avx_dependency (ctxt);
    3729              : }
    3730              : 
    3731              : /* Return a machine mode suitable for vector SIZE with SMODE inner
    3732              :    mode.  */
    3733              : 
    3734              : static machine_mode
    3735        63797 : ix86_get_vector_cse_mode (unsigned int size, machine_mode smode)
    3736              : {
    3737              :   /* Use the inner scalar mode of vector broadcast source in:
    3738              : 
    3739              :      (set (reg:V8DF 394)
    3740              :           (vec_duplicate:V8DF (reg:V2DF 190 [ alpha ])))
    3741              : 
    3742              :      to compute the vector mode for broadcast from vector source.
    3743              :    */
    3744        63797 :   if (VECTOR_MODE_P (smode))
    3745        30749 :     smode = GET_MODE_INNER (smode);
    3746        63797 :   scalar_mode s_mode = as_a <scalar_mode> (smode);
    3747       127594 :   poly_uint64 nunits = size / GET_MODE_SIZE (smode);
    3748        63797 :   machine_mode mode = mode_for_vector (s_mode, nunits).require ();
    3749        63797 :   return mode;
    3750              : }
    3751              : 
    3752              : /* Replace the source operand of instructions in VECTOR_INSNS with
    3753              :    VECTOR_CONST in VECTOR_MODE.  */
    3754              : 
    3755              : static void
    3756        63326 : replace_vector_const (machine_mode vector_mode, rtx vector_const,
    3757              :                       auto_bitmap &vector_insns,
    3758              :                       machine_mode scalar_mode)
    3759              : {
    3760        63326 :   bitmap_iterator bi;
    3761        63326 :   unsigned int id;
    3762              : 
    3763       222040 :   EXECUTE_IF_SET_IN_BITMAP (vector_insns, 0, id, bi)
    3764              :     {
    3765       158714 :       rtx_insn *insn = DF_INSN_UID_GET (id)->insn;
    3766              : 
    3767              :       /* Get the single SET instruction.  */
    3768       158714 :       rtx set = single_set (insn);
    3769       158714 :       rtx src = SET_SRC (set);
    3770       158714 :       rtx dest = SET_DEST (set);
    3771       158714 :       machine_mode mode = GET_MODE (dest);
    3772              : 
    3773       158714 :       rtx replace;
    3774              :       /* Replace the source operand with VECTOR_CONST.  */
    3775       158714 :       if (SUBREG_P (src)
    3776       158714 :           || mode == vector_mode
    3777        60235 :           || CONST_INT_P (vector_const))
    3778              :         replace = vector_const;
    3779              :       else
    3780              :         {
    3781        60235 :           unsigned int size = GET_MODE_SIZE (mode);
    3782        60235 :           if (size < ix86_regmode_natural_size (mode))
    3783              :             {
    3784              :               /* If the mode size is smaller than its natural size,
    3785              :                  first insert an extra move with a QI vector SUBREG
    3786              :                  of the same size to avoid validate_subreg failure.  */
    3787          471 :               machine_mode vmode
    3788          471 :                 = ix86_get_vector_cse_mode (size, scalar_mode);
    3789          471 :               rtx vreg;
    3790          471 :               if (mode == vmode)
    3791              :                 vreg = vector_const;
    3792              :               else
    3793              :                 {
    3794           59 :                   vreg = gen_reg_rtx (vmode);
    3795           59 :                   rtx vsubreg = gen_rtx_SUBREG (vmode, vector_const, 0);
    3796           59 :                   rtx pat = gen_rtx_SET (vreg, vsubreg);
    3797           59 :                   rtx_insn *vinsn = emit_insn_before (pat, insn);
    3798           59 :                   if (dump_file)
    3799              :                     {
    3800            0 :                       fprintf (dump_file, "\nInsert an extra move:\n\n");
    3801            0 :                       print_rtl_single (dump_file, vinsn);
    3802            0 :                       fprintf (dump_file, "\nbefore:\n\n");
    3803            0 :                       print_rtl_single (dump_file, insn);
    3804            0 :                       fprintf (dump_file, "\n");
    3805              :                     }
    3806              :                 }
    3807          471 :               replace = gen_rtx_SUBREG (mode, vreg, 0);
    3808              :             }
    3809              :           else
    3810        59764 :             replace = gen_rtx_SUBREG (mode, vector_const, 0);
    3811              :         }
    3812              : 
    3813       158714 :       if (dump_file)
    3814              :         {
    3815            3 :           fprintf (dump_file, "\nReplace:\n\n");
    3816            3 :           print_rtl_single (dump_file, insn);
    3817              :         }
    3818       158714 :       SET_SRC (set) = replace;
    3819       158714 :       if (CONST_INT_P (replace))
    3820              :         {
    3821        23098 :           dest = gen_lowpart (scalar_mode, dest);
    3822        23098 :           SET_DEST (set) = dest;
    3823              :         }
    3824              :       /* Drop possible dead definitions.  */
    3825       158714 :       PATTERN (insn) = set;
    3826       158714 :       INSN_CODE (insn) = -1;
    3827       158714 :       recog_memoized (insn);
    3828       158714 :       if (dump_file)
    3829              :         {
    3830            3 :           fprintf (dump_file, "\nwith:\n\n");
    3831            3 :           print_rtl_single (dump_file, insn);
    3832            3 :           fprintf (dump_file, "\n");
    3833              :         }
    3834       158714 :       df_insn_rescan (insn);
    3835              :     }
    3836        63326 : }
    3837              : 
    3838              : /* Return the inner scalar if OP is a broadcast, else return nullptr.  */
    3839              : 
    3840              : static rtx
    3841      2196863 : ix86_broadcast_inner (rtx op, machine_mode mode,
    3842              :                       machine_mode *scalar_mode_p,
    3843              :                       x86_cse_kind *kind_p, rtx_insn **insn_p)
    3844              : {
    3845      2196863 :   switch (standard_sse_constant_p (op, mode))
    3846              :     {
    3847       114776 :     case 1:
    3848       114776 :       *scalar_mode_p = QImode;
    3849       114776 :       *kind_p = X86_CSE_CONST0_VECTOR;
    3850       114776 :       *insn_p = nullptr;
    3851       114776 :       return const0_rtx;
    3852        12163 :     case 2:
    3853        12163 :       *scalar_mode_p = QImode;
    3854        12163 :       *kind_p = X86_CSE_CONSTM1_VECTOR;
    3855        12163 :       *insn_p = nullptr;
    3856        12163 :       return constm1_rtx;
    3857      2069924 :     default:
    3858      2069924 :       break;
    3859              :     }
    3860              : 
    3861      2069924 :   mode = GET_MODE (op);
    3862      2069924 :   int nunits = GET_MODE_NUNITS (mode);
    3863      2069924 :   if (nunits < 2)
    3864              :     return nullptr;
    3865              : 
    3866      1595135 :   bool const_vector_p = CONST_VECTOR_P (op);
    3867      1595135 :   bool duplicated = GET_CODE (op) == VEC_DUPLICATE;
    3868      1595135 :   rtx orig_op = op;
    3869      1595135 :   if (!const_vector_p)
    3870              :     {
    3871              :       /* Check CONST_VECTOR in REG_EQUAL note.  */
    3872      1595115 :       rtx equal = find_reg_equal_equiv_note (*insn_p);
    3873      1595115 :       if (equal)
    3874              :         {
    3875       373876 :           equal = XEXP (equal, 0);
    3876       373876 :           const_vector_p = CONST_VECTOR_P (equal);
    3877              :           /* Use CONST_VECTOR in REG_EQUAL note.  */
    3878       373876 :           if (const_vector_p)
    3879              :             {
    3880              :               /* Handle REG_EQUAL note in:
    3881              : 
    3882              :                  (insn 7 5 12 2 (set (subreg:V8SI (reg:V4DI 100) 0)
    3883              :                         (vec_duplicate:V8SI (reg:SI 102)))
    3884              :                     (expr_list:REG_DEAD (reg:SI 102)
    3885              :                        (expr_list:REG_EQUAL (const_vector:V4DI [
    3886              :                           (const_int -1 [0xffffffffffffffff]) repeated x4]) (nil))))
    3887              : 
    3888              :                  NB: Don't treat it as CONST_VECTOR since EQUAL isn't
    3889              :                  supported by ISAs as in gcc.target/i386/pr40957.c.  */
    3890       262242 :               if (GET_MODE (equal) != mode)
    3891              :                 const_vector_p = false;
    3892              :               else
    3893      1595135 :                 op = equal;
    3894              :             }
    3895              :         }
    3896              :     }
    3897              : 
    3898      1595135 :   machine_mode inner_mode = GET_MODE_INNER (mode);
    3899              : 
    3900      1595135 :   if (const_vector_p)
    3901              :     {
    3902       524456 :       bool int_load_p = GET_MODE_SIZE (mode) <= UNITS_PER_WORD;
    3903       262228 :       *kind_p = X86_CSE_CONST_VECTOR;
    3904       262228 :       if (int_load_p)
    3905              :         {
    3906              :           /* This CONST_VECTOR load can be converted to constant
    3907              :              integer load.  */
    3908        34371 :           *scalar_mode_p = mode;
    3909        34371 :           *insn_p = nullptr;
    3910        34371 :           return op;
    3911              :         }
    3912              : 
    3913              :       /* This CONST_VECTOR is wider than the integer register.  */
    3914       227857 :       rtx first = XVECEXP (op, 0, 0);
    3915              : 
    3916       227857 :       if (duplicated)
    3917              :         {
    3918              :           /* Check if CONST_VECTOR in REG_EQUAL note is duplicated in
    3919              : 
    3920              :              (insn 10 7 12 2 (set (reg:V8SI 128)
    3921              :                 (vec_duplicate:V8SI (vec_select:V2SI (reg:V4SI 180)
    3922              :                         (parallel [(const_int 0 [0])
    3923              :                                    (const_int 1 [0x1])]))))
    3924              :                   (expr_list:REG_EQUAL (const_vector:V8SI [
    3925              :                     (const_int 0 [0])
    3926              :                     (const_int 34 [0x22])
    3927              :                     (const_int 0 [0])
    3928              :                     (const_int 34 [0x22])
    3929              :                     (const_int 0 [0])
    3930              :                     (const_int 34 [0x22])
    3931              :                     (const_int 0 [0])
    3932              :                     (const_int 34 [0x22])])(nil)))
    3933              : 
    3934              :            */
    3935              : 
    3936       211672 :           bool duplicated_const_vector = true;
    3937       211672 :           for (int i = 1; i < nunits; ++i)
    3938              :             {
    3939       138641 :               rtx tmp = XVECEXP (op, 0, i);
    3940       138641 :               if (!rtx_equal_p (tmp, first))
    3941              :                 {
    3942              :                   duplicated_const_vector = false;
    3943              :                   break;
    3944              :                 }
    3945              :             }
    3946              : 
    3947        73047 :           if (duplicated_const_vector)
    3948              :             {
    3949        73031 :               bool const_double_p = CONST_DOUBLE_P (first);
    3950              :               /* Force the floating point constant to memory.  */
    3951        73031 :               if (const_double_p)
    3952         5538 :                 first = validize_mem (force_const_mem (inner_mode, first));
    3953              : 
    3954        73031 :               if (const_double_p || CONST_INT_P (first))
    3955              :                 {
    3956              :                   /* Handle
    3957              : 
    3958              :                      (insn 7 6 8 2 (set (reg:V4SF 99)
    3959              :                           (vec_duplicate:V4SF (mem/u/c:SF (symbol_ref/u:DI ("*.LC2") [flags 0x2]) [0  S4 A32])))
    3960              :                         (expr_list:REG_EQUAL (const_vector:V4SF [
    3961              :                            (const_double:SF 3.4e+1 [0x0.88p+6]) repeated x4]) (nil)))
    3962              : 
    3963              :                      and
    3964              : 
    3965              :                      (insn 14 15 16 3 (set (reg:V4SI 116)
    3966              :                           (vec_duplicate:V4SI (reg:SI 117)))
    3967              :                        (expr_list:REG_EQUAL (const_vector:V4SI [
    3968              :                           (const_int 34 [0x22]) repeated x4]) (nil)))
    3969              : 
    3970              :                    */
    3971        73031 :                   *kind_p = X86_CSE_VEC_DUP;
    3972        73031 :                   *insn_p = nullptr;
    3973        73031 :                   *scalar_mode_p = inner_mode;
    3974        73031 :                   return first;
    3975              :                 }
    3976              :             }
    3977              : 
    3978              :           op = orig_op;
    3979              :         }
    3980              :       else
    3981              :         {
    3982              :           /* Only native CONST_VECTOR is allowed.  */
    3983       154810 :           if (orig_op != op)
    3984              :             return nullptr;
    3985              : 
    3986              :           /* Check if VEC_DUPLICATE can be used.  */
    3987           48 :           for (int i = 1; i < nunits; ++i)
    3988              :             {
    3989           48 :               rtx tmp = XVECEXP (op, 0, i);
    3990              :               /* Vector duplicate value.  */
    3991           48 :               if (!rtx_equal_p (tmp, first))
    3992              :                 return nullptr;
    3993              :             }
    3994              : 
    3995              :           /* Use the inner mode to handle
    3996              :              (const_vector:V2QI [(const_int 0 [0]) repeated x2])
    3997              :            */
    3998            0 :           *scalar_mode_p = inner_mode;
    3999            0 :           *insn_p = nullptr;
    4000            0 :           return first;
    4001              :         }
    4002              :     }
    4003              : 
    4004      1332923 :   if (!duplicated)
    4005              :     return nullptr;
    4006              : 
    4007        22671 :   *kind_p = X86_CSE_VEC_DUP;
    4008              : 
    4009              :   /* Only
    4010              : 
    4011              :      (vec_duplicate:V4SI (reg:SI 99))
    4012              :      (vec_duplicate:V2DF (mem/u/c:DF (symbol_ref/u:DI ("*.LC1") [flags 0x2]) [0 S8 A64]))
    4013              : 
    4014              :      are supported.  Set OP to the broadcast source by default.  */
    4015        22671 :   op = XEXP (op, 0);
    4016        22671 :   rtx reg = op;
    4017        22671 :   if (SUBREG_P (op)
    4018          403 :       && SUBREG_BYTE (op) == 0
    4019        23074 :       && !paradoxical_subreg_p (op))
    4020          403 :     reg = SUBREG_REG (op);
    4021        22671 :   if (!REG_P (reg))
    4022              :     {
    4023         2305 :       if (MEM_P (op)
    4024         2041 :           && SYMBOL_REF_P (XEXP (op, 0))
    4025         2554 :           && CONSTANT_POOL_ADDRESS_P (XEXP (op, 0)))
    4026              :         {
    4027              :           /* Handle constant broadcast from memory.  */
    4028           11 :           *scalar_mode_p = inner_mode;
    4029           11 :           *insn_p = nullptr;
    4030           11 :           return op;
    4031              :         }
    4032              :       return nullptr;
    4033              :     }
    4034              : 
    4035        20366 :   machine_mode orig_mode = mode;
    4036        20366 :   mode = GET_MODE (op);
    4037              : 
    4038              :   /* Only single def chain is supported.  */
    4039        20366 :   df_ref ref = DF_REG_DEF_CHAIN (REGNO (reg));
    4040        20366 :   if (!ref
    4041        20365 :       || DF_REF_IS_ARTIFICIAL (ref)
    4042        20365 :       || DF_REF_NEXT_REG (ref) != nullptr)
    4043              :     return nullptr;
    4044              : 
    4045        14872 :   rtx_insn *insn = DF_REF_INSN (ref);
    4046        14872 :   rtx set = single_set (insn);
    4047        14872 :   if (!set)
    4048              :     return nullptr;
    4049              : 
    4050        14833 :   rtx src = SET_SRC (set);
    4051              : 
    4052        14833 :   if (CONST_INT_P (src))
    4053              :     {
    4054              :       /* Handle sequences like
    4055              : 
    4056              :          (set (subreg:SI (reg/v:SF 105 [ f ]) 0)
    4057              :               (const_int 0 [0]))
    4058              :          (set (reg:V4SF 110)
    4059              :               (vec_duplicate:V4SF (reg/v:SF 105 [ f ])))
    4060              : 
    4061              :          and
    4062              : 
    4063              :          (set (reg:SI 99)
    4064              :                (const_int 34 [0x22]))
    4065              :          (set (reg:V4SI 98)
    4066              :                (vec_duplicate:V4SI (reg:SI 99)))
    4067              : 
    4068              :          Set *INSN_P to nullptr and return SET_SRC if SET_SRC is an
    4069              :          integer constant.  */
    4070          233 :       op = src;
    4071          233 :       if (SCALAR_INT_MODE_P (mode) && mode != GET_MODE (reg))
    4072            0 :         op = gen_int_mode (INTVAL (src), mode);
    4073          233 :       if (op == const0_rtx)
    4074              :         {
    4075            6 :            if (standard_sse_constant_p (CONST0_RTX (orig_mode),
    4076              :                                         orig_mode) == 1)
    4077              :              {
    4078            6 :                *scalar_mode_p = QImode;
    4079            6 :                *kind_p = X86_CSE_CONST0_VECTOR;
    4080            6 :                *insn_p = nullptr;
    4081            6 :                return const0_rtx;
    4082              :              }
    4083            0 :            op = CONST0_RTX (mode);
    4084              :         }
    4085          227 :       else if (op == constm1_rtx
    4086          227 :                && standard_sse_constant_p (CONSTM1_RTX (orig_mode),
    4087              :                                            orig_mode) == 2)
    4088              :         {
    4089            0 :           *scalar_mode_p = QImode;
    4090            0 :           *kind_p = X86_CSE_CONSTM1_VECTOR;
    4091            0 :           *insn_p = nullptr;
    4092            0 :           return constm1_rtx;
    4093              :         }
    4094              : 
    4095              :       /* Check if we can convert:
    4096              : 
    4097              :          (insn 14 465 412 3 (set (reg:SI 507 [ j_lsm.26 ])
    4098              :                 (const_int 2 [0x2])) "foo.c":10:12 discrim 2 100 {*movsi_internal} (nil))
    4099              :          ...
    4100              :          (insn 518 507 434 16 (set (reg:V2SI 493)
    4101              :                 (vec_duplicate:V2SI (reg:SI 507 [ j_lsm.26 ]))) 2395 {*vec_dupv2si} (nil))
    4102              : 
    4103              :          to constant integer load:
    4104              : 
    4105              :          (insn 566 55 56 6 (set (subreg:DI (reg:V2SI 517) 0)
    4106              :                 (const_int 8589934594 [0x200000002])) -1 (nil))
    4107              :          ...
    4108              :          (insn 518 507 434 16 (set (reg:V2SI 493)
    4109              :                 (reg:V2SI 517)) 2066 {*movv2si_internal} (nil))
    4110              : 
    4111              :        */
    4112          454 :       if (GET_MODE_SIZE (orig_mode) <= UNITS_PER_WORD)
    4113            6 :         *kind_p = X86_CSE_CONST_VECTOR;
    4114              : 
    4115          227 :       *insn_p = nullptr;
    4116              :     }
    4117              :   else
    4118              :     {
    4119              :       /* Handle sequences like
    4120              : 
    4121              :          (set (reg:QI 105 [ c ])
    4122              :               (reg:QI 5 di [ c ]))
    4123              :          (set (reg:V64QI 102 [ _1 ])
    4124              :               (vec_duplicate:V64QI (reg:QI 105 [ c ])))
    4125              : 
    4126              :          (set (reg/v:SI 116 [ argc ])
    4127              :               (mem/c:SI (reg:SI 135) [2 argc+0 S4 A32]))
    4128              :          (set (reg:V4SI 119 [ _45 ])
    4129              :               (vec_duplicate:V4SI (reg/v:SI 116 [ argc ])))
    4130              : 
    4131              :          (set (reg:SI 98 [ _1 ])
    4132              :               (sign_extend:SI (reg:QI 106 [ c ])))
    4133              :          (set (reg:V16SI 103 [ _2 ])
    4134              :                (vec_duplicate:V16SI (reg:SI 98 [ _1 ])))
    4135              : 
    4136              :          (set (reg:SI 102 [ cost ])
    4137              :               (mem/c:SI (symbol_ref:DI ("cost") [flags 0x40])))
    4138              :          (set (reg:V4HI 103 [ _16 ])
    4139              :               (vec_duplicate:V4HI (subreg:HI (reg:SI 102 [ cost ]) 0)))
    4140              : 
    4141              :          (set (subreg:SI (reg/v:HI 107 [ cr_val ]) 0)
    4142              :               (ashift:SI (reg:SI 158)
    4143              :                          (subreg:QI (reg:SI 156 [ _2 ]) 0)))
    4144              :          (set (reg:V16HI 183 [ _61 ])
    4145              :               (vec_duplicate:V16HI (reg/v:HI 107 [ cr_val ])))
    4146              : 
    4147              :          Set *INSN_P to INSN and return the broadcast source otherwise.  */
    4148        14600 :       *insn_p = insn;
    4149              :     }
    4150              : 
    4151        14827 :   *scalar_mode_p = mode;
    4152        14827 :   return op;
    4153              : }
    4154              : 
    4155              : /* Replace CALL instruction in TLS_CALL_INSNS with SET from SRC and
    4156              :    put the updated instruction in UPDATED_TLS_INSNS.  */
    4157              : 
    4158              : static void
    4159          313 : replace_tls_call (rtx src, auto_bitmap &tls_call_insns,
    4160              :                   auto_bitmap &updated_tls_insns)
    4161              : {
    4162          313 :   bitmap_iterator bi;
    4163          313 :   unsigned int id;
    4164              : 
    4165         1739 :   EXECUTE_IF_SET_IN_BITMAP (tls_call_insns, 0, id, bi)
    4166              :     {
    4167         1426 :       rtx_insn *insn = DF_INSN_UID_GET (id)->insn;
    4168              : 
    4169              :       /* If this isn't a CALL, only GNU2 TLS implicit CALL patterns are
    4170              :          allowed.  */
    4171         1426 :       if (!CALL_P (insn))
    4172              :         {
    4173           47 :           attr_tls64 tls64 = get_attr_tls64 (insn);
    4174           47 :           if (tls64 != TLS64_CALL && tls64 != TLS64_COMBINE)
    4175            0 :             gcc_unreachable ();
    4176              :         }
    4177              : 
    4178         1426 :       rtx pat = PATTERN (insn);
    4179         1426 :       gcc_assert (GET_CODE (pat) == PARALLEL);
    4180         1426 :       rtx set = XVECEXP (pat, 0, 0);
    4181         1426 :       gcc_assert (GET_CODE (set) == SET);
    4182         1426 :       rtx dest = SET_DEST (set);
    4183              : 
    4184         1426 :       set = gen_rtx_SET (dest, src);
    4185         1426 :       rtx_insn *set_insn = emit_insn_after (set, insn);
    4186         1426 :       if (recog_memoized (set_insn) < 0)
    4187            0 :         gcc_unreachable ();
    4188              : 
    4189              :       /* Put SET_INSN in UPDATED_TLS_INSNS.  */
    4190         1426 :       bitmap_set_bit (updated_tls_insns, INSN_UID (set_insn));
    4191              : 
    4192         1426 :       if (dump_file)
    4193              :         {
    4194            0 :           fprintf (dump_file, "\nReplace:\n\n");
    4195            0 :           print_rtl_single (dump_file, insn);
    4196            0 :           fprintf (dump_file, "\nwith:\n\n");
    4197            0 :           print_rtl_single (dump_file, set_insn);
    4198            0 :           fprintf (dump_file, "\n");
    4199              :         }
    4200              : 
    4201              :       /* Delete the CALL insn.  */
    4202         1426 :       delete_insn (insn);
    4203              : 
    4204         1426 :       df_insn_rescan (set_insn);
    4205              :     }
    4206          313 : }
    4207              : 
    4208              : /* Return the basic block which dominates all basic blocks which set
    4209              :    hard register REGNO used in basic block BB.  */
    4210              : 
    4211              : static basic_block
    4212            2 : ix86_get_dominator_for_reg (unsigned int regno, basic_block bb)
    4213              : {
    4214            2 :   basic_block set_bb;
    4215            2 :   auto_bitmap set_bbs;
    4216              : 
    4217              :   /* Get all BBs which set REGNO and dominate the current BB from all
    4218              :      DEFs of REGNO.  */
    4219            2 :   for (df_ref def = DF_REG_DEF_CHAIN (regno);
    4220           18 :        def;
    4221           16 :        def = DF_REF_NEXT_REG (def))
    4222           16 :     if (!DF_REF_IS_ARTIFICIAL (def)
    4223           16 :         && !DF_REF_FLAGS_IS_SET (def, DF_REF_MAY_CLOBBER)
    4224            6 :         && !DF_REF_FLAGS_IS_SET (def, DF_REF_MUST_CLOBBER))
    4225              :       {
    4226            4 :         set_bb = DF_REF_BB (def);
    4227            4 :         if (dominated_by_p (CDI_DOMINATORS, bb, set_bb))
    4228            2 :           bitmap_set_bit (set_bbs, set_bb->index);
    4229              :       }
    4230              : 
    4231            2 :   bb = nearest_common_dominator_for_set (CDI_DOMINATORS, set_bbs);
    4232            2 :   return bb;
    4233            2 : }
    4234              : 
    4235              : /* Mark FLAGS register as live in DATA, a bitmap of live caller-saved
    4236              :    registers, if DEST is FLAGS register.  */
    4237              : 
    4238              : static void
    4239          381 : ix86_check_flags_reg (rtx dest, const_rtx x, void *data)
    4240              : {
    4241          381 :   if (GET_CODE (x) == CLOBBER)
    4242              :     return;
    4243              : 
    4244          374 :   auto_bitmap *live_caller_saved_regs = (auto_bitmap *) data;
    4245          374 :   if (REG_P (dest) && REGNO (dest) == FLAGS_REG)
    4246            0 :     bitmap_set_bit (*live_caller_saved_regs, FLAGS_REG);
    4247              : }
    4248              : 
    4249              : /* Emit a TLS_SET instruction of KIND in basic block BB.   Store the
    4250              :    insertion point in *BEFORE_P for emit_insn_before or in *AFTER_P
    4251              :    for emit_insn_after.  UPDATED_GNU_TLS_INSNS contains instructions
    4252              :    which replace the GNU TLS instructions.  UPDATED_GNU2_TLS_INSNS
    4253              :    contains instructions which replace the GNU2 TLS instructions.  */
    4254              : 
    4255              : static rtx_insn *
    4256          313 : ix86_emit_tls_call (rtx tls_set, x86_cse_kind kind, basic_block bb,
    4257              :                     rtx_insn **before_p, rtx_insn **after_p,
    4258              :                     auto_bitmap &updated_gnu_tls_insns,
    4259              :                     auto_bitmap &updated_gnu2_tls_insns)
    4260              : {
    4261          315 :   rtx_insn *tls_insn;
    4262              : 
    4263          315 :   do
    4264              :     {
    4265          315 :       rtx_insn *insn = BB_HEAD (bb);
    4266         1297 :       while (insn && !NONDEBUG_INSN_P (insn))
    4267              :         {
    4268          986 :           if (insn == BB_END (bb))
    4269              :             {
    4270              :               /* This must be the beginning basic block:
    4271              : 
    4272              :                  (note 4 0 2 2 [bb 2] NOTE_INSN_BASIC_BLOCK)
    4273              :                  (note 2 4 26 2 NOTE_INSN_FUNCTION_BEG)
    4274              : 
    4275              :                  or a basic block with only a label:
    4276              : 
    4277              :                  (code_label 78 11 77 3 14 (nil) [1 uses])
    4278              :                  (note 77 78 54 3 [bb 3] NOTE_INSN_BASIC_BLOCK)
    4279              : 
    4280              :                  or a basic block with only a debug marker:
    4281              : 
    4282              :                  (note 3 0 2 2 [bb 2] NOTE_INSN_BASIC_BLOCK)
    4283              :                  (note 2 3 5 2 NOTE_INSN_FUNCTION_BEG)
    4284              :                  (debug_insn 5 2 16 2 (debug_marker) "x.c":6:3 -1 (nil))
    4285              : 
    4286              :                  or a basic block with only deleted instructions:
    4287              : 
    4288              :                  (code_label 348 23 349 45 3 (nil) [0 uses])
    4289              :                  (note 349 348 436 45 [bb 45] NOTE_INSN_BASIC_BLOCK)
    4290              :                  (note 436 349 362 45 NOTE_INSN_DELETED)
    4291              : 
    4292              :                */
    4293            4 :               gcc_assert (DEBUG_INSN_P (insn)
    4294              :                           || (NOTE_P (insn)
    4295              :                               && ((NOTE_KIND (insn)
    4296              :                                    == NOTE_INSN_FUNCTION_BEG)
    4297              :                                   || (NOTE_KIND (insn)
    4298              :                                       == NOTE_INSN_DELETED)
    4299              :                                   || (NOTE_KIND (insn)
    4300              :                                       == NOTE_INSN_BASIC_BLOCK))));
    4301              :               insn = NULL;
    4302              :               break;
    4303              :             }
    4304          982 :           insn = NEXT_INSN (insn);
    4305              :         }
    4306              : 
    4307              :       /* TLS_GD and TLS_LD_BASE instructions are normal functions which
    4308              :          clobber caller-saved registers.  TLSDESC instructions only
    4309              :          clobber FLAGS.  If any registers clobbered by TLS instructions
    4310              :          are live in this basic block, we must insert TLS instructions
    4311              :          after all live registers clobbered are dead.  */
    4312              : 
    4313          315 :       auto_bitmap live_caller_saved_regs;
    4314          630 :       bitmap in = df_live ? DF_LIVE_IN (bb) : DF_LR_IN (bb);
    4315              : 
    4316          315 :       if (bitmap_bit_p (in, FLAGS_REG))
    4317            4 :         bitmap_set_bit (live_caller_saved_regs, FLAGS_REG);
    4318              : 
    4319          315 :       unsigned int i;
    4320              : 
    4321              :       /* Get all live caller-saved registers for TLS_GD and TLS_LD_BASE
    4322              :          instructions.  */
    4323          315 :       if (kind != X86_CSE_TLSDESC)
    4324        27249 :         for (i = 0; i < FIRST_PSEUDO_REGISTER; i++)
    4325        26956 :           if (call_used_regs[i]
    4326        25198 :               && !fixed_regs[i]
    4327        38993 :               && bitmap_bit_p (in, i))
    4328          344 :             bitmap_set_bit (live_caller_saved_regs, i);
    4329              : 
    4330          315 :       if (bitmap_empty_p (live_caller_saved_regs))
    4331              :         {
    4332           82 :           if (insn == BB_HEAD (bb))
    4333              :             {
    4334            0 :               *before_p = insn;
    4335            0 :               tls_insn = emit_insn_before (tls_set, insn);
    4336              :             }
    4337              :           else
    4338              :             {
    4339              :               /* Emit the TLS call after NOTE_INSN_FUNCTION_BEG in the
    4340              :                  beginning basic block:
    4341              : 
    4342              :                  (note 4 0 2 2 [bb 2] NOTE_INSN_BASIC_BLOCK)
    4343              :                  (note 2 4 26 2 NOTE_INSN_FUNCTION_BEG)
    4344              : 
    4345              :                  or after NOTE_INSN_BASIC_BLOCK in a basic block with
    4346              :                  only a label:
    4347              : 
    4348              :                  (code_label 78 11 77 3 14 (nil) [1 uses])
    4349              :                  (note 77 78 54 3 [bb 3] NOTE_INSN_BASIC_BLOCK)
    4350              : 
    4351              :                  or after debug marker in a basic block with only a
    4352              :                  debug marker:
    4353              : 
    4354              :                  (note 3 0 2 2 [bb 2] NOTE_INSN_BASIC_BLOCK)
    4355              :                  (note 2 3 5 2 NOTE_INSN_FUNCTION_BEG)
    4356              :                  (debug_insn 5 2 16 2 (debug_marker) "x.c":6:3 -1 (nil))
    4357              : 
    4358              :                */
    4359           82 :               insn = insn ? PREV_INSN (insn) : BB_END (bb);
    4360           82 :               *after_p = insn;
    4361           82 :               tls_insn = emit_insn_after (tls_set, insn);
    4362              :             }
    4363           82 :           return tls_insn;
    4364              :         }
    4365              : 
    4366          233 :       bool repeat = false;
    4367              : 
    4368              :       /* Search for REG_DEAD notes in this basic block.  */
    4369          661 :       FOR_BB_INSNS (bb, insn)
    4370              :         {
    4371          661 :           if (!NONDEBUG_INSN_P (insn))
    4372          283 :             continue;
    4373              : 
    4374              :           /* NB: Conditional jump is the only instruction which reads
    4375              :              flags register and changes control flow.  We can never
    4376              :              place the TLS call after unconditional jump.  */
    4377          378 :           if (JUMP_P (insn))
    4378              :             {
    4379              :               /* This must be a conditional jump.  */
    4380            2 :               rtx label = JUMP_LABEL (insn);
    4381            2 :               if (label == nullptr
    4382            2 :                   || ANY_RETURN_P (label)
    4383            2 :                   || !(LABEL_P (label) || SYMBOL_REF_P (label)))
    4384            0 :                 gcc_unreachable ();
    4385              : 
    4386              :               /* Place the call before all FLAGS_REG setting BBs since
    4387              :                  we can't place a call before nor after a conditional
    4388              :                  jump.  */
    4389            2 :               bb = ix86_get_dominator_for_reg (FLAGS_REG, bb);
    4390              : 
    4391              :               /* Start over again.  */
    4392            2 :               repeat = true;
    4393            2 :               break;
    4394              :             }
    4395              : 
    4396          376 :           if (bitmap_bit_p (updated_gnu_tls_insns, INSN_UID (insn)))
    4397              :             {
    4398              :               /* Insert the __tls_get_addr call before INSN which
    4399              :                  replaces a __tls_get_addr call.  */
    4400            1 :               *before_p = insn;
    4401            1 :               tls_insn = emit_insn_before (tls_set, insn);
    4402            1 :               return tls_insn;
    4403              :             }
    4404              : 
    4405          375 :           if (bitmap_bit_p (updated_gnu2_tls_insns, INSN_UID (insn)))
    4406              :             {
    4407              :               /* Mark FLAGS register as dead since FLAGS register
    4408              :                  would be clobbered by the GNU2 TLS instruction.  */
    4409            1 :               bitmap_clear_bit (live_caller_saved_regs, FLAGS_REG);
    4410            1 :               continue;
    4411              :             }
    4412              : 
    4413              :           /* Check if FLAGS register is live.  */
    4414          374 :           note_stores (insn, ix86_check_flags_reg,
    4415              :                        &live_caller_saved_regs);
    4416              : 
    4417          374 :           rtx link;
    4418          515 :           for (link = REG_NOTES (insn); link; link = XEXP (link, 1))
    4419          371 :             if ((REG_NOTE_KIND (link) == REG_DEAD
    4420            9 :                  || (REG_NOTE_KIND (link) == REG_UNUSED
    4421            7 :                      && REGNO (XEXP (link, 0)) == FLAGS_REG))
    4422          378 :                 && REG_P (XEXP (link, 0)))
    4423              :               {
    4424              :                 /* Mark the live caller-saved register as dead.  */
    4425          743 :                 for (i = REGNO (XEXP (link, 0));
    4426          743 :                      i < END_REGNO (XEXP (link, 0));
    4427              :                      i++)
    4428          374 :                   if (i < FIRST_PSEUDO_REGISTER)
    4429          351 :                     bitmap_clear_bit (live_caller_saved_regs, i);
    4430              : 
    4431          369 :                 if (bitmap_empty_p (live_caller_saved_regs))
    4432              :                   {
    4433          230 :                     *after_p = insn;
    4434          230 :                     tls_insn = emit_insn_after (tls_set, insn);
    4435          230 :                     return tls_insn;
    4436              :                   }
    4437              :               }
    4438              :         }
    4439              : 
    4440              :       /* NB: Start over again for conditional jump.  */
    4441            2 :       if (repeat)
    4442            2 :         continue;
    4443              : 
    4444            0 :       gcc_assert (!bitmap_empty_p (live_caller_saved_regs));
    4445              : 
    4446              :       /* If any live caller-saved registers aren't dead at the end of
    4447              :          this basic block, get the basic block which dominates all
    4448              :          basic blocks which set the remaining live registers.  */
    4449            0 :       auto_bitmap set_bbs;
    4450            0 :       bitmap_iterator bi;
    4451            0 :       unsigned int id;
    4452            0 :       EXECUTE_IF_SET_IN_BITMAP (live_caller_saved_regs, 0, id, bi)
    4453              :         {
    4454            0 :           basic_block set_bb = ix86_get_dominator_for_reg (id, bb);
    4455            0 :           bitmap_set_bit (set_bbs, set_bb->index);
    4456              :         }
    4457            0 :       bb = nearest_common_dominator_for_set (CDI_DOMINATORS, set_bbs);
    4458            2 :     }
    4459              :   while (true);
    4460              : }
    4461              : 
    4462              : /* Generate a TLS call of KIND with VAL and copy the call result to DEST,
    4463              :    at entry of the nearest dominator for basic block map BBS, which is in
    4464              :    the fake loop that contains the whole function, so that there is only
    4465              :    a single TLS CALL of KIND with VAL in the whole function.
    4466              :    UPDATED_GNU_TLS_INSNS contains instructions which replace the GNU TLS
    4467              :    instructions.  UPDATED_GNU2_TLS_INSNS contains instructions which
    4468              :    replace the GNU2 TLS instructions.  If TLSDESC_SET isn't nullptr,
    4469              :    insert it before the TLS call.  */
    4470              : 
    4471              : static void
    4472          313 : ix86_place_single_tls_call (rtx dest, rtx val, x86_cse_kind kind,
    4473              :                             auto_bitmap &bbs,
    4474              :                             auto_bitmap &updated_gnu_tls_insns,
    4475              :                             auto_bitmap &updated_gnu2_tls_insns,
    4476              :                             rtx tlsdesc_set = nullptr)
    4477              : {
    4478          313 :   basic_block bb = nearest_common_dominator_for_set (CDI_DOMINATORS, bbs);
    4479          313 :   while (bb->loop_father->latch
    4480          322 :          != EXIT_BLOCK_PTR_FOR_FN (cfun))
    4481            9 :     bb = get_immediate_dominator (CDI_DOMINATORS,
    4482              :                                   bb->loop_father->header);
    4483              : 
    4484          313 :   rtx rax = nullptr, rdi;
    4485          313 :   rtx eqv = nullptr;
    4486          313 :   rtx caddr;
    4487          313 :   rtx set;
    4488          313 :   rtx clob;
    4489          313 :   rtx symbol;
    4490          313 :   rtx tls;
    4491              : 
    4492          313 :   switch (kind)
    4493              :     {
    4494          262 :     case X86_CSE_TLS_GD:
    4495          262 :       rax = gen_rtx_REG (Pmode, AX_REG);
    4496          262 :       rdi = gen_rtx_REG (Pmode, DI_REG);
    4497          262 :       caddr = ix86_tls_get_addr ();
    4498              : 
    4499          262 :       symbol = XVECEXP (val, 0, 0);
    4500          262 :       tls = gen_tls_global_dynamic_64 (Pmode, rax, symbol, caddr, rdi);
    4501              : 
    4502          262 :       if (GET_MODE (symbol) != Pmode)
    4503            0 :         symbol = gen_rtx_ZERO_EXTEND (Pmode, symbol);
    4504              :       eqv = symbol;
    4505              :       break;
    4506              : 
    4507           30 :     case X86_CSE_TLS_LD_BASE:
    4508           30 :       rax = gen_rtx_REG (Pmode, AX_REG);
    4509           30 :       rdi = gen_rtx_REG (Pmode, DI_REG);
    4510           30 :       caddr = ix86_tls_get_addr ();
    4511              : 
    4512           30 :       tls = gen_tls_local_dynamic_base_64 (Pmode, rax, caddr, rdi);
    4513              : 
    4514              :       /* Attach a unique REG_EQUAL to DEST, to allow the RTL optimizers
    4515              :          to share the LD_BASE result with other LD model accesses.  */
    4516           30 :       eqv = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, const0_rtx),
    4517              :                             UNSPEC_TLS_LD_BASE);
    4518              : 
    4519           30 :       break;
    4520              : 
    4521           21 :     case X86_CSE_TLSDESC:
    4522           21 :       set = gen_rtx_SET (dest, val);
    4523           21 :       clob = gen_rtx_CLOBBER (VOIDmode,
    4524              :                               gen_rtx_REG (CCmode, FLAGS_REG));
    4525           21 :       tls = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, set, clob));
    4526           21 :       break;
    4527              : 
    4528            0 :     default:
    4529            0 :       gcc_unreachable ();
    4530              :     }
    4531              : 
    4532              :   /* Emit the TLS CALL insn.  */
    4533          313 :   rtx_insn *before = nullptr;
    4534          313 :   rtx_insn *after = nullptr;
    4535          313 :   rtx_insn *tls_insn = ix86_emit_tls_call (tls, kind, bb, &before,
    4536              :                                            &after,
    4537              :                                            updated_gnu_tls_insns,
    4538              :                                            updated_gnu2_tls_insns);
    4539              : 
    4540          313 :   rtx_insn *tlsdesc_insn = nullptr;
    4541          313 :   if (tlsdesc_set)
    4542              :     {
    4543           16 :       rtx dest = copy_rtx (SET_DEST (tlsdesc_set));
    4544           16 :       rtx src = copy_rtx (SET_SRC (tlsdesc_set));
    4545           16 :       tlsdesc_set = gen_rtx_SET (dest, src);
    4546           16 :       tlsdesc_insn = emit_insn_before (tlsdesc_set, tls_insn);
    4547              :     }
    4548              : 
    4549          313 :   if (kind != X86_CSE_TLSDESC)
    4550              :     {
    4551          292 :       RTL_CONST_CALL_P (tls_insn) = 1;
    4552              : 
    4553              :       /* Indicate that this function can't jump to non-local gotos.  */
    4554          292 :       make_reg_eh_region_note_nothrow_nononlocal (tls_insn);
    4555              :     }
    4556              : 
    4557          313 :   if (recog_memoized (tls_insn) < 0)
    4558            0 :     gcc_unreachable ();
    4559              : 
    4560          313 :   if (dump_file)
    4561              :     {
    4562            0 :       if (after)
    4563              :         {
    4564            0 :           fprintf (dump_file, "\nPlace:\n\n");
    4565            0 :           if (tlsdesc_insn)
    4566            0 :             print_rtl_single (dump_file, tlsdesc_insn);
    4567            0 :           print_rtl_single (dump_file, tls_insn);
    4568            0 :           fprintf (dump_file, "\nafter:\n\n");
    4569            0 :           print_rtl_single (dump_file, after);
    4570            0 :           fprintf (dump_file, "\n");
    4571              :         }
    4572              :       else
    4573              :         {
    4574            0 :           fprintf (dump_file, "\nPlace:\n\n");
    4575            0 :           if (tlsdesc_insn)
    4576            0 :             print_rtl_single (dump_file, tlsdesc_insn);
    4577            0 :           print_rtl_single (dump_file, tls_insn);
    4578            0 :           fprintf (dump_file, "\nbefore:\n\n");
    4579            0 :           print_rtl_single (dump_file, before);
    4580            0 :           fprintf (dump_file, "\n");
    4581              :         }
    4582              :     }
    4583              : 
    4584          313 :   if (kind != X86_CSE_TLSDESC)
    4585              :     {
    4586              :       /* Copy RAX to DEST.  */
    4587          292 :       set = gen_rtx_SET (dest, rax);
    4588          292 :       rtx_insn *set_insn = emit_insn_after (set, tls_insn);
    4589          292 :       set_dst_reg_note (set_insn, REG_EQUAL, copy_rtx (eqv), dest);
    4590          292 :       if (dump_file)
    4591              :         {
    4592            0 :           fprintf (dump_file, "\nPlace:\n\n");
    4593            0 :           print_rtl_single (dump_file, set_insn);
    4594            0 :           fprintf (dump_file, "\nafter:\n\n");
    4595            0 :           print_rtl_single (dump_file, tls_insn);
    4596            0 :           fprintf (dump_file, "\n");
    4597              :         }
    4598              :     }
    4599          313 : }
    4600              : 
    4601              : namespace {
    4602              : 
    4603              : const pass_data pass_data_x86_cse =
    4604              : {
    4605              :   RTL_PASS, /* type */
    4606              :   "x86_cse", /* name */
    4607              :   OPTGROUP_NONE, /* optinfo_flags */
    4608              :   TV_MACH_DEP, /* tv_id */
    4609              :   0, /* properties_required */
    4610              :   0, /* properties_provided */
    4611              :   0, /* properties_destroyed */
    4612              :   0, /* todo_flags_start */
    4613              :   0, /* todo_flags_finish */
    4614              : };
    4615              : 
    4616              : class pass_x86_cse : public rtl_opt_pass
    4617              : {
    4618              : public:
    4619       298828 :   pass_x86_cse (gcc::context *ctxt)
    4620       597656 :     : rtl_opt_pass (pass_data_x86_cse, ctxt)
    4621              :   {}
    4622              : 
    4623              :   /* opt_pass methods: */
    4624      1488378 :   bool gate (function *fun) final override
    4625              :     {
    4626      1488378 :       return optimize && optimize_function_for_speed_p (fun);
    4627              :     }
    4628              : 
    4629       976653 :   unsigned int execute (function *) final override
    4630              :     {
    4631       976653 :       return x86_cse ();
    4632              :     }
    4633              : 
    4634              : private:
    4635              :   /* The redundant source value.  */
    4636              :   rtx val;
    4637              :   /* The actual redundant source value for UNSPEC_TLSDESC.  */
    4638              :   rtx tlsdesc_val;
    4639              :   /* The instruction which defines the redundant value.  */
    4640              :   rtx_insn *def_insn;
    4641              :   /* Mode of the destination of the candidate redundant instruction.  */
    4642              :   machine_mode mode;
    4643              :   /* Mode of the source of the candidate redundant instruction.  */
    4644              :   machine_mode scalar_mode;
    4645              :   /* The classification of the candidate redundant instruction.  */
    4646              :   x86_cse_kind kind;
    4647              : 
    4648              :   unsigned int x86_cse (void);
    4649              :   bool candidate_gnu_tls_p (rtx_insn *, attr_tls64);
    4650              :   bool candidate_gnu2_tls_p (rtx, attr_tls64);
    4651              :   bool candidate_vector_p (rtx, rtx_insn *);
    4652              :   rtx_insn *tls_set_insn_from_symbol (const_rtx, const_rtx);
    4653              : }; // class pass_x86_cse
    4654              : 
    4655              : /* Return the instruction which sets REG from TLS_SYMBOL.  */
    4656              : 
    4657              : rtx_insn *
    4658           42 : pass_x86_cse::tls_set_insn_from_symbol (const_rtx reg,
    4659              :                                         const_rtx tls_symbol)
    4660              : {
    4661           42 :   rtx_insn *set_insn = nullptr;
    4662           42 :   for (df_ref ref = DF_REG_DEF_CHAIN (REGNO (reg));
    4663          111 :        ref;
    4664           69 :        ref = DF_REF_NEXT_REG (ref))
    4665              :     {
    4666           69 :       if (DF_REF_IS_ARTIFICIAL (ref))
    4667              :         return nullptr;
    4668              : 
    4669           69 :       set_insn = DF_REF_INSN (ref);
    4670           69 :       if (get_attr_tls64 (set_insn) != TLS64_LEA)
    4671              :         return nullptr;
    4672              : 
    4673           69 :       rtx tls_set = PATTERN (set_insn);
    4674           69 :       rtx tls_src = XVECEXP (SET_SRC (tls_set), 0, 0);
    4675           69 :       if (!rtx_equal_p (tls_symbol, tls_src))
    4676              :         return nullptr;
    4677              :     }
    4678              : 
    4679              :   return set_insn;
    4680              : }
    4681              : 
    4682              : /* Return true and output def_insn, val, mode, scalar_mode and kind if
    4683              :    INSN is UNSPEC_TLS_GD or UNSPEC_TLS_LD_BASE.  */
    4684              : 
    4685              : bool
    4686         2190 : pass_x86_cse::candidate_gnu_tls_p (rtx_insn *insn, attr_tls64 tls64)
    4687              : {
    4688         2190 :   if (!TARGET_64BIT || !cfun->machine->tls_descriptor_call_multiple_p)
    4689              :     return false;
    4690              : 
    4691              :   /* Record the redundant TLS CALLs for 64-bit:
    4692              : 
    4693              :      (parallel [
    4694              :         (set (reg:DI 0 ax)
    4695              :              (call:DI (mem:QI (symbol_ref:DI ("__tls_get_addr")))
    4696              :                       (const_int 0 [0])))
    4697              :         (unspec:DI [(symbol_ref:DI ("foo") [flags 0x50])
    4698              :                     (reg/f:DI 7 sp)] UNSPEC_TLS_GD)
    4699              :         (clobber (reg:DI 5 di))])
    4700              : 
    4701              : 
    4702              :      and
    4703              : 
    4704              :      (parallel [
    4705              :         (set (reg:DI 0 ax)
    4706              :              (call:DI (mem:QI (symbol_ref:DI ("__tls_get_addr")))
    4707              :                       (const_int 0 [0])))
    4708              :         (unspec:DI [(reg/f:DI 7 sp)] UNSPEC_TLS_LD_BASE)])
    4709              : 
    4710              :    */
    4711              : 
    4712         2022 :   rtx pat = PATTERN (insn);
    4713         2022 :   rtx set = XVECEXP (pat, 0, 0);
    4714         2022 :   gcc_assert (GET_CODE (set) == SET);
    4715         2022 :   rtx dest = SET_DEST (set);
    4716         2022 :   scalar_mode = mode = GET_MODE (dest);
    4717         2022 :   val = XVECEXP (pat, 0, 1);
    4718         2022 :   gcc_assert (GET_CODE (val) == UNSPEC);
    4719              : 
    4720         2022 :   if (tls64 == TLS64_GD)
    4721         1921 :     kind = X86_CSE_TLS_GD;
    4722              :   else
    4723          101 :     kind = X86_CSE_TLS_LD_BASE;
    4724              : 
    4725         2022 :   def_insn = nullptr;
    4726         2022 :   return true;
    4727              : }
    4728              : 
    4729              : /* Return true and output def_insn, val, mode, scalar_mode and kind if
    4730              :    SET is UNSPEC_TLSDESC.  */
    4731              : 
    4732              : bool
    4733           56 : pass_x86_cse::candidate_gnu2_tls_p (rtx set, attr_tls64 tls64)
    4734              : {
    4735           56 :   if (!TARGET_64BIT || !cfun->machine->tls_descriptor_call_multiple_p)
    4736              :     return false;
    4737              : 
    4738           54 :   rtx tls_symbol;
    4739           54 :   rtx_insn *set_insn;
    4740           54 :   rtx src = SET_SRC (set);
    4741           54 :   val = src;
    4742           54 :   tlsdesc_val = src;
    4743           54 :   kind = X86_CSE_TLSDESC;
    4744              : 
    4745           54 :   if (tls64 == TLS64_COMBINE)
    4746              :     {
    4747              :       /* Record 64-bit TLS64_COMBINE:
    4748              : 
    4749              :          (set (reg/f:DI 104)
    4750              :               (plus:DI (unspec:DI [
    4751              :                           (symbol_ref:DI ("_TLS_MODULE_BASE_") [flags 0x10])
    4752              :                           (reg:DI 114)
    4753              :                           (reg/f:DI 7 sp)] UNSPEC_TLSDESC)
    4754              :                        (const:DI (unspec:DI [
    4755              :                                     (symbol_ref:DI ("e") [flags 0x1a])
    4756              :                                   ] UNSPEC_DTPOFF))))
    4757              : 
    4758              :          (set (reg/f:DI 104)
    4759              :               (plus:DI (unspec:DI [
    4760              :                           (symbol_ref:DI ("_TLS_MODULE_BASE_") [flags 0x10])
    4761              :                           (unspec:DI [
    4762              :                              (symbol_ref:DI ("_TLS_MODULE_BASE_") [flags 0x10])
    4763              :                           ] UNSPEC_TLSDESC)
    4764              :                           (reg/f:DI 7 sp)] UNSPEC_TLSDESC)
    4765              :                        (const:DI (unspec:DI [
    4766              :                                     (symbol_ref:DI ("e") [flags 0x1a])
    4767              :                                  ] UNSPEC_DTPOFF))))
    4768              :      */
    4769              : 
    4770           12 :       scalar_mode = mode = GET_MODE (src);
    4771              : 
    4772              :       /* Since the first operand of PLUS in the source TLS_COMBINE
    4773              :          pattern is unused, use the second operand of PLUS:
    4774              : 
    4775              :          (const:DI (unspec:DI [
    4776              :                       (symbol_ref:DI ("e") [flags 0x1a])
    4777              :                    ] UNSPEC_DTPOFF))
    4778              : 
    4779              :          as VAL to check if 2 TLS_COMBINE patterns have the same
    4780              :          source.  */
    4781           12 :       val = XEXP (src, 1);
    4782           12 :       gcc_assert (GET_CODE (val) == CONST
    4783              :                   && GET_CODE (XEXP (val, 0)) == UNSPEC
    4784              :                       && XINT (XEXP (val, 0), 1) == UNSPEC_DTPOFF
    4785              :                       && SYMBOL_REF_P (XVECEXP (XEXP (val, 0), 0, 0)));
    4786           12 :       def_insn = nullptr;
    4787           12 :       return true;
    4788              :     }
    4789              : 
    4790              :   /* Record 64-bit TLS_CALL:
    4791              : 
    4792              :      (set (reg:DI 101)
    4793              :           (unspec:DI [(symbol_ref:DI ("foo") [flags 0x50])
    4794              :                       (reg:DI 112)
    4795              :                       (reg/f:DI 7 sp)] UNSPEC_TLSDESC))
    4796              : 
    4797              :    */
    4798              : 
    4799           42 :   gcc_assert (GET_CODE (src) == UNSPEC);
    4800           42 :   tls_symbol = XVECEXP (src, 0, 0);
    4801           42 :   src = XVECEXP (src, 0, 1);
    4802           42 :   scalar_mode = mode = GET_MODE (src);
    4803           42 :   gcc_assert (REG_P (src));
    4804              : 
    4805              :   /* All definitions of reg:DI 129 in
    4806              : 
    4807              :      (set (reg:DI 110)
    4808              :           (unspec:DI [(symbol_ref:DI ("foo"))
    4809              :                       (reg:DI 129)
    4810              :                       (reg/f:DI 7 sp)] UNSPEC_TLSDESC))
    4811              : 
    4812              :      should have the same source as in
    4813              : 
    4814              :      (set (reg:DI 129)
    4815              :           (unspec:DI [(symbol_ref:DI ("foo"))] UNSPEC_TLSDESC))
    4816              : 
    4817              :    */
    4818              : 
    4819           42 :   set_insn = tls_set_insn_from_symbol (src, tls_symbol);
    4820           42 :   if (!set_insn)
    4821              :     return false;
    4822              : 
    4823              :   /* Use TLS_SYMBOL as VAL to check if 2 patterns have the same source.  */
    4824           42 :   val = tls_symbol;
    4825           42 :   def_insn = set_insn;
    4826           42 :   return true;
    4827              : }
    4828              : 
    4829              : /* Return true and output def_insn, val, mode, scalar_mode and kind if
    4830              :   INSN is a vector broadcast instruction.  */
    4831              : 
    4832              : bool
    4833     49619974 : pass_x86_cse::candidate_vector_p (rtx set, rtx_insn *insn)
    4834              : {
    4835     49619974 :   rtx src = SET_SRC (set);
    4836     49619974 :   rtx dest = SET_DEST (set);
    4837     49619974 :   mode = GET_MODE (dest);
    4838              :   /* Skip non-vector instruction.  */
    4839     49619974 :   if (!VECTOR_MODE_P (mode))
    4840              :     return false;
    4841              : 
    4842              :   /* Skip non-vector load instruction.  */
    4843      3697385 :   if (!REG_P (dest) && !SUBREG_P (dest))
    4844              :     return false;
    4845              : 
    4846      2196863 :   def_insn = insn;
    4847      2196863 :   val = ix86_broadcast_inner (src, mode, &scalar_mode, &kind,
    4848              :                               &def_insn);
    4849      2196863 :   return val ? true : false;
    4850              : }
    4851              : 
    4852              : /* At entry of the nearest common dominator for basic blocks with
    4853              : 
    4854              :    1. Vector CONST0_RTX patterns.
    4855              :    2. Vector CONSTM1_RTX patterns.
    4856              :    3. Vector broadcast patterns.
    4857              :    4. UNSPEC_TLS_GD patterns.
    4858              :    5. UNSPEC_TLS_LD_BASE patterns.
    4859              :    6. UNSPEC_TLSDESC patterns.
    4860              : 
    4861              :    generate a single pattern whose destination is used to replace the
    4862              :    source in all identical patterns.
    4863              : 
    4864              :    NB: We want to generate a pattern, which is executed only once, to
    4865              :    cover the whole function.  The LCM algorithm isn't appropriate here
    4866              :    since it may place a pattern inside the loop.  */
    4867              : 
    4868              : unsigned int
    4869       976653 : pass_x86_cse::x86_cse (void)
    4870              : {
    4871       976653 :   timevar_push (TV_MACH_DEP);
    4872              : 
    4873       976653 :   auto_vec<redundant_pattern *> loads;
    4874       976653 :   redundant_pattern *load;
    4875       976653 :   basic_block bb;
    4876       976653 :   rtx_insn *insn;
    4877       976653 :   unsigned int i;
    4878       976653 :   auto_bitmap updated_gnu_tls_insns;
    4879       976653 :   auto_bitmap updated_gnu2_tls_insns;
    4880       976653 :   auto_bitmap call_bbs;
    4881              : 
    4882       976653 :   df_set_flags (DF_DEFER_INSN_RESCAN);
    4883              : 
    4884       976653 :   bool recursive_call_p = cfun->machine->recursive_function;
    4885              : 
    4886     10831686 :   FOR_EACH_BB_FN (bb, cfun)
    4887              :     {
    4888    129855939 :       FOR_BB_INSNS (bb, insn)
    4889              :         {
    4890    120000906 :           if (!NONDEBUG_INSN_P (insn))
    4891     66727867 :             continue;
    4892              : 
    4893     53273039 :           bool matched = false;
    4894              :           /* Remove redundant patterns if there are more than 2 of
    4895              :              them.  */
    4896     53273039 :           unsigned int threshold = 2;
    4897              : 
    4898     53273039 :           bool call_p = CALL_P (insn);
    4899     53273039 :           rtx set = single_set (insn);
    4900     53273039 :           if (!set && !call_p)
    4901      1104511 :             continue;
    4902              : 
    4903     52168528 :           tlsdesc_val = nullptr;
    4904              : 
    4905     52168528 :           attr_tls64 tls64 = get_attr_tls64 (insn);
    4906              : 
    4907              :           /* NB: TLS calls preserve all registers.  */
    4908     52168528 :           if (call_p && tls64 == TLS64_NONE)
    4909      4399353 :             bitmap_set_bit (call_bbs, BLOCK_FOR_INSN (insn)->index);
    4910              : 
    4911     52168528 :           switch (tls64)
    4912              :             {
    4913         2190 :             case TLS64_GD:
    4914         2190 :             case TLS64_LD_BASE:
    4915              :               /* Verify UNSPEC_TLS_GD and UNSPEC_TLS_LD_BASE.  */
    4916         2190 :               if (candidate_gnu_tls_p (insn, tls64))
    4917              :                 break;
    4918          168 :               continue;
    4919              : 
    4920           56 :             case TLS64_CALL:
    4921           56 :             case TLS64_COMBINE:
    4922              :               /* Verify UNSPEC_TLSDESC.  */
    4923           56 :               if (candidate_gnu2_tls_p (set, tls64))
    4924              :                 break;
    4925            2 :               continue;
    4926              : 
    4927           38 :             case TLS64_LEA:
    4928              :               /* Skip TLS64_LEA.  */
    4929           38 :               continue;
    4930              : 
    4931     52166244 :             case TLS64_NONE:
    4932     52166244 :               if (!set)
    4933      2546270 :                 continue;
    4934              : 
    4935              :               /* Check for vector broadcast.  */
    4936     49619974 :               if (candidate_vector_p (set, insn))
    4937              :                 break;
    4938     49370789 :               continue;
    4939              :             }
    4940              : 
    4941              :           /* Check if there is a matching redundant load.   */
    4942       595578 :           FOR_EACH_VEC_ELT (loads, i, load)
    4943       440818 :             if (load->val
    4944       440818 :                 && load->kind == kind
    4945       294295 :                 && load->mode == scalar_mode
    4946       258645 :                 && (load->bb == bb
    4947       197213 :                     || (kind != X86_CSE_VEC_DUP
    4948       197213 :                         && kind != X86_CSE_CONST_VECTOR)
    4949              :                     /* Non all 0s/1s vector load must be in the same
    4950              :                        basic block if it is in a recursive call.  */
    4951       137425 :                     || !recursive_call_p)
    4952       697336 :                 && rtx_equal_p (load->val, val))
    4953              :               {
    4954              :                 /* Record instruction.  */
    4955        96501 :                 bitmap_set_bit (load->insns, INSN_UID (insn));
    4956              : 
    4957              :                 /* Record the maximum vector size.  */
    4958        96501 :                 if (kind <= X86_CSE_VEC_DUP
    4959       191889 :                     && load->size < GET_MODE_SIZE (mode))
    4960         1014 :                   load->size = GET_MODE_SIZE (mode);
    4961              : 
    4962              :                 /* Record the basic block.  */
    4963        96501 :                 bitmap_set_bit (load->bbs, bb->index);
    4964              : 
    4965              :                 /* Increment the count.  */
    4966        96501 :                 load->count++;
    4967              : 
    4968        96501 :                 matched = true;
    4969        96501 :                 break;
    4970              :               }
    4971              : 
    4972       251261 :           if (matched)
    4973        96501 :             continue;
    4974              : 
    4975              :           /* We see this instruction the first time.  Record the
    4976              :              redundant source value, its mode, the destination size,
    4977              :              instruction which defines the redundant source value,
    4978              :              instruction basic block and the instruction kind.  */
    4979       154760 :           load = new redundant_pattern;
    4980              : 
    4981              :           /* Convert CONST_VECTOR load no larger than integer register
    4982              :              to constant integer load even if there is no redundant
    4983              :              CONST_VECTOR load.  */
    4984       154760 :           if (CONST_VECTOR_P (val))
    4985        30748 :             threshold = 1;
    4986              : 
    4987       154760 :           load->val = copy_rtx (val);
    4988       154760 :           if (tlsdesc_val)
    4989           28 :             load->tlsdesc_val = copy_rtx (tlsdesc_val);
    4990              :           else
    4991       154732 :             load->tlsdesc_val = nullptr;
    4992       154760 :           load->mode = scalar_mode;
    4993       154760 :           load->dest_mode = mode;
    4994       154760 :           load->size = GET_MODE_SIZE (mode);
    4995       154760 :           load->def_insn = def_insn;
    4996       154760 :           load->count = 1;
    4997       154760 :           load->threshold = threshold;
    4998       154760 :           load->bb = BLOCK_FOR_INSN (insn);
    4999       154760 :           load->kind = kind;
    5000              : 
    5001       154760 :           bitmap_set_bit (load->insns, INSN_UID (insn));
    5002       154760 :           bitmap_set_bit (load->bbs, bb->index);
    5003              : 
    5004       154760 :           loads.safe_push (load);
    5005              :         }
    5006              :     }
    5007              : 
    5008              :   bool replaced = false;
    5009      1131413 :   FOR_EACH_VEC_ELT (loads, i, load)
    5010       154760 :     if (load->count >= load->threshold)
    5011              :       {
    5012        63639 :         machine_mode mode;
    5013        63639 :         rtx reg, broadcast_reg;
    5014        63639 :         rtx broadcast_source = nullptr;
    5015        63639 :         replaced = true;
    5016        63639 :         switch (load->kind)
    5017              :           {
    5018          313 :           case X86_CSE_TLS_GD:
    5019          313 :           case X86_CSE_TLS_LD_BASE:
    5020          313 :           case X86_CSE_TLSDESC:
    5021          313 :             broadcast_reg = gen_reg_rtx (load->mode);
    5022          313 :             replace_tls_call (broadcast_reg, load->insns,
    5023          313 :                               (load->kind == X86_CSE_TLSDESC
    5024              :                                ? updated_gnu2_tls_insns
    5025              :                                : updated_gnu_tls_insns));
    5026          313 :             load->broadcast_reg = broadcast_reg;
    5027          313 :             break;
    5028              : 
    5029        11171 :           case X86_CSE_VEC_DUP:
    5030        11171 :             if (CONST_INT_P (load->val)
    5031        10048 :                 && (load->val == CONST0_RTX (load->mode)
    5032        10072 :                     || load->size <= UNITS_PER_WORD))
    5033              :               {
    5034              :                 /* Generate CONST_VECTOR load.  */
    5035        30749 :               case X86_CSE_CONST_VECTOR:
    5036        30749 :                 mode = ix86_get_vector_cse_mode (load->size,
    5037              :                                                  load->mode);
    5038              : 
    5039        30749 :                 if (CONST_VECTOR_P (load->val))
    5040              :                   broadcast_source = load->val;
    5041            1 :                 else if (load->val == CONST0_RTX (load->mode))
    5042            0 :                   broadcast_source = CONST0_RTX (mode);
    5043            1 :                 else if (load->val == CONSTM1_RTX (load->mode))
    5044            0 :                   broadcast_source = CONSTM1_RTX (mode);
    5045              :                 else
    5046              :                   {
    5047            1 :                     int nunits = GET_MODE_NUNITS (mode);
    5048            1 :                     rtvec v = rtvec_alloc (nunits);
    5049            3 :                     for (int j = 0; j < nunits ; j++)
    5050            2 :                       RTVEC_ELT (v, j) = load->val;
    5051            1 :                     broadcast_source = gen_rtx_CONST_VECTOR (mode, v);
    5052              :                   }
    5053              : 
    5054              :                 /* NB: Zero CONST_VECTOR load works for MMX and XMM
    5055              :                    registers.  */
    5056        32160 :                 if (load->size <= UNITS_PER_WORD)
    5057              :                   {
    5058              :                     /* Convert CONST_VECTOR load no larger than integer
    5059              :                        register:
    5060              : 
    5061              :                        (set (reg:V2SI 106)
    5062              :                             (const_vector:V2SI [(const_int 1 [1]) repeated x2]))
    5063              : 
    5064              :                        to constant integer load:
    5065              : 
    5066              :                        (set (subreg:DI (reg:V2SI 106 [ _20 ]) 0)
    5067              :                             (const_int 4294967297 [0x100000001]))
    5068              :                        */
    5069        30749 :                     machine_mode int_mode
    5070        30749 :                       = int_mode_for_mode (mode).require ();
    5071        30749 :                     load->dest_mode = int_mode;
    5072        30749 :                     broadcast_source = simplify_subreg (int_mode,
    5073              :                                                         broadcast_source,
    5074              :                                                         mode, 0);
    5075        30749 :                     gcc_assert (broadcast_source != nullptr);
    5076              : 
    5077        30749 :                     bool keep_const_int_load = false;
    5078        30749 :                     if (!bitmap_empty_p (call_bbs))
    5079              :                       {
    5080        27498 :                         bitmap_iterator bi;
    5081        27498 :                         unsigned int id;
    5082        36029 :                         EXECUTE_IF_SET_IN_BITMAP (load->bbs, 0, id, bi)
    5083        28636 :                           if (bitmap_bit_p (call_bbs, id))
    5084              :                             {
    5085              :                               /* NB: Constant integer load is faster
    5086              :                                  than save and restore an integer
    5087              :                                  register when crossing a function call.
    5088              :                                */
    5089              :                               keep_const_int_load = true;
    5090              :                               break;
    5091              :                             }
    5092              :                       }
    5093              : 
    5094        27498 :                     if (keep_const_int_load)
    5095              :                       {
    5096              :                         /* Keep constant integer load.  */
    5097        20105 :                         replace_vector_const (mode, broadcast_source,
    5098        20105 :                                               load->insns, int_mode);
    5099        20105 :                         load->broadcast_source = nullptr;
    5100        20105 :                         load->broadcast_reg = nullptr;
    5101              :                       }
    5102              :                     else
    5103              :                       {
    5104        10644 :                         broadcast_reg = gen_reg_rtx (mode);
    5105        10644 :                         reg = gen_reg_rtx (load->mode);
    5106        10644 :                         replace_vector_const (mode, broadcast_reg,
    5107        10644 :                                               load->insns, load->mode);
    5108        10644 :                         load->broadcast_source = broadcast_source;
    5109        10644 :                         load->broadcast_reg = broadcast_reg;
    5110              :                       }
    5111              :                     break;
    5112              :                   }
    5113              :               }
    5114              :             /* FALLTHRU */
    5115              : 
    5116        32577 :           case X86_CSE_CONST0_VECTOR:
    5117        32577 :           case X86_CSE_CONSTM1_VECTOR:
    5118        32577 :             mode = ix86_get_vector_cse_mode (load->size, load->mode);
    5119        32577 :             broadcast_reg = gen_reg_rtx (mode);
    5120        32577 :             if (load->def_insn)
    5121              :               {
    5122              :                 /* Replace redundant vector loads with a single vector
    5123              :                    load in the same basic block.  */
    5124          841 :                 reg = load->val;
    5125          841 :                 if (load->mode != GET_MODE (reg))
    5126            0 :                   reg = gen_rtx_SUBREG (load->mode, reg, 0);
    5127          841 :                 broadcast_source = gen_rtx_VEC_DUPLICATE (mode, reg);
    5128              :               }
    5129              :             else
    5130              :               /* This is a constant integer/double vector.  If the
    5131              :                  inner scalar is 0 or -1, set vector to CONST0_RTX
    5132              :                  or CONSTM1_RTX directly.  */
    5133        31736 :               switch (load->kind)
    5134              :                 {
    5135        19828 :                 case X86_CSE_CONST0_VECTOR:
    5136        19828 :                   broadcast_source = CONST0_RTX (mode);
    5137        19828 :                   break;
    5138         1578 :                 case X86_CSE_CONSTM1_VECTOR:
    5139         1578 :                   broadcast_source = CONSTM1_RTX (mode);
    5140         1578 :                   break;
    5141        10330 :                 case X86_CSE_CONST_VECTOR:
    5142        10330 :                 case X86_CSE_VEC_DUP:
    5143        10330 :                   if (!broadcast_source)
    5144              :                     {
    5145        10330 :                       reg = gen_reg_rtx (load->mode);
    5146        10330 :                       broadcast_source = gen_rtx_VEC_DUPLICATE (mode,
    5147              :                                                                 reg);
    5148              :                     }
    5149              :                   break;
    5150            0 :                 default:
    5151            0 :                   gcc_unreachable ();
    5152              :                 }
    5153        32577 :             replace_vector_const (mode, broadcast_reg, load->insns,
    5154              :                                   load->mode);
    5155        32577 :             load->broadcast_source = broadcast_source;
    5156        32577 :             load->broadcast_reg = broadcast_reg;
    5157        32577 :             break;
    5158              :           }
    5159              :       }
    5160              : 
    5161       976653 :   if (replaced)
    5162              :     {
    5163        41343 :       auto_vec<rtx_insn *> control_flow_insns;
    5164              : 
    5165              :       /* (Re-)discover loops so that bb->loop_father can be used in the
    5166              :          analysis below.  */
    5167        41343 :       calculate_dominance_info (CDI_DOMINATORS);
    5168        41343 :       loop_optimizer_init (AVOID_CFG_MODIFICATIONS);
    5169              : 
    5170       125650 :       FOR_EACH_VEC_ELT (loads, i, load)
    5171        84307 :         if (load->count >= load->threshold)
    5172              :           {
    5173        63639 :             rtx set;
    5174        63639 :             if (load->def_insn)
    5175          857 :               switch (load->kind)
    5176              :                 {
    5177           16 :                 case X86_CSE_TLSDESC:
    5178           16 :                   ix86_place_single_tls_call (load->broadcast_reg,
    5179              :                                               load->tlsdesc_val,
    5180              :                                               load->kind,
    5181           16 :                                               load->bbs,
    5182              :                                               updated_gnu_tls_insns,
    5183              :                                               updated_gnu2_tls_insns,
    5184           16 :                                               PATTERN (load->def_insn));
    5185           16 :                   break;
    5186          841 :                 case X86_CSE_VEC_DUP:
    5187              :                   /* Insert a broadcast after the original scalar
    5188              :                      definition.  */
    5189          841 :                   set = gen_rtx_SET (load->broadcast_reg,
    5190              :                                      load->broadcast_source);
    5191          841 :                   insn = emit_insn_after (set, load->def_insn);
    5192              : 
    5193          841 :                   if (cfun->can_throw_non_call_exceptions)
    5194              :                     {
    5195              :                       /* Handle REG_EH_REGION note in DEF_INSN.  */
    5196            4 :                       rtx note = find_reg_note (load->def_insn,
    5197              :                                                 REG_EH_REGION, nullptr);
    5198            4 :                       if (note)
    5199              :                         {
    5200            1 :                           control_flow_insns.safe_push (load->def_insn);
    5201            1 :                           add_reg_note (insn, REG_EH_REGION,
    5202              :                                         XEXP (note, 0));
    5203              :                         }
    5204              :                     }
    5205              : 
    5206          841 :                   if (dump_file)
    5207              :                     {
    5208            0 :                       fprintf (dump_file, "\nAdd:\n\n");
    5209            0 :                       print_rtl_single (dump_file, insn);
    5210            0 :                       fprintf (dump_file, "\nafter:\n\n");
    5211            0 :                       print_rtl_single (dump_file, load->def_insn);
    5212            0 :                       fprintf (dump_file, "\n");
    5213              :                     }
    5214              :                   break;
    5215            0 :                 default:
    5216            0 :                   gcc_unreachable ();
    5217              :                 }
    5218              :             else
    5219        62782 :               switch (load->kind)
    5220              :                 {
    5221          297 :                 case X86_CSE_TLS_GD:
    5222          297 :                 case X86_CSE_TLS_LD_BASE:
    5223          297 :                 case X86_CSE_TLSDESC:
    5224          297 :                   ix86_place_single_tls_call (load->broadcast_reg,
    5225              :                                               (load->kind == X86_CSE_TLSDESC
    5226              :                                                ? load->tlsdesc_val
    5227              :                                                : load->val),
    5228              :                                               load->kind,
    5229          297 :                                               load->bbs,
    5230              :                                               updated_gnu_tls_insns,
    5231              :                                               updated_gnu2_tls_insns);
    5232          297 :                   break;
    5233        41079 :                 case X86_CSE_CONST_VECTOR:
    5234        41079 :                 case X86_CSE_VEC_DUP:
    5235              :                   /* Keep redundant constant integer load.  */
    5236        41079 :                   if (!load->broadcast_reg)
    5237              :                     break;
    5238              :                   /* FALLTHRU */
    5239        42380 :                 case X86_CSE_CONST0_VECTOR:
    5240        42380 :                 case X86_CSE_CONSTM1_VECTOR:
    5241        42380 :                   ix86_place_single_vector_set (load->broadcast_reg,
    5242              :                                                 load->broadcast_source,
    5243              :                                                 load->bbs,
    5244              :                                                 load);
    5245        42380 :                   break;
    5246              :                 }
    5247              :           }
    5248              : 
    5249        41343 :       loop_optimizer_finalize ();
    5250              : 
    5251        41343 :       if (!control_flow_insns.is_empty ())
    5252              :         {
    5253            1 :           free_dominance_info (CDI_DOMINATORS);
    5254              : 
    5255            3 :           FOR_EACH_VEC_ELT (control_flow_insns, i, insn)
    5256            1 :             if (control_flow_insn_p (insn))
    5257              :               {
    5258              :                 /* Split the block after insn.  There will be a fallthru
    5259              :                    edge, which is OK so we keep it.  We have to create
    5260              :                    the exception edges ourselves.  */
    5261            1 :                 bb = BLOCK_FOR_INSN (insn);
    5262            1 :                 split_block (bb, insn);
    5263            1 :                 rtl_make_eh_edge (NULL, bb, BB_END (bb));
    5264              :               }
    5265              :         }
    5266              : 
    5267        41343 :       df_process_deferred_rescans ();
    5268        41343 :     }
    5269              : 
    5270      1131413 :   FOR_EACH_VEC_ELT (loads, i, load)
    5271       309520 :     delete load;
    5272              : 
    5273       976653 :   df_clear_flags (DF_DEFER_INSN_RESCAN);
    5274              : 
    5275       976653 :   timevar_pop (TV_MACH_DEP);
    5276       976653 :   return 0;
    5277       976653 : }
    5278              : 
    5279              : } // anon namespace
    5280              : 
    5281              : rtl_opt_pass *
    5282       298828 : make_pass_x86_cse (gcc::context *ctxt)
    5283              : {
    5284       298828 :   return new pass_x86_cse (ctxt);
    5285              : }
    5286              : 
    5287              : /* Convert legacy instructions that clobbers EFLAGS to APX_NF
    5288              :    instructions when there are no flag set between a flag
    5289              :    producer and user.  */
    5290              : 
    5291              : static unsigned int
    5292          371 : ix86_apx_nf_convert (void)
    5293              : {
    5294          371 :   timevar_push (TV_MACH_DEP);
    5295              : 
    5296          371 :   basic_block bb;
    5297          371 :   rtx_insn *insn;
    5298          371 :   hash_map <rtx_insn *, rtx> converting_map;
    5299          371 :   auto_vec <rtx_insn *> current_convert_list;
    5300              : 
    5301          371 :   bool converting_seq = false;
    5302          371 :   rtx cc = gen_rtx_REG (CCmode, FLAGS_REG);
    5303              : 
    5304          794 :   FOR_EACH_BB_FN (bb, cfun)
    5305              :     {
    5306              :       /* Reset conversion for each bb.  */
    5307          423 :       converting_seq = false;
    5308         5079 :       FOR_BB_INSNS (bb, insn)
    5309              :         {
    5310         4656 :           if (!NONDEBUG_INSN_P (insn))
    5311         4995 :             continue;
    5312              : 
    5313         3712 :           if (recog_memoized (insn) < 0)
    5314          337 :             continue;
    5315              : 
    5316              :           /* Convert candidate insns after cstore, which should
    5317              :              satisfy the two conditions:
    5318              :              1. Is not flag user or producer, only clobbers
    5319              :              FLAGS_REG.
    5320              :              2. Have corresponding nf pattern.  */
    5321              : 
    5322         3375 :           rtx pat = PATTERN (insn);
    5323              : 
    5324              :           /* Starting conversion at first cstorecc.  */
    5325         3375 :           rtx set = NULL_RTX;
    5326         3375 :           if (!converting_seq
    5327         2793 :               && (set = single_set (insn))
    5328         2717 :               && ix86_comparison_operator (SET_SRC (set), VOIDmode)
    5329          127 :               && reg_overlap_mentioned_p (cc, SET_SRC (set))
    5330         3499 :               && !reg_overlap_mentioned_p (cc, SET_DEST (set)))
    5331              :             {
    5332          124 :               converting_seq = true;
    5333          124 :               current_convert_list.truncate (0);
    5334              :             }
    5335              :           /* Terminate at the next explicit flag set.  */
    5336         3251 :           else if (reg_set_p (cc, pat)
    5337         3251 :                    && GET_CODE (set_of (cc, pat)) != CLOBBER)
    5338              :             converting_seq = false;
    5339              : 
    5340         3154 :           if (!converting_seq)
    5341         2770 :             continue;
    5342              : 
    5343          605 :           if (get_attr_has_nf (insn)
    5344          605 :               && GET_CODE (pat) == PARALLEL)
    5345              :             {
    5346              :               /* Record the insn to candidate map.  */
    5347           72 :               current_convert_list.safe_push (insn);
    5348           72 :               converting_map.put (insn, pat);
    5349              :             }
    5350              :           /* If the insn clobbers flags but has no nf_attr,
    5351              :              revoke all previous candidates.  */
    5352          533 :           else if (!get_attr_has_nf (insn)
    5353          532 :                    && reg_set_p (cc, pat)
    5354          536 :                    && GET_CODE (set_of (cc, pat)) == CLOBBER)
    5355              :             {
    5356            3 :               for (auto item : current_convert_list)
    5357            0 :                 converting_map.remove (item);
    5358            3 :               converting_seq = false;
    5359              :             }
    5360              :         }
    5361              :     }
    5362              : 
    5363          371 :   if (!converting_map.is_empty ())
    5364              :     {
    5365           85 :       for (auto iter = converting_map.begin ();
    5366          170 :            iter != converting_map.end (); ++iter)
    5367              :         {
    5368           72 :           rtx_insn *replace = (*iter).first;
    5369           72 :           rtx pat = (*iter).second;
    5370           72 :           int i, n = 0, len = XVECLEN (pat, 0);
    5371           72 :           rtx *new_elems = XALLOCAVEC (rtx, len);
    5372           72 :           rtx new_pat;
    5373          216 :           for (i = 0; i < len; i++)
    5374              :             {
    5375          144 :               rtx temp = XVECEXP (pat, 0, i);
    5376          216 :               if (! (GET_CODE (temp) == CLOBBER
    5377           72 :                      && reg_overlap_mentioned_p (cc,
    5378           72 :                                                  XEXP (temp, 0))))
    5379              :                 {
    5380           72 :                   new_elems[n] = temp;
    5381           72 :                   n++;
    5382              :                 }
    5383              :             }
    5384              : 
    5385           72 :           if (n == 1)
    5386           72 :             new_pat = new_elems[0];
    5387              :           else
    5388            0 :             new_pat =
    5389            0 :               gen_rtx_PARALLEL (VOIDmode,
    5390              :                                 gen_rtvec_v (n,
    5391              :                                              new_elems));
    5392              : 
    5393           72 :           PATTERN (replace) = new_pat;
    5394           72 :           INSN_CODE (replace) = -1;
    5395           72 :           recog_memoized (replace);
    5396           72 :           df_insn_rescan (replace);
    5397              :         }
    5398              :     }
    5399              : 
    5400          371 :   timevar_pop (TV_MACH_DEP);
    5401          371 :   return 0;
    5402          371 : }
    5403              : 
    5404              : 
    5405              : namespace {
    5406              : 
    5407              : const pass_data pass_data_apx_nf_convert =
    5408              : {
    5409              :   RTL_PASS, /* type */
    5410              :   "apx_nfcvt", /* name */
    5411              :   OPTGROUP_NONE, /* optinfo_flags */
    5412              :   TV_MACH_DEP, /* tv_id */
    5413              :   0, /* properties_required */
    5414              :   0, /* properties_provided */
    5415              :   0, /* properties_destroyed */
    5416              :   0, /* todo_flags_start */
    5417              :   0, /* todo_flags_finish */
    5418              : };
    5419              : 
    5420              : class pass_apx_nf_convert : public rtl_opt_pass
    5421              : {
    5422              : public:
    5423       298828 :   pass_apx_nf_convert (gcc::context *ctxt)
    5424       597656 :     : rtl_opt_pass (pass_data_apx_nf_convert, ctxt)
    5425              :   {}
    5426              : 
    5427              :   /* opt_pass methods: */
    5428      1488378 :   bool gate (function *) final override
    5429              :     {
    5430      1488378 :       return (TARGET_APX_NF
    5431          465 :               && optimize
    5432      1488833 :               && optimize_function_for_speed_p (cfun));
    5433              :     }
    5434              : 
    5435          371 :   unsigned int execute (function *) final override
    5436              :     {
    5437          371 :       return ix86_apx_nf_convert ();
    5438              :     }
    5439              : }; // class pass_apx_nf_convert
    5440              : 
    5441              : } // anon namespace
    5442              : 
    5443              : rtl_opt_pass *
    5444       298828 : make_pass_apx_nf_convert (gcc::context *ctxt)
    5445              : {
    5446       298828 :   return new pass_apx_nf_convert (ctxt);
    5447              : }
    5448              : 
    5449              : /* When a hot loop can be fit into one cacheline,
    5450              :    force align the loop without considering the max skip.  */
    5451              : static void
    5452       976174 : ix86_align_loops ()
    5453              : {
    5454       976174 :   basic_block bb;
    5455              : 
    5456              :   /* Don't do this when we don't know cache line size.  */
    5457       976174 :   if (ix86_cost->prefetch_block == 0)
    5458            9 :     return;
    5459              : 
    5460       976165 :   loop_optimizer_init (AVOID_CFG_MODIFICATIONS);
    5461       976165 :   profile_count count_threshold = cfun->cfg->count_max / param_align_threshold;
    5462     11299539 :   FOR_EACH_BB_FN (bb, cfun)
    5463              :     {
    5464     10323374 :       rtx_insn *label = BB_HEAD (bb);
    5465     10323374 :       bool has_fallthru = 0;
    5466     10323374 :       edge e;
    5467     10323374 :       edge_iterator ei;
    5468              : 
    5469     10323374 :       if (!LABEL_P (label))
    5470      5257974 :         continue;
    5471              : 
    5472      5070212 :       profile_count fallthru_count = profile_count::zero ();
    5473      5070212 :       profile_count branch_count = profile_count::zero ();
    5474              : 
    5475     14735754 :       FOR_EACH_EDGE (e, ei, bb->preds)
    5476              :         {
    5477      9665542 :           if (e->flags & EDGE_FALLTHRU)
    5478      2463054 :             has_fallthru = 1, fallthru_count += e->count ();
    5479              :           else
    5480      7202488 :             branch_count += e->count ();
    5481              :         }
    5482              : 
    5483      5070212 :       if (!fallthru_count.initialized_p () || !branch_count.initialized_p ())
    5484         4812 :         continue;
    5485              : 
    5486      5065400 :       if (bb->loop_father
    5487      5065400 :           && bb->loop_father->latch != EXIT_BLOCK_PTR_FOR_FN (cfun)
    5488      6407118 :           && (has_fallthru
    5489      1341718 :               ? (!(single_succ_p (bb)
    5490       146632 :                    && single_succ (bb) == EXIT_BLOCK_PTR_FOR_FN (cfun))
    5491       928658 :                  && optimize_bb_for_speed_p (bb)
    5492       848676 :                  && branch_count + fallthru_count > count_threshold
    5493       725939 :                  && (branch_count > fallthru_count * param_align_loop_iterations))
    5494              :               /* In case there'no fallthru for the loop.
    5495              :                  Nops inserted won't be executed.  */
    5496       413060 :               : (branch_count > count_threshold
    5497       137263 :                  || (bb->count > bb->prev_bb->count * 10
    5498        12583 :                      && (bb->prev_bb->count
    5499      4526995 :                          <= ENTRY_BLOCK_PTR_FOR_FN (cfun)->count / 2)))))
    5500              :         {
    5501       550988 :           rtx_insn* insn, *end_insn;
    5502       550988 :           HOST_WIDE_INT size = 0;
    5503       550988 :           bool padding_p = true;
    5504       550988 :           basic_block tbb = bb;
    5505       550988 :           unsigned cond_branch_num = 0;
    5506       550988 :           bool detect_tight_loop_p = false;
    5507              : 
    5508       869732 :           for (unsigned int i = 0; i != bb->loop_father->num_nodes;
    5509       318744 :                i++, tbb = tbb->next_bb)
    5510              :             {
    5511              :               /* Only handle continuous cfg layout. */
    5512       869732 :               if (bb->loop_father != tbb->loop_father)
    5513              :                 {
    5514              :                   padding_p = false;
    5515              :                   break;
    5516              :                 }
    5517              : 
    5518     10322981 :               FOR_BB_INSNS (tbb, insn)
    5519              :                 {
    5520      9653983 :                   if (!NONDEBUG_INSN_P (insn))
    5521      5599918 :                     continue;
    5522      4054065 :                   size += ix86_min_insn_size (insn);
    5523              : 
    5524              :                   /* We don't know size of inline asm.
    5525              :                      Don't align loop for call.  */
    5526      4054065 :                   if (asm_noperands (PATTERN (insn)) >= 0
    5527      4054065 :                       || CALL_P (insn))
    5528              :                     {
    5529              :                       size = -1;
    5530              :                       break;
    5531              :                     }
    5532              :                 }
    5533              : 
    5534       825253 :               if (size == -1 || size > ix86_cost->prefetch_block)
    5535              :                 {
    5536              :                   padding_p = false;
    5537              :                   break;
    5538              :                 }
    5539              : 
    5540      1483268 :               FOR_EACH_EDGE (e, ei, tbb->succs)
    5541              :                 {
    5542              :                   /* It could be part of the loop.  */
    5543      1024079 :                   if (e->dest == bb)
    5544              :                     {
    5545              :                       detect_tight_loop_p = true;
    5546              :                       break;
    5547              :                     }
    5548              :                 }
    5549              : 
    5550       643566 :               if (detect_tight_loop_p)
    5551              :                 break;
    5552              : 
    5553       459189 :               end_insn = BB_END (tbb);
    5554       459189 :               if (JUMP_P (end_insn))
    5555              :                 {
    5556              :                   /* For decoded icache:
    5557              :                      1. Up to two branches are allowed per Way.
    5558              :                      2. A non-conditional branch is the last micro-op in a Way.
    5559              :                   */
    5560       370945 :                   if (onlyjump_p (end_insn)
    5561       370945 :                       && (any_uncondjump_p (end_insn)
    5562       312452 :                           || single_succ_p (tbb)))
    5563              :                     {
    5564              :                       padding_p = false;
    5565              :                       break;
    5566              :                     }
    5567       312452 :                   else if (++cond_branch_num >= 2)
    5568              :                     {
    5569              :                       padding_p = false;
    5570              :                       break;
    5571              :                     }
    5572              :                 }
    5573              : 
    5574              :             }
    5575              : 
    5576       550988 :           if (padding_p && detect_tight_loop_p)
    5577              :             {
    5578       368754 :               emit_insn_before (gen_max_skip_align (GEN_INT (ceil_log2 (size)),
    5579              :                                                     GEN_INT (0)), label);
    5580              :               /* End of function.  */
    5581       184377 :               if (!tbb || tbb == EXIT_BLOCK_PTR_FOR_FN (cfun))
    5582              :                 break;
    5583              :               /* Skip bb which already fits into one cacheline.  */
    5584              :               bb = tbb;
    5585              :             }
    5586              :         }
    5587              :     }
    5588              : 
    5589       976165 :   loop_optimizer_finalize ();
    5590       976165 :   free_dominance_info (CDI_DOMINATORS);
    5591              : }
    5592              : 
    5593              : namespace {
    5594              : 
    5595              : const pass_data pass_data_align_tight_loops =
    5596              : {
    5597              :   RTL_PASS, /* type */
    5598              :   "align_tight_loops", /* name */
    5599              :   OPTGROUP_NONE, /* optinfo_flags */
    5600              :   TV_MACH_DEP, /* tv_id */
    5601              :   0, /* properties_required */
    5602              :   0, /* properties_provided */
    5603              :   0, /* properties_destroyed */
    5604              :   0, /* todo_flags_start */
    5605              :   0, /* todo_flags_finish */
    5606              : };
    5607              : 
    5608              : class pass_align_tight_loops : public rtl_opt_pass
    5609              : {
    5610              : public:
    5611       298828 :   pass_align_tight_loops (gcc::context *ctxt)
    5612       597656 :     : rtl_opt_pass (pass_data_align_tight_loops, ctxt)
    5613              :   {}
    5614              : 
    5615              :   /* opt_pass methods: */
    5616      1488378 :   bool gate (function *) final override
    5617              :     {
    5618      1488378 :       return TARGET_ALIGN_TIGHT_LOOPS
    5619      1487892 :              && optimize
    5620      2529666 :              && optimize_function_for_speed_p (cfun);
    5621              :     }
    5622              : 
    5623       976174 :   unsigned int execute (function *) final override
    5624              :     {
    5625       976174 :       timevar_push (TV_MACH_DEP);
    5626              : #ifdef ASM_OUTPUT_MAX_SKIP_ALIGN
    5627       976174 :       ix86_align_loops ();
    5628              : #endif
    5629       976174 :       timevar_pop (TV_MACH_DEP);
    5630       976174 :       return 0;
    5631              :     }
    5632              : }; // class pass_align_tight_loops
    5633              : 
    5634              : } // anon namespace
    5635              : 
    5636              : rtl_opt_pass *
    5637       298828 : make_pass_align_tight_loops (gcc::context *ctxt)
    5638              : {
    5639       298828 :   return new pass_align_tight_loops (ctxt);
    5640              : }
    5641              : 
    5642              : /* This compares the priority of target features in function DECL1
    5643              :    and DECL2.  It returns positive value if DECL1 is higher priority,
    5644              :    negative value if DECL2 is higher priority and 0 if they are the
    5645              :    same.  */
    5646              : 
    5647              : int
    5648         5812 : ix86_compare_version_priority (tree decl1, tree decl2)
    5649              : {
    5650         5812 :   unsigned int priority1 = get_builtin_code_for_version (decl1, NULL);
    5651         5812 :   unsigned int priority2 = get_builtin_code_for_version (decl2, NULL);
    5652              : 
    5653         5812 :   return (int)priority1 - (int)priority2;
    5654              : }
    5655              : 
    5656              : /* V1 and V2 point to function versions with different priorities
    5657              :    based on the target ISA.  This function compares their priorities.  */
    5658              : 
    5659              : static int
    5660         6858 : feature_compare (const void *v1, const void *v2)
    5661              : {
    5662         6858 :   typedef struct _function_version_info
    5663              :     {
    5664              :       tree version_decl;
    5665              :       tree predicate_chain;
    5666              :       unsigned int dispatch_priority;
    5667              :     } function_version_info;
    5668              : 
    5669         6858 :   const function_version_info c1 = *(const function_version_info *)v1;
    5670         6858 :   const function_version_info c2 = *(const function_version_info *)v2;
    5671         6858 :   return (c2.dispatch_priority - c1.dispatch_priority);
    5672              : }
    5673              : 
    5674              : /* This adds a condition to the basic_block NEW_BB in function FUNCTION_DECL
    5675              :    to return a pointer to VERSION_DECL if the outcome of the expression
    5676              :    formed by PREDICATE_CHAIN is true.  This function will be called during
    5677              :    version dispatch to decide which function version to execute.  It returns
    5678              :    the basic block at the end, to which more conditions can be added.  */
    5679              : 
    5680              : static basic_block
    5681          839 : add_condition_to_bb (tree function_decl, tree version_decl,
    5682              :                      tree predicate_chain, basic_block new_bb)
    5683              : {
    5684          839 :   gimple *return_stmt;
    5685          839 :   tree convert_expr, result_var;
    5686          839 :   gimple *convert_stmt;
    5687          839 :   gimple *call_cond_stmt;
    5688          839 :   gimple *if_else_stmt;
    5689              : 
    5690          839 :   basic_block bb1, bb2, bb3;
    5691          839 :   edge e12, e23;
    5692              : 
    5693          839 :   tree cond_var, and_expr_var = NULL_TREE;
    5694          839 :   gimple_seq gseq;
    5695              : 
    5696          839 :   tree predicate_decl, predicate_arg;
    5697              : 
    5698          839 :   push_cfun (DECL_STRUCT_FUNCTION (function_decl));
    5699              : 
    5700          839 :   gcc_assert (new_bb != NULL);
    5701          839 :   gseq = bb_seq (new_bb);
    5702              : 
    5703              : 
    5704          839 :   convert_expr = build1 (CONVERT_EXPR, ptr_type_node,
    5705              :                          build_fold_addr_expr (version_decl));
    5706          839 :   result_var = create_tmp_var (ptr_type_node);
    5707          839 :   convert_stmt = gimple_build_assign (result_var, convert_expr);
    5708          839 :   return_stmt = gimple_build_return (result_var);
    5709              : 
    5710          839 :   if (predicate_chain == NULL_TREE)
    5711              :     {
    5712          201 :       gimple_seq_add_stmt (&gseq, convert_stmt);
    5713          201 :       gimple_seq_add_stmt (&gseq, return_stmt);
    5714          201 :       set_bb_seq (new_bb, gseq);
    5715          201 :       gimple_set_bb (convert_stmt, new_bb);
    5716          201 :       gimple_set_bb (return_stmt, new_bb);
    5717          201 :       pop_cfun ();
    5718          201 :       return new_bb;
    5719              :     }
    5720              : 
    5721         1315 :   while (predicate_chain != NULL)
    5722              :     {
    5723          677 :       cond_var = create_tmp_var (integer_type_node);
    5724          677 :       predicate_decl = TREE_PURPOSE (predicate_chain);
    5725          677 :       predicate_arg = TREE_VALUE (predicate_chain);
    5726          677 :       call_cond_stmt = gimple_build_call (predicate_decl, 1, predicate_arg);
    5727          677 :       gimple_call_set_lhs (call_cond_stmt, cond_var);
    5728              : 
    5729          677 :       gimple_set_block (call_cond_stmt, DECL_INITIAL (function_decl));
    5730          677 :       gimple_set_bb (call_cond_stmt, new_bb);
    5731          677 :       gimple_seq_add_stmt (&gseq, call_cond_stmt);
    5732              : 
    5733          677 :       predicate_chain = TREE_CHAIN (predicate_chain);
    5734              : 
    5735          677 :       if (and_expr_var == NULL)
    5736              :         and_expr_var = cond_var;
    5737              :       else
    5738              :         {
    5739           39 :           gimple *assign_stmt;
    5740              :           /* Use MIN_EXPR to check if any integer is zero?.
    5741              :              and_expr_var = min_expr <cond_var, and_expr_var>  */
    5742           39 :           assign_stmt = gimple_build_assign (and_expr_var,
    5743              :                           build2 (MIN_EXPR, integer_type_node,
    5744              :                                   cond_var, and_expr_var));
    5745              : 
    5746           39 :           gimple_set_block (assign_stmt, DECL_INITIAL (function_decl));
    5747           39 :           gimple_set_bb (assign_stmt, new_bb);
    5748           39 :           gimple_seq_add_stmt (&gseq, assign_stmt);
    5749              :         }
    5750              :     }
    5751              : 
    5752          638 :   if_else_stmt = gimple_build_cond (GT_EXPR, and_expr_var,
    5753              :                                     integer_zero_node,
    5754              :                                     NULL_TREE, NULL_TREE);
    5755          638 :   gimple_set_block (if_else_stmt, DECL_INITIAL (function_decl));
    5756          638 :   gimple_set_bb (if_else_stmt, new_bb);
    5757          638 :   gimple_seq_add_stmt (&gseq, if_else_stmt);
    5758              : 
    5759          638 :   gimple_seq_add_stmt (&gseq, convert_stmt);
    5760          638 :   gimple_seq_add_stmt (&gseq, return_stmt);
    5761          638 :   set_bb_seq (new_bb, gseq);
    5762              : 
    5763          638 :   bb1 = new_bb;
    5764          638 :   e12 = split_block (bb1, if_else_stmt);
    5765          638 :   bb2 = e12->dest;
    5766          638 :   e12->flags &= ~EDGE_FALLTHRU;
    5767          638 :   e12->flags |= EDGE_TRUE_VALUE;
    5768              : 
    5769          638 :   e23 = split_block (bb2, return_stmt);
    5770              : 
    5771          638 :   gimple_set_bb (convert_stmt, bb2);
    5772          638 :   gimple_set_bb (return_stmt, bb2);
    5773              : 
    5774          638 :   bb3 = e23->dest;
    5775          638 :   make_edge (bb1, bb3, EDGE_FALSE_VALUE);
    5776              : 
    5777          638 :   remove_edge (e23);
    5778          638 :   make_edge (bb2, EXIT_BLOCK_PTR_FOR_FN (cfun), 0);
    5779              : 
    5780          638 :   pop_cfun ();
    5781              : 
    5782          638 :   return bb3;
    5783              : }
    5784              : 
    5785              : /* This function generates the dispatch function for
    5786              :    multi-versioned functions.  DISPATCH_DECL is the function which will
    5787              :    contain the dispatch logic.  FNDECLS are the function choices for
    5788              :    dispatch, and is a tree chain.  EMPTY_BB is the basic block pointer
    5789              :    in DISPATCH_DECL in which the dispatch code is generated.  */
    5790              : 
    5791              : static int
    5792          201 : dispatch_function_versions (tree dispatch_decl,
    5793              :                             void *fndecls_p,
    5794              :                             basic_block *empty_bb)
    5795              : {
    5796          201 :   tree default_decl;
    5797          201 :   gimple *ifunc_cpu_init_stmt;
    5798          201 :   gimple_seq gseq;
    5799          201 :   int ix;
    5800          201 :   tree ele;
    5801          201 :   vec<tree> *fndecls;
    5802          201 :   unsigned int num_versions = 0;
    5803          201 :   unsigned int actual_versions = 0;
    5804          201 :   unsigned int i;
    5805              : 
    5806          201 :   struct _function_version_info
    5807              :     {
    5808              :       tree version_decl;
    5809              :       tree predicate_chain;
    5810              :       unsigned int dispatch_priority;
    5811              :     }*function_version_info;
    5812              : 
    5813          201 :   gcc_assert (dispatch_decl != NULL
    5814              :               && fndecls_p != NULL
    5815              :               && empty_bb != NULL);
    5816              : 
    5817              :   /*fndecls_p is actually a vector.  */
    5818          201 :   fndecls = static_cast<vec<tree> *> (fndecls_p);
    5819              : 
    5820              :   /* At least one more version other than the default.  */
    5821          201 :   num_versions = fndecls->length ();
    5822          201 :   gcc_assert (num_versions >= 2);
    5823              : 
    5824          201 :   function_version_info = (struct _function_version_info *)
    5825          201 :     XNEWVEC (struct _function_version_info, (num_versions - 1));
    5826              : 
    5827              :   /* The first version in the vector is the default decl.  */
    5828          201 :   default_decl = (*fndecls)[0];
    5829              : 
    5830          201 :   push_cfun (DECL_STRUCT_FUNCTION (dispatch_decl));
    5831              : 
    5832          201 :   gseq = bb_seq (*empty_bb);
    5833              :   /* Function version dispatch is via IFUNC.  IFUNC resolvers fire before
    5834              :      constructors, so explicitly call __builtin_cpu_init here.  */
    5835          201 :   ifunc_cpu_init_stmt
    5836          201 :     = gimple_build_call_vec (get_ix86_builtin (IX86_BUILTIN_CPU_INIT), vNULL);
    5837          201 :   gimple_seq_add_stmt (&gseq, ifunc_cpu_init_stmt);
    5838          201 :   gimple_set_bb (ifunc_cpu_init_stmt, *empty_bb);
    5839          201 :   set_bb_seq (*empty_bb, gseq);
    5840              : 
    5841          201 :   pop_cfun ();
    5842              : 
    5843              : 
    5844          996 :   for (ix = 1; fndecls->iterate (ix, &ele); ++ix)
    5845              :     {
    5846          795 :       tree version_decl = ele;
    5847          795 :       tree predicate_chain = NULL_TREE;
    5848          795 :       unsigned int priority;
    5849              :       /* Get attribute string, parse it and find the right predicate decl.
    5850              :          The predicate function could be a lengthy combination of many
    5851              :          features, like arch-type and various isa-variants.  */
    5852          795 :       priority = get_builtin_code_for_version (version_decl,
    5853              :                                                &predicate_chain);
    5854              : 
    5855          795 :       if (predicate_chain == NULL_TREE)
    5856          157 :         continue;
    5857              : 
    5858          638 :       function_version_info [actual_versions].version_decl = version_decl;
    5859          638 :       function_version_info [actual_versions].predicate_chain
    5860          638 :          = predicate_chain;
    5861          638 :       function_version_info [actual_versions].dispatch_priority = priority;
    5862          638 :       actual_versions++;
    5863              :     }
    5864              : 
    5865              :   /* Sort the versions according to descending order of dispatch priority.  The
    5866              :      priority is based on the ISA.  This is not a perfect solution.  There
    5867              :      could still be ambiguity.  If more than one function version is suitable
    5868              :      to execute,  which one should be dispatched?  In future, allow the user
    5869              :      to specify a dispatch  priority next to the version.  */
    5870          201 :   qsort (function_version_info, actual_versions,
    5871              :          sizeof (struct _function_version_info), feature_compare);
    5872              : 
    5873         1040 :   for  (i = 0; i < actual_versions; ++i)
    5874          638 :     *empty_bb = add_condition_to_bb (dispatch_decl,
    5875              :                                      function_version_info[i].version_decl,
    5876          638 :                                      function_version_info[i].predicate_chain,
    5877              :                                      *empty_bb);
    5878              : 
    5879              :   /* dispatch default version at the end.  */
    5880          201 :   *empty_bb = add_condition_to_bb (dispatch_decl, default_decl,
    5881              :                                    NULL, *empty_bb);
    5882              : 
    5883          201 :   free (function_version_info);
    5884          201 :   return 0;
    5885              : }
    5886              : 
    5887              : /* This function changes the assembler name for functions that are
    5888              :    versions.  If DECL is a function version and has a "target"
    5889              :    attribute, it appends the attribute string to its assembler name.  */
    5890              : 
    5891              : static tree
    5892         1118 : ix86_mangle_function_version_assembler_name (tree decl, tree id)
    5893              : {
    5894         1118 :   tree version_attr;
    5895         1118 :   char *attr_str;
    5896              : 
    5897         1118 :   if (DECL_DECLARED_INLINE_P (decl)
    5898         1167 :       && lookup_attribute ("gnu_inline",
    5899           49 :                            DECL_ATTRIBUTES (decl)))
    5900            0 :     error_at (DECL_SOURCE_LOCATION (decl),
    5901              :               "function versions cannot be marked as %<gnu_inline%>,"
    5902              :               " bodies have to be generated");
    5903              : 
    5904         1118 :   if (DECL_VIRTUAL_P (decl)
    5905         2236 :       || DECL_VINDEX (decl))
    5906            0 :     sorry ("virtual function multiversioning not supported");
    5907              : 
    5908         1118 :   version_attr = lookup_attribute ("target", DECL_ATTRIBUTES (decl));
    5909              : 
    5910              :   /* target attribute string cannot be NULL.  */
    5911         1118 :   gcc_assert (version_attr != NULL_TREE);
    5912              : 
    5913         1118 :   attr_str = sorted_attr_string (TREE_VALUE (version_attr));
    5914              : 
    5915              :   /* Allow assembler name to be modified if already set.  */
    5916         1118 :   if (DECL_ASSEMBLER_NAME_SET_P (decl))
    5917         1103 :     SET_DECL_RTL (decl, NULL);
    5918              : 
    5919         1118 :   tree ret = clone_identifier (id, attr_str, true);
    5920              : 
    5921         1118 :   XDELETEVEC (attr_str);
    5922              : 
    5923         1118 :   return ret;
    5924              : }
    5925              : 
    5926              : tree
    5927    493879603 : ix86_mangle_decl_assembler_name (tree decl, tree id)
    5928              : {
    5929              :   /* For function version, add the target suffix to the assembler name.  */
    5930    493879603 :   if (TREE_CODE (decl) == FUNCTION_DECL)
    5931              :     {
    5932    459472001 :       cgraph_node *node = cgraph_node::get (decl);
    5933              :       /* Mangle all versions when annotated with target_clones, but only
    5934              :          non-default versions when annotated with target attributes.  */
    5935    459472001 :       if (DECL_FUNCTION_VERSIONED (decl)
    5936    459472001 :           && (node->is_target_clone
    5937         1089 :               || !is_function_default_version (node->decl)))
    5938         1118 :         id = ix86_mangle_function_version_assembler_name (decl, id);
    5939              :       /* Mangle the dispatched symbol but only in the case of target clones.  */
    5940    459470883 :       else if (node && node->dispatcher_function && !node->is_target_clone)
    5941          117 :         id = clone_identifier (id, "ifunc");
    5942     64165655 :       else if (node && node->dispatcher_resolver_function)
    5943          201 :         id = clone_identifier (id, "resolver");
    5944              :     }
    5945              : #ifdef SUBTARGET_MANGLE_DECL_ASSEMBLER_NAME
    5946              :   id = SUBTARGET_MANGLE_DECL_ASSEMBLER_NAME (decl, id);
    5947              : #endif
    5948              : 
    5949    493879603 :   return id;
    5950              : }
    5951              : 
    5952              : /* Make a dispatcher declaration for the multi-versioned function DECL.
    5953              :    Calls to DECL function will be replaced with calls to the dispatcher
    5954              :    by the front-end.  Returns the decl of the dispatcher function.  */
    5955              : 
    5956              : tree
    5957          327 : ix86_get_function_versions_dispatcher (void *decl)
    5958              : {
    5959          327 :   tree fn = (tree) decl;
    5960          327 :   struct cgraph_node *node = NULL;
    5961          327 :   struct cgraph_node *default_node = NULL;
    5962          327 :   struct cgraph_function_version_info *node_v = NULL;
    5963              : 
    5964          327 :   tree dispatch_decl = NULL;
    5965              : 
    5966          327 :   struct cgraph_function_version_info *default_version_info = NULL;
    5967              : 
    5968          654 :   gcc_assert (fn != NULL && DECL_FUNCTION_VERSIONED (fn));
    5969              : 
    5970          327 :   node = cgraph_node::get (fn);
    5971          327 :   gcc_assert (node != NULL);
    5972              : 
    5973          327 :   node_v = node->function_version ();
    5974          327 :   gcc_assert (node_v != NULL);
    5975              : 
    5976          327 :   if (node_v->dispatcher_resolver != NULL)
    5977              :     return node_v->dispatcher_resolver;
    5978              : 
    5979              :   /* The default node is always the beginning of the chain.  */
    5980              :   default_version_info = node_v;
    5981          675 :   while (default_version_info->prev != NULL)
    5982              :     default_version_info = default_version_info->prev;
    5983          213 :   default_node = default_version_info->this_node;
    5984              : 
    5985              :   /* If there is no default node, just return NULL.  */
    5986          213 :   if (!is_function_default_version (default_node->decl))
    5987              :     return NULL;
    5988              : 
    5989              : #if defined (ASM_OUTPUT_TYPE_DIRECTIVE)
    5990          204 :   if (targetm.has_ifunc_p ())
    5991              :     {
    5992          204 :       struct cgraph_function_version_info *it_v = NULL;
    5993              : 
    5994              :       /* Right now, the dispatching is done via ifunc.  */
    5995          204 :       dispatch_decl = make_dispatcher_decl (default_node->decl);
    5996              : 
    5997              :       /* Set the dispatcher for all the versions.  */
    5998          204 :       it_v = default_version_info;
    5999         1410 :       while (it_v != NULL)
    6000              :         {
    6001         1002 :           it_v->dispatcher_resolver = dispatch_decl;
    6002         1002 :           it_v = it_v->next;
    6003              :         }
    6004              :     }
    6005              :   else
    6006              : #endif
    6007              :     {
    6008            0 :       error_at (DECL_SOURCE_LOCATION (default_node->decl),
    6009              :                 "multiversioning needs %<ifunc%> which is not supported "
    6010              :                 "on this target");
    6011              :     }
    6012              : 
    6013              :   return dispatch_decl;
    6014              : }
    6015              : 
    6016              : /* Make the resolver function decl to dispatch the versions of
    6017              :    a multi-versioned function,  DEFAULT_DECL.  IFUNC_ALIAS_DECL is
    6018              :    ifunc alias that will point to the created resolver.  Create an
    6019              :    empty basic block in the resolver and store the pointer in
    6020              :    EMPTY_BB.  Return the decl of the resolver function.  */
    6021              : 
    6022              : static tree
    6023          201 : make_resolver_func (const tree default_decl,
    6024              :                     const tree ifunc_alias_decl,
    6025              :                     basic_block *empty_bb)
    6026              : {
    6027          201 :   tree decl, type, t;
    6028              : 
    6029              :   /* The resolver function should return a (void *). */
    6030          201 :   type = build_function_type_list (ptr_type_node, NULL_TREE);
    6031              : 
    6032          201 :   cgraph_node *node = cgraph_node::get (default_decl);
    6033          201 :   gcc_assert (node && node->function_version ());
    6034              : 
    6035          201 :   decl = build_fn_decl (IDENTIFIER_POINTER (DECL_NAME (default_decl)), type);
    6036              : 
    6037              :   /* Set the assembler name to prevent cgraph_node attempting to mangle.  */
    6038          201 :   SET_DECL_ASSEMBLER_NAME (decl, DECL_ASSEMBLER_NAME (default_decl));
    6039              : 
    6040          201 :   cgraph_node *resolver_node = cgraph_node::get_create (decl);
    6041          201 :   resolver_node->dispatcher_resolver_function = true;
    6042              : 
    6043          201 :   if (node->is_target_clone)
    6044           87 :     resolver_node->is_target_clone = true;
    6045              : 
    6046          201 :   tree id = ix86_mangle_decl_assembler_name
    6047          201 :     (decl, node->function_version ()->assembler_name);
    6048          201 :   symtab->change_decl_assembler_name (decl, id);
    6049              : 
    6050          201 :   DECL_NAME (decl) = DECL_NAME (default_decl);
    6051          201 :   TREE_USED (decl) = 1;
    6052          201 :   DECL_ARTIFICIAL (decl) = 1;
    6053          201 :   DECL_IGNORED_P (decl) = 1;
    6054          201 :   TREE_PUBLIC (decl) = 0;
    6055          201 :   DECL_UNINLINABLE (decl) = 1;
    6056              : 
    6057              :   /* Resolver is not external, body is generated.  */
    6058          201 :   DECL_EXTERNAL (decl) = 0;
    6059          201 :   DECL_EXTERNAL (ifunc_alias_decl) = 0;
    6060              : 
    6061          201 :   DECL_CONTEXT (decl) = NULL_TREE;
    6062          201 :   DECL_INITIAL (decl) = make_node (BLOCK);
    6063          201 :   DECL_STATIC_CONSTRUCTOR (decl) = 0;
    6064              : 
    6065          201 :   if (DECL_COMDAT_GROUP (default_decl)
    6066          201 :       || TREE_PUBLIC (default_decl))
    6067              :     {
    6068              :       /* In this case, each translation unit with a call to this
    6069              :          versioned function will put out a resolver.  Ensure it
    6070              :          is comdat to keep just one copy.  */
    6071          177 :       DECL_COMDAT (decl) = 1;
    6072          177 :       make_decl_one_only (decl, DECL_ASSEMBLER_NAME (decl));
    6073              :     }
    6074              :   else
    6075           24 :     TREE_PUBLIC (ifunc_alias_decl) = 0;
    6076              : 
    6077              :   /* Build result decl and add to function_decl. */
    6078          201 :   t = build_decl (UNKNOWN_LOCATION, RESULT_DECL, NULL_TREE, ptr_type_node);
    6079          201 :   DECL_CONTEXT (t) = decl;
    6080          201 :   DECL_ARTIFICIAL (t) = 1;
    6081          201 :   DECL_IGNORED_P (t) = 1;
    6082          201 :   DECL_RESULT (decl) = t;
    6083              : 
    6084          201 :   gimplify_function_tree (decl);
    6085          201 :   push_cfun (DECL_STRUCT_FUNCTION (decl));
    6086          201 :   *empty_bb = init_lowered_empty_function (decl, false,
    6087              :                                            profile_count::uninitialized ());
    6088              : 
    6089          201 :   cgraph_node::add_new_function (decl, true);
    6090          201 :   symtab->call_cgraph_insertion_hooks (cgraph_node::get_create (decl));
    6091              : 
    6092          201 :   pop_cfun ();
    6093              : 
    6094          201 :   gcc_assert (ifunc_alias_decl != NULL);
    6095              :   /* Mark ifunc_alias_decl as "ifunc" with resolver as resolver_name.  */
    6096          201 :   DECL_ATTRIBUTES (ifunc_alias_decl)
    6097          201 :     = make_attribute ("ifunc", IDENTIFIER_POINTER (DECL_ASSEMBLER_NAME (decl)),
    6098          201 :                       DECL_ATTRIBUTES (ifunc_alias_decl));
    6099              : 
    6100              :   /* Create the alias for dispatch to resolver here.  */
    6101          201 :   cgraph_node::create_same_body_alias (ifunc_alias_decl, decl);
    6102          201 :   return decl;
    6103              : }
    6104              : 
    6105              : /* Generate the dispatching code body to dispatch multi-versioned function
    6106              :    DECL.  The target hook is called to process the "target" attributes and
    6107              :    provide the code to dispatch the right function at run-time.  NODE points
    6108              :    to the dispatcher decl whose body will be created.  */
    6109              : 
    6110              : tree
    6111          201 : ix86_generate_version_dispatcher_body (void *node_p)
    6112              : {
    6113          201 :   tree resolver_decl;
    6114          201 :   basic_block empty_bb;
    6115          201 :   tree default_ver_decl;
    6116          201 :   struct cgraph_node *versn;
    6117          201 :   struct cgraph_node *node;
    6118              : 
    6119          201 :   struct cgraph_function_version_info *node_version_info = NULL;
    6120          201 :   struct cgraph_function_version_info *versn_info = NULL;
    6121              : 
    6122          201 :   node = (cgraph_node *)node_p;
    6123              : 
    6124          201 :   node_version_info = node->function_version ();
    6125          201 :   gcc_assert (node->dispatcher_function
    6126              :               && node_version_info != NULL);
    6127              : 
    6128          201 :   if (node_version_info->dispatcher_resolver)
    6129              :     return node_version_info->dispatcher_resolver;
    6130              : 
    6131              :   /* The first version in the chain corresponds to the default version.  */
    6132          201 :   default_ver_decl = node_version_info->next->this_node->decl;
    6133              : 
    6134              :   /* node is going to be an alias, so remove the finalized bit.  */
    6135          201 :   node->definition = false;
    6136              : 
    6137          201 :   resolver_decl = make_resolver_func (default_ver_decl,
    6138              :                                       node->decl, &empty_bb);
    6139              : 
    6140          201 :   node_version_info->dispatcher_resolver = resolver_decl;
    6141              : 
    6142          201 :   push_cfun (DECL_STRUCT_FUNCTION (resolver_decl));
    6143              : 
    6144          201 :   auto_vec<tree, 2> fn_ver_vec;
    6145              : 
    6146         1197 :   for (versn_info = node_version_info->next; versn_info;
    6147          996 :        versn_info = versn_info->next)
    6148              :     {
    6149          996 :       versn = versn_info->this_node;
    6150              :       /* Check for virtual functions here again, as by this time it should
    6151              :          have been determined if this function needs a vtable index or
    6152              :          not.  This happens for methods in derived classes that override
    6153              :          virtual methods in base classes but are not explicitly marked as
    6154              :          virtual.  */
    6155          996 :       if (DECL_VIRTUAL_P (versn->decl))
    6156            0 :         sorry ("virtual function multiversioning not supported");
    6157              : 
    6158          996 :       fn_ver_vec.safe_push (versn->decl);
    6159              :     }
    6160              : 
    6161          201 :   dispatch_function_versions (resolver_decl, &fn_ver_vec, &empty_bb);
    6162          201 :   cgraph_edge::rebuild_edges ();
    6163          201 :   pop_cfun ();
    6164          201 :   return resolver_decl;
    6165          201 : }
    6166              : 
    6167              : 
        

Generated by: LCOV version 2.4-beta

LCOV profile is generated on x86_64 machine using following configure options: configure --disable-bootstrap --enable-coverage=opt --enable-languages=c,c++,fortran,go,jit,lto,rust,m2 --enable-host-shared. GCC test suite is run with the built compiler.