LCOV - code coverage report
Current view: top level - gcc/config/i386 - i386-features.cc (source / functions) Coverage Total Hit
Test: gcc.info Lines: 88.9 % 2643 2349
Test Date: 2026-03-28 14:25:54 Functions: 98.9 % 95 94
Legend: Lines:     hit not hit

            Line data    Source code
       1              : /* Copyright (C) 1988-2026 Free Software Foundation, Inc.
       2              : 
       3              : This file is part of GCC.
       4              : 
       5              : GCC is free software; you can redistribute it and/or modify
       6              : it under the terms of the GNU General Public License as published by
       7              : the Free Software Foundation; either version 3, or (at your option)
       8              : any later version.
       9              : 
      10              : GCC is distributed in the hope that it will be useful,
      11              : but WITHOUT ANY WARRANTY; without even the implied warranty of
      12              : MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
      13              : GNU General Public License for more details.
      14              : 
      15              : You should have received a copy of the GNU General Public License
      16              : along with GCC; see the file COPYING3.  If not see
      17              : <http://www.gnu.org/licenses/>.  */
      18              : 
      19              : #define IN_TARGET_CODE 1
      20              : 
      21              : #include "config.h"
      22              : #include "system.h"
      23              : #include "coretypes.h"
      24              : #include "backend.h"
      25              : #include "rtl.h"
      26              : #include "tree.h"
      27              : #include "memmodel.h"
      28              : #include "gimple.h"
      29              : #include "cfghooks.h"
      30              : #include "cfgloop.h"
      31              : #include "df.h"
      32              : #include "tm_p.h"
      33              : #include "stringpool.h"
      34              : #include "expmed.h"
      35              : #include "optabs.h"
      36              : #include "regs.h"
      37              : #include "emit-rtl.h"
      38              : #include "recog.h"
      39              : #include "cgraph.h"
      40              : #include "diagnostic.h"
      41              : #include "cfgbuild.h"
      42              : #include "alias.h"
      43              : #include "fold-const.h"
      44              : #include "attribs.h"
      45              : #include "calls.h"
      46              : #include "stor-layout.h"
      47              : #include "varasm.h"
      48              : #include "output.h"
      49              : #include "insn-attr.h"
      50              : #include "flags.h"
      51              : #include "except.h"
      52              : #include "explow.h"
      53              : #include "expr.h"
      54              : #include "cfgrtl.h"
      55              : #include "common/common-target.h"
      56              : #include "langhooks.h"
      57              : #include "reload.h"
      58              : #include "gimplify.h"
      59              : #include "dwarf2.h"
      60              : #include "tm-constrs.h"
      61              : #include "cselib.h"
      62              : #include "sched-int.h"
      63              : #include "opts.h"
      64              : #include "tree-pass.h"
      65              : #include "context.h"
      66              : #include "pass_manager.h"
      67              : #include "target-globals.h"
      68              : #include "gimple-iterator.h"
      69              : #include "shrink-wrap.h"
      70              : #include "builtins.h"
      71              : #include "rtl-iter.h"
      72              : #include "tree-iterator.h"
      73              : #include "dbgcnt.h"
      74              : #include "case-cfn-macros.h"
      75              : #include "dojump.h"
      76              : #include "fold-const-call.h"
      77              : #include "tree-vrp.h"
      78              : #include "tree-ssanames.h"
      79              : #include "selftest.h"
      80              : #include "selftest-rtl.h"
      81              : #include "print-rtl.h"
      82              : #include "intl.h"
      83              : #include "ifcvt.h"
      84              : #include "symbol-summary.h"
      85              : #include "sreal.h"
      86              : #include "ipa-cp.h"
      87              : #include "ipa-prop.h"
      88              : #include "ipa-fnsummary.h"
      89              : #include "wide-int-bitmask.h"
      90              : #include "tree-vector-builder.h"
      91              : #include "debug.h"
      92              : #include "dwarf2out.h"
      93              : #include "i386-builtins.h"
      94              : #include "i386-features.h"
      95              : #include "i386-expand.h"
      96              : 
      97              : const char * const xlogue_layout::STUB_BASE_NAMES[XLOGUE_STUB_COUNT] = {
      98              :   "savms64",
      99              :   "resms64",
     100              :   "resms64x",
     101              :   "savms64f",
     102              :   "resms64f",
     103              :   "resms64fx"
     104              : };
     105              : 
     106              : const unsigned xlogue_layout::REG_ORDER[xlogue_layout::MAX_REGS] = {
     107              : /* The below offset values are where each register is stored for the layout
     108              :    relative to incoming stack pointer.  The value of each m_regs[].offset will
     109              :    be relative to the incoming base pointer (rax or rsi) used by the stub.
     110              : 
     111              :     s_instances:   0            1               2               3
     112              :     Offset:                                     realigned or    aligned + 8
     113              :     Register       aligned      aligned + 8     aligned w/HFP   w/HFP   */
     114              :     XMM15_REG,  /* 0x10         0x18            0x10            0x18    */
     115              :     XMM14_REG,  /* 0x20         0x28            0x20            0x28    */
     116              :     XMM13_REG,  /* 0x30         0x38            0x30            0x38    */
     117              :     XMM12_REG,  /* 0x40         0x48            0x40            0x48    */
     118              :     XMM11_REG,  /* 0x50         0x58            0x50            0x58    */
     119              :     XMM10_REG,  /* 0x60         0x68            0x60            0x68    */
     120              :     XMM9_REG,   /* 0x70         0x78            0x70            0x78    */
     121              :     XMM8_REG,   /* 0x80         0x88            0x80            0x88    */
     122              :     XMM7_REG,   /* 0x90         0x98            0x90            0x98    */
     123              :     XMM6_REG,   /* 0xa0         0xa8            0xa0            0xa8    */
     124              :     SI_REG,     /* 0xa8         0xb0            0xa8            0xb0    */
     125              :     DI_REG,     /* 0xb0         0xb8            0xb0            0xb8    */
     126              :     BX_REG,     /* 0xb8         0xc0            0xb8            0xc0    */
     127              :     BP_REG,     /* 0xc0         0xc8            N/A             N/A     */
     128              :     R12_REG,    /* 0xc8         0xd0            0xc0            0xc8    */
     129              :     R13_REG,    /* 0xd0         0xd8            0xc8            0xd0    */
     130              :     R14_REG,    /* 0xd8         0xe0            0xd0            0xd8    */
     131              :     R15_REG,    /* 0xe0         0xe8            0xd8            0xe0    */
     132              : };
     133              : 
     134              : /* Instantiate static const values.  */
     135              : const HOST_WIDE_INT xlogue_layout::STUB_INDEX_OFFSET;
     136              : const unsigned xlogue_layout::MIN_REGS;
     137              : const unsigned xlogue_layout::MAX_REGS;
     138              : const unsigned xlogue_layout::MAX_EXTRA_REGS;
     139              : const unsigned xlogue_layout::VARIANT_COUNT;
     140              : const unsigned xlogue_layout::STUB_NAME_MAX_LEN;
     141              : 
     142              : /* Initialize xlogue_layout::s_stub_names to zero.  */
     143              : char xlogue_layout::s_stub_names[2][XLOGUE_STUB_COUNT][VARIANT_COUNT]
     144              :                                 [STUB_NAME_MAX_LEN];
     145              : 
     146              : /* Instantiates all xlogue_layout instances.  */
     147              : const xlogue_layout xlogue_layout::s_instances[XLOGUE_SET_COUNT] = {
     148              :   xlogue_layout (0, false),
     149              :   xlogue_layout (8, false),
     150              :   xlogue_layout (0, true),
     151              :   xlogue_layout (8, true)
     152              : };
     153              : 
     154              : /* Return an appropriate const instance of xlogue_layout based upon values
     155              :    in cfun->machine and crtl.  */
     156              : const class xlogue_layout &
     157        49891 : xlogue_layout::get_instance ()
     158              : {
     159        49891 :   enum xlogue_stub_sets stub_set;
     160        49891 :   bool aligned_plus_8 = cfun->machine->call_ms2sysv_pad_in;
     161              : 
     162        49891 :   if (stack_realign_fp)
     163              :     stub_set = XLOGUE_SET_HFP_ALIGNED_OR_REALIGN;
     164        40910 :   else if (frame_pointer_needed)
     165        25246 :     stub_set = aligned_plus_8
     166        31552 :               ? XLOGUE_SET_HFP_ALIGNED_PLUS_8
     167              :               : XLOGUE_SET_HFP_ALIGNED_OR_REALIGN;
     168              :   else
     169         9358 :     stub_set = aligned_plus_8 ? XLOGUE_SET_ALIGNED_PLUS_8 : XLOGUE_SET_ALIGNED;
     170              : 
     171        49891 :   return s_instances[stub_set];
     172              : }
     173              : 
     174              : /* Determine how many clobbered registers can be saved by the stub.
     175              :    Returns the count of registers the stub will save and restore.  */
     176              : unsigned
     177        35225 : xlogue_layout::count_stub_managed_regs ()
     178              : {
     179        35225 :   bool hfp = frame_pointer_needed || stack_realign_fp;
     180        35225 :   unsigned i, count;
     181        35225 :   unsigned regno;
     182              : 
     183        94890 :   for (count = i = MIN_REGS; i < MAX_REGS; ++i)
     184              :     {
     185        93670 :       regno = REG_ORDER[i];
     186        93670 :       if (regno == BP_REG && hfp)
     187        18200 :         continue;
     188        75470 :       if (!ix86_save_reg (regno, false, false))
     189              :         break;
     190        41465 :       ++count;
     191              :     }
     192        35225 :   return count;
     193              : }
     194              : 
     195              : /* Determine if register REGNO is a stub managed register given the
     196              :    total COUNT of stub managed registers.  */
     197              : bool
     198      2650688 : xlogue_layout::is_stub_managed_reg (unsigned regno, unsigned count)
     199              : {
     200      2650688 :   bool hfp = frame_pointer_needed || stack_realign_fp;
     201      2650688 :   unsigned i;
     202              : 
     203     34587805 :   for (i = 0; i < count; ++i)
     204              :     {
     205     32436986 :       gcc_assert (i < MAX_REGS);
     206     32436986 :       if (REG_ORDER[i] == BP_REG && hfp)
     207       522627 :         ++count;
     208     31914359 :       else if (REG_ORDER[i] == regno)
     209              :         return true;
     210              :     }
     211              :   return false;
     212              : }
     213              : 
     214              : /* Constructor for xlogue_layout.  */
     215      1146964 : xlogue_layout::xlogue_layout (HOST_WIDE_INT stack_align_off_in, bool hfp)
     216      1146964 :   : m_hfp (hfp) , m_nregs (hfp ? 17 : 18),
     217      1146964 :     m_stack_align_off_in (stack_align_off_in)
     218              : {
     219      1146964 :   HOST_WIDE_INT offset = stack_align_off_in;
     220      1146964 :   unsigned i, j;
     221              : 
     222     21792316 :   for (i = j = 0; i < MAX_REGS; ++i)
     223              :     {
     224     20645352 :       unsigned regno = REG_ORDER[i];
     225              : 
     226     20645352 :       if (regno == BP_REG && hfp)
     227       573482 :         continue;
     228     20071870 :       if (SSE_REGNO_P (regno))
     229              :         {
     230     11469640 :           offset += 16;
     231              :           /* Verify that SSE regs are always aligned.  */
     232     11469640 :           gcc_assert (!((stack_align_off_in + offset) & 15));
     233              :         }
     234              :       else
     235      8602230 :         offset += 8;
     236              : 
     237     20071870 :       m_regs[j].regno    = regno;
     238     20071870 :       m_regs[j++].offset = offset - STUB_INDEX_OFFSET;
     239              :     }
     240      1146964 :   gcc_assert (j == m_nregs);
     241      1146964 : }
     242              : 
     243              : const char *
     244        14666 : xlogue_layout::get_stub_name (enum xlogue_stub stub,
     245              :                               unsigned n_extra_regs)
     246              : {
     247        14666 :   const int have_avx = TARGET_AVX;
     248        14666 :   char *name = s_stub_names[!!have_avx][stub][n_extra_regs];
     249              : 
     250              :   /* Lazy init */
     251        14666 :   if (!*name)
     252              :     {
     253          362 :       int res = snprintf (name, STUB_NAME_MAX_LEN, "__%s_%s_%u",
     254              :                           (have_avx ? "avx" : "sse"),
     255          181 :                           STUB_BASE_NAMES[stub],
     256              :                           MIN_REGS + n_extra_regs);
     257          181 :       gcc_checking_assert (res < (int)STUB_NAME_MAX_LEN);
     258              :     }
     259              : 
     260        14666 :   return name;
     261              : }
     262              : 
     263              : /* Return rtx of a symbol ref for the entry point (based upon
     264              :    cfun->machine->call_ms2sysv_extra_regs) of the specified stub.  */
     265              : rtx
     266        14666 : xlogue_layout::get_stub_rtx (enum xlogue_stub stub)
     267              : {
     268        14666 :   const unsigned n_extra_regs = cfun->machine->call_ms2sysv_extra_regs;
     269        14666 :   gcc_checking_assert (n_extra_regs <= MAX_EXTRA_REGS);
     270        14666 :   gcc_assert (stub < XLOGUE_STUB_COUNT);
     271        14666 :   gcc_assert (crtl->stack_realign_finalized);
     272              : 
     273        14666 :   return gen_rtx_SYMBOL_REF (Pmode, get_stub_name (stub, n_extra_regs));
     274              : }
     275              : 
     276              : unsigned scalar_chain::max_id = 0;
     277              : 
     278              : namespace {
     279              : 
     280              : /* Initialize new chain.  */
     281              : 
     282      6369565 : scalar_chain::scalar_chain (enum machine_mode smode_, enum machine_mode vmode_)
     283              : {
     284      6369565 :   smode = smode_;
     285      6369565 :   vmode = vmode_;
     286              : 
     287      6369565 :   chain_id = ++max_id;
     288              : 
     289      6369565 :    if (dump_file)
     290          136 :     fprintf (dump_file, "Created a new instruction chain #%d\n", chain_id);
     291              : 
     292      6369565 :   bitmap_obstack_initialize (NULL);
     293      6369565 :   insns = BITMAP_ALLOC (NULL);
     294      6369565 :   defs = BITMAP_ALLOC (NULL);
     295      6369565 :   defs_conv = BITMAP_ALLOC (NULL);
     296      6369565 :   insns_conv = BITMAP_ALLOC (NULL);
     297      6369565 :   queue = NULL;
     298              : 
     299      6369565 :   cost_sse_integer = 0;
     300      6369565 :   weighted_cost_sse_integer = 0 ;
     301      6369565 :   max_visits = x86_stv_max_visits;
     302      6369565 : }
     303              : 
     304              : /* Free chain's data.  */
     305              : 
     306      6369565 : scalar_chain::~scalar_chain ()
     307              : {
     308      6369565 :   BITMAP_FREE (insns);
     309      6369565 :   BITMAP_FREE (defs);
     310      6369565 :   BITMAP_FREE (defs_conv);
     311      6369565 :   BITMAP_FREE (insns_conv);
     312      6369565 :   bitmap_obstack_release (NULL);
     313      6369565 : }
     314              : 
     315              : /* Add instruction into chains' queue.  */
     316              : 
     317              : void
     318      8246227 : scalar_chain::add_to_queue (unsigned insn_uid)
     319              : {
     320      8246227 :   if (!bitmap_set_bit (queue, insn_uid))
     321              :     return;
     322              : 
     323      6221467 :   if (dump_file)
     324          141 :     fprintf (dump_file, "  Adding insn %d into chain's #%d queue\n",
     325              :              insn_uid, chain_id);
     326              : }
     327              : 
     328              : /* For DImode conversion, mark register defined by DEF as requiring
     329              :    conversion.  */
     330              : 
     331              : void
     332      9384633 : scalar_chain::mark_dual_mode_def (df_ref def)
     333              : {
     334      9384633 :   gcc_assert (DF_REF_REG_DEF_P (def));
     335              : 
     336              :   /* Record the def/insn pair so we can later efficiently iterate over
     337              :      the defs to convert on insns not in the chain.  */
     338      9384633 :   bool reg_new = bitmap_set_bit (defs_conv, DF_REF_REGNO (def));
     339      9384633 :   basic_block bb = BLOCK_FOR_INSN (DF_REF_INSN (def));
     340      9384633 :   profile_count entry_count = ENTRY_BLOCK_PTR_FOR_FN (cfun)->count;
     341      9384633 :   bool speed_p = optimize_bb_for_speed_p (bb);
     342      9384633 :   int cost = 0;
     343              : 
     344      9384633 :   if (!bitmap_bit_p (insns, DF_REF_INSN_UID (def)))
     345              :     {
     346      2721688 :       if (!bitmap_set_bit (insns_conv, DF_REF_INSN_UID (def))
     347      2721688 :           && !reg_new)
     348      1408345 :         return;
     349              : 
     350              :       /* Cost integer to sse moves.  */
     351      2472064 :       if (speed_p)
     352      2193867 :         cost = COSTS_N_INSNS (ix86_cost->integer_to_sse) / 2;
     353       278197 :       else if (TARGET_64BIT || smode == SImode)
     354              :         cost = COSTS_N_BYTES (4);
     355              :       /* vmovd (4 bytes) + vpinsrd (6 bytes).  */
     356        18685 :       else if (TARGET_SSE4_1)
     357              :         cost = COSTS_N_BYTES (10);
     358              :       /* movd (4 bytes) + movd (4 bytes) + unpckldq (4 bytes).  */
     359              :       else
     360      7976288 :         cost = COSTS_N_BYTES (12);
     361              :     }
     362              :   else
     363              :     {
     364      6662945 :       if (!reg_new)
     365              :         return;
     366              : 
     367              :       /* Cost sse to integer moves.  */
     368      5504224 :       if (speed_p)
     369      4944528 :         cost = COSTS_N_INSNS (ix86_cost->sse_to_integer) / 2;
     370       559696 :       else if (TARGET_64BIT || smode == SImode)
     371              :         cost = COSTS_N_BYTES (4);
     372              :       /* vmovd (4 bytes) + vpextrd (6 bytes).  */
     373         3016 :       else if (TARGET_SSE4_1)
     374              :         cost = COSTS_N_BYTES (10);
     375              :       /* movd (4 bytes) + psrlq (5 bytes) + movd (4 bytes).  */
     376              :       else
     377      7976288 :         cost = COSTS_N_BYTES (13);
     378              :     }
     379              : 
     380      7976288 :   if (speed_p)
     381      7138395 :     weighted_cost_sse_integer += bb->count.to_sreal_scale (entry_count) * cost;
     382              : 
     383      7976288 :   cost_sse_integer += cost;
     384              : 
     385      7976288 :   if (dump_file)
     386          240 :     fprintf (dump_file,
     387              :              "  Mark r%d def in insn %d as requiring both modes in chain #%d\n",
     388          240 :              DF_REF_REGNO (def), DF_REF_INSN_UID (def), chain_id);
     389              : }
     390              : 
     391              : /* Check REF's chain to add new insns into a queue
     392              :    and find registers requiring conversion.  Return true if OK, false
     393              :    if the analysis was aborted.  */
     394              : 
     395              : bool
     396     17766723 : scalar_chain::analyze_register_chain (bitmap candidates, df_ref ref,
     397              :                                       bitmap disallowed)
     398              : {
     399     17766723 :   df_link *chain;
     400     17766723 :   bool mark_def = false;
     401              : 
     402     17766723 :   gcc_checking_assert (bitmap_bit_p (insns, DF_REF_INSN_UID (ref)));
     403              : 
     404     61911976 :   for (chain = DF_REF_CHAIN (ref); chain; chain = chain->next)
     405              :     {
     406     44151982 :       unsigned uid = DF_REF_INSN_UID (chain->ref);
     407              : 
     408     44151982 :       if (!NONDEBUG_INSN_P (DF_REF_INSN (chain->ref)))
     409      8023977 :         continue;
     410              : 
     411     36128005 :       if (--max_visits == 0)
     412              :         return false;
     413              : 
     414     36127419 :       if (!DF_REF_REG_MEM_P (chain->ref))
     415              :         {
     416     30122084 :           if (bitmap_bit_p (insns, uid))
     417      9511708 :             continue;
     418              : 
     419     20610376 :           if (bitmap_bit_p (candidates, uid))
     420              :             {
     421      8246227 :               add_to_queue (uid);
     422      8246227 :               continue;
     423              :             }
     424              : 
     425              :           /* If we run into parts of an aborted chain discovery abort.  */
     426     12364149 :           if (bitmap_bit_p (disallowed, uid))
     427              :             return false;
     428              :         }
     429              : 
     430     18363341 :       if (DF_REF_REG_DEF_P (chain->ref))
     431              :         {
     432      2721688 :           if (dump_file)
     433          125 :             fprintf (dump_file, "  r%d def in insn %d isn't convertible\n",
     434              :                      DF_REF_REGNO (chain->ref), uid);
     435      2721688 :           mark_dual_mode_def (chain->ref);
     436              :         }
     437              :       else
     438              :         {
     439     15641653 :           if (dump_file)
     440          524 :             fprintf (dump_file, "  r%d use in insn %d isn't convertible\n",
     441              :                      DF_REF_REGNO (chain->ref), uid);
     442              :           mark_def = true;
     443              :         }
     444              :     }
     445              : 
     446     17759994 :   if (mark_def)
     447      6662945 :     mark_dual_mode_def (ref);
     448              : 
     449              :   return true;
     450              : }
     451              : 
     452              : /* Check whether X is a convertible *concatditi_? variant.  X is known
     453              :    to be any_or_plus:TI, i.e. PLUS:TI, IOR:TI or XOR:TI.  */
     454              : 
     455              : static bool
     456        27276 : timode_concatdi_p (rtx x)
     457              : {
     458        27276 :   rtx op0 = XEXP (x, 0);
     459        27276 :   rtx op1 = XEXP (x, 1);
     460              : 
     461        27276 :   if (GET_CODE (op1) == ASHIFT)
     462          958 :     std::swap (op0, op1);
     463              : 
     464        27276 :   return GET_CODE (op0) == ASHIFT
     465        18423 :          && GET_CODE (XEXP (op0, 0)) == ZERO_EXTEND
     466        18423 :          && GET_MODE (XEXP (XEXP (op0, 0), 0)) == DImode
     467        18423 :          && REG_P (XEXP (XEXP (op0, 0), 0))
     468        18288 :          && CONST_INT_P (XEXP (op0, 1))
     469        18288 :          && INTVAL (XEXP (op0, 1)) == 64
     470        18288 :          && GET_CODE (op1) == ZERO_EXTEND
     471        17330 :          && GET_MODE (XEXP (op1, 0)) == DImode
     472        44606 :          && REG_P (XEXP (op1, 0));
     473              : }
     474              : 
     475              : 
     476              : /* Add instruction into a chain.  Return true if OK, false if the search
     477              :    was aborted.  */
     478              : 
     479              : bool
     480     12582842 : scalar_chain::add_insn (bitmap candidates, unsigned int insn_uid,
     481              :                         bitmap disallowed)
     482              : {
     483     12582842 :   if (!bitmap_set_bit (insns, insn_uid))
     484              :     return true;
     485              : 
     486     12582842 :   if (dump_file)
     487          277 :     fprintf (dump_file, "  Adding insn %d to chain #%d\n", insn_uid, chain_id);
     488              : 
     489     12582842 :   rtx_insn *insn = DF_INSN_UID_GET (insn_uid)->insn;
     490     12582842 :   rtx def_set = single_set (insn);
     491     12582842 :   if (def_set && REG_P (SET_DEST (def_set))
     492     22292027 :       && !HARD_REGISTER_P (SET_DEST (def_set)))
     493      9709173 :     bitmap_set_bit (defs, REGNO (SET_DEST (def_set)));
     494              : 
     495              :   /* ???  The following is quadratic since analyze_register_chain
     496              :      iterates over all refs to look for dual-mode regs.  Instead this
     497              :      should be done separately for all regs mentioned in the chain once.  */
     498     12582842 :   df_ref ref;
     499     25706771 :   for (ref = DF_INSN_UID_DEFS (insn_uid); ref; ref = DF_REF_NEXT_LOC (ref))
     500     13126385 :     if (!HARD_REGISTER_P (DF_REF_REG (ref)))
     501      9709173 :       if (!analyze_register_chain (candidates, ref, disallowed))
     502              :         return false;
     503              : 
     504              :   /* The operand(s) of VEC_SELECT, ZERO_EXTEND and similar ops don't need
     505              :      to be converted/convertible.  */
     506     12580386 :   if (def_set)
     507     12580386 :     switch (GET_CODE (SET_SRC (def_set)))
     508              :       {
     509              :       case VEC_SELECT:
     510              :         return true;
     511          122 :       case ZERO_EXTEND:
     512          122 :         if (GET_MODE (XEXP (SET_SRC (def_set), 0)) == DImode)
     513              :           return true;
     514              :         break;
     515      2371758 :       case PLUS:
     516      2371758 :       case IOR:
     517      2371758 :       case XOR:
     518      2371758 :         if (smode == TImode && timode_concatdi_p (SET_SRC (def_set)))
     519              :           return true;
     520              :         break;
     521              :       default:
     522              :         break;
     523              :       }
     524              : 
     525     27524962 :   for (ref = DF_INSN_UID_USES (insn_uid); ref; ref = DF_REF_NEXT_LOC (ref))
     526     14989281 :     if (!DF_REF_REG_MEM_P (ref))
     527      8057550 :       if (!analyze_register_chain (candidates, ref, disallowed))
     528              :         return false;
     529              : 
     530              :   return true;
     531              : }
     532              : 
     533              : /* Build new chain starting from insn INSN_UID recursively
     534              :    adding all dependent uses and definitions.  Return true if OK, false
     535              :    if the chain discovery was aborted.  */
     536              : 
     537              : bool
     538      6369565 : scalar_chain::build (bitmap candidates, unsigned insn_uid, bitmap disallowed)
     539              : {
     540      6369565 :   queue = BITMAP_ALLOC (NULL);
     541      6369565 :   bitmap_set_bit (queue, insn_uid);
     542              : 
     543      6369565 :   if (dump_file)
     544          136 :     fprintf (dump_file, "Building chain #%d...\n", chain_id);
     545              : 
     546     18945678 :   while (!bitmap_empty_p (queue))
     547              :     {
     548     12582842 :       insn_uid = bitmap_first_set_bit (queue);
     549     12582842 :       bitmap_clear_bit (queue, insn_uid);
     550     12582842 :       bitmap_clear_bit (candidates, insn_uid);
     551     12582842 :       if (!add_insn (candidates, insn_uid, disallowed))
     552              :         {
     553              :           /* If we aborted the search put sofar found insn on the set of
     554              :              disallowed insns so that further searches reaching them also
     555              :              abort and thus we abort the whole but yet undiscovered chain.  */
     556         6729 :           bitmap_ior_into (disallowed, insns);
     557         6729 :           if (dump_file)
     558            0 :             fprintf (dump_file, "Aborted chain #%d discovery\n", chain_id);
     559         6729 :           BITMAP_FREE (queue);
     560         6729 :           return false;
     561              :         }
     562              :     }
     563              : 
     564      6362836 :   if (dump_file)
     565              :     {
     566          136 :       fprintf (dump_file, "Collected chain #%d...\n", chain_id);
     567          136 :       fprintf (dump_file, "  insns: ");
     568          136 :       dump_bitmap (dump_file, insns);
     569          136 :       if (!bitmap_empty_p (defs_conv))
     570              :         {
     571          136 :           bitmap_iterator bi;
     572          136 :           unsigned id;
     573          136 :           const char *comma = "";
     574          136 :           fprintf (dump_file, "  defs to convert: ");
     575          366 :           EXECUTE_IF_SET_IN_BITMAP (defs_conv, 0, id, bi)
     576              :             {
     577          230 :               fprintf (dump_file, "%sr%d", comma, id);
     578          230 :               comma = ", ";
     579              :             }
     580          136 :           fprintf (dump_file, "\n");
     581              :         }
     582              :     }
     583              : 
     584      6362836 :   BITMAP_FREE (queue);
     585              : 
     586      6362836 :   return true;
     587              : }
     588              : 
     589              : /* Return a cost of building a vector constant
     590              :    instead of using a scalar one.  */
     591              : 
     592              : int
     593      2680965 : general_scalar_chain::vector_const_cost (rtx exp, basic_block bb)
     594              : {
     595      2680965 :   gcc_assert (CONST_INT_P (exp));
     596              : 
     597      2680965 :   if (standard_sse_constant_p (exp, vmode))
     598       619793 :     return ix86_cost->sse_op;
     599      2061172 :   if (optimize_bb_for_size_p (bb))
     600              :     return COSTS_N_BYTES (8);
     601              :   /* We have separate costs for SImode and DImode, use SImode costs
     602              :      for smaller modes.  */
     603      2455091 :   return COSTS_N_INSNS (ix86_cost->sse_load[smode == DImode ? 1 : 0]) / 2;
     604              : }
     605              : 
     606              : /* Return true if it's cost profitable for chain conversion.  */
     607              : 
     608              : bool
     609      5890727 : general_scalar_chain::compute_convert_gain ()
     610              : {
     611      5890727 :   bitmap_iterator bi;
     612      5890727 :   unsigned insn_uid;
     613      5890727 :   int gain = 0;
     614      5890727 :   sreal weighted_gain = 0;
     615              : 
     616      5890727 :   if (dump_file)
     617          136 :     fprintf (dump_file, "Computing gain for chain #%d...\n", chain_id);
     618              : 
     619              :   /* SSE costs distinguish between SImode and DImode loads/stores, for
     620              :      int costs factor in the number of GPRs involved.  When supporting
     621              :      smaller modes than SImode the int load/store costs need to be
     622              :      adjusted as well.  */
     623      5890727 :   unsigned sse_cost_idx = smode == DImode ? 1 : 0;
     624      5890727 :   int m = smode == DImode ? (TARGET_64BIT ? 1 : 2) : 1;
     625              : 
     626     17522562 :   EXECUTE_IF_SET_IN_BITMAP (insns, 0, insn_uid, bi)
     627              :     {
     628     11631835 :       rtx_insn *insn = DF_INSN_UID_GET (insn_uid)->insn;
     629     11631835 :       rtx def_set = single_set (insn);
     630     11631835 :       rtx src = SET_SRC (def_set);
     631     11631835 :       rtx dst = SET_DEST (def_set);
     632     11631835 :       basic_block bb = BLOCK_FOR_INSN (insn);
     633     11631835 :       int igain = 0;
     634     11631835 :       profile_count entry_count = ENTRY_BLOCK_PTR_FOR_FN (cfun)->count;
     635     11631835 :       bool speed_p = optimize_bb_for_speed_p (bb);
     636     11631835 :       sreal bb_freq = bb->count.to_sreal_scale (entry_count);
     637              : 
     638     11631835 :       if (REG_P (src) && REG_P (dst))
     639              :         {
     640       933786 :           if (!speed_p)
     641              :             /* reg-reg move is 2 bytes, while SSE 3.  */
     642       186982 :             igain += COSTS_N_BYTES (2 * m - 3);
     643              :           else
     644              :             /* Move costs are normalized to reg-reg move having cost 2.  */
     645       746804 :             igain += COSTS_N_INSNS (2 * m - ix86_cost->xmm_move) / 2;
     646              :         }
     647     10698049 :       else if (REG_P (src) && MEM_P (dst))
     648              :         {
     649      2303672 :           if (!speed_p)
     650              :             /* Integer load/store is 3+ bytes and SSE 4+.  */
     651       191466 :             igain += COSTS_N_BYTES (3 * m - 4);
     652              :           else
     653      2112206 :             igain
     654      2112206 :               += COSTS_N_INSNS (m * ix86_cost->int_store[2]
     655              :                                 - ix86_cost->sse_store[sse_cost_idx]) / 2;
     656              :         }
     657      8394377 :       else if (MEM_P (src) && REG_P (dst))
     658              :         {
     659      3762752 :           if (!speed_p)
     660       358579 :             igain += COSTS_N_BYTES (3 * m - 4);
     661              :           else
     662      3404173 :             igain += COSTS_N_INSNS (m * ix86_cost->int_load[2]
     663              :                                     - ix86_cost->sse_load[sse_cost_idx]) / 2;
     664              :         }
     665              :       else
     666              :         {
     667              :           /* For operations on memory operands, include the overhead
     668              :              of explicit load and store instructions.  */
     669      4631625 :           if (MEM_P (dst))
     670              :             {
     671        67025 :               if (!speed_p)
     672              :                 /* ??? This probably should account size difference
     673              :                    of SSE and integer load rather than full SSE load.  */
     674              :                 igain -= COSTS_N_BYTES (8);
     675              :               else
     676              :                 {
     677        57834 :                   int cost = (m * (ix86_cost->int_load[2]
     678        57834 :                                    + ix86_cost->int_store[2])
     679        57834 :                              - (ix86_cost->sse_load[sse_cost_idx] +
     680        57834 :                                 ix86_cost->sse_store[sse_cost_idx]));
     681        57834 :                   igain += COSTS_N_INSNS (cost) / 2;
     682              :                 }
     683              :             }
     684              : 
     685      4631625 :           switch (GET_CODE (src))
     686              :             {
     687       477837 :             case ASHIFT:
     688       477837 :             case ASHIFTRT:
     689       477837 :             case LSHIFTRT:
     690       477837 :               if (m == 2)
     691              :                 {
     692        16944 :                   if (INTVAL (XEXP (src, 1)) >= 32)
     693        11524 :                     igain += ix86_cost->add;
     694              :                   /* Gain for extend highpart case.  */
     695         5420 :                   else if (GET_CODE (XEXP (src, 0)) == ASHIFT)
     696            0 :                     igain += ix86_cost->shift_const - ix86_cost->sse_op;
     697              :                   else
     698         5420 :                     igain += ix86_cost->shift_const;
     699              :                 }
     700              : 
     701       477837 :               igain += ix86_cost->shift_const - ix86_cost->sse_op;
     702              : 
     703       477837 :               if (CONST_INT_P (XEXP (src, 0)))
     704            0 :                 igain -= vector_const_cost (XEXP (src, 0), bb);
     705              :               break;
     706              : 
     707         3819 :             case ROTATE:
     708         3819 :             case ROTATERT:
     709         3819 :               igain += m * ix86_cost->shift_const;
     710         3819 :               if (TARGET_AVX512VL)
     711          204 :                 igain -= ix86_cost->sse_op;
     712         3615 :               else if (smode == DImode)
     713              :                 {
     714          612 :                   int bits = INTVAL (XEXP (src, 1));
     715          612 :                   if ((bits & 0x0f) == 0)
     716          128 :                     igain -= ix86_cost->sse_op;
     717          484 :                   else if ((bits & 0x07) == 0)
     718           27 :                     igain -= 2 * ix86_cost->sse_op;
     719              :                   else
     720          457 :                     igain -= 3 * ix86_cost->sse_op;
     721              :                 }
     722         3003 :               else if (INTVAL (XEXP (src, 1)) == 16)
     723          242 :                 igain -= ix86_cost->sse_op;
     724              :               else
     725         2761 :                 igain -= 2 * ix86_cost->sse_op;
     726              :               break;
     727              : 
     728      2845481 :             case AND:
     729      2845481 :             case IOR:
     730      2845481 :             case XOR:
     731      2845481 :             case PLUS:
     732      2845481 :             case MINUS:
     733      2845481 :               igain += m * ix86_cost->add - ix86_cost->sse_op;
     734              :               /* Additional gain for andnot for targets without BMI.  */
     735      2845481 :               if (GET_CODE (XEXP (src, 0)) == NOT
     736         3599 :                   && !TARGET_BMI)
     737         3590 :                 igain += m * ix86_cost->add;
     738              : 
     739      2845481 :               if (CONST_INT_P (XEXP (src, 0)))
     740            0 :                 igain -= vector_const_cost (XEXP (src, 0), bb);
     741      2845481 :               if (CONST_INT_P (XEXP (src, 1)))
     742      1696311 :                 igain -= vector_const_cost (XEXP (src, 1), bb);
     743      2845481 :               if (MEM_P (XEXP (src, 1)))
     744              :                 {
     745        84397 :                   if (!speed_p)
     746        20527 :                     igain -= COSTS_N_BYTES (m == 2 ? 3 : 5);
     747              :                   else
     748        74129 :                     igain += COSTS_N_INSNS
     749              :                                (m * ix86_cost->int_load[2]
     750              :                                  - ix86_cost->sse_load[sse_cost_idx]) / 2;
     751              :                 }
     752              :               break;
     753              : 
     754        50600 :             case NEG:
     755        50600 :             case NOT:
     756        50600 :               igain -= ix86_cost->sse_op + COSTS_N_INSNS (1);
     757              : 
     758        50600 :               if (GET_CODE (XEXP (src, 0)) != ABS)
     759              :                 {
     760        50600 :                   igain += m * ix86_cost->add;
     761        50600 :                   break;
     762              :                 }
     763              :               /* FALLTHRU */
     764              : 
     765         1004 :             case ABS:
     766         1004 :             case SMAX:
     767         1004 :             case SMIN:
     768         1004 :             case UMAX:
     769         1004 :             case UMIN:
     770              :               /* We do not have any conditional move cost, estimate it as a
     771              :                  reg-reg move.  Comparisons are costed as adds.  */
     772         1004 :               igain += m * (COSTS_N_INSNS (2) + ix86_cost->add);
     773              :               /* Integer SSE ops are all costed the same.  */
     774         1004 :               igain -= ix86_cost->sse_op;
     775         1004 :               break;
     776              : 
     777            0 :             case COMPARE:
     778            0 :               if (XEXP (src, 1) != const0_rtx)
     779              :                 {
     780              :                   /* cmp vs. pxor;pshufd;ptest.  */
     781            0 :                   igain += COSTS_N_INSNS (m - 3);
     782              :                 }
     783            0 :               else if (GET_CODE (XEXP (src, 0)) != AND)
     784              :                 {
     785              :                   /* test vs. pshufd;ptest.  */
     786            0 :                   igain += COSTS_N_INSNS (m - 2);
     787              :                 }
     788            0 :               else if (GET_CODE (XEXP (XEXP (src, 0), 0)) != NOT)
     789              :                 {
     790              :                   /* and;test vs. pshufd;ptest.  */
     791            0 :                   igain += COSTS_N_INSNS (2 * m - 2);
     792              :                 }
     793            0 :               else if (TARGET_BMI)
     794              :                 {
     795              :                   /* andn;test vs. pandn;pshufd;ptest.  */
     796            0 :                   igain += COSTS_N_INSNS (2 * m - 3);
     797              :                 }
     798              :               else
     799              :                 {
     800              :                   /* not;and;test vs. pandn;pshufd;ptest.  */
     801            0 :                   igain += COSTS_N_INSNS (3 * m - 3);
     802              :                 }
     803              :               break;
     804              : 
     805      1215339 :             case CONST_INT:
     806      1215339 :               if (REG_P (dst))
     807              :                 {
     808      1215339 :                   if (!speed_p)
     809              :                     {
     810              :                       /* xor (2 bytes) vs. xorps (3 bytes).  */
     811       230685 :                       if (src == const0_rtx)
     812       121787 :                         igain -= COSTS_N_BYTES (1);
     813              :                       /* movdi_internal vs. movv2di_internal.  */
     814              :                       /* => mov (5 bytes) vs. movaps (7 bytes).  */
     815       108898 :                       else if (x86_64_immediate_operand (src, SImode))
     816        96231 :                         igain -= COSTS_N_BYTES (2);
     817              :                       else
     818              :                         /* ??? Larger immediate constants are placed in the
     819              :                            constant pool, where the size benefit/impact of
     820              :                            STV conversion is affected by whether and how
     821              :                            often each constant pool entry is shared/reused.
     822              :                            The value below is empirically derived from the
     823              :                            CSiBE benchmark (and the optimal value may drift
     824              :                            over time).  */
     825              :                         igain += COSTS_N_BYTES (0);
     826              :                     }
     827              :                   else
     828              :                     {
     829              :                       /* DImode can be immediate for TARGET_64BIT
     830              :                          and SImode always.  */
     831       984654 :                       igain += m * COSTS_N_INSNS (1);
     832       984654 :                       igain -= vector_const_cost (src, bb);
     833              :                     }
     834              :                 }
     835            0 :               else if (MEM_P (dst))
     836              :                 {
     837            0 :                   igain += (m * ix86_cost->int_store[2]
     838            0 :                             - ix86_cost->sse_store[sse_cost_idx]);
     839            0 :                   igain -= vector_const_cost (src, bb);
     840              :                 }
     841              :               break;
     842              : 
     843        37545 :             case VEC_SELECT:
     844        37545 :               if (XVECEXP (XEXP (src, 1), 0, 0) == const0_rtx)
     845              :                 {
     846              :                   // movd (4 bytes) replaced with movdqa (4 bytes).
     847        27826 :                   if (!!speed_p)
     848        26021 :                     igain += COSTS_N_INSNS (ix86_cost->sse_to_integer
     849              :                                             - ix86_cost->xmm_move) / 2;
     850              :                 }
     851              :               else
     852              :                 {
     853              :                   // pshufd; movd replaced with pshufd.
     854         9719 :                   if (!speed_p)
     855          674 :                     igain += COSTS_N_BYTES (4);
     856              :                   else
     857         9045 :                     igain += ix86_cost->sse_to_integer;
     858              :                 }
     859              :               break;
     860              : 
     861            0 :             default:
     862            0 :               gcc_unreachable ();
     863              :             }
     864              :         }
     865              : 
     866     11630030 :       if (speed_p)
     867     10361167 :         weighted_gain += bb_freq * igain;
     868     11631835 :       gain += igain;
     869              : 
     870     11631835 :       if (igain != 0 && dump_file)
     871              :         {
     872           93 :           fprintf (dump_file, "  Instruction gain %d with bb_freq %.2f for",
     873              :                    igain, bb_freq.to_double ());
     874           93 :           dump_insn_slim (dump_file, insn);
     875              :         }
     876              :     }
     877              : 
     878      5890727 :   if (dump_file)
     879              :     {
     880          136 :       fprintf (dump_file, "  Instruction conversion gain: %d, \n",
     881              :                gain);
     882          136 :       fprintf (dump_file, "  Registers conversion cost: %d\n",
     883              :                cost_sse_integer);
     884          136 :       fprintf (dump_file, "  Weighted instruction conversion gain: %.2f, \n",
     885              :                weighted_gain.to_double ());
     886          136 :       fprintf (dump_file, "  Weighted registers conversion cost: %.2f\n",
     887              :                weighted_cost_sse_integer.to_double ());
     888              :     }
     889              : 
     890      5890727 :   if (weighted_gain != weighted_cost_sse_integer)
     891      4756721 :     return weighted_gain > weighted_cost_sse_integer;
     892              :   else
     893      1134006 :     return gain > cost_sse_integer;;
     894              : }
     895              : 
     896              : /* Insert generated conversion instruction sequence INSNS
     897              :    after instruction AFTER.  New BB may be required in case
     898              :    instruction has EH region attached.  */
     899              : 
     900              : void
     901        30742 : scalar_chain::emit_conversion_insns (rtx insns, rtx_insn *after)
     902              : {
     903        30742 :   if (!control_flow_insn_p (after))
     904              :     {
     905        30529 :       emit_insn_after (insns, after);
     906        30529 :       return;
     907              :     }
     908              : 
     909          213 :   basic_block bb = BLOCK_FOR_INSN (after);
     910          213 :   edge e = find_fallthru_edge (bb->succs);
     911          213 :   gcc_assert (e);
     912              : 
     913          213 :   basic_block new_bb = split_edge (e);
     914          213 :   emit_insn_after (insns, BB_HEAD (new_bb));
     915              : }
     916              : 
     917              : } // anon namespace
     918              : 
     919              : /* Generate the canonical SET_SRC to move GPR to a VMODE vector register,
     920              :    zeroing the upper parts.  */
     921              : 
     922              : static rtx
     923       173086 : gen_gpr_to_xmm_move_src (enum machine_mode vmode, rtx gpr)
     924              : {
     925       346172 :   switch (GET_MODE_NUNITS (vmode))
     926              :     {
     927           25 :     case 1:
     928           25 :       return gen_rtx_SUBREG (vmode, gpr, 0);
     929       172504 :     case 2:
     930       345008 :       return gen_rtx_VEC_CONCAT (vmode, gpr,
     931              :                                  CONST0_RTX (GET_MODE_INNER (vmode)));
     932          557 :     default:
     933          557 :       return gen_rtx_VEC_MERGE (vmode, gen_rtx_VEC_DUPLICATE (vmode, gpr),
     934              :                                 CONST0_RTX (vmode), GEN_INT (HOST_WIDE_INT_1U));
     935              :     }
     936              : }
     937              : 
     938              : /* Make vector copies for all register REGNO definitions
     939              :    and replace its uses in a chain.  */
     940              : 
     941              : void
     942         8088 : scalar_chain::make_vector_copies (rtx_insn *insn, rtx reg)
     943              : {
     944         8088 :   rtx vreg = *defs_map.get (reg);
     945              : 
     946         8088 :   start_sequence ();
     947         8088 :   if (!TARGET_INTER_UNIT_MOVES_TO_VEC)
     948              :     {
     949            0 :       rtx tmp = assign_386_stack_local (smode, SLOT_STV_TEMP);
     950            0 :       if (smode == DImode && !TARGET_64BIT)
     951              :         {
     952            0 :           emit_move_insn (adjust_address (tmp, SImode, 0),
     953              :                           gen_rtx_SUBREG (SImode, reg, 0));
     954            0 :           emit_move_insn (adjust_address (tmp, SImode, 4),
     955              :                           gen_rtx_SUBREG (SImode, reg, 4));
     956              :         }
     957              :       else
     958            0 :         emit_move_insn (copy_rtx (tmp), reg);
     959            0 :       emit_insn (gen_rtx_SET (gen_rtx_SUBREG (vmode, vreg, 0),
     960              :                               gen_gpr_to_xmm_move_src (vmode, tmp)));
     961              :     }
     962         8088 :   else if (!TARGET_64BIT && smode == DImode)
     963              :     {
     964         7950 :       if (TARGET_SSE4_1)
     965              :         {
     966          356 :           emit_insn (gen_sse2_loadld (gen_rtx_SUBREG (V4SImode, vreg, 0),
     967              :                                       CONST0_RTX (V4SImode),
     968              :                                       gen_rtx_SUBREG (SImode, reg, 0)));
     969          356 :           emit_insn (gen_sse4_1_pinsrd (gen_rtx_SUBREG (V4SImode, vreg, 0),
     970              :                                         gen_rtx_SUBREG (V4SImode, vreg, 0),
     971              :                                         gen_rtx_SUBREG (SImode, reg, 4),
     972              :                                         GEN_INT (2)));
     973              :         }
     974              :       else
     975              :         {
     976         7594 :           rtx tmp = gen_reg_rtx (DImode);
     977         7594 :           emit_insn (gen_sse2_loadld (gen_rtx_SUBREG (V4SImode, vreg, 0),
     978              :                                       CONST0_RTX (V4SImode),
     979              :                                       gen_rtx_SUBREG (SImode, reg, 0)));
     980         7594 :           emit_insn (gen_sse2_loadld (gen_rtx_SUBREG (V4SImode, tmp, 0),
     981              :                                       CONST0_RTX (V4SImode),
     982              :                                       gen_rtx_SUBREG (SImode, reg, 4)));
     983         7594 :           emit_insn (gen_vec_interleave_lowv4si
     984              :                      (gen_rtx_SUBREG (V4SImode, vreg, 0),
     985              :                       gen_rtx_SUBREG (V4SImode, vreg, 0),
     986              :                       gen_rtx_SUBREG (V4SImode, tmp, 0)));
     987              :         }
     988              :     }
     989              :   else
     990          138 :     emit_insn (gen_rtx_SET (gen_rtx_SUBREG (vmode, vreg, 0),
     991              :                             gen_gpr_to_xmm_move_src (vmode, reg)));
     992         8088 :   rtx_insn *seq = end_sequence ();
     993         8088 :   emit_conversion_insns (seq, insn);
     994              : 
     995         8088 :   if (dump_file)
     996            0 :     fprintf (dump_file,
     997              :              "  Copied r%d to a vector register r%d for insn %d\n",
     998            0 :              REGNO (reg), REGNO (vreg), INSN_UID (insn));
     999         8088 : }
    1000              : 
    1001              : /* Copy the definition SRC of INSN inside the chain to DST for
    1002              :    scalar uses outside of the chain.  */
    1003              : 
    1004              : void
    1005        21892 : scalar_chain::convert_reg (rtx_insn *insn, rtx dst, rtx src)
    1006              : {
    1007        21892 :   start_sequence ();
    1008        21892 :   if (!TARGET_INTER_UNIT_MOVES_FROM_VEC)
    1009              :     {
    1010            0 :       rtx tmp = assign_386_stack_local (smode, SLOT_STV_TEMP);
    1011            0 :       emit_move_insn (tmp, src);
    1012            0 :       if (!TARGET_64BIT && smode == DImode)
    1013              :         {
    1014            0 :           emit_move_insn (gen_rtx_SUBREG (SImode, dst, 0),
    1015              :                           adjust_address (tmp, SImode, 0));
    1016            0 :           emit_move_insn (gen_rtx_SUBREG (SImode, dst, 4),
    1017              :                           adjust_address (tmp, SImode, 4));
    1018              :         }
    1019              :       else
    1020            0 :         emit_move_insn (dst, copy_rtx (tmp));
    1021              :     }
    1022        21892 :   else if (!TARGET_64BIT && smode == DImode)
    1023              :     {
    1024        21002 :       if (TARGET_SSE4_1)
    1025              :         {
    1026            0 :           rtx tmp = gen_rtx_PARALLEL (VOIDmode,
    1027              :                                       gen_rtvec (1, const0_rtx));
    1028            0 :           emit_insn
    1029            0 :               (gen_rtx_SET
    1030              :                (gen_rtx_SUBREG (SImode, dst, 0),
    1031              :                 gen_rtx_VEC_SELECT (SImode,
    1032              :                                     gen_rtx_SUBREG (V4SImode, src, 0),
    1033              :                                     tmp)));
    1034              : 
    1035            0 :           tmp = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (1, const1_rtx));
    1036            0 :           emit_insn
    1037            0 :               (gen_rtx_SET
    1038              :                (gen_rtx_SUBREG (SImode, dst, 4),
    1039              :                 gen_rtx_VEC_SELECT (SImode,
    1040              :                                     gen_rtx_SUBREG (V4SImode, src, 0),
    1041              :                                     tmp)));
    1042              :         }
    1043              :       else
    1044              :         {
    1045        21002 :           rtx vcopy = gen_reg_rtx (V2DImode);
    1046        21002 :           emit_move_insn (vcopy, gen_rtx_SUBREG (V2DImode, src, 0));
    1047        21002 :           emit_move_insn (gen_rtx_SUBREG (SImode, dst, 0),
    1048              :                           gen_rtx_SUBREG (SImode, vcopy, 0));
    1049        21002 :           emit_move_insn (vcopy,
    1050              :                           gen_rtx_LSHIFTRT (V2DImode,
    1051              :                                             vcopy, GEN_INT (32)));
    1052        21002 :           emit_move_insn (gen_rtx_SUBREG (SImode, dst, 4),
    1053              :                           gen_rtx_SUBREG (SImode, vcopy, 0));
    1054              :         }
    1055              :     }
    1056              :   else
    1057          890 :     emit_move_insn (dst, src);
    1058              : 
    1059        21892 :   rtx_insn *seq = end_sequence ();
    1060        21892 :   emit_conversion_insns (seq, insn);
    1061              : 
    1062        21892 :   if (dump_file)
    1063            0 :     fprintf (dump_file,
    1064              :              "  Copied r%d to a scalar register r%d for insn %d\n",
    1065            0 :              REGNO (src), REGNO (dst), INSN_UID (insn));
    1066        21892 : }
    1067              : 
    1068              : /* Helper function to convert immediate constant X to vmode.  */
    1069              : static rtx
    1070        41474 : smode_convert_cst (rtx x, enum machine_mode vmode)
    1071              : {
    1072              :   /* Prefer all ones vector in case of -1.  */
    1073        41474 :   if (constm1_operand (x, GET_MODE (x)))
    1074          894 :     return CONSTM1_RTX (vmode);
    1075              : 
    1076        40580 :   unsigned n = GET_MODE_NUNITS (vmode);
    1077        40580 :   rtx *v = XALLOCAVEC (rtx, n);
    1078        40580 :   v[0] = x;
    1079        46366 :   for (unsigned i = 1; i < n; ++i)
    1080         5786 :     v[i] = const0_rtx;
    1081        40580 :   return gen_rtx_CONST_VECTOR (vmode, gen_rtvec_v (n, v));
    1082              : }
    1083              : 
    1084              : /* Convert operand OP in INSN.  We should handle
    1085              :    memory operands and uninitialized registers.
    1086              :    All other register uses are converted during
    1087              :    registers conversion.  */
    1088              : 
    1089              : void
    1090       247603 : scalar_chain::convert_op (rtx *op, rtx_insn *insn)
    1091              : {
    1092       247603 :   rtx tmp;
    1093              : 
    1094       247603 :   if (GET_MODE (*op) == V1TImode)
    1095              :     return;
    1096              : 
    1097       247452 :   *op = copy_rtx_if_shared (*op);
    1098              : 
    1099       247452 :   if (GET_CODE (*op) == NOT
    1100       247452 :       || GET_CODE (*op) == ASHIFT)
    1101              :     {
    1102         3490 :       convert_op (&XEXP (*op, 0), insn);
    1103         3490 :       PUT_MODE (*op, vmode);
    1104              :     }
    1105              :   else if (MEM_P (*op))
    1106              :     {
    1107       172948 :       rtx_insn *movabs = NULL;
    1108              : 
    1109              :       /* Emit MOVABS to load from a 64-bit absolute address to a GPR.  */
    1110       172948 :       if (!memory_operand (*op, GET_MODE (*op)))
    1111              :         {
    1112            0 :           tmp = gen_reg_rtx (GET_MODE (*op));
    1113            0 :           movabs = emit_insn_before (gen_rtx_SET (tmp, *op), insn);
    1114              : 
    1115            0 :           *op = tmp;
    1116              :         }
    1117              : 
    1118       172948 :       tmp = gen_rtx_SUBREG (vmode, gen_reg_rtx (GET_MODE (*op)), 0);
    1119              : 
    1120       172948 :       rtx_insn *eh_insn
    1121       172948 :         = emit_insn_before (gen_rtx_SET (copy_rtx (tmp),
    1122              :                                          gen_gpr_to_xmm_move_src (vmode, *op)),
    1123       172948 :                             insn);
    1124              : 
    1125       172948 :       if (cfun->can_throw_non_call_exceptions)
    1126              :         {
    1127              :           /* Handle REG_EH_REGION note.  */
    1128       168867 :           rtx note = find_reg_note (insn, REG_EH_REGION, NULL_RTX);
    1129       168867 :           if (note)
    1130              :             {
    1131         3597 :               if (movabs)
    1132            0 :                 eh_insn = movabs;
    1133         3597 :               control_flow_insns.safe_push (eh_insn);
    1134         3597 :               add_reg_note (eh_insn, REG_EH_REGION, XEXP (note, 0));
    1135              :             }
    1136              :         }
    1137              : 
    1138       172948 :       *op = tmp;
    1139              : 
    1140       172948 :       if (dump_file)
    1141            0 :         fprintf (dump_file, "  Preloading operand for insn %d into r%d\n",
    1142            0 :                  INSN_UID (insn), reg_or_subregno (tmp));
    1143              :     }
    1144              :   else if (REG_P (*op))
    1145        64600 :     *op = gen_rtx_SUBREG (vmode, *op, 0);
    1146              :   else if (CONST_SCALAR_INT_P (*op))
    1147              :     {
    1148         6414 :       rtx vec_cst = smode_convert_cst (*op, vmode);
    1149              : 
    1150         6414 :       if (!standard_sse_constant_p (vec_cst, vmode))
    1151              :         {
    1152         2698 :           start_sequence ();
    1153         2698 :           vec_cst = validize_mem (force_const_mem (vmode, vec_cst));
    1154         2698 :           rtx_insn *seq = end_sequence ();
    1155         2698 :           emit_insn_before (seq, insn);
    1156              :         }
    1157              : 
    1158         6414 :       tmp = gen_rtx_SUBREG (vmode, gen_reg_rtx (smode), 0);
    1159              : 
    1160         6414 :       emit_insn_before (gen_move_insn (copy_rtx (tmp), vec_cst), insn);
    1161         6414 :       *op = tmp;
    1162              :     }
    1163              :   else
    1164              :     {
    1165            0 :       gcc_assert (SUBREG_P (*op));
    1166            0 :       gcc_assert (GET_MODE (*op) == vmode);
    1167              :     }
    1168              : }
    1169              : 
    1170              : /* Convert CCZmode COMPARE to vector mode.  */
    1171              : 
    1172              : rtx
    1173           10 : scalar_chain::convert_compare (rtx op1, rtx op2, rtx_insn *insn)
    1174              : {
    1175           10 :   rtx src, tmp;
    1176              : 
    1177              :   /* Handle any REG_EQUAL notes.  */
    1178           10 :   tmp = find_reg_equal_equiv_note (insn);
    1179           10 :   if (tmp)
    1180              :     {
    1181            1 :       if (GET_CODE (XEXP (tmp, 0)) == COMPARE
    1182            1 :           && GET_MODE (XEXP (tmp, 0)) == CCZmode
    1183            1 :           && REG_P (XEXP (XEXP (tmp, 0), 0)))
    1184              :         {
    1185            1 :           rtx *op = &XEXP (XEXP (tmp, 0), 1);
    1186            1 :           if (CONST_SCALAR_INT_P (*op))
    1187              :             {
    1188            1 :               if (constm1_operand (*op, GET_MODE (*op)))
    1189            0 :                 *op = CONSTM1_RTX (vmode);
    1190              :               else
    1191              :                 {
    1192            1 :                   unsigned n = GET_MODE_NUNITS (vmode);
    1193            1 :                   rtx *v = XALLOCAVEC (rtx, n);
    1194            1 :                   v[0] = *op;
    1195            1 :                   for (unsigned i = 1; i < n; ++i)
    1196            0 :                     v[i] = const0_rtx;
    1197            1 :                   *op = gen_rtx_CONST_VECTOR (vmode, gen_rtvec_v (n, v));
    1198              :                 }
    1199              :               tmp = NULL_RTX;
    1200              :             }
    1201            0 :           else if (REG_P (*op))
    1202              :             tmp = NULL_RTX;
    1203              :         }
    1204              : 
    1205              :       if (tmp)
    1206            0 :         remove_note (insn, tmp);
    1207              :     }
    1208              : 
    1209              :   /* Comparison against anything other than zero, requires an XOR.  */
    1210           10 :   if (op2 != const0_rtx)
    1211              :     {
    1212            4 :       convert_op (&op1, insn);
    1213            4 :       convert_op (&op2, insn);
    1214              :       /* If both operands are MEMs, explicitly load the OP1 into TMP.  */
    1215            4 :       if (MEM_P (op1) && MEM_P (op2))
    1216              :         {
    1217            0 :           tmp = gen_reg_rtx (vmode);
    1218            0 :           emit_insn_before (gen_rtx_SET (tmp, op1), insn);
    1219            0 :           src = tmp;
    1220              :         }
    1221              :       else
    1222              :         src = op1;
    1223            4 :       src = gen_rtx_XOR (vmode, src, op2);
    1224              :     }
    1225            6 :   else if (GET_CODE (op1) == AND
    1226            0 :            && GET_CODE (XEXP (op1, 0)) == NOT)
    1227              :     {
    1228            0 :       rtx op11 = XEXP (XEXP (op1, 0), 0);
    1229            0 :       rtx op12 = XEXP (op1, 1);
    1230            0 :       convert_op (&op11, insn);
    1231            0 :       convert_op (&op12, insn);
    1232            0 :       if (!REG_P (op11))
    1233              :         {
    1234            0 :           tmp = gen_reg_rtx (vmode);
    1235            0 :           emit_insn_before (gen_rtx_SET (tmp, op11), insn);
    1236            0 :           op11 = tmp;
    1237              :         }
    1238            0 :       src = gen_rtx_AND (vmode, gen_rtx_NOT (vmode, op11), op12);
    1239            0 :     }
    1240            6 :   else if (GET_CODE (op1) == AND)
    1241              :     {
    1242            0 :       rtx op11 = XEXP (op1, 0);
    1243            0 :       rtx op12 = XEXP (op1, 1);
    1244            0 :       convert_op (&op11, insn);
    1245            0 :       convert_op (&op12, insn);
    1246            0 :       if (!REG_P (op11))
    1247              :         {
    1248            0 :           tmp = gen_reg_rtx (vmode);
    1249            0 :           emit_insn_before (gen_rtx_SET (tmp, op11), insn);
    1250            0 :           op11 = tmp;
    1251              :         }
    1252            0 :       return gen_rtx_UNSPEC (CCZmode, gen_rtvec (2, op11, op12),
    1253              :                              UNSPEC_PTEST);
    1254              :     }
    1255              :   else
    1256              :     {
    1257            6 :       convert_op (&op1, insn);
    1258            6 :       src = op1;
    1259              :     }
    1260              : 
    1261           10 :   if (!REG_P (src))
    1262              :     {
    1263            6 :       tmp = gen_reg_rtx (vmode);
    1264            6 :       emit_insn_before (gen_rtx_SET (tmp, src), insn);
    1265            6 :       src = tmp;
    1266              :     }
    1267              : 
    1268           10 :   if (vmode == V2DImode)
    1269              :     {
    1270            0 :       tmp = gen_reg_rtx (vmode);
    1271            0 :       emit_insn_before (gen_vec_interleave_lowv2di (tmp, src, src), insn);
    1272            0 :       src = tmp;
    1273              :     }
    1274           10 :   else if (vmode == V4SImode)
    1275              :     {
    1276            0 :       tmp = gen_reg_rtx (vmode);
    1277            0 :       emit_insn_before (gen_sse2_pshufd (tmp, src, const0_rtx), insn);
    1278            0 :       src = tmp;
    1279              :     }
    1280              : 
    1281           10 :   return gen_rtx_UNSPEC (CCZmode, gen_rtvec (2, src, src), UNSPEC_PTEST);
    1282              : }
    1283              : 
    1284              : /* Helper function for converting INSN to vector mode.  */
    1285              : 
    1286              : void
    1287      1333620 : scalar_chain::convert_insn_common (rtx_insn *insn)
    1288              : {
    1289              :   /* Generate copies for out-of-chain uses of defs and adjust debug uses.  */
    1290      2038619 :   for (df_ref ref = DF_INSN_DEFS (insn); ref; ref = DF_REF_NEXT_LOC (ref))
    1291       704999 :     if (bitmap_bit_p (defs_conv, DF_REF_REGNO (ref)))
    1292              :       {
    1293        23319 :         df_link *use;
    1294        44433 :         for (use = DF_REF_CHAIN (ref); use; use = use->next)
    1295        43006 :           if (NONDEBUG_INSN_P (DF_REF_INSN (use->ref))
    1296        43006 :               && (DF_REF_REG_MEM_P (use->ref)
    1297        38462 :                   || !bitmap_bit_p (insns, DF_REF_INSN_UID (use->ref))))
    1298              :             break;
    1299        23319 :         if (use)
    1300        21892 :           convert_reg (insn, DF_REF_REG (ref),
    1301        21892 :                        *defs_map.get (regno_reg_rtx [DF_REF_REGNO (ref)]));
    1302         1427 :         else if (MAY_HAVE_DEBUG_BIND_INSNS)
    1303              :           {
    1304              :             /* If we generated a scalar copy we can leave debug-insns
    1305              :                as-is, if not, we have to adjust them.  */
    1306         1307 :             auto_vec<rtx_insn *, 5> to_reset_debug_insns;
    1307         3920 :             for (use = DF_REF_CHAIN (ref); use; use = use->next)
    1308         2613 :               if (DEBUG_INSN_P (DF_REF_INSN (use->ref)))
    1309              :                 {
    1310          862 :                   rtx_insn *debug_insn = DF_REF_INSN (use->ref);
    1311              :                   /* If there's a reaching definition outside of the
    1312              :                      chain we have to reset.  */
    1313          862 :                   df_link *def;
    1314         3010 :                   for (def = DF_REF_CHAIN (use->ref); def; def = def->next)
    1315         2332 :                     if (!bitmap_bit_p (insns, DF_REF_INSN_UID (def->ref)))
    1316              :                       break;
    1317          862 :                   if (def)
    1318          184 :                     to_reset_debug_insns.safe_push (debug_insn);
    1319              :                   else
    1320              :                     {
    1321          678 :                       *DF_REF_REAL_LOC (use->ref)
    1322          678 :                         = *defs_map.get (regno_reg_rtx [DF_REF_REGNO (ref)]);
    1323          678 :                       df_insn_rescan (debug_insn);
    1324              :                     }
    1325              :                 }
    1326              :             /* Have to do the reset outside of the DF_CHAIN walk to not
    1327              :                disrupt it.  */
    1328         2798 :             while (!to_reset_debug_insns.is_empty ())
    1329              :               {
    1330          184 :                 rtx_insn *debug_insn = to_reset_debug_insns.pop ();
    1331          184 :                 INSN_VAR_LOCATION_LOC (debug_insn) = gen_rtx_UNKNOWN_VAR_LOC ();
    1332          184 :                 df_insn_rescan_debug_internal (debug_insn);
    1333              :               }
    1334         1307 :           }
    1335              :       }
    1336              : 
    1337              :   /* Replace uses in this insn with the defs we use in the chain.  */
    1338      3335094 :   for (df_ref ref = DF_INSN_USES (insn); ref; ref = DF_REF_NEXT_LOC (ref))
    1339      2001474 :     if (!DF_REF_REG_MEM_P (ref))
    1340       715526 :       if (rtx *vreg = defs_map.get (regno_reg_rtx[DF_REF_REGNO (ref)]))
    1341              :         {
    1342              :           /* Also update a corresponding REG_DEAD note.  */
    1343        35114 :           rtx note = find_reg_note (insn, REG_DEAD, DF_REF_REG (ref));
    1344        35114 :           if (note)
    1345        23182 :             XEXP (note, 0) = *vreg;
    1346        35114 :           *DF_REF_REAL_LOC (ref) = *vreg;
    1347              :         }
    1348      1333620 : }
    1349              : 
    1350              : /* Convert INSN which is an SImode or DImode rotation by a constant
    1351              :    to vector mode.  CODE is either ROTATE or ROTATERT with operands
    1352              :    OP0 and OP1.  Returns the SET_SRC of the last instruction in the
    1353              :    resulting sequence, which is emitted before INSN.  */
    1354              : 
    1355              : rtx
    1356           92 : general_scalar_chain::convert_rotate (enum rtx_code code, rtx op0, rtx op1,
    1357              :                                       rtx_insn *insn)
    1358              : {
    1359           92 :   int bits = INTVAL (op1);
    1360           92 :   rtx pat, result;
    1361              : 
    1362           92 :   convert_op (&op0, insn);
    1363           92 :   if (bits == 0)
    1364            0 :     return op0;
    1365              : 
    1366           92 :   if (smode == DImode)
    1367              :     {
    1368           92 :       if (code == ROTATE)
    1369           45 :         bits = 64 - bits;
    1370           92 :       if (bits == 32)
    1371              :         {
    1372            0 :           rtx tmp1 = gen_reg_rtx (V4SImode);
    1373            0 :           pat = gen_sse2_pshufd (tmp1, gen_lowpart (V4SImode, op0),
    1374              :                                  GEN_INT (225));
    1375            0 :           emit_insn_before (pat, insn);
    1376            0 :           result = gen_lowpart (V2DImode, tmp1);
    1377              :         }
    1378           92 :       else if (TARGET_AVX512VL)
    1379            0 :         result = simplify_gen_binary (code, V2DImode, op0, op1);
    1380           92 :       else if (bits == 16 || bits == 48)
    1381              :         {
    1382            0 :           rtx tmp1 = gen_reg_rtx (V8HImode);
    1383            0 :           pat = gen_sse2_pshuflw (tmp1, gen_lowpart (V8HImode, op0),
    1384              :                                   GEN_INT (bits == 16 ? 57 : 147));
    1385            0 :           emit_insn_before (pat, insn);
    1386            0 :           result = gen_lowpart (V2DImode, tmp1);
    1387              :         }
    1388           92 :       else if ((bits & 0x07) == 0)
    1389              :         {
    1390            0 :           rtx tmp1 = gen_reg_rtx (V4SImode);
    1391            0 :           pat = gen_sse2_pshufd (tmp1, gen_lowpart (V4SImode, op0),
    1392              :                                  GEN_INT (68));
    1393            0 :           emit_insn_before (pat, insn);
    1394            0 :           rtx tmp2 = gen_reg_rtx (V1TImode);
    1395            0 :           pat = gen_sse2_lshrv1ti3 (tmp2, gen_lowpart (V1TImode, tmp1),
    1396              :                                     GEN_INT (bits));
    1397            0 :           emit_insn_before (pat, insn);
    1398            0 :           result = gen_lowpart (V2DImode, tmp2);
    1399              :         }
    1400              :       else
    1401              :         {
    1402           92 :           rtx tmp1 = gen_reg_rtx (V4SImode);
    1403           92 :           pat = gen_sse2_pshufd (tmp1, gen_lowpart (V4SImode, op0),
    1404              :                                  GEN_INT (20));
    1405           92 :           emit_insn_before (pat, insn);
    1406           92 :           rtx tmp2 = gen_reg_rtx (V2DImode);
    1407           92 :           pat = gen_lshrv2di3 (tmp2, gen_lowpart (V2DImode, tmp1),
    1408              :                                GEN_INT (bits & 31));
    1409           92 :           emit_insn_before (pat, insn);
    1410           92 :           rtx tmp3 = gen_reg_rtx (V4SImode);
    1411          139 :           pat = gen_sse2_pshufd (tmp3, gen_lowpart (V4SImode, tmp2),
    1412              :                                  GEN_INT (bits > 32 ? 34 : 136));
    1413           92 :           emit_insn_before (pat, insn);
    1414           92 :           result = gen_lowpart (V2DImode, tmp3);
    1415              :         }
    1416              :     }
    1417            0 :   else if (bits == 16)
    1418              :     {
    1419            0 :       rtx tmp1 = gen_reg_rtx (V8HImode);
    1420            0 :       pat = gen_sse2_pshuflw (tmp1, gen_lowpart (V8HImode, op0), GEN_INT (225));
    1421            0 :       emit_insn_before (pat, insn);
    1422            0 :       result = gen_lowpart (V4SImode, tmp1);
    1423              :     }
    1424            0 :   else if (TARGET_AVX512VL)
    1425            0 :     result = simplify_gen_binary (code, V4SImode, op0, op1);
    1426              :   else
    1427              :     {
    1428            0 :       if (code == ROTATE)
    1429            0 :         bits = 32 - bits;
    1430              : 
    1431            0 :       rtx tmp1 = gen_reg_rtx (V4SImode);
    1432            0 :       emit_insn_before (gen_sse2_pshufd (tmp1, op0, GEN_INT (224)), insn);
    1433            0 :       rtx tmp2 = gen_reg_rtx (V2DImode);
    1434            0 :       pat = gen_lshrv2di3 (tmp2, gen_lowpart (V2DImode, tmp1),
    1435              :                            GEN_INT (bits));
    1436            0 :       emit_insn_before (pat, insn);
    1437            0 :       result = gen_lowpart (V4SImode, tmp2);
    1438              :     }
    1439              : 
    1440              :   return result;
    1441              : }
    1442              : 
    1443              : /* Convert INSN to vector mode.  */
    1444              : 
    1445              : void
    1446       412266 : general_scalar_chain::convert_insn (rtx_insn *insn)
    1447              : {
    1448       412266 :   rtx def_set = single_set (insn);
    1449       412266 :   rtx src = SET_SRC (def_set);
    1450       412266 :   rtx dst = SET_DEST (def_set);
    1451       412266 :   rtx subreg;
    1452              : 
    1453       412266 :   if (MEM_P (dst) && !REG_P (src))
    1454              :     {
    1455              :       /* There are no scalar integer instructions and therefore
    1456              :          temporary register usage is required.  */
    1457          762 :       rtx tmp = gen_reg_rtx (smode);
    1458          762 :       emit_conversion_insns (gen_move_insn (dst, tmp), insn);
    1459          762 :       dst = gen_rtx_SUBREG (vmode, tmp, 0);
    1460          762 :     }
    1461       411504 :   else if (REG_P (dst) && GET_MODE (dst) == smode)
    1462              :     {
    1463              :       /* Replace the definition with a SUBREG to the definition we
    1464              :          use inside the chain.  */
    1465       215877 :       rtx *vdef = defs_map.get (dst);
    1466       215877 :       if (vdef)
    1467        23319 :         dst = *vdef;
    1468       215877 :       dst = gen_rtx_SUBREG (vmode, dst, 0);
    1469              :       /* IRA doesn't like to have REG_EQUAL/EQUIV notes when the SET_DEST
    1470              :          is a non-REG_P.  So kill those off.  */
    1471       215877 :       rtx note = find_reg_equal_equiv_note (insn);
    1472       215877 :       if (note)
    1473         9550 :         remove_note (insn, note);
    1474              :     }
    1475              : 
    1476       412266 :   switch (GET_CODE (src))
    1477              :     {
    1478        30139 :     case PLUS:
    1479        30139 :     case MINUS:
    1480        30139 :     case IOR:
    1481        30139 :     case XOR:
    1482        30139 :     case AND:
    1483        30139 :     case SMAX:
    1484        30139 :     case SMIN:
    1485        30139 :     case UMAX:
    1486        30139 :     case UMIN:
    1487        30139 :       convert_op (&XEXP (src, 1), insn);
    1488              :       /* FALLTHRU */
    1489              : 
    1490        37435 :     case ABS:
    1491        37435 :     case ASHIFT:
    1492        37435 :     case ASHIFTRT:
    1493        37435 :     case LSHIFTRT:
    1494        37435 :       convert_op (&XEXP (src, 0), insn);
    1495        37435 :       PUT_MODE (src, vmode);
    1496        37435 :       break;
    1497              : 
    1498           92 :     case ROTATE:
    1499           92 :     case ROTATERT:
    1500           92 :       src = convert_rotate (GET_CODE (src), XEXP (src, 0), XEXP (src, 1),
    1501              :                             insn);
    1502           92 :       break;
    1503              : 
    1504          400 :     case NEG:
    1505          400 :       src = XEXP (src, 0);
    1506              : 
    1507          400 :       if (GET_CODE (src) == ABS)
    1508              :         {
    1509            0 :           src = XEXP (src, 0);
    1510            0 :           convert_op (&src, insn);
    1511            0 :           subreg = gen_reg_rtx (vmode);
    1512            0 :           emit_insn_before (gen_rtx_SET (subreg,
    1513              :                                          gen_rtx_ABS (vmode, src)), insn);
    1514            0 :           src = subreg;
    1515              :         }
    1516              :       else
    1517          400 :         convert_op (&src, insn);
    1518              : 
    1519          400 :       subreg = gen_reg_rtx (vmode);
    1520          400 :       emit_insn_before (gen_move_insn (subreg, CONST0_RTX (vmode)), insn);
    1521          400 :       src = gen_rtx_MINUS (vmode, subreg, src);
    1522          400 :       break;
    1523              : 
    1524          250 :     case NOT:
    1525          250 :       src = XEXP (src, 0);
    1526          250 :       convert_op (&src, insn);
    1527          250 :       subreg = gen_reg_rtx (vmode);
    1528          250 :       emit_insn_before (gen_move_insn (subreg, CONSTM1_RTX (vmode)), insn);
    1529          250 :       src = gen_rtx_XOR (vmode, src, subreg);
    1530          250 :       break;
    1531              : 
    1532       170798 :     case MEM:
    1533       170798 :       if (!REG_P (dst))
    1534       170798 :         convert_op (&src, insn);
    1535              :       break;
    1536              : 
    1537       197036 :     case REG:
    1538       197036 :       if (!MEM_P (dst))
    1539         1409 :         convert_op (&src, insn);
    1540              :       break;
    1541              : 
    1542            0 :     case SUBREG:
    1543            0 :       gcc_assert (GET_MODE (src) == vmode);
    1544              :       break;
    1545              : 
    1546            0 :     case COMPARE:
    1547            0 :       dst = gen_rtx_REG (CCZmode, FLAGS_REG);
    1548            0 :       src = convert_compare (XEXP (src, 0), XEXP (src, 1), insn);
    1549            0 :       break;
    1550              : 
    1551         3402 :     case CONST_INT:
    1552         3402 :       convert_op (&src, insn);
    1553         3402 :       break;
    1554              : 
    1555         2853 :     case VEC_SELECT:
    1556         2853 :       if (XVECEXP (XEXP (src, 1), 0, 0) == const0_rtx)
    1557         1875 :         src = XEXP (src, 0);
    1558          978 :       else if (smode == DImode)
    1559              :         {
    1560          735 :           rtx tmp = gen_lowpart (V1TImode, XEXP (src, 0));
    1561          735 :           dst = gen_lowpart (V1TImode, dst);
    1562          735 :           src = gen_rtx_LSHIFTRT (V1TImode, tmp, GEN_INT (64));
    1563              :         }
    1564              :       else
    1565              :         {
    1566          243 :           rtx tmp = XVECEXP (XEXP (src, 1), 0, 0);
    1567          243 :           rtvec vec = gen_rtvec (4, tmp, tmp, tmp, tmp);
    1568          243 :           rtx par = gen_rtx_PARALLEL (VOIDmode, vec);
    1569          243 :           src = gen_rtx_VEC_SELECT (vmode, XEXP (src, 0), par);
    1570              :         }
    1571              :       break;
    1572              : 
    1573            0 :     default:
    1574            0 :       gcc_unreachable ();
    1575              :     }
    1576              : 
    1577       412266 :   SET_SRC (def_set) = src;
    1578       412266 :   SET_DEST (def_set) = dst;
    1579              : 
    1580              :   /* Drop possible dead definitions.  */
    1581       412266 :   PATTERN (insn) = def_set;
    1582              : 
    1583       412266 :   INSN_CODE (insn) = -1;
    1584       412266 :   int patt = recog_memoized (insn);
    1585       412266 :   if  (patt == -1)
    1586            0 :     fatal_insn_not_found (insn);
    1587       412266 :   df_insn_rescan (insn);
    1588       412266 : }
    1589              : 
    1590              : /* Helper function to compute gain for loading an immediate constant.
    1591              :    Typically, two movabsq for TImode vs. vmovdqa for V1TImode, but
    1592              :    with numerous special cases.  */
    1593              : 
    1594              : static int
    1595            8 : timode_immed_const_gain (rtx cst, basic_block bb)
    1596              : {
    1597              :   /* movabsq vs. movabsq+vmovq+vunpacklqdq.  */
    1598            8 :   if (CONST_WIDE_INT_P (cst)
    1599            5 :       && CONST_WIDE_INT_NUNITS (cst) == 2
    1600           13 :       && CONST_WIDE_INT_ELT (cst, 0) == CONST_WIDE_INT_ELT (cst, 1))
    1601            0 :     return optimize_bb_for_size_p (bb) ? -COSTS_N_BYTES (9)
    1602              :                                        : -COSTS_N_INSNS (2);
    1603              :   /* 2x movabsq ~ vmovdqa.  */
    1604              :   return 0;
    1605              : }
    1606              : 
    1607              : /* Return true it's cost profitable for for chain conversion.  */
    1608              : 
    1609              : bool
    1610       472109 : timode_scalar_chain::compute_convert_gain ()
    1611              : {
    1612              :   /* Assume that if we have to move TImode values between units,
    1613              :      then transforming this chain isn't worth it.  */
    1614       472109 :   if (cost_sse_integer)
    1615              :     return false;
    1616              : 
    1617       472109 :   bitmap_iterator bi;
    1618       472109 :   unsigned insn_uid;
    1619              : 
    1620              :   /* Split ties to prefer V1TImode when not optimizing for size.  */
    1621       472109 :   int gain = optimize_size ? 0 : 1;
    1622       472109 :   sreal weighted_gain  = 0;
    1623              : 
    1624       472109 :   if (dump_file)
    1625            0 :     fprintf (dump_file, "Computing gain for chain #%d...\n", chain_id);
    1626              : 
    1627      1399806 :   EXECUTE_IF_SET_IN_BITMAP (insns, 0, insn_uid, bi)
    1628              :     {
    1629       927697 :       rtx_insn *insn = DF_INSN_UID_GET (insn_uid)->insn;
    1630       927697 :       rtx def_set = single_set (insn);
    1631       927697 :       rtx src = SET_SRC (def_set);
    1632       927697 :       rtx dst = SET_DEST (def_set);
    1633       927697 :       HOST_WIDE_INT op1val;
    1634       927697 :       basic_block bb = BLOCK_FOR_INSN (insn);
    1635       927697 :       int scost, vcost;
    1636       927697 :       int igain = 0;
    1637       927697 :       profile_count entry_count = ENTRY_BLOCK_PTR_FOR_FN (cfun)->count;
    1638       927697 :       bool speed_p = optimize_bb_for_speed_p (bb);
    1639       927697 :       sreal bb_freq = bb->count.to_sreal_scale (entry_count);
    1640              : 
    1641       927697 :       switch (GET_CODE (src))
    1642              :         {
    1643       455207 :         case REG:
    1644       455207 :           if (!speed_p)
    1645        20585 :             igain = MEM_P (dst) ? COSTS_N_BYTES (6) : COSTS_N_BYTES (3);
    1646              :           else
    1647              :             igain = COSTS_N_INSNS (1);
    1648              :           break;
    1649              : 
    1650       422636 :         case MEM:
    1651       422636 :           igain = !speed_p ? COSTS_N_BYTES (7) : COSTS_N_INSNS (1);
    1652              :           break;
    1653              : 
    1654        11350 :         case CONST_INT:
    1655        11350 :           if (MEM_P (dst)
    1656        11350 :               && standard_sse_constant_p (src, V1TImode))
    1657        10846 :             igain = !speed_p ? COSTS_N_BYTES (11) : 1;
    1658              :           break;
    1659              : 
    1660        35255 :         case CONST_WIDE_INT:
    1661              :           /* 2 x mov vs. vmovdqa.  */
    1662        35255 :           if (MEM_P (dst))
    1663        35053 :             igain = !speed_p ? COSTS_N_BYTES (3) : COSTS_N_INSNS (1);
    1664              :           break;
    1665              : 
    1666           19 :         case NOT:
    1667           19 :           if (MEM_P (dst))
    1668        24511 :             igain = -COSTS_N_INSNS (1);
    1669              :           break;
    1670              : 
    1671           14 :         case AND:
    1672           14 :           if (!MEM_P (dst))
    1673            3 :             igain = COSTS_N_INSNS (1);
    1674           14 :           if (CONST_SCALAR_INT_P (XEXP (src, 1)))
    1675            5 :             igain += timode_immed_const_gain (XEXP (src, 1), bb);
    1676              :           break;
    1677              : 
    1678         2816 :         case XOR:
    1679         2816 :         case IOR:
    1680         2816 :           if (timode_concatdi_p (src))
    1681              :             {
    1682              :               /* vmovq;vpinsrq (11 bytes).  */
    1683         2765 :               igain = speed_p ? -2 * ix86_cost->sse_to_integer
    1684              :                               : -COSTS_N_BYTES (11);
    1685              :               break;
    1686              :             }
    1687           51 :           if (!MEM_P (dst))
    1688           43 :             igain = COSTS_N_INSNS (1);
    1689           51 :           if (CONST_SCALAR_INT_P (XEXP (src, 1)))
    1690            3 :             igain += timode_immed_const_gain (XEXP (src, 1), bb);
    1691              :           break;
    1692              : 
    1693            0 :         case PLUS:
    1694            0 :           if (timode_concatdi_p (src))
    1695              :             /* vmovq;vpinsrq (11 bytes).  */
    1696            0 :             igain = speed_p ? -2 * ix86_cost->sse_to_integer
    1697              :                             : -COSTS_N_BYTES (11);
    1698              :           break;
    1699              : 
    1700          158 :         case ASHIFT:
    1701          158 :         case LSHIFTRT:
    1702              :           /* See ix86_expand_v1ti_shift.  */
    1703          158 :           op1val = INTVAL (XEXP (src, 1));
    1704          158 :           if (!speed_p)
    1705              :             {
    1706           15 :               if (op1val == 64 || op1val == 65)
    1707              :                 scost = COSTS_N_BYTES (5);
    1708           10 :               else if (op1val >= 66)
    1709              :                 scost = COSTS_N_BYTES (6);
    1710           10 :               else if (op1val == 1)
    1711              :                 scost = COSTS_N_BYTES (8);
    1712              :               else
    1713              :                 scost = COSTS_N_BYTES (9);
    1714              : 
    1715           14 :               if ((op1val & 7) == 0)
    1716              :                 vcost = COSTS_N_BYTES (5);
    1717           10 :               else if (op1val > 64)
    1718              :                 vcost = COSTS_N_BYTES (10);
    1719              :               else
    1720           10 :                 vcost = TARGET_AVX ? COSTS_N_BYTES (19) : COSTS_N_BYTES (23);
    1721              :             }
    1722              :           else
    1723              :             {
    1724          143 :               scost = COSTS_N_INSNS (2);
    1725          143 :               if ((op1val & 7) == 0)
    1726              :                 vcost = COSTS_N_INSNS (1);
    1727          110 :               else if (op1val > 64)
    1728              :                 vcost = COSTS_N_INSNS (2);
    1729              :               else
    1730          110 :                 vcost = TARGET_AVX ? COSTS_N_INSNS (4) : COSTS_N_INSNS (5);
    1731              :             }
    1732          158 :           igain = scost - vcost;
    1733          158 :           break;
    1734              : 
    1735          103 :         case ASHIFTRT:
    1736              :           /* See ix86_expand_v1ti_ashiftrt.  */
    1737          103 :           op1val = INTVAL (XEXP (src, 1));
    1738          103 :           if (!speed_p)
    1739              :             {
    1740            7 :               if (op1val == 64 || op1val == 127)
    1741              :                 scost = COSTS_N_BYTES (7);
    1742            7 :               else if (op1val == 1)
    1743              :                 scost = COSTS_N_BYTES (8);
    1744            7 :               else if (op1val == 65)
    1745              :                 scost = COSTS_N_BYTES (10);
    1746            7 :               else if (op1val >= 66)
    1747              :                 scost = COSTS_N_BYTES (11);
    1748              :               else
    1749              :                 scost = COSTS_N_BYTES (9);
    1750              : 
    1751            0 :               if (op1val == 127)
    1752              :                 vcost = COSTS_N_BYTES (10);
    1753            7 :               else if (op1val == 64)
    1754              :                 vcost = COSTS_N_BYTES (14);
    1755            7 :               else if (op1val == 96)
    1756              :                 vcost = COSTS_N_BYTES (18);
    1757            7 :               else if (op1val >= 111)
    1758              :                 vcost = COSTS_N_BYTES (15);
    1759            7 :               else if (TARGET_AVX2 && op1val == 32)
    1760              :                 vcost = COSTS_N_BYTES (16);
    1761            7 :               else if (TARGET_SSE4_1 && op1val == 32)
    1762              :                 vcost = COSTS_N_BYTES (20);
    1763            7 :               else if (op1val >= 96)
    1764              :                 vcost = COSTS_N_BYTES (23);
    1765            7 :               else if ((op1val & 7) == 0)
    1766              :                 vcost = COSTS_N_BYTES (28);
    1767            7 :               else if (TARGET_AVX2 && op1val < 32)
    1768              :                 vcost = COSTS_N_BYTES (30);
    1769            7 :               else if (op1val == 1 || op1val >= 64)
    1770              :                 vcost = COSTS_N_BYTES (42);
    1771              :               else
    1772            7 :                 vcost = COSTS_N_BYTES (47);
    1773              :             }
    1774              :           else
    1775              :             {
    1776           96 :               if (op1val >= 65 && op1val <= 126)
    1777              :                 scost = COSTS_N_INSNS (3);
    1778              :               else
    1779           96 :                 scost = COSTS_N_INSNS (2);
    1780              : 
    1781           96 :               if (op1val == 127)
    1782              :                 vcost = COSTS_N_INSNS (2);
    1783           96 :               else if (op1val == 64)
    1784              :                 vcost = COSTS_N_INSNS (3);
    1785           96 :               else if (op1val == 96)
    1786              :                 vcost = COSTS_N_INSNS (3);
    1787           96 :               else if (op1val >= 111)
    1788              :                 vcost = COSTS_N_INSNS (3);
    1789           96 :               else if (TARGET_SSE4_1 && op1val == 32)
    1790              :                 vcost = COSTS_N_INSNS (3);
    1791           96 :               else if (TARGET_SSE4_1
    1792            0 :                        && (op1val == 8 || op1val == 16 || op1val == 24))
    1793              :                 vcost = COSTS_N_INSNS (3);
    1794           96 :               else if (op1val >= 96)
    1795              :                 vcost = COSTS_N_INSNS (4);
    1796           96 :               else if (TARGET_SSE4_1 && (op1val == 28 || op1val == 80))
    1797              :                 vcost = COSTS_N_INSNS (4);
    1798           96 :               else if ((op1val & 7) == 0)
    1799              :                 vcost = COSTS_N_INSNS (5);
    1800           96 :               else if (TARGET_AVX2 && op1val < 32)
    1801              :                 vcost = COSTS_N_INSNS (6);
    1802           96 :               else if (TARGET_SSE4_1 && op1val < 15)
    1803              :                 vcost = COSTS_N_INSNS (6);
    1804           96 :               else if (op1val == 1 || op1val >= 64)
    1805              :                 vcost = COSTS_N_INSNS (8);
    1806              :               else
    1807            0 :                 vcost = COSTS_N_INSNS (9);
    1808              :             }
    1809          103 :           igain = scost - vcost;
    1810          103 :           break;
    1811              : 
    1812            5 :         case ROTATE:
    1813            5 :         case ROTATERT:
    1814              :           /* See ix86_expand_v1ti_rotate.  */
    1815            5 :           op1val = INTVAL (XEXP (src, 1));
    1816            5 :           if (!speed_p)
    1817              :             {
    1818            0 :               scost = COSTS_N_BYTES (13);
    1819            0 :               if ((op1val & 31) == 0)
    1820              :                 vcost = COSTS_N_BYTES (5);
    1821            0 :               else if ((op1val & 7) == 0)
    1822            0 :                 vcost = TARGET_AVX ? COSTS_N_BYTES (13) : COSTS_N_BYTES (18);
    1823            0 :               else if (op1val > 32 && op1val < 96)
    1824              :                 vcost = COSTS_N_BYTES (24);
    1825              :               else
    1826            0 :                 vcost = COSTS_N_BYTES (19);
    1827              :             }
    1828              :           else
    1829              :             {
    1830            5 :               scost = COSTS_N_INSNS (3);
    1831            5 :               if ((op1val & 31) == 0)
    1832              :                 vcost = COSTS_N_INSNS (1);
    1833            3 :               else if ((op1val & 7) == 0)
    1834            1 :                 vcost = TARGET_AVX ? COSTS_N_INSNS (3) : COSTS_N_INSNS (4);
    1835            2 :               else if (op1val > 32 && op1val < 96)
    1836              :                 vcost = COSTS_N_INSNS (5);
    1837              :               else
    1838            2 :                 vcost = COSTS_N_INSNS (1);
    1839              :             }
    1840            5 :           igain = scost - vcost;
    1841            5 :           break;
    1842              : 
    1843           12 :         case COMPARE:
    1844           12 :           if (XEXP (src, 1) == const0_rtx)
    1845              :             {
    1846            8 :               if (GET_CODE (XEXP (src, 0)) == AND)
    1847              :                 /* and;and;or (9 bytes) vs. ptest (5 bytes).  */
    1848              :                 igain = !speed_p ? COSTS_N_BYTES (4) : COSTS_N_INSNS (2);
    1849              :               /* or (3 bytes) vs. ptest (5 bytes).  */
    1850            8 :               else if (!speed_p)
    1851            0 :                 igain = -COSTS_N_BYTES (2);
    1852              :             }
    1853            4 :           else if (XEXP (src, 1) == const1_rtx)
    1854              :             /* and;cmp -1 (7 bytes) vs. pcmpeqd;pxor;ptest (13 bytes).  */
    1855            0 :             igain = !speed_p ? -COSTS_N_BYTES (6) : -COSTS_N_INSNS (1);
    1856              :           break;
    1857              : 
    1858          122 :         case ZERO_EXTEND:
    1859          122 :           if (GET_MODE (XEXP (src, 0)) == DImode)
    1860              :             /* xor (2 bytes) vs. vmovq (5 bytes).  */
    1861          122 :             igain = speed_p ? COSTS_N_INSNS (1) - ix86_cost->sse_to_integer
    1862              :                             : -COSTS_N_BYTES (3);
    1863              :           break;
    1864              : 
    1865              :         default:
    1866              :           break;
    1867              :         }
    1868              : 
    1869      1813459 :       gain += igain;
    1870       927689 :       if (speed_p)
    1871       885770 :         weighted_gain += bb_freq * igain;
    1872              : 
    1873       927697 :       if (igain != 0 && dump_file)
    1874              :         {
    1875            0 :           fprintf (dump_file, "  Instruction gain %d with bb_freq %.2f for ",
    1876              :                    igain, bb_freq.to_double ());
    1877            0 :           dump_insn_slim (dump_file, insn);
    1878              :         }
    1879              :     }
    1880              : 
    1881       472109 :   if (dump_file)
    1882            0 :     fprintf (dump_file, "  Total gain: %d, weighted gain %.2f\n",
    1883              :              gain, weighted_gain.to_double ());
    1884              : 
    1885       472109 :   if (weighted_gain > (sreal) 0)
    1886              :     return true;
    1887              :   else
    1888        24591 :     return gain > 0;
    1889              : }
    1890              : 
    1891              : /* Fix uses of converted REG in debug insns.  */
    1892              : 
    1893              : void
    1894       423862 : timode_scalar_chain::fix_debug_reg_uses (rtx reg)
    1895              : {
    1896       423862 :   if (!flag_var_tracking)
    1897              :     return;
    1898              : 
    1899       374473 :   df_ref ref, next;
    1900       766908 :   for (ref = DF_REG_USE_CHAIN (REGNO (reg)); ref; ref = next)
    1901              :     {
    1902       392435 :       rtx_insn *insn = DF_REF_INSN (ref);
    1903              :       /* Make sure the next ref is for a different instruction,
    1904              :          so that we're not affected by the rescan.  */
    1905       392435 :       next = DF_REF_NEXT_REG (ref);
    1906       392435 :       while (next && DF_REF_INSN (next) == insn)
    1907            0 :         next = DF_REF_NEXT_REG (next);
    1908              : 
    1909       392435 :       if (DEBUG_INSN_P (insn))
    1910              :         {
    1911              :           /* It may be a debug insn with a TImode variable in
    1912              :              register.  */
    1913              :           bool changed = false;
    1914          178 :           for (; ref != next; ref = DF_REF_NEXT_REG (ref))
    1915              :             {
    1916           89 :               rtx *loc = DF_REF_LOC (ref);
    1917           89 :               if (REG_P (*loc) && GET_MODE (*loc) == V1TImode)
    1918              :                 {
    1919           85 :                   *loc = gen_rtx_SUBREG (TImode, *loc, 0);
    1920           85 :                   changed = true;
    1921              :                 }
    1922              :             }
    1923           89 :           if (changed)
    1924           85 :             df_insn_rescan (insn);
    1925              :         }
    1926              :     }
    1927              : }
    1928              : 
    1929              : /* Convert SRC, a *concatditi3 pattern, into a vec_concatv2di instruction.
    1930              :    Insert this before INSN, and return the result as a V1TImode subreg.  */
    1931              : 
    1932              : static rtx
    1933          253 : timode_convert_concatdi (rtx src, rtx_insn *insn)
    1934              : {
    1935          253 :   rtx hi, lo;
    1936          253 :   rtx tmp = gen_reg_rtx (V2DImode);
    1937          253 :   if (GET_CODE (XEXP (src, 0)) == ASHIFT)
    1938              :     {
    1939          253 :       hi = XEXP (XEXP (XEXP (src, 0), 0), 0);
    1940          253 :       lo = XEXP (XEXP (src, 1), 0);
    1941              :     }
    1942              :   else
    1943              :     {
    1944            0 :       hi = XEXP (XEXP (XEXP (src, 1), 0), 0);
    1945            0 :       lo = XEXP (XEXP (src, 0), 0);
    1946              :     }
    1947          253 :   emit_insn_before (gen_vec_concatv2di (tmp, lo, hi), insn);
    1948          253 :   return gen_rtx_SUBREG (V1TImode, tmp, 0);
    1949              : }
    1950              : 
    1951              : /* Convert INSN from TImode to V1T1mode.  */
    1952              : 
    1953              : void
    1954       921354 : timode_scalar_chain::convert_insn (rtx_insn *insn)
    1955              : {
    1956       921354 :   rtx def_set = single_set (insn);
    1957       921354 :   rtx src = SET_SRC (def_set);
    1958       921354 :   rtx dst = SET_DEST (def_set);
    1959       921354 :   rtx tmp;
    1960              : 
    1961       921354 :   switch (GET_CODE (dst))
    1962              :     {
    1963       423872 :     case REG:
    1964       423872 :       if (GET_MODE (dst) == TImode)
    1965              :         {
    1966       422111 :           PUT_MODE (dst, V1TImode);
    1967       422111 :           fix_debug_reg_uses (dst);
    1968              :         }
    1969       423872 :       if (GET_MODE (dst) == V1TImode)
    1970              :         {
    1971              :           /* It might potentially be helpful to convert REG_EQUAL notes,
    1972              :              but for now we just remove them.  */
    1973       423862 :           rtx note = find_reg_equal_equiv_note (insn);
    1974       423862 :           if (note)
    1975          444 :             remove_note (insn, note);
    1976              :         }
    1977              :       break;
    1978       497482 :     case MEM:
    1979       497482 :       PUT_MODE (dst, V1TImode);
    1980       497482 :       break;
    1981              : 
    1982            0 :     default:
    1983            0 :       gcc_unreachable ();
    1984              :     }
    1985              : 
    1986       921354 :   switch (GET_CODE (src))
    1987              :     {
    1988       451690 :     case REG:
    1989       451690 :       if (GET_MODE (src) == TImode)
    1990              :         {
    1991         1751 :           PUT_MODE (src, V1TImode);
    1992         1751 :           fix_debug_reg_uses (src);
    1993              :         }
    1994              :       break;
    1995              : 
    1996       422588 :     case MEM:
    1997       422588 :       PUT_MODE (src, V1TImode);
    1998       422588 :       break;
    1999              : 
    2000        35254 :     case CONST_WIDE_INT:
    2001        35254 :       if (NONDEBUG_INSN_P (insn))
    2002              :         {
    2003              :           /* Since there are no instructions to store 128-bit constant,
    2004              :              temporary register usage is required.  */
    2005        35254 :           bool use_move;
    2006        35254 :           start_sequence ();
    2007        35254 :           tmp = ix86_convert_const_wide_int_to_broadcast (TImode, src);
    2008        35254 :           if (tmp)
    2009              :             {
    2010          194 :               src = lowpart_subreg (V1TImode, tmp, TImode);
    2011          194 :               use_move = true;
    2012              :             }
    2013              :           else
    2014              :             {
    2015        35060 :               src = smode_convert_cst (src, V1TImode);
    2016        35060 :               src = validize_mem (force_const_mem (V1TImode, src));
    2017        35060 :               use_move = MEM_P (dst);
    2018              :             }
    2019        35254 :           rtx_insn *seq = end_sequence ();
    2020        35254 :           if (seq)
    2021          195 :             emit_insn_before (seq, insn);
    2022        35254 :           if (use_move)
    2023              :             {
    2024        35054 :               tmp = gen_reg_rtx (V1TImode);
    2025        35054 :               emit_insn_before (gen_rtx_SET (tmp, src), insn);
    2026        35054 :               src = tmp;
    2027              :             }
    2028              :         }
    2029              :       break;
    2030              : 
    2031        11350 :     case CONST_INT:
    2032        11350 :       switch (standard_sse_constant_p (src, TImode))
    2033              :         {
    2034        11125 :         case 1:
    2035        11125 :           src = CONST0_RTX (GET_MODE (dst));
    2036        11125 :           break;
    2037          225 :         case 2:
    2038          225 :           src = CONSTM1_RTX (GET_MODE (dst));
    2039          225 :           break;
    2040            0 :         default:
    2041            0 :           gcc_unreachable ();
    2042              :         }
    2043        11350 :       if (MEM_P (dst))
    2044              :         {
    2045        10846 :           tmp = gen_reg_rtx (V1TImode);
    2046        10846 :           emit_insn_before (gen_rtx_SET (tmp, src), insn);
    2047        10846 :           src = tmp;
    2048              :         }
    2049              :       break;
    2050              : 
    2051           13 :     case AND:
    2052           13 :       if (GET_CODE (XEXP (src, 0)) == NOT)
    2053              :         {
    2054            0 :           convert_op (&XEXP (XEXP (src, 0), 0), insn);
    2055            0 :           convert_op (&XEXP (src, 1), insn);
    2056            0 :           PUT_MODE (XEXP (src, 0), V1TImode);
    2057            0 :           PUT_MODE (src, V1TImode);
    2058            0 :           break;
    2059              :         }
    2060           13 :       convert_op (&XEXP (src, 0), insn);
    2061           13 :       convert_op (&XEXP (src, 1), insn);
    2062           13 :       PUT_MODE (src, V1TImode);
    2063           13 :       if (MEM_P (dst))
    2064              :         {
    2065           10 :           tmp = gen_reg_rtx (V1TImode);
    2066           10 :           emit_insn_before (gen_rtx_SET (tmp, src), insn);
    2067           10 :           src = tmp;
    2068              :         }
    2069              :       break;
    2070              : 
    2071          304 :     case XOR:
    2072          304 :     case IOR:
    2073          304 :       if (timode_concatdi_p (src))
    2074              :         {
    2075          253 :           src = timode_convert_concatdi (src, insn);
    2076          253 :           break;
    2077              :         }
    2078           51 :       convert_op (&XEXP (src, 0), insn);
    2079           51 :       convert_op (&XEXP (src, 1), insn);
    2080           51 :       PUT_MODE (src, V1TImode);
    2081           51 :       if (MEM_P (dst))
    2082              :         {
    2083            8 :           tmp = gen_reg_rtx (V1TImode);
    2084            8 :           emit_insn_before (gen_rtx_SET (tmp, src), insn);
    2085            8 :           src = tmp;
    2086              :         }
    2087              :       break;
    2088              : 
    2089            3 :     case NOT:
    2090            3 :       src = XEXP (src, 0);
    2091            3 :       convert_op (&src, insn);
    2092            3 :       tmp = gen_reg_rtx (V1TImode);
    2093            3 :       emit_insn_before (gen_move_insn (tmp, CONSTM1_RTX (V1TImode)), insn);
    2094            3 :       src = gen_rtx_XOR (V1TImode, src, tmp);
    2095            3 :       if (MEM_P (dst))
    2096              :         {
    2097            0 :           tmp = gen_reg_rtx (V1TImode);
    2098            0 :           emit_insn_before (gen_rtx_SET (tmp, src), insn);
    2099            0 :           src = tmp;
    2100              :         }
    2101              :       break;
    2102              : 
    2103           10 :     case COMPARE:
    2104           10 :       dst = gen_rtx_REG (CCZmode, FLAGS_REG);
    2105           10 :       src = convert_compare (XEXP (src, 0), XEXP (src, 1), insn);
    2106           10 :       break;
    2107              : 
    2108           43 :     case ASHIFT:
    2109           43 :     case LSHIFTRT:
    2110           43 :     case ASHIFTRT:
    2111           43 :     case ROTATERT:
    2112           43 :     case ROTATE:
    2113           43 :       convert_op (&XEXP (src, 0), insn);
    2114           43 :       PUT_MODE (src, V1TImode);
    2115           43 :       break;
    2116              : 
    2117           99 :     case ZERO_EXTEND:
    2118           99 :       if (GET_MODE (XEXP (src, 0)) == DImode)
    2119              :         {
    2120              :           /* Convert to *vec_concatv2di_0.  */
    2121           99 :           rtx tmp = gen_reg_rtx (V2DImode);
    2122           99 :           rtx pat = gen_rtx_VEC_CONCAT (V2DImode, XEXP (src, 0), const0_rtx);
    2123           99 :           emit_insn_before (gen_move_insn (tmp, pat), insn);
    2124           99 :           src = gen_rtx_SUBREG (vmode, tmp, 0);
    2125              :         }
    2126              :       else
    2127            0 :         gcc_unreachable ();
    2128           99 :       break;
    2129              : 
    2130            0 :     case PLUS:
    2131            0 :       if (timode_concatdi_p (src))
    2132            0 :         src = timode_convert_concatdi (src, insn);
    2133              :       else
    2134            0 :         gcc_unreachable ();
    2135            0 :       break;
    2136              : 
    2137            0 :     default:
    2138            0 :       gcc_unreachable ();
    2139              :     }
    2140              : 
    2141       921354 :   SET_SRC (def_set) = src;
    2142       921354 :   SET_DEST (def_set) = dst;
    2143              : 
    2144              :   /* Drop possible dead definitions.  */
    2145       921354 :   PATTERN (insn) = def_set;
    2146              : 
    2147       921354 :   INSN_CODE (insn) = -1;
    2148       921354 :   recog_memoized (insn);
    2149       921354 :   df_insn_rescan (insn);
    2150       921354 : }
    2151              : 
    2152              : /* Generate copies from defs used by the chain but not defined therein.
    2153              :    Also populates defs_map which is used later by convert_insn.  */
    2154              : 
    2155              : void
    2156       642658 : scalar_chain::convert_registers ()
    2157              : {
    2158       642658 :   bitmap_iterator bi;
    2159       642658 :   unsigned id;
    2160       668632 :   EXECUTE_IF_SET_IN_BITMAP (defs_conv, 0, id, bi)
    2161              :     {
    2162        25974 :       rtx chain_reg = gen_reg_rtx (smode);
    2163        25974 :       defs_map.put (regno_reg_rtx[id], chain_reg);
    2164              :     }
    2165       650746 :   EXECUTE_IF_SET_IN_BITMAP (insns_conv, 0, id, bi)
    2166        20442 :     for (df_ref ref = DF_INSN_UID_DEFS (id); ref; ref = DF_REF_NEXT_LOC (ref))
    2167        12354 :       if (bitmap_bit_p (defs_conv, DF_REF_REGNO (ref)))
    2168         8088 :         make_vector_copies (DF_REF_INSN (ref), DF_REF_REAL_REG (ref));
    2169       642658 : }
    2170              : 
    2171              : /* Convert whole chain creating required register
    2172              :    conversions and copies.  */
    2173              : 
    2174              : int
    2175       642658 : scalar_chain::convert ()
    2176              : {
    2177       642658 :   bitmap_iterator bi;
    2178       642658 :   unsigned id;
    2179       642658 :   int converted_insns = 0;
    2180              : 
    2181       642658 :   if (!dbg_cnt (stv_conversion))
    2182              :     return 0;
    2183              : 
    2184       642658 :   if (dump_file)
    2185            0 :     fprintf (dump_file, "Converting chain #%d...\n", chain_id);
    2186              : 
    2187       642658 :   convert_registers ();
    2188              : 
    2189      1976278 :   EXECUTE_IF_SET_IN_BITMAP (insns, 0, id, bi)
    2190              :     {
    2191      1333620 :       rtx_insn *insn = DF_INSN_UID_GET (id)->insn;
    2192      1333620 :       convert_insn_common (insn);
    2193      1333620 :       convert_insn (insn);
    2194      1333620 :       converted_insns++;
    2195              :     }
    2196              : 
    2197              :   return converted_insns;
    2198              : }
    2199              : 
    2200              : /* Return the SET expression if INSN doesn't reference hard register.
    2201              :    Return NULL if INSN uses or defines a hard register, excluding
    2202              :    pseudo register pushes, hard register uses in a memory address,
    2203              :    clobbers and flags definitions.  */
    2204              : 
    2205              : static rtx
    2206    337484609 : pseudo_reg_set (rtx_insn *insn)
    2207              : {
    2208    337484609 :   rtx set = single_set (insn);
    2209    337484609 :   if (!set)
    2210              :     return NULL;
    2211              : 
    2212              :   /* Check pseudo register push first. */
    2213    135441446 :   machine_mode mode = TARGET_64BIT ? TImode : DImode;
    2214    135441446 :   if (REG_P (SET_SRC (set))
    2215     38145953 :       && !HARD_REGISTER_P (SET_SRC (set))
    2216    165212978 :       && push_operand (SET_DEST (set), mode))
    2217              :     return set;
    2218              : 
    2219    135189119 :   df_ref ref;
    2220    219050361 :   FOR_EACH_INSN_DEF (ref, insn)
    2221    120586782 :     if (HARD_REGISTER_P (DF_REF_REAL_REG (ref))
    2222     64722979 :         && !DF_REF_FLAGS_IS_SET (ref, DF_REF_MUST_CLOBBER)
    2223    170801855 :         && DF_REF_REGNO (ref) != FLAGS_REG)
    2224              :       return NULL;
    2225              : 
    2226    188665704 :   FOR_EACH_INSN_USE (ref, insn)
    2227    115603337 :     if (!DF_REF_REG_MEM_P (ref) && HARD_REGISTER_P (DF_REF_REAL_REG (ref)))
    2228              :       return NULL;
    2229              : 
    2230              :   return set;
    2231              : }
    2232              : 
    2233              : /* Return true if the register REG is defined in a single DEF chain.
    2234              :    If it is defined in more than one DEF chains, we may not be able
    2235              :    to convert it in all chains.  */
    2236              : 
    2237              : static bool
    2238      1155736 : single_def_chain_p (rtx reg)
    2239              : {
    2240      1155736 :   df_ref ref = DF_REG_DEF_CHAIN (REGNO (reg));
    2241      1155736 :   if (!ref)
    2242              :     return false;
    2243      1155720 :   return DF_REF_NEXT_REG (ref) == nullptr;
    2244              : }
    2245              : 
    2246              : /* Check if comparison INSN may be transformed into vector comparison.
    2247              :    Currently we transform equality/inequality checks which look like:
    2248              :    (set (reg:CCZ 17 flags) (compare:CCZ (reg:TI x) (reg:TI y)))  */
    2249              : 
    2250              : static bool
    2251     12869469 : convertible_comparison_p (rtx_insn *insn, enum machine_mode mode)
    2252              : {
    2253     14271865 :   if (mode != (TARGET_64BIT ? TImode : DImode))
    2254              :     return false;
    2255              : 
    2256      4696889 :   if (!TARGET_SSE4_1)
    2257              :     return false;
    2258              : 
    2259       164925 :   rtx def_set = single_set (insn);
    2260              : 
    2261       164925 :   gcc_assert (def_set);
    2262              : 
    2263       164925 :   rtx src = SET_SRC (def_set);
    2264       164925 :   rtx dst = SET_DEST (def_set);
    2265              : 
    2266       164925 :   gcc_assert (GET_CODE (src) == COMPARE);
    2267              : 
    2268       164925 :   if (!REG_P (dst)
    2269       164925 :       || REGNO (dst) != FLAGS_REG
    2270       329850 :       || GET_MODE (dst) != CCZmode)
    2271              :     return false;
    2272              : 
    2273       120106 :   rtx op1 = XEXP (src, 0);
    2274       120106 :   rtx op2 = XEXP (src, 1);
    2275              : 
    2276              :   /* *cmp<dwi>_doubleword.  */
    2277       120106 :   if ((CONST_SCALAR_INT_P (op1)
    2278       120106 :        || ((REG_P (op1) || MEM_P (op1))
    2279       118321 :            && GET_MODE (op1) == mode))
    2280           60 :       && (CONST_SCALAR_INT_P (op2)
    2281           12 :           || ((REG_P (op2) || MEM_P (op2))
    2282           10 :               && GET_MODE (op2) == mode)))
    2283              :     return true;
    2284              : 
    2285              :   /* *testti_doubleword.  */
    2286       120048 :   if (op2 == const0_rtx
    2287        38296 :       && GET_CODE (op1) == AND
    2288          150 :       && REG_P (XEXP (op1, 0)))
    2289              :     {
    2290          150 :       rtx op12 = XEXP (op1, 1);
    2291          150 :       return GET_MODE (XEXP (op1, 0)) == TImode
    2292          150 :              && (CONST_SCALAR_INT_P (op12)
    2293            0 :                  || ((REG_P (op12) || MEM_P (op12))
    2294            0 :                      && GET_MODE (op12) == TImode));
    2295              :     }
    2296              : 
    2297              :   /* *test<dwi>_not_doubleword.  */
    2298       119898 :   if (op2 == const0_rtx
    2299        38146 :       && GET_CODE (op1) == AND
    2300            0 :       && GET_CODE (XEXP (op1, 0)) == NOT)
    2301              :     {
    2302            0 :       rtx op11 = XEXP (XEXP (op1, 0), 0);
    2303            0 :       rtx op12 = XEXP (op1, 1);
    2304            0 :       return (REG_P (op11) || MEM_P (op11))
    2305            0 :              && (REG_P (op12) || MEM_P (op12))
    2306            0 :              && GET_MODE (op11) == mode
    2307            0 :              && GET_MODE (op12) == mode;
    2308              :     }
    2309              : 
    2310              :   return false;
    2311              : }
    2312              : 
    2313              : /* The general version of scalar_to_vector_candidate_p.  */
    2314              : 
    2315              : static bool
    2316    236001426 : general_scalar_to_vector_candidate_p (rtx_insn *insn, enum machine_mode mode)
    2317              : {
    2318    236001426 :   rtx def_set = pseudo_reg_set (insn);
    2319              : 
    2320    236001426 :   if (!def_set)
    2321              :     return false;
    2322              : 
    2323     49551398 :   rtx src = SET_SRC (def_set);
    2324     49551398 :   rtx dst = SET_DEST (def_set);
    2325              : 
    2326     49551398 :   if (GET_CODE (src) == COMPARE)
    2327      8873778 :     return convertible_comparison_p (insn, mode);
    2328              : 
    2329              :   /* We are interested in "mode" only.  */
    2330     40677620 :   if ((GET_MODE (src) != mode
    2331     27828861 :        && !CONST_INT_P (src))
    2332     18007183 :       || GET_MODE (dst) != mode)
    2333              :     return false;
    2334              : 
    2335     15111358 :   if (!REG_P (dst) && !MEM_P (dst))
    2336              :     return false;
    2337              : 
    2338     14881811 :   switch (GET_CODE (src))
    2339              :     {
    2340       530193 :     case ASHIFT:
    2341       530193 :     case LSHIFTRT:
    2342       530193 :     case ASHIFTRT:
    2343       530193 :     case ROTATE:
    2344       530193 :     case ROTATERT:
    2345       530193 :       if (!CONST_INT_P (XEXP (src, 1))
    2346      1024225 :           || !IN_RANGE (INTVAL (XEXP (src, 1)), 0, GET_MODE_BITSIZE (mode)-1))
    2347              :         return false;
    2348              : 
    2349              :       /* Check for extend highpart case.  */
    2350       494028 :       if (mode != DImode
    2351       353750 :           || GET_CODE (src) != ASHIFTRT
    2352        76423 :           || GET_CODE (XEXP (src, 0)) != ASHIFT)
    2353              :         break;
    2354              : 
    2355      3697171 :       src = XEXP (src, 0);
    2356              :       break;
    2357              : 
    2358        78460 :     case SMAX:
    2359        78460 :     case SMIN:
    2360        78460 :     case UMAX:
    2361        78460 :     case UMIN:
    2362        78460 :       if ((mode == DImode && !TARGET_AVX512VL)
    2363        17538 :           || (mode == SImode && !TARGET_SSE4_1))
    2364              :         return false;
    2365              :       /* Fallthru.  */
    2366              : 
    2367      3243340 :     case AND:
    2368      3243340 :     case IOR:
    2369      3243340 :     case XOR:
    2370      3243340 :     case PLUS:
    2371      3243340 :     case MINUS:
    2372      3243340 :       if (!REG_P (XEXP (src, 1))
    2373              :           && !MEM_P (XEXP (src, 1))
    2374              :           && !CONST_INT_P (XEXP (src, 1)))
    2375              :         return false;
    2376              : 
    2377      3150759 :       if (GET_MODE (XEXP (src, 1)) != mode
    2378      1842608 :           && !CONST_INT_P (XEXP (src, 1)))
    2379              :         return false;
    2380              : 
    2381              :       /* Check for andnot case.  */
    2382      3150759 :       if (GET_CODE (src) != AND
    2383       182409 :           || GET_CODE (XEXP (src, 0)) != NOT)
    2384              :         break;
    2385              : 
    2386      3697171 :       src = XEXP (src, 0);
    2387              :       /* FALLTHRU */
    2388              : 
    2389              :     case NOT:
    2390              :       break;
    2391              : 
    2392        24992 :     case NEG:
    2393              :       /* Check for nabs case.  */
    2394        24992 :       if (GET_CODE (XEXP (src, 0)) != ABS)
    2395              :         break;
    2396              : 
    2397              :       src = XEXP (src, 0);
    2398              :       /* FALLTHRU */
    2399              : 
    2400         2880 :     case ABS:
    2401         2880 :       if ((mode == DImode && !TARGET_AVX512VL)
    2402         1428 :           || (mode == SImode && !TARGET_SSSE3))
    2403              :         return false;
    2404              :       break;
    2405              : 
    2406              :     case REG:
    2407              :       return true;
    2408              : 
    2409      5984483 :     case MEM:
    2410      5984483 :     case CONST_INT:
    2411      5984483 :       return REG_P (dst);
    2412              : 
    2413        57439 :     case VEC_SELECT:
    2414              :       /* Excluding MEM_P (dst) avoids intefering with vpextr[dq].  */
    2415        57439 :       return REG_P (dst)
    2416        46833 :              && REG_P (XEXP (src, 0))
    2417        53874 :              && GET_MODE (XEXP (src, 0)) == (mode == DImode ? V2DImode
    2418              :                                                             : V4SImode)
    2419        37545 :              && GET_CODE (XEXP (src, 1)) == PARALLEL
    2420        37545 :              && XVECLEN (XEXP (src, 1), 0) == 1
    2421        94984 :              && CONST_INT_P (XVECEXP (XEXP (src, 1), 0, 0));
    2422              : 
    2423              :     default:
    2424              :       return false;
    2425              :     }
    2426              : 
    2427      3697171 :   if (!REG_P (XEXP (src, 0))
    2428              :       && !MEM_P (XEXP (src, 0))
    2429              :       && !CONST_INT_P (XEXP (src, 0)))
    2430              :     return false;
    2431              : 
    2432      3388876 :   if (GET_MODE (XEXP (src, 0)) != mode
    2433            0 :       && !CONST_INT_P (XEXP (src, 0)))
    2434              :     return false;
    2435              : 
    2436              :   return true;
    2437              : }
    2438              : 
    2439              : /* Check for a suitable TImode memory operand.  */
    2440              : 
    2441              : static bool
    2442         1566 : timode_mem_p (rtx x)
    2443              : {
    2444         1566 :   return MEM_P (x)
    2445         1566 :          && (TARGET_SSE_UNALIGNED_LOAD_OPTIMAL
    2446            0 :              || !misaligned_operand (x, TImode));
    2447              : }
    2448              : 
    2449              : /* The TImode version of scalar_to_vector_candidate_p.  */
    2450              : 
    2451              : static bool
    2452    101483183 : timode_scalar_to_vector_candidate_p (rtx_insn *insn)
    2453              : {
    2454    101483183 :   rtx def_set = pseudo_reg_set (insn);
    2455              : 
    2456    101483183 :   if (!def_set)
    2457              :     return false;
    2458              : 
    2459     23763296 :   rtx src = SET_SRC (def_set);
    2460     23763296 :   rtx dst = SET_DEST (def_set);
    2461              : 
    2462     23763296 :   if (GET_CODE (src) == COMPARE)
    2463      3995691 :     return convertible_comparison_p (insn, TImode);
    2464              : 
    2465     19767605 :   if (GET_MODE (dst) != TImode
    2466      1204687 :       || (GET_MODE (src) != TImode
    2467        64872 :           && !CONST_SCALAR_INT_P (src)))
    2468              :     return false;
    2469              : 
    2470      1204687 :   if (!REG_P (dst) && !MEM_P (dst))
    2471              :     return false;
    2472              : 
    2473      1203234 :   if (MEM_P (dst)
    2474       535302 :       && misaligned_operand (dst, TImode)
    2475      1515885 :       && !TARGET_SSE_UNALIGNED_STORE_OPTIMAL)
    2476              :     return false;
    2477              : 
    2478      1203229 :   if (REG_P (dst) && !single_def_chain_p (dst))
    2479              :     return false;
    2480              : 
    2481      1048034 :   switch (GET_CODE (src))
    2482              :     {
    2483       487804 :     case REG:
    2484       487804 :       return single_def_chain_p (src);
    2485              : 
    2486              :     case CONST_WIDE_INT:
    2487              :       return true;
    2488              : 
    2489        13226 :     case CONST_INT:
    2490              :       /* ??? Verify performance impact before enabling CONST_INT for
    2491              :          __int128 store.  */
    2492        13226 :       return standard_sse_constant_p (src, TImode);
    2493              : 
    2494       445057 :     case MEM:
    2495              :       /* Memory must be aligned or unaligned load is optimal.  */
    2496       445057 :       return (REG_P (dst)
    2497       445057 :               && (!misaligned_operand (src, TImode)
    2498       148775 :                   || TARGET_SSE_UNALIGNED_LOAD_OPTIMAL));
    2499              : 
    2500         3930 :     case AND:
    2501         3930 :       if (!MEM_P (dst)
    2502         3889 :           && GET_CODE (XEXP (src, 0)) == NOT
    2503            0 :           && REG_P (XEXP (XEXP (src, 0), 0))
    2504         3930 :           && (REG_P (XEXP (src, 1))
    2505            0 :               || CONST_SCALAR_INT_P (XEXP (src, 1))
    2506            0 :               || timode_mem_p (XEXP (src, 1))))
    2507            0 :         return true;
    2508         3930 :       return (REG_P (XEXP (src, 0))
    2509           46 :               || timode_mem_p (XEXP (src, 0)))
    2510         3976 :              && (REG_P (XEXP (src, 1))
    2511         2108 :                  || CONST_SCALAR_INT_P (XEXP (src, 1))
    2512           35 :                  || timode_mem_p (XEXP (src, 1)));
    2513              : 
    2514        14107 :     case IOR:
    2515        14107 :     case XOR:
    2516        14107 :       if (timode_concatdi_p (src))
    2517              :         return true;
    2518         2667 :       return (REG_P (XEXP (src, 0))
    2519         1438 :               || timode_mem_p (XEXP (src, 0)))
    2520         2684 :              && (REG_P (XEXP (src, 1))
    2521          267 :                  || CONST_SCALAR_INT_P (XEXP (src, 1))
    2522           31 :                  || timode_mem_p (XEXP (src, 1)));
    2523              : 
    2524          505 :     case NOT:
    2525          505 :       return REG_P (XEXP (src, 0)) || timode_mem_p (XEXP (src, 0));
    2526              : 
    2527        12321 :     case ASHIFT:
    2528        12321 :     case LSHIFTRT:
    2529        12321 :     case ASHIFTRT:
    2530        12321 :     case ROTATERT:
    2531        12321 :     case ROTATE:
    2532              :       /* Handle shifts/rotates by integer constants between 0 and 127.  */
    2533        12321 :       return REG_P (XEXP (src, 0))
    2534        12289 :              && CONST_INT_P (XEXP (src, 1))
    2535        24269 :              && (INTVAL (XEXP (src, 1)) & ~0x7f) == 0;
    2536              : 
    2537         7233 :     case PLUS:
    2538         7233 :       return timode_concatdi_p (src);
    2539              : 
    2540         3828 :     case ZERO_EXTEND:
    2541         3828 :       return REG_P (XEXP (src, 0))
    2542         3828 :              && GET_MODE (XEXP (src, 0)) == DImode;
    2543              : 
    2544              :     default:
    2545              :       return false;
    2546              :     }
    2547              : }
    2548              : 
    2549              : /* For a register REGNO, scan instructions for its defs and uses.
    2550              :    Put REGNO in REGS if a def or use isn't in CANDIDATES.  */
    2551              : 
    2552              : static void
    2553      1278271 : timode_check_non_convertible_regs (bitmap candidates, bitmap regs,
    2554              :                                    unsigned int regno)
    2555              : {
    2556              :   /* Do nothing if REGNO is already in REGS or is a hard reg.  */
    2557      1278271 :   if (bitmap_bit_p (regs, regno)
    2558      1278271 :       || HARD_REGISTER_NUM_P (regno))
    2559              :     return;
    2560              : 
    2561      1265640 :   for (df_ref def = DF_REG_DEF_CHAIN (regno);
    2562      2506786 :        def;
    2563      1241146 :        def = DF_REF_NEXT_REG (def))
    2564              :     {
    2565      1265620 :       if (!bitmap_bit_p (candidates, DF_REF_INSN_UID (def)))
    2566              :         {
    2567        24474 :           if (dump_file)
    2568            0 :             fprintf (dump_file,
    2569              :                      "r%d has non convertible def in insn %d\n",
    2570            0 :                      regno, DF_REF_INSN_UID (def));
    2571              : 
    2572        24474 :           bitmap_set_bit (regs, regno);
    2573        24474 :           break;
    2574              :         }
    2575              :     }
    2576              : 
    2577      1265640 :   for (df_ref ref = DF_REG_USE_CHAIN (regno);
    2578      2784574 :        ref;
    2579      1518934 :        ref = DF_REF_NEXT_REG (ref))
    2580              :     {
    2581              :       /* Debug instructions are skipped.  */
    2582      1583333 :       if (NONDEBUG_INSN_P (DF_REF_INSN (ref))
    2583      1583333 :           && !bitmap_bit_p (candidates, DF_REF_INSN_UID (ref)))
    2584              :         {
    2585        64399 :           if (dump_file)
    2586            0 :             fprintf (dump_file,
    2587              :                      "r%d has non convertible use in insn %d\n",
    2588            0 :                      regno, DF_REF_INSN_UID (ref));
    2589              : 
    2590        64399 :           bitmap_set_bit (regs, regno);
    2591        64399 :           break;
    2592              :         }
    2593              :     }
    2594              : }
    2595              : 
    2596              : /* For a given bitmap of insn UIDs scans all instructions and
    2597              :    remove insn from CANDIDATES in case it has both convertible
    2598              :    and not convertible definitions.
    2599              : 
    2600              :    All insns in a bitmap are conversion candidates according to
    2601              :    scalar_to_vector_candidate_p.  Currently it implies all insns
    2602              :    are single_set.  */
    2603              : 
    2604              : static void
    2605       828673 : timode_remove_non_convertible_regs (bitmap candidates)
    2606              : {
    2607       828673 :   bitmap_iterator bi;
    2608       828673 :   unsigned id;
    2609       828673 :   bitmap regs = BITMAP_ALLOC (NULL);
    2610       855893 :   bool changed;
    2611              : 
    2612       855893 :   do {
    2613       855893 :     changed = false;
    2614      2165342 :     EXECUTE_IF_SET_IN_BITMAP (candidates, 0, id, bi)
    2615              :       {
    2616      1309449 :         rtx_insn *insn = DF_INSN_UID_GET (id)->insn;
    2617      1309449 :         df_ref ref;
    2618              : 
    2619      1963608 :         FOR_EACH_INSN_DEF (ref, insn)
    2620       654159 :           if (!DF_REF_REG_MEM_P (ref)
    2621       654159 :               && GET_MODE (DF_REF_REG (ref)) == TImode)
    2622       631384 :             timode_check_non_convertible_regs (candidates, regs,
    2623              :                                                DF_REF_REGNO (ref));
    2624              : 
    2625      3229581 :         FOR_EACH_INSN_USE (ref, insn)
    2626      1920132 :           if (!DF_REF_REG_MEM_P (ref)
    2627       677505 :               && GET_MODE (DF_REF_REG (ref)) == TImode)
    2628       646887 :             timode_check_non_convertible_regs (candidates, regs,
    2629              :                                                DF_REF_REGNO (ref));
    2630              :       }
    2631              : 
    2632      1047010 :     EXECUTE_IF_SET_IN_BITMAP (regs, 0, id, bi)
    2633              :       {
    2634       191117 :         for (df_ref def = DF_REG_DEF_CHAIN (id);
    2635       389178 :              def;
    2636       198061 :              def = DF_REF_NEXT_REG (def))
    2637       198061 :           if (bitmap_bit_p (candidates, DF_REF_INSN_UID (def)))
    2638              :             {
    2639        49317 :               if (dump_file)
    2640            0 :                 fprintf (dump_file, "Removing insn %d from candidates list\n",
    2641            0 :                          DF_REF_INSN_UID (def));
    2642              : 
    2643        49317 :               bitmap_clear_bit (candidates, DF_REF_INSN_UID (def));
    2644        49317 :               changed = true;
    2645              :             }
    2646              : 
    2647       191117 :         for (df_ref ref = DF_REG_USE_CHAIN (id);
    2648       520047 :              ref;
    2649       328930 :              ref = DF_REF_NEXT_REG (ref))
    2650       328930 :           if (bitmap_bit_p (candidates, DF_REF_INSN_UID (ref)))
    2651              :             {
    2652        35325 :               if (dump_file)
    2653            0 :                 fprintf (dump_file, "Removing insn %d from candidates list\n",
    2654            0 :                          DF_REF_INSN_UID (ref));
    2655              : 
    2656        35325 :               bitmap_clear_bit (candidates, DF_REF_INSN_UID (ref));
    2657        35325 :               changed = true;
    2658              :             }
    2659              :       }
    2660              :   } while (changed);
    2661              : 
    2662       828673 :   BITMAP_FREE (regs);
    2663       828673 : }
    2664              : 
    2665              : /* Main STV pass function.  Find and convert scalar
    2666              :    instructions into vector mode when profitable.  */
    2667              : 
    2668              : static unsigned int
    2669      1783308 : convert_scalars_to_vector (bool timode_p)
    2670              : {
    2671      1783308 :   basic_block bb;
    2672      1783308 :   int converted_insns = 0;
    2673      1783308 :   auto_vec<rtx_insn *> control_flow_insns;
    2674              : 
    2675      1783308 :   bitmap_obstack_initialize (NULL);
    2676      1783308 :   const machine_mode cand_mode[3] = { SImode, DImode, TImode };
    2677      1783308 :   const machine_mode cand_vmode[3] = { V4SImode, V2DImode, V1TImode };
    2678      5349924 :   bitmap_head candidates[3];  /* { SImode, DImode, TImode } */
    2679      7133232 :   for (unsigned i = 0; i < 3; ++i)
    2680      5349924 :     bitmap_initialize (&candidates[i], &bitmap_default_obstack);
    2681              : 
    2682      1783308 :   calculate_dominance_info (CDI_DOMINATORS);
    2683      1783308 :   df_set_flags (DF_DEFER_INSN_RESCAN | DF_RD_PRUNE_DEAD_DEFS);
    2684      1783308 :   df_chain_add_problem (DF_DU_CHAIN | DF_UD_CHAIN);
    2685      1783308 :   df_analyze ();
    2686              : 
    2687              :   /* Find all instructions we want to convert into vector mode.  */
    2688      1783308 :   if (dump_file)
    2689           44 :     fprintf (dump_file, "Searching for mode conversion candidates...\n");
    2690              : 
    2691     19780882 :   FOR_EACH_BB_FN (bb, cfun)
    2692              :     {
    2693     17997574 :       rtx_insn *insn;
    2694    239259237 :       FOR_BB_INSNS (bb, insn)
    2695    221261663 :         if (timode_p
    2696    221261663 :             && timode_scalar_to_vector_candidate_p (insn))
    2697              :           {
    2698      1012339 :             if (dump_file)
    2699            0 :               fprintf (dump_file, "  insn %d is marked as a TImode candidate\n",
    2700            0 :                        INSN_UID (insn));
    2701              : 
    2702      1012339 :             bitmap_set_bit (&candidates[2], INSN_UID (insn));
    2703              :           }
    2704    220249324 :         else if (!timode_p)
    2705              :           {
    2706              :             /* Check {SI,DI}mode.  */
    2707    344124761 :             for (unsigned i = 0; i <= 1; ++i)
    2708    236001426 :               if (general_scalar_to_vector_candidate_p (insn, cand_mode[i]))
    2709              :                 {
    2710     11655145 :                   if (dump_file)
    2711          554 :                     fprintf (dump_file, "  insn %d is marked as a %s candidate\n",
    2712          277 :                              INSN_UID (insn), i == 0 ? "SImode" : "DImode");
    2713              : 
    2714     11655145 :                   bitmap_set_bit (&candidates[i], INSN_UID (insn));
    2715     11655145 :                   break;
    2716              :                 }
    2717              :           }
    2718              :     }
    2719              : 
    2720      1783308 :   if (timode_p)
    2721       828673 :     timode_remove_non_convertible_regs (&candidates[2]);
    2722              : 
    2723      5652591 :   for (unsigned i = 0; i <= 2; ++i)
    2724      4499029 :     if (!bitmap_empty_p (&candidates[i]))
    2725              :       break;
    2726      3869283 :     else if (i == 2 && dump_file)
    2727           23 :       fprintf (dump_file, "There are no candidates for optimization.\n");
    2728              : 
    2729      7133232 :   for (unsigned i = 0; i <= 2; ++i)
    2730              :     {
    2731      5349924 :       auto_bitmap disallowed;
    2732      5349924 :       bitmap_tree_view (&candidates[i]);
    2733     17069413 :       while (!bitmap_empty_p (&candidates[i]))
    2734              :         {
    2735      6369565 :           unsigned uid = bitmap_first_set_bit (&candidates[i]);
    2736      6369565 :           scalar_chain *chain;
    2737              : 
    2738      6369565 :           if (cand_mode[i] == TImode)
    2739       472109 :             chain = new timode_scalar_chain;
    2740              :           else
    2741      5897456 :             chain = new general_scalar_chain (cand_mode[i], cand_vmode[i]);
    2742              : 
    2743              :           /* Find instructions chain we want to convert to vector mode.
    2744              :              Check all uses and definitions to estimate all required
    2745              :              conversions.  */
    2746      6369565 :           if (chain->build (&candidates[i], uid, disallowed))
    2747              :             {
    2748      6362836 :               if (chain->compute_convert_gain ())
    2749       642658 :                 converted_insns += chain->convert ();
    2750      5720178 :               else if (dump_file)
    2751          136 :                 fprintf (dump_file, "Chain #%d conversion is not profitable\n",
    2752              :                          chain->chain_id);
    2753              :             }
    2754              : 
    2755      6369565 :           rtx_insn* iter_insn;
    2756      6369565 :           unsigned int ii;
    2757      6373162 :           FOR_EACH_VEC_ELT (chain->control_flow_insns, ii, iter_insn)
    2758         3597 :             control_flow_insns.safe_push (iter_insn);
    2759              : 
    2760      6369565 :           delete chain;
    2761              :         }
    2762      5349924 :     }
    2763              : 
    2764      1783308 :   if (dump_file)
    2765           44 :     fprintf (dump_file, "Total insns converted: %d\n", converted_insns);
    2766              : 
    2767      7133232 :   for (unsigned i = 0; i <= 2; ++i)
    2768      5349924 :     bitmap_release (&candidates[i]);
    2769      1783308 :   bitmap_obstack_release (NULL);
    2770      1783308 :   df_process_deferred_rescans ();
    2771              : 
    2772              :   /* Conversion means we may have 128bit register spills/fills
    2773              :      which require aligned stack.  */
    2774      1783308 :   if (converted_insns)
    2775              :     {
    2776       104611 :       if (crtl->stack_alignment_needed < 128)
    2777         2354 :         crtl->stack_alignment_needed = 128;
    2778       104611 :       if (crtl->stack_alignment_estimated < 128)
    2779          219 :         crtl->stack_alignment_estimated = 128;
    2780              : 
    2781       104611 :       crtl->stack_realign_needed
    2782       104611 :         = INCOMING_STACK_BOUNDARY < crtl->stack_alignment_estimated;
    2783       104611 :       crtl->stack_realign_tried = crtl->stack_realign_needed;
    2784              : 
    2785       104611 :       crtl->stack_realign_processed = true;
    2786              : 
    2787       104611 :       if (!crtl->drap_reg)
    2788              :         {
    2789       104432 :           rtx drap_rtx = targetm.calls.get_drap_rtx ();
    2790              : 
    2791              :           /* stack_realign_drap and drap_rtx must match.  */
    2792       104432 :           gcc_assert ((stack_realign_drap != 0) == (drap_rtx != NULL));
    2793              : 
    2794              :           /* Do nothing if NULL is returned,
    2795              :              which means DRAP is not needed.  */
    2796       104432 :           if (drap_rtx != NULL)
    2797              :             {
    2798            0 :               crtl->args.internal_arg_pointer = drap_rtx;
    2799              : 
    2800              :               /* Call fixup_tail_calls to clean up
    2801              :                  REG_EQUIV note if DRAP is needed. */
    2802            0 :               fixup_tail_calls ();
    2803              :             }
    2804              :         }
    2805              : 
    2806              :       /* Fix up DECL_RTL/DECL_INCOMING_RTL of arguments.  */
    2807       104611 :       if (TARGET_64BIT)
    2808        66181 :         for (tree parm = DECL_ARGUMENTS (current_function_decl);
    2809       182034 :              parm; parm = DECL_CHAIN (parm))
    2810              :           {
    2811       115853 :             if (TYPE_MODE (TREE_TYPE (parm)) != TImode)
    2812        99797 :               continue;
    2813        16056 :             if (DECL_RTL_SET_P (parm)
    2814        32112 :                 && GET_MODE (DECL_RTL (parm)) == V1TImode)
    2815              :               {
    2816          522 :                 rtx r = DECL_RTL (parm);
    2817          522 :                 if (REG_P (r))
    2818          522 :                   SET_DECL_RTL (parm, gen_rtx_SUBREG (TImode, r, 0));
    2819              :               }
    2820        16056 :             if (DECL_INCOMING_RTL (parm)
    2821        16056 :                 && GET_MODE (DECL_INCOMING_RTL (parm)) == V1TImode)
    2822              :               {
    2823            0 :                 rtx r = DECL_INCOMING_RTL (parm);
    2824            0 :                 if (REG_P (r))
    2825            0 :                   DECL_INCOMING_RTL (parm) = gen_rtx_SUBREG (TImode, r, 0);
    2826              :               }
    2827              :           }
    2828              : 
    2829       104611 :       if (!control_flow_insns.is_empty ())
    2830              :         {
    2831         1130 :           free_dominance_info (CDI_DOMINATORS);
    2832              : 
    2833         1130 :           unsigned int i;
    2834         1130 :           rtx_insn* insn;
    2835         5857 :           FOR_EACH_VEC_ELT (control_flow_insns, i, insn)
    2836         3597 :             if (control_flow_insn_p (insn))
    2837              :               {
    2838              :                 /* Split the block after insn.  There will be a fallthru
    2839              :                    edge, which is OK so we keep it.  We have to create
    2840              :                    the exception edges ourselves.  */
    2841         3597 :                 bb = BLOCK_FOR_INSN (insn);
    2842         3597 :                 split_block (bb, insn);
    2843         3597 :                 rtl_make_eh_edge (NULL, bb, BB_END (bb));
    2844              :               }
    2845              :         }
    2846              :     }
    2847              : 
    2848      1783308 :   return 0;
    2849      1783308 : }
    2850              : 
    2851              : static unsigned int
    2852        74342 : rest_of_handle_insert_vzeroupper (void)
    2853              : {
    2854              :   /* vzeroupper instructions are inserted immediately after reload and
    2855              :      postreload_cse to clean up after it a little bit to account for possible
    2856              :      spills from 256bit or 512bit registers.  The pass reuses mode switching
    2857              :      infrastructure by re-running mode insertion pass, so disable entities
    2858              :      that have already been processed.  */
    2859       520394 :   for (int i = 0; i < MAX_386_ENTITIES; i++)
    2860       446052 :     ix86_optimize_mode_switching[i] = 0;
    2861              : 
    2862        74342 :   ix86_optimize_mode_switching[AVX_U128] = 1;
    2863              : 
    2864              :   /* Call optimize_mode_switching.  */
    2865        74342 :   g->get_passes ()->execute_pass_mode_switching ();
    2866              : 
    2867              :   /* LRA removes all REG_DEAD/REG_UNUSED notes and normally they
    2868              :      reappear in the IL only at the start of pass_rtl_dse2, which does
    2869              :      df_note_add_problem (); df_analyze ();
    2870              :      The vzeroupper is scheduled after postreload_cse pass and mode
    2871              :      switching computes the notes as well, the problem is that e.g.
    2872              :      pass_gcse2 doesn't maintain the notes, see PR113059 and
    2873              :      PR112760.  Remove the notes now to restore status quo ante
    2874              :      until we figure out how to maintain the notes or what else
    2875              :      to do.  */
    2876        74342 :   basic_block bb;
    2877        74342 :   rtx_insn *insn;
    2878       409112 :   FOR_EACH_BB_FN (bb, cfun)
    2879      4318273 :     FOR_BB_INSNS (bb, insn)
    2880      3983503 :       if (NONDEBUG_INSN_P (insn))
    2881              :         {
    2882      2121088 :           rtx *pnote = &REG_NOTES (insn);
    2883      3933459 :           while (*pnote != 0)
    2884              :             {
    2885      1812371 :               if (REG_NOTE_KIND (*pnote) == REG_DEAD
    2886       829627 :                   || REG_NOTE_KIND (*pnote) == REG_UNUSED)
    2887      1299935 :                 *pnote = XEXP (*pnote, 1);
    2888              :               else
    2889       512436 :                 pnote = &XEXP (*pnote, 1);
    2890              :             }
    2891              :         }
    2892              : 
    2893        74342 :   df_remove_problem (df_note);
    2894        74342 :   df_analyze ();
    2895        74342 :   return 0;
    2896              : }
    2897              : 
    2898              : namespace {
    2899              : 
    2900              : const pass_data pass_data_insert_vzeroupper =
    2901              : {
    2902              :   RTL_PASS, /* type */
    2903              :   "vzeroupper", /* name */
    2904              :   OPTGROUP_NONE, /* optinfo_flags */
    2905              :   TV_MACH_DEP, /* tv_id */
    2906              :   0, /* properties_required */
    2907              :   0, /* properties_provided */
    2908              :   0, /* properties_destroyed */
    2909              :   0, /* todo_flags_start */
    2910              :   TODO_df_finish, /* todo_flags_finish */
    2911              : };
    2912              : 
    2913              : class pass_insert_vzeroupper : public rtl_opt_pass
    2914              : {
    2915              : public:
    2916       287872 :   pass_insert_vzeroupper(gcc::context *ctxt)
    2917       575744 :     : rtl_opt_pass(pass_data_insert_vzeroupper, ctxt)
    2918              :   {}
    2919              : 
    2920              :   /* opt_pass methods: */
    2921      1480955 :   bool gate (function *) final override
    2922              :     {
    2923      1480955 :       return TARGET_AVX && TARGET_VZEROUPPER;
    2924              :     }
    2925              : 
    2926        74342 :   unsigned int execute (function *) final override
    2927              :     {
    2928        74342 :       return rest_of_handle_insert_vzeroupper ();
    2929              :     }
    2930              : 
    2931              : }; // class pass_insert_vzeroupper
    2932              : 
    2933              : const pass_data pass_data_stv =
    2934              : {
    2935              :   RTL_PASS, /* type */
    2936              :   "stv", /* name */
    2937              :   OPTGROUP_NONE, /* optinfo_flags */
    2938              :   TV_MACH_DEP, /* tv_id */
    2939              :   0, /* properties_required */
    2940              :   0, /* properties_provided */
    2941              :   0, /* properties_destroyed */
    2942              :   0, /* todo_flags_start */
    2943              :   TODO_df_finish, /* todo_flags_finish */
    2944              : };
    2945              : 
    2946              : class pass_stv : public rtl_opt_pass
    2947              : {
    2948              : public:
    2949       575744 :   pass_stv (gcc::context *ctxt)
    2950       575744 :     : rtl_opt_pass (pass_data_stv, ctxt),
    2951      1151488 :       timode_p (false)
    2952              :   {}
    2953              : 
    2954              :   /* opt_pass methods: */
    2955      2961910 :   bool gate (function *) final override
    2956              :     {
    2957      1480955 :       return ((!timode_p || TARGET_64BIT)
    2958      4316582 :               && TARGET_STV && TARGET_SSE2 && optimize > 1);
    2959              :     }
    2960              : 
    2961      1783308 :   unsigned int execute (function *) final override
    2962              :     {
    2963      1783308 :       return convert_scalars_to_vector (timode_p);
    2964              :     }
    2965              : 
    2966       287872 :   opt_pass *clone () final override
    2967              :     {
    2968       287872 :       return new pass_stv (m_ctxt);
    2969              :     }
    2970              : 
    2971       575744 :   void set_pass_param (unsigned int n, bool param) final override
    2972              :     {
    2973       575744 :       gcc_assert (n == 0);
    2974       575744 :       timode_p = param;
    2975       575744 :     }
    2976              : 
    2977              : private:
    2978              :   bool timode_p;
    2979              : }; // class pass_stv
    2980              : 
    2981              : } // anon namespace
    2982              : 
    2983              : rtl_opt_pass *
    2984       287872 : make_pass_insert_vzeroupper (gcc::context *ctxt)
    2985              : {
    2986       287872 :   return new pass_insert_vzeroupper (ctxt);
    2987              : }
    2988              : 
    2989              : rtl_opt_pass *
    2990       287872 : make_pass_stv (gcc::context *ctxt)
    2991              : {
    2992       287872 :   return new pass_stv (ctxt);
    2993              : }
    2994              : 
    2995              : /* Inserting ENDBR and pseudo patchable-area instructions.  */
    2996              : 
    2997              : static void
    2998       194176 : rest_of_insert_endbr_and_patchable_area (bool need_endbr,
    2999              :                                          unsigned int patchable_area_size)
    3000              : {
    3001       194176 :   rtx endbr;
    3002       194176 :   rtx_insn *insn;
    3003       194176 :   rtx_insn *endbr_insn = NULL;
    3004       194176 :   basic_block bb;
    3005              : 
    3006       194176 :   if (need_endbr)
    3007              :     {
    3008              :       /* Currently emit EB if it's a tracking function, i.e. 'nocf_check'
    3009              :          is absent among function attributes.  Later an optimization will
    3010              :          be introduced to make analysis if an address of a static function
    3011              :          is taken.  A static function whose address is not taken will get
    3012              :          a nocf_check attribute.  This will allow to reduce the number of
    3013              :          EB.  */
    3014       194131 :       if (!lookup_attribute ("nocf_check",
    3015       194131 :                              TYPE_ATTRIBUTES (TREE_TYPE (cfun->decl)))
    3016       194113 :           && (!flag_manual_endbr
    3017            8 :               || lookup_attribute ("cf_check",
    3018            8 :                                    DECL_ATTRIBUTES (cfun->decl)))
    3019       388243 :           && (!cgraph_node::get (cfun->decl)->only_called_directly_p ()
    3020        26492 :               || ix86_cmodel == CM_LARGE
    3021        26491 :               || ix86_cmodel == CM_LARGE_PIC
    3022        26490 :               || flag_force_indirect_call
    3023        26490 :               || (TARGET_DLLIMPORT_DECL_ATTRIBUTES
    3024              :                   && DECL_DLLIMPORT_P (cfun->decl))))
    3025              :         {
    3026       167623 :           if (crtl->profile && flag_fentry)
    3027              :             {
    3028              :               /* Queue ENDBR insertion to x86_function_profiler.
    3029              :                  NB: Any patchable-area insn will be inserted after
    3030              :                  ENDBR.  */
    3031            6 :               cfun->machine->insn_queued_at_entrance = TYPE_ENDBR;
    3032              :             }
    3033              :           else
    3034              :             {
    3035       167617 :               endbr = gen_nop_endbr ();
    3036       167617 :               bb = ENTRY_BLOCK_PTR_FOR_FN (cfun)->next_bb;
    3037       167617 :               rtx_insn *insn = BB_HEAD (bb);
    3038       167617 :               endbr_insn = emit_insn_before (endbr, insn);
    3039              :             }
    3040              :         }
    3041              :     }
    3042              : 
    3043       194176 :   if (patchable_area_size)
    3044              :     {
    3045           51 :       if (crtl->profile && flag_fentry)
    3046              :         {
    3047              :           /* Queue patchable-area insertion to x86_function_profiler.
    3048              :              NB: If there is a queued ENDBR, x86_function_profiler
    3049              :              will also handle patchable-area.  */
    3050            2 :           if (!cfun->machine->insn_queued_at_entrance)
    3051            1 :             cfun->machine->insn_queued_at_entrance = TYPE_PATCHABLE_AREA;
    3052              :         }
    3053              :       else
    3054              :         {
    3055           49 :           rtx patchable_area
    3056           49 :             = gen_patchable_area (GEN_INT (patchable_area_size),
    3057           49 :                                   GEN_INT (crtl->patch_area_entry == 0));
    3058           49 :           if (endbr_insn)
    3059            3 :             emit_insn_after (patchable_area, endbr_insn);
    3060              :           else
    3061              :             {
    3062           46 :               bb = ENTRY_BLOCK_PTR_FOR_FN (cfun)->next_bb;
    3063           46 :               insn = BB_HEAD (bb);
    3064           46 :               emit_insn_before (patchable_area, insn);
    3065              :             }
    3066              :         }
    3067              :     }
    3068              : 
    3069       194176 :   if (!need_endbr)
    3070              :     return;
    3071              : 
    3072       194131 :   bb = 0;
    3073      4062745 :   FOR_EACH_BB_FN (bb, cfun)
    3074              :     {
    3075     73394830 :       for (insn = BB_HEAD (bb); insn != NEXT_INSN (BB_END (bb));
    3076     69526216 :            insn = NEXT_INSN (insn))
    3077              :         {
    3078     69526216 :           if (CALL_P (insn))
    3079              :             {
    3080      1360600 :               need_endbr = find_reg_note (insn, REG_SETJMP, NULL) != NULL;
    3081      1360600 :               if (!need_endbr && !SIBLING_CALL_P (insn))
    3082              :                 {
    3083      1310772 :                   rtx call = get_call_rtx_from (insn);
    3084      1310772 :                   rtx fnaddr = XEXP (call, 0);
    3085      1310772 :                   tree fndecl = NULL_TREE;
    3086              : 
    3087              :                   /* Also generate ENDBRANCH for non-tail call which
    3088              :                      may return via indirect branch.  */
    3089      1310772 :                   if (SYMBOL_REF_P (XEXP (fnaddr, 0)))
    3090      1251480 :                     fndecl = SYMBOL_REF_DECL (XEXP (fnaddr, 0));
    3091      1251480 :                   if (fndecl == NULL_TREE)
    3092        59660 :                     fndecl = MEM_EXPR (fnaddr);
    3093        59660 :                   if (fndecl
    3094      1308325 :                       && TREE_CODE (TREE_TYPE (fndecl)) != FUNCTION_TYPE
    3095       553873 :                       && TREE_CODE (TREE_TYPE (fndecl)) != METHOD_TYPE)
    3096              :                     fndecl = NULL_TREE;
    3097      1310772 :                   if (fndecl && TYPE_ARG_TYPES (TREE_TYPE (fndecl)))
    3098              :                     {
    3099      1269399 :                       tree fntype = TREE_TYPE (fndecl);
    3100      1269399 :                       if (lookup_attribute ("indirect_return",
    3101      1269399 :                                             TYPE_ATTRIBUTES (fntype)))
    3102              :                         need_endbr = true;
    3103              :                     }
    3104              :                 }
    3105      1360588 :               if (!need_endbr)
    3106      1360580 :                 continue;
    3107              :               /* Generate ENDBRANCH after CALL, which can return more than
    3108              :                  twice, setjmp-like functions.  */
    3109              : 
    3110           20 :               endbr = gen_nop_endbr ();
    3111           20 :               emit_insn_after_setloc (endbr, insn, INSN_LOCATION (insn));
    3112           20 :               continue;
    3113           20 :             }
    3114              : 
    3115     68165616 :           if (JUMP_P (insn) && flag_cet_switch)
    3116              :             {
    3117            9 :               rtx target = JUMP_LABEL (insn);
    3118            9 :               if (target == NULL_RTX || ANY_RETURN_P (target))
    3119            5 :                 continue;
    3120              : 
    3121              :               /* Check the jump is a switch table.  */
    3122            4 :               rtx_insn *label = as_a<rtx_insn *> (target);
    3123            4 :               rtx_insn *table = next_insn (label);
    3124            4 :               if (table == NULL_RTX || !JUMP_TABLE_DATA_P (table))
    3125            2 :                 continue;
    3126              : 
    3127              :               /* For the indirect jump find out all places it jumps and insert
    3128              :                  ENDBRANCH there.  It should be done under a special flag to
    3129              :                  control ENDBRANCH generation for switch stmts.  */
    3130            2 :               edge_iterator ei;
    3131            2 :               edge e;
    3132            2 :               basic_block dest_blk;
    3133              : 
    3134           24 :               FOR_EACH_EDGE (e, ei, bb->succs)
    3135              :                 {
    3136           22 :                   rtx_insn *insn;
    3137              : 
    3138           22 :                   dest_blk = e->dest;
    3139           22 :                   insn = BB_HEAD (dest_blk);
    3140           22 :                   gcc_assert (LABEL_P (insn));
    3141           22 :                   endbr = gen_nop_endbr ();
    3142           22 :                   emit_insn_after (endbr, insn);
    3143              :                 }
    3144            2 :               continue;
    3145            2 :             }
    3146              : 
    3147     68165607 :           if (LABEL_P (insn) && LABEL_PRESERVE_P (insn))
    3148              :             {
    3149       135905 :               endbr = gen_nop_endbr ();
    3150       135905 :               emit_insn_after (endbr, insn);
    3151       135905 :               continue;
    3152              :             }
    3153              :         }
    3154              :     }
    3155              : 
    3156              :   return;
    3157              : }
    3158              : 
    3159              : namespace {
    3160              : 
    3161              : const pass_data pass_data_insert_endbr_and_patchable_area =
    3162              : {
    3163              :   RTL_PASS, /* type.  */
    3164              :   "endbr_and_patchable_area", /* name.  */
    3165              :   OPTGROUP_NONE, /* optinfo_flags.  */
    3166              :   TV_MACH_DEP, /* tv_id.  */
    3167              :   0, /* properties_required.  */
    3168              :   0, /* properties_provided.  */
    3169              :   0, /* properties_destroyed.  */
    3170              :   0, /* todo_flags_start.  */
    3171              :   0, /* todo_flags_finish.  */
    3172              : };
    3173              : 
    3174              : class pass_insert_endbr_and_patchable_area : public rtl_opt_pass
    3175              : {
    3176              : public:
    3177       287872 :   pass_insert_endbr_and_patchable_area (gcc::context *ctxt)
    3178       575744 :     : rtl_opt_pass (pass_data_insert_endbr_and_patchable_area, ctxt)
    3179              :   {}
    3180              : 
    3181              :   /* opt_pass methods: */
    3182      1480955 :   bool gate (function *) final override
    3183              :     {
    3184      1480955 :       need_endbr = (flag_cf_protection & CF_BRANCH) != 0;
    3185      1480955 :       patchable_area_size = crtl->patch_area_size - crtl->patch_area_entry;
    3186      1480955 :       return need_endbr || patchable_area_size;
    3187              :     }
    3188              : 
    3189       194176 :   unsigned int execute (function *) final override
    3190              :     {
    3191       194176 :       timevar_push (TV_MACH_DEP);
    3192       194176 :       rest_of_insert_endbr_and_patchable_area (need_endbr,
    3193              :                                                patchable_area_size);
    3194       194176 :       timevar_pop (TV_MACH_DEP);
    3195       194176 :       return 0;
    3196              :     }
    3197              : 
    3198              : private:
    3199              :   bool need_endbr;
    3200              :   unsigned int patchable_area_size;
    3201              : }; // class pass_insert_endbr_and_patchable_area
    3202              : 
    3203              : } // anon namespace
    3204              : 
    3205              : rtl_opt_pass *
    3206       287872 : make_pass_insert_endbr_and_patchable_area (gcc::context *ctxt)
    3207              : {
    3208       287872 :   return new pass_insert_endbr_and_patchable_area (ctxt);
    3209              : }
    3210              : 
    3211              : bool
    3212      6148285 : ix86_rpad_gate ()
    3213              : {
    3214      6148285 :   return (TARGET_AVX
    3215       403835 :           && TARGET_SSE_PARTIAL_REG_DEPENDENCY
    3216       308940 :           && TARGET_SSE_MATH
    3217       308710 :           && optimize
    3218      6451827 :           && optimize_function_for_speed_p (cfun));
    3219              : }
    3220              : 
    3221              : enum x86_cse_kind
    3222              : {
    3223              :   X86_CSE_CONST0_VECTOR,
    3224              :   X86_CSE_CONSTM1_VECTOR,
    3225              :   X86_CSE_VEC_DUP,
    3226              :   X86_CSE_TLS_GD,
    3227              :   X86_CSE_TLS_LD_BASE,
    3228              :   X86_CSE_TLSDESC
    3229              : };
    3230              : 
    3231       123323 : struct redundant_pattern
    3232              : {
    3233              :   /* Bitmap of basic blocks with broadcast instructions.  */
    3234              :   auto_bitmap bbs;
    3235              :   /* Bitmap of broadcast instructions.  */
    3236              :   auto_bitmap insns;
    3237              :   /* The broadcast inner scalar.  */
    3238              :   rtx val;
    3239              :   /* The actual redundant source value for UNSPEC_TLSDESC.  */
    3240              :   rtx tlsdesc_val;
    3241              :   /* The inner scalar mode.  */
    3242              :   machine_mode mode;
    3243              :   /* The instruction which sets the inner scalar.  Nullptr if the inner
    3244              :      scalar is applied to the whole function, instead of within the same
    3245              :      block.  */
    3246              :   rtx_insn *def_insn;
    3247              :   /* The widest broadcast source.  */
    3248              :   rtx broadcast_source;
    3249              :   /* The widest broadcast register.  */
    3250              :   rtx broadcast_reg;
    3251              :   /* The basic block of the broadcast instruction.  */
    3252              :   basic_block bb;
    3253              :   /* The number of broadcast instructions with the same inner scalar.  */
    3254              :   unsigned HOST_WIDE_INT count;
    3255              :   /* The threshold of broadcast instructions with the same inner
    3256              :      scalar.  */
    3257              :   unsigned int threshold;
    3258              :   /* The widest broadcast size in bytes.  */
    3259              :   unsigned int size;
    3260              :   /* Load kind.  */
    3261              :   x86_cse_kind kind;
    3262              : };
    3263              : 
    3264              : /* Generate a vector set, DEST = SRC, at entry of the nearest dominator
    3265              :    for basic block map BBS, which is in the fake loop that contains the
    3266              :    whole function, so that there is only a single vector set in the
    3267              :    whole function.  If not nullptr, LOAD is a pointer to the load.  */
    3268              : 
    3269              : static void
    3270        32270 : ix86_place_single_vector_set (rtx dest, rtx src, bitmap bbs,
    3271              :                               redundant_pattern *load = nullptr)
    3272              : {
    3273        32270 :   basic_block bb = nearest_common_dominator_for_set (CDI_DOMINATORS, bbs);
    3274              :   /* For X86_CSE_VEC_DUP, don't place the vector set outside of the loop
    3275              :      to avoid extra spills.  */
    3276        32270 :   if (!load || load->kind != X86_CSE_VEC_DUP)
    3277              :     {
    3278        23345 :       while (bb->loop_father->latch
    3279        23345 :              != EXIT_BLOCK_PTR_FOR_FN (cfun))
    3280         1351 :         bb = get_immediate_dominator (CDI_DOMINATORS,
    3281              :                                       bb->loop_father->header);
    3282              :     }
    3283              : 
    3284        32270 :   rtx set = gen_rtx_SET (dest, src);
    3285              : 
    3286        32270 :   rtx_insn *insn = BB_HEAD (bb);
    3287       125309 :   while (insn && !NONDEBUG_INSN_P (insn))
    3288              :     {
    3289        93043 :       if (insn == BB_END (bb))
    3290              :         {
    3291              :           insn = NULL;
    3292              :           break;
    3293              :         }
    3294        93039 :       insn = NEXT_INSN (insn);
    3295              :     }
    3296              : 
    3297        32270 :   rtx_insn *set_insn;
    3298        32270 :   if (insn == BB_HEAD (bb))
    3299              :     {
    3300            0 :       set_insn = emit_insn_before (set, insn);
    3301            0 :       if (dump_file)
    3302              :         {
    3303            0 :           fprintf (dump_file, "\nPlace:\n\n");
    3304            0 :           print_rtl_single (dump_file, set_insn);
    3305            0 :           fprintf (dump_file, "\nbefore:\n\n");
    3306            0 :           print_rtl_single (dump_file, insn);
    3307            0 :           fprintf (dump_file, "\n");
    3308              :         }
    3309              :     }
    3310              :   else
    3311              :     {
    3312        32270 :       rtx_insn *after = insn ? PREV_INSN (insn) : BB_END (bb);
    3313        32270 :       set_insn = emit_insn_after (set, after);
    3314        32270 :       if (dump_file)
    3315              :         {
    3316            1 :           fprintf (dump_file, "\nPlace:\n\n");
    3317            1 :           print_rtl_single (dump_file, set_insn);
    3318            1 :           fprintf (dump_file, "\nafter:\n\n");
    3319            1 :           print_rtl_single (dump_file, after);
    3320            1 :           fprintf (dump_file, "\n");
    3321              :         }
    3322              :     }
    3323              : 
    3324        32270 :   if (load && load->kind == X86_CSE_VEC_DUP)
    3325              :     {
    3326              :       /* Get the source from LOAD as (reg:SI 99) in
    3327              : 
    3328              :          (vec_duplicate:V4SI (reg:SI 99))
    3329              : 
    3330              :        */
    3331        10276 :       rtx inner_scalar = load->val;
    3332              :       /* Set the source in (vec_duplicate:V4SI (reg:SI 99)).  */
    3333        10276 :       rtx reg = XEXP (src, 0);
    3334        10276 :       machine_mode reg_mode = GET_MODE (reg);
    3335        10276 :       if (reg_mode != GET_MODE (inner_scalar))
    3336              :         {
    3337         9994 :           if (REG_P (inner_scalar) || MEM_P (inner_scalar))
    3338            0 :             inner_scalar = gen_rtx_SUBREG (reg_mode, inner_scalar, 0);
    3339         9994 :           else if (!SCALAR_INT_MODE_P (reg_mode))
    3340              :             {
    3341              :               /* For non-int load with integer constant, generate
    3342              : 
    3343              :                  (set (subreg:SI (reg/v:SF 105 [ f ]) 0)
    3344              :                       (const_int 1313486336 [0x4e4a3600]))
    3345              : 
    3346              :                */
    3347            1 :               gcc_assert (CONST_INT_P (inner_scalar));
    3348            1 :               unsigned int bits = GET_MODE_BITSIZE (reg_mode);
    3349            1 :               machine_mode mode = int_mode_for_size (bits, 0).require ();
    3350            1 :               reg = gen_rtx_SUBREG (mode, reg, 0);
    3351              :             }
    3352              :         }
    3353        10276 :       rtx set = gen_rtx_SET (reg, inner_scalar);
    3354        10276 :       insn = emit_insn_before (set, set_insn);
    3355        10276 :       if (dump_file)
    3356              :         {
    3357            1 :           fprintf (dump_file, "\nAdd:\n\n");
    3358            1 :           print_rtl_single (dump_file, insn);
    3359            1 :           fprintf (dump_file, "\nbefore:\n\n");
    3360            1 :           print_rtl_single (dump_file, set_insn);
    3361            1 :           fprintf (dump_file, "\n");
    3362              :         }
    3363              :     }
    3364        32270 : }
    3365              : 
    3366              : /* At entry of the nearest common dominator for basic blocks with
    3367              :    conversions/rcp/sqrt/rsqrt/round, generate a single
    3368              :         vxorps %xmmN, %xmmN, %xmmN
    3369              :    for all
    3370              :         vcvtss2sd  op, %xmmN, %xmmX
    3371              :         vcvtsd2ss  op, %xmmN, %xmmX
    3372              :         vcvtsi2ss  op, %xmmN, %xmmX
    3373              :         vcvtsi2sd  op, %xmmN, %xmmX
    3374              : 
    3375              :    NB: We want to generate only a single vxorps to cover the whole
    3376              :    function.  The LCM algorithm isn't appropriate here since it may
    3377              :    place a vxorps inside the loop.  */
    3378              : 
    3379              : static unsigned int
    3380        33177 : remove_partial_avx_dependency (void)
    3381              : {
    3382        33177 :   timevar_push (TV_MACH_DEP);
    3383              : 
    3384        33177 :   bitmap_obstack_initialize (NULL);
    3385        33177 :   bitmap convert_bbs = BITMAP_ALLOC (NULL);
    3386              : 
    3387        33177 :   basic_block bb;
    3388        33177 :   rtx_insn *insn, *set_insn;
    3389        33177 :   rtx set;
    3390        33177 :   rtx v4sf_const0 = NULL_RTX;
    3391              : 
    3392        33177 :   auto_vec<rtx_insn *> control_flow_insns;
    3393              : 
    3394              :   /* We create invalid RTL initially so defer rescans.  */
    3395        33177 :   df_set_flags (DF_DEFER_INSN_RESCAN);
    3396              : 
    3397       312626 :   FOR_EACH_BB_FN (bb, cfun)
    3398              :     {
    3399      3552205 :       FOR_BB_INSNS (bb, insn)
    3400              :         {
    3401      3272756 :           if (!NONDEBUG_INSN_P (insn))
    3402      1465244 :             continue;
    3403              : 
    3404      1807512 :           set = single_set (insn);
    3405      1807512 :           if (!set)
    3406        70179 :             continue;
    3407              : 
    3408      1737333 :           if (get_attr_avx_partial_xmm_update (insn)
    3409              :               != AVX_PARTIAL_XMM_UPDATE_TRUE)
    3410      1734104 :             continue;
    3411              : 
    3412              :           /* Convert PARTIAL_XMM_UPDATE_TRUE insns, DF -> SF, SF -> DF,
    3413              :              SI -> SF, SI -> DF, DI -> SF, DI -> DF, sqrt, rsqrt, rcp,
    3414              :              round, to vec_dup and vec_merge with subreg.  */
    3415         3229 :           rtx src = SET_SRC (set);
    3416         3229 :           rtx dest = SET_DEST (set);
    3417         3229 :           machine_mode dest_mode = GET_MODE (dest);
    3418         3229 :           bool convert_p = false;
    3419         3229 :           switch (GET_CODE (src))
    3420              :             {
    3421         3124 :             case FLOAT:
    3422         3124 :             case FLOAT_EXTEND:
    3423         3124 :             case FLOAT_TRUNCATE:
    3424         3124 :             case UNSIGNED_FLOAT:
    3425         3124 :               convert_p = true;
    3426         3124 :               break;
    3427              :             default:
    3428              :               break;
    3429              :             }
    3430              : 
    3431              :           /* Only handle conversion here.  */
    3432         3124 :           machine_mode src_mode
    3433         3124 :             = convert_p ? GET_MODE (XEXP (src, 0)) : VOIDmode;
    3434         3124 :           switch (src_mode)
    3435              :             {
    3436          155 :             case E_SFmode:
    3437          155 :             case E_DFmode:
    3438          155 :               if (TARGET_USE_VECTOR_FP_CONVERTS
    3439          149 :                   || !TARGET_SSE_PARTIAL_REG_FP_CONVERTS_DEPENDENCY)
    3440            8 :                 continue;
    3441              :               break;
    3442         2969 :             case E_SImode:
    3443         2969 :             case E_DImode:
    3444         2969 :               if (TARGET_USE_VECTOR_CONVERTS
    3445         2957 :                   || !TARGET_SSE_PARTIAL_REG_CONVERTS_DEPENDENCY)
    3446           14 :                 continue;
    3447              :               break;
    3448          105 :             case E_VOIDmode:
    3449          105 :               gcc_assert (!convert_p);
    3450              :               break;
    3451            0 :             default:
    3452            0 :               gcc_unreachable ();
    3453              :             }
    3454              : 
    3455         3207 :           if (!v4sf_const0)
    3456         1022 :             v4sf_const0 = gen_reg_rtx (V4SFmode);
    3457              : 
    3458         3207 :           rtx zero;
    3459         3207 :           machine_mode dest_vecmode;
    3460         3207 :           switch (dest_mode)
    3461              :             {
    3462           90 :             case E_HFmode:
    3463           90 :               dest_vecmode = V8HFmode;
    3464           90 :               zero = gen_rtx_SUBREG (V8HFmode, v4sf_const0, 0);
    3465           90 :               break;
    3466              :             case E_SFmode:
    3467              :               dest_vecmode = V4SFmode;
    3468              :               zero = v4sf_const0;
    3469              :               break;
    3470         1175 :             case E_DFmode:
    3471         1175 :               dest_vecmode = V2DFmode;
    3472         1175 :               zero = gen_rtx_SUBREG (V2DFmode, v4sf_const0, 0);
    3473         1175 :               break;
    3474            0 :             default:
    3475            0 :               gcc_unreachable ();
    3476              :             }
    3477              : 
    3478              :           /* Change source to vector mode.  */
    3479         3207 :           src = gen_rtx_VEC_DUPLICATE (dest_vecmode, src);
    3480         3207 :           src = gen_rtx_VEC_MERGE (dest_vecmode, src, zero,
    3481              :                                    GEN_INT (HOST_WIDE_INT_1U));
    3482              :           /* Change destination to vector mode.  */
    3483         3207 :           rtx vec = gen_reg_rtx (dest_vecmode);
    3484              :           /* Generate an XMM vector SET.  */
    3485         3207 :           set = gen_rtx_SET (vec, src);
    3486         3207 :           set_insn = emit_insn_before (set, insn);
    3487              : 
    3488         3207 :           if (cfun->can_throw_non_call_exceptions)
    3489              :             {
    3490              :               /* Handle REG_EH_REGION note.  */
    3491            0 :               rtx note = find_reg_note (insn, REG_EH_REGION, NULL_RTX);
    3492            0 :               if (note)
    3493              :                 {
    3494            0 :                   control_flow_insns.safe_push (set_insn);
    3495            0 :                   add_reg_note (set_insn, REG_EH_REGION, XEXP (note, 0));
    3496              :                 }
    3497              :             }
    3498              : 
    3499         3207 :           src = gen_rtx_SUBREG (dest_mode, vec, 0);
    3500         3207 :           set = gen_rtx_SET (dest, src);
    3501              : 
    3502              :           /* Drop possible dead definitions.  */
    3503         3207 :           PATTERN (insn) = set;
    3504              : 
    3505         3207 :           INSN_CODE (insn) = -1;
    3506         3207 :           recog_memoized (insn);
    3507         3207 :           df_insn_rescan (insn);
    3508         3207 :           bitmap_set_bit (convert_bbs, bb->index);
    3509              :         }
    3510              :     }
    3511              : 
    3512        33177 :   if (v4sf_const0)
    3513              :     {
    3514              :       /* (Re-)discover loops so that bb->loop_father can be used in the
    3515              :          analysis below.  */
    3516         1022 :       calculate_dominance_info (CDI_DOMINATORS);
    3517         1022 :       loop_optimizer_init (AVOID_CFG_MODIFICATIONS);
    3518              : 
    3519         1022 :       ix86_place_single_vector_set (v4sf_const0,
    3520              :                                     CONST0_RTX (V4SFmode),
    3521              :                                     convert_bbs);
    3522              : 
    3523         1022 :       loop_optimizer_finalize ();
    3524              : 
    3525         1022 :       if (!control_flow_insns.is_empty ())
    3526              :         {
    3527            0 :           free_dominance_info (CDI_DOMINATORS);
    3528              : 
    3529            0 :           unsigned int i;
    3530            0 :           FOR_EACH_VEC_ELT (control_flow_insns, i, insn)
    3531            0 :             if (control_flow_insn_p (insn))
    3532              :               {
    3533              :                 /* Split the block after insn.  There will be a fallthru
    3534              :                    edge, which is OK so we keep it.  We have to create
    3535              :                    the exception edges ourselves.  */
    3536            0 :                 bb = BLOCK_FOR_INSN (insn);
    3537            0 :                 split_block (bb, insn);
    3538            0 :                 rtl_make_eh_edge (NULL, bb, BB_END (bb));
    3539              :               }
    3540              :         }
    3541              :     }
    3542              : 
    3543        33177 :   df_process_deferred_rescans ();
    3544        33177 :   df_clear_flags (DF_DEFER_INSN_RESCAN);
    3545        33177 :   bitmap_obstack_release (NULL);
    3546        33177 :   BITMAP_FREE (convert_bbs);
    3547              : 
    3548        33177 :   timevar_pop (TV_MACH_DEP);
    3549        33177 :   return 0;
    3550        33177 : }
    3551              : 
    3552              : namespace {
    3553              : 
    3554              : const pass_data pass_data_remove_partial_avx_dependency =
    3555              : {
    3556              :   RTL_PASS, /* type */
    3557              :   "rpad", /* name */
    3558              :   OPTGROUP_NONE, /* optinfo_flags */
    3559              :   TV_MACH_DEP, /* tv_id */
    3560              :   0, /* properties_required */
    3561              :   0, /* properties_provided */
    3562              :   0, /* properties_destroyed */
    3563              :   0, /* todo_flags_start */
    3564              :   0, /* todo_flags_finish */
    3565              : };
    3566              : 
    3567              : class pass_remove_partial_avx_dependency : public rtl_opt_pass
    3568              : {
    3569              : public:
    3570       287872 :   pass_remove_partial_avx_dependency (gcc::context *ctxt)
    3571       575744 :     : rtl_opt_pass (pass_data_remove_partial_avx_dependency, ctxt)
    3572              :   {}
    3573              : 
    3574              :   /* opt_pass methods: */
    3575      1480955 :   bool gate (function *) final override
    3576              :     {
    3577      1480955 :       return ix86_rpad_gate ();
    3578              :     }
    3579              : 
    3580        33177 :   unsigned int execute (function *) final override
    3581              :     {
    3582        33177 :       return remove_partial_avx_dependency ();
    3583              :     }
    3584              : }; // class pass_rpad
    3585              : 
    3586              : } // anon namespace
    3587              : 
    3588              : rtl_opt_pass *
    3589       287872 : make_pass_remove_partial_avx_dependency (gcc::context *ctxt)
    3590              : {
    3591       287872 :   return new pass_remove_partial_avx_dependency (ctxt);
    3592              : }
    3593              : 
    3594              : /* Return a machine mode suitable for vector SIZE with SMODE inner
    3595              :    mode.  */
    3596              : 
    3597              : static machine_mode
    3598        32537 : ix86_get_vector_cse_mode (unsigned int size, machine_mode smode)
    3599              : {
    3600              :   /* Use the inner scalar mode of vector broadcast source in:
    3601              : 
    3602              :      (set (reg:V8DF 394)
    3603              :           (vec_duplicate:V8DF (reg:V2DF 190 [ alpha ])))
    3604              : 
    3605              :      to compute the vector mode for broadcast from vector source.
    3606              :    */
    3607        32537 :   if (VECTOR_MODE_P (smode))
    3608            1 :     smode = GET_MODE_INNER (smode);
    3609        32537 :   scalar_mode s_mode = as_a <scalar_mode> (smode);
    3610        65074 :   poly_uint64 nunits = size / GET_MODE_SIZE (smode);
    3611        32537 :   machine_mode mode = mode_for_vector (s_mode, nunits).require ();
    3612        32537 :   return mode;
    3613              : }
    3614              : 
    3615              : /* Replace the source operand of instructions in VECTOR_INSNS with
    3616              :    VECTOR_CONST in VECTOR_MODE.  */
    3617              : 
    3618              : static void
    3619        32083 : replace_vector_const (machine_mode vector_mode, rtx vector_const,
    3620              :                       auto_bitmap &vector_insns,
    3621              :                       machine_mode scalar_mode)
    3622              : {
    3623        32083 :   bitmap_iterator bi;
    3624        32083 :   unsigned int id;
    3625              : 
    3626       154827 :   EXECUTE_IF_SET_IN_BITMAP (vector_insns, 0, id, bi)
    3627              :     {
    3628       122744 :       rtx_insn *insn = DF_INSN_UID_GET (id)->insn;
    3629              : 
    3630              :       /* Get the single SET instruction.  */
    3631       122744 :       rtx set = single_set (insn);
    3632       122744 :       rtx src = SET_SRC (set);
    3633       122744 :       rtx dest = SET_DEST (set);
    3634       122744 :       machine_mode mode = GET_MODE (dest);
    3635              : 
    3636       122744 :       rtx replace;
    3637              :       /* Replace the source operand with VECTOR_CONST.  */
    3638       122744 :       if (SUBREG_P (src) || mode == vector_mode)
    3639              :         replace = vector_const;
    3640              :       else
    3641              :         {
    3642        59346 :           unsigned int size = GET_MODE_SIZE (mode);
    3643        59346 :           if (size < ix86_regmode_natural_size (mode))
    3644              :             {
    3645              :               /* If the mode size is smaller than its natural size,
    3646              :                  first insert an extra move with a QI vector SUBREG
    3647              :                  of the same size to avoid validate_subreg failure.  */
    3648          454 :               machine_mode vmode
    3649          454 :                 = ix86_get_vector_cse_mode (size, scalar_mode);
    3650          454 :               rtx vreg;
    3651          454 :               if (mode == vmode)
    3652              :                 vreg = vector_const;
    3653              :               else
    3654              :                 {
    3655           43 :                   vreg = gen_reg_rtx (vmode);
    3656           43 :                   rtx vsubreg = gen_rtx_SUBREG (vmode, vector_const, 0);
    3657           43 :                   rtx pat = gen_rtx_SET (vreg, vsubreg);
    3658           43 :                   rtx_insn *vinsn = emit_insn_before (pat, insn);
    3659           43 :                   if (dump_file)
    3660              :                     {
    3661            0 :                       fprintf (dump_file, "\nInsert an extra move:\n\n");
    3662            0 :                       print_rtl_single (dump_file, vinsn);
    3663            0 :                       fprintf (dump_file, "\nbefore:\n\n");
    3664            0 :                       print_rtl_single (dump_file, insn);
    3665            0 :                       fprintf (dump_file, "\n");
    3666              :                     }
    3667              :                 }
    3668          454 :               replace = gen_rtx_SUBREG (mode, vreg, 0);
    3669              :             }
    3670              :           else
    3671        58892 :             replace = gen_rtx_SUBREG (mode, vector_const, 0);
    3672              :         }
    3673              : 
    3674       122744 :       if (dump_file)
    3675              :         {
    3676            2 :           fprintf (dump_file, "\nReplace:\n\n");
    3677            2 :           print_rtl_single (dump_file, insn);
    3678              :         }
    3679       122744 :       SET_SRC (set) = replace;
    3680              :       /* Drop possible dead definitions.  */
    3681       122744 :       PATTERN (insn) = set;
    3682       122744 :       INSN_CODE (insn) = -1;
    3683       122744 :       recog_memoized (insn);
    3684       122744 :       if (dump_file)
    3685              :         {
    3686            2 :           fprintf (dump_file, "\nwith:\n\n");
    3687            2 :           print_rtl_single (dump_file, insn);
    3688            2 :           fprintf (dump_file, "\n");
    3689              :         }
    3690       122744 :       df_insn_rescan (insn);
    3691              :     }
    3692        32083 : }
    3693              : 
    3694              : /* Return the inner scalar if OP is a broadcast, else return nullptr.  */
    3695              : 
    3696              : static rtx
    3697      2200031 : ix86_broadcast_inner (rtx op, machine_mode mode,
    3698              :                       machine_mode *scalar_mode_p,
    3699              :                       x86_cse_kind *kind_p, rtx_insn **insn_p)
    3700              : {
    3701      2200031 :   switch (standard_sse_constant_p (op, mode))
    3702              :     {
    3703       113951 :     case 1:
    3704       113951 :       *scalar_mode_p = QImode;
    3705       113951 :       *kind_p = X86_CSE_CONST0_VECTOR;
    3706       113951 :       *insn_p = nullptr;
    3707       113951 :       return const0_rtx;
    3708        11426 :     case 2:
    3709        11426 :       *scalar_mode_p = QImode;
    3710        11426 :       *kind_p = X86_CSE_CONSTM1_VECTOR;
    3711        11426 :       *insn_p = nullptr;
    3712        11426 :       return constm1_rtx;
    3713      2074654 :     default:
    3714      2074654 :       break;
    3715              :     }
    3716              : 
    3717      2074654 :   mode = GET_MODE (op);
    3718      2074654 :   int nunits = GET_MODE_NUNITS (mode);
    3719      2074654 :   if (nunits < 2)
    3720              :     return nullptr;
    3721              : 
    3722      1597234 :   *kind_p = X86_CSE_VEC_DUP;
    3723              : 
    3724      1597234 :   rtx reg;
    3725      1597234 :   if (GET_CODE (op) == VEC_DUPLICATE)
    3726              :     {
    3727              :       /* Only
    3728              :           (vec_duplicate:V4SI (reg:SI 99))
    3729              :           (vec_duplicate:V2DF (mem/u/c:DF (symbol_ref/u:DI ("*.LC1") [flags 0x2]) [0  S8 A64]))
    3730              :          are supported.  Set OP to the broadcast source by default.  */
    3731        95711 :       op = XEXP (op, 0);
    3732        95711 :       reg = op;
    3733        95711 :       if (SUBREG_P (op)
    3734          402 :           && SUBREG_BYTE (op) == 0
    3735        96113 :           && !paradoxical_subreg_p (op))
    3736          402 :         reg = SUBREG_REG (op);
    3737        95711 :       if (!REG_P (reg))
    3738              :         {
    3739         7848 :           if (MEM_P (op)
    3740         7582 :               && SYMBOL_REF_P (XEXP (op, 0))
    3741        13623 :               && CONSTANT_POOL_ADDRESS_P (XEXP (op, 0)))
    3742              :             {
    3743              :               /* Handle constant broadcast from memory.  */
    3744         5552 :               *scalar_mode_p = GET_MODE_INNER (mode);
    3745         5552 :               *insn_p = nullptr;
    3746         5552 :               return op;
    3747              :             }
    3748              :           return nullptr;
    3749              :         }
    3750              :     }
    3751      1501523 :   else if (CONST_VECTOR_P (op))
    3752              :     {
    3753           20 :       rtx first = XVECEXP (op, 0, 0);
    3754           48 :       for (int i = 1; i < nunits; ++i)
    3755              :         {
    3756           48 :           rtx tmp = XVECEXP (op, 0, i);
    3757              :           /* Vector duplicate value.  */
    3758           48 :           if (!rtx_equal_p (tmp, first))
    3759              :             return nullptr;
    3760              :         }
    3761            0 :       *scalar_mode_p = GET_MODE (first);
    3762            0 :       *insn_p = nullptr;
    3763            0 :       return first;
    3764              :     }
    3765              :   else
    3766              :     return nullptr;
    3767              : 
    3768        87863 :   mode = GET_MODE (op);
    3769              : 
    3770              :   /* Only single def chain is supported.  */
    3771        87863 :   df_ref ref = DF_REG_DEF_CHAIN (REGNO (reg));
    3772        87863 :   if (!ref
    3773        87862 :       || DF_REF_IS_ARTIFICIAL (ref)
    3774        87862 :       || DF_REF_NEXT_REG (ref) != nullptr)
    3775              :     return nullptr;
    3776              : 
    3777        82143 :   rtx_insn *insn = DF_REF_INSN (ref);
    3778        82143 :   rtx set = single_set (insn);
    3779        82143 :   if (!set)
    3780              :     return nullptr;
    3781              : 
    3782        82095 :   rtx src = SET_SRC (set);
    3783              : 
    3784        82095 :   if (CONST_INT_P (src))
    3785              :     {
    3786              :       /* Handle sequences like
    3787              : 
    3788              :          (set (subreg:SI (reg/v:SF 105 [ f ]) 0)
    3789              :               (const_int 0 [0]))
    3790              :          (set (reg:V4SF 110)
    3791              :               (vec_duplicate:V4SF (reg/v:SF 105 [ f ])))
    3792              : 
    3793              :          and
    3794              : 
    3795              :          (set (reg:SI 99)
    3796              :                (const_int 34 [0x22]))
    3797              :          (set (reg:V4SI 98)
    3798              :                (vec_duplicate:V4SI (reg:SI 99)))
    3799              : 
    3800              :          Set *INSN_P to nullptr and return SET_SRC if SET_SRC is an
    3801              :          integer constant.  */
    3802        67361 :       op = src;
    3803        67361 :       if (SCALAR_INT_MODE_P (mode))
    3804              :         {
    3805        67351 :           if (mode != GET_MODE (reg))
    3806            0 :             op = gen_int_mode (INTVAL (src), mode);
    3807              :         }
    3808           10 :       else if (op == const0_rtx)
    3809            2 :         op = CONST0_RTX (mode);
    3810        67361 :       *insn_p = nullptr;
    3811              :     }
    3812              :   else
    3813              :     {
    3814              :       /* Handle sequences like
    3815              : 
    3816              :          (set (reg:QI 105 [ c ])
    3817              :               (reg:QI 5 di [ c ]))
    3818              :          (set (reg:V64QI 102 [ _1 ])
    3819              :               (vec_duplicate:V64QI (reg:QI 105 [ c ])))
    3820              : 
    3821              :          (set (reg/v:SI 116 [ argc ])
    3822              :               (mem/c:SI (reg:SI 135) [2 argc+0 S4 A32]))
    3823              :          (set (reg:V4SI 119 [ _45 ])
    3824              :               (vec_duplicate:V4SI (reg/v:SI 116 [ argc ])))
    3825              : 
    3826              :          (set (reg:SI 98 [ _1 ])
    3827              :               (sign_extend:SI (reg:QI 106 [ c ])))
    3828              :          (set (reg:V16SI 103 [ _2 ])
    3829              :                (vec_duplicate:V16SI (reg:SI 98 [ _1 ])))
    3830              : 
    3831              :          (set (reg:SI 102 [ cost ])
    3832              :               (mem/c:SI (symbol_ref:DI ("cost") [flags 0x40])))
    3833              :          (set (reg:V4HI 103 [ _16 ])
    3834              :               (vec_duplicate:V4HI (subreg:HI (reg:SI 102 [ cost ]) 0)))
    3835              : 
    3836              :          (set (subreg:SI (reg/v:HI 107 [ cr_val ]) 0)
    3837              :               (ashift:SI (reg:SI 158)
    3838              :                          (subreg:QI (reg:SI 156 [ _2 ]) 0)))
    3839              :          (set (reg:V16HI 183 [ _61 ])
    3840              :               (vec_duplicate:V16HI (reg/v:HI 107 [ cr_val ])))
    3841              : 
    3842              :          Set *INSN_P to INSN and return the broadcast source otherwise.  */
    3843        14734 :       *insn_p = insn;
    3844              :     }
    3845              : 
    3846        82095 :   *scalar_mode_p = mode;
    3847        82095 :   return op;
    3848              : }
    3849              : 
    3850              : /* Replace CALL instruction in TLS_CALL_INSNS with SET from SRC and
    3851              :    put the updated instruction in UPDATED_TLS_INSNS.  */
    3852              : 
    3853              : static void
    3854          310 : replace_tls_call (rtx src, auto_bitmap &tls_call_insns,
    3855              :                   auto_bitmap &updated_tls_insns)
    3856              : {
    3857          310 :   bitmap_iterator bi;
    3858          310 :   unsigned int id;
    3859              : 
    3860         1730 :   EXECUTE_IF_SET_IN_BITMAP (tls_call_insns, 0, id, bi)
    3861              :     {
    3862         1420 :       rtx_insn *insn = DF_INSN_UID_GET (id)->insn;
    3863              : 
    3864              :       /* If this isn't a CALL, only GNU2 TLS implicit CALL patterns are
    3865              :          allowed.  */
    3866         1420 :       if (!CALL_P (insn))
    3867              :         {
    3868           41 :           attr_tls64 tls64 = get_attr_tls64 (insn);
    3869           41 :           if (tls64 != TLS64_CALL && tls64 != TLS64_COMBINE)
    3870            0 :             gcc_unreachable ();
    3871              :         }
    3872              : 
    3873         1420 :       rtx pat = PATTERN (insn);
    3874         1420 :       gcc_assert (GET_CODE (pat) == PARALLEL);
    3875         1420 :       rtx set = XVECEXP (pat, 0, 0);
    3876         1420 :       gcc_assert (GET_CODE (set) == SET);
    3877         1420 :       rtx dest = SET_DEST (set);
    3878              : 
    3879         1420 :       set = gen_rtx_SET (dest, src);
    3880         1420 :       rtx_insn *set_insn = emit_insn_after (set, insn);
    3881         1420 :       if (recog_memoized (set_insn) < 0)
    3882            0 :         gcc_unreachable ();
    3883              : 
    3884              :       /* Put SET_INSN in UPDATED_TLS_INSNS.  */
    3885         1420 :       bitmap_set_bit (updated_tls_insns, INSN_UID (set_insn));
    3886              : 
    3887         1420 :       if (dump_file)
    3888              :         {
    3889            0 :           fprintf (dump_file, "\nReplace:\n\n");
    3890            0 :           print_rtl_single (dump_file, insn);
    3891            0 :           fprintf (dump_file, "\nwith:\n\n");
    3892            0 :           print_rtl_single (dump_file, set_insn);
    3893            0 :           fprintf (dump_file, "\n");
    3894              :         }
    3895              : 
    3896              :       /* Delete the CALL insn.  */
    3897         1420 :       delete_insn (insn);
    3898              : 
    3899         1420 :       df_insn_rescan (set_insn);
    3900              :     }
    3901          310 : }
    3902              : 
    3903              : /* Return the basic block which dominates all basic blocks which set
    3904              :    hard register REGNO used in basic block BB.  */
    3905              : 
    3906              : static basic_block
    3907            2 : ix86_get_dominator_for_reg (unsigned int regno, basic_block bb)
    3908              : {
    3909            2 :   basic_block set_bb;
    3910            2 :   auto_bitmap set_bbs;
    3911              : 
    3912              :   /* Get all BBs which set REGNO and dominate the current BB from all
    3913              :      DEFs of REGNO.  */
    3914            2 :   for (df_ref def = DF_REG_DEF_CHAIN (regno);
    3915           18 :        def;
    3916           16 :        def = DF_REF_NEXT_REG (def))
    3917           16 :     if (!DF_REF_IS_ARTIFICIAL (def)
    3918           16 :         && !DF_REF_FLAGS_IS_SET (def, DF_REF_MAY_CLOBBER)
    3919            6 :         && !DF_REF_FLAGS_IS_SET (def, DF_REF_MUST_CLOBBER))
    3920              :       {
    3921            4 :         set_bb = DF_REF_BB (def);
    3922            4 :         if (dominated_by_p (CDI_DOMINATORS, bb, set_bb))
    3923            2 :           bitmap_set_bit (set_bbs, set_bb->index);
    3924              :       }
    3925              : 
    3926            2 :   bb = nearest_common_dominator_for_set (CDI_DOMINATORS, set_bbs);
    3927            2 :   return bb;
    3928            2 : }
    3929              : 
    3930              : /* Mark FLAGS register as live in DATA, a bitmap of live caller-saved
    3931              :    registers, if DEST is FLAGS register.  */
    3932              : 
    3933              : static void
    3934          381 : ix86_check_flags_reg (rtx dest, const_rtx x, void *data)
    3935              : {
    3936          381 :   if (GET_CODE (x) == CLOBBER)
    3937              :     return;
    3938              : 
    3939          374 :   auto_bitmap *live_caller_saved_regs = (auto_bitmap *) data;
    3940          374 :   if (REG_P (dest) && REGNO (dest) == FLAGS_REG)
    3941            0 :     bitmap_set_bit (*live_caller_saved_regs, FLAGS_REG);
    3942              : }
    3943              : 
    3944              : /* Emit a TLS_SET instruction of KIND in basic block BB.   Store the
    3945              :    insertion point in *BEFORE_P for emit_insn_before or in *AFTER_P
    3946              :    for emit_insn_after.  UPDATED_GNU_TLS_INSNS contains instructions
    3947              :    which replace the GNU TLS instructions.  UPDATED_GNU2_TLS_INSNS
    3948              :    contains instructions which replace the GNU2 TLS instructions.  */
    3949              : 
    3950              : static rtx_insn *
    3951          310 : ix86_emit_tls_call (rtx tls_set, x86_cse_kind kind, basic_block bb,
    3952              :                     rtx_insn **before_p, rtx_insn **after_p,
    3953              :                     auto_bitmap &updated_gnu_tls_insns,
    3954              :                     auto_bitmap &updated_gnu2_tls_insns)
    3955              : {
    3956          312 :   rtx_insn *tls_insn;
    3957              : 
    3958          312 :   do
    3959              :     {
    3960          312 :       rtx_insn *insn = BB_HEAD (bb);
    3961         1288 :       while (insn && !NONDEBUG_INSN_P (insn))
    3962              :         {
    3963          980 :           if (insn == BB_END (bb))
    3964              :             {
    3965              :               /* This must be the beginning basic block:
    3966              : 
    3967              :                  (note 4 0 2 2 [bb 2] NOTE_INSN_BASIC_BLOCK)
    3968              :                  (note 2 4 26 2 NOTE_INSN_FUNCTION_BEG)
    3969              : 
    3970              :                  or a basic block with only a label:
    3971              : 
    3972              :                  (code_label 78 11 77 3 14 (nil) [1 uses])
    3973              :                  (note 77 78 54 3 [bb 3] NOTE_INSN_BASIC_BLOCK)
    3974              : 
    3975              :                  or a basic block with only a debug marker:
    3976              : 
    3977              :                  (note 3 0 2 2 [bb 2] NOTE_INSN_BASIC_BLOCK)
    3978              :                  (note 2 3 5 2 NOTE_INSN_FUNCTION_BEG)
    3979              :                  (debug_insn 5 2 16 2 (debug_marker) "x.c":6:3 -1 (nil))
    3980              : 
    3981              :                  or a basic block with only deleted instructions:
    3982              : 
    3983              :                  (code_label 348 23 349 45 3 (nil) [0 uses])
    3984              :                  (note 349 348 436 45 [bb 45] NOTE_INSN_BASIC_BLOCK)
    3985              :                  (note 436 349 362 45 NOTE_INSN_DELETED)
    3986              : 
    3987              :                */
    3988            4 :               gcc_assert (DEBUG_INSN_P (insn)
    3989              :                           || (NOTE_P (insn)
    3990              :                               && ((NOTE_KIND (insn)
    3991              :                                    == NOTE_INSN_FUNCTION_BEG)
    3992              :                                   || (NOTE_KIND (insn)
    3993              :                                       == NOTE_INSN_DELETED)
    3994              :                                   || (NOTE_KIND (insn)
    3995              :                                       == NOTE_INSN_BASIC_BLOCK))));
    3996              :               insn = NULL;
    3997              :               break;
    3998              :             }
    3999          976 :           insn = NEXT_INSN (insn);
    4000              :         }
    4001              : 
    4002              :       /* TLS_GD and TLS_LD_BASE instructions are normal functions which
    4003              :          clobber caller-saved registers.  TLSDESC instructions only
    4004              :          clobber FLAGS.  If any registers clobbered by TLS instructions
    4005              :          are live in this basic block, we must insert TLS instructions
    4006              :          after all live registers clobbered are dead.  */
    4007              : 
    4008          312 :       auto_bitmap live_caller_saved_regs;
    4009          624 :       bitmap in = df_live ? DF_LIVE_IN (bb) : DF_LR_IN (bb);
    4010              : 
    4011          312 :       if (bitmap_bit_p (in, FLAGS_REG))
    4012            4 :         bitmap_set_bit (live_caller_saved_regs, FLAGS_REG);
    4013              : 
    4014          312 :       unsigned int i;
    4015              : 
    4016              :       /* Get all live caller-saved registers for TLS_GD and TLS_LD_BASE
    4017              :          instructions.  */
    4018          312 :       if (kind != X86_CSE_TLSDESC)
    4019        27249 :         for (i = 0; i < FIRST_PSEUDO_REGISTER; i++)
    4020        26956 :           if (call_used_regs[i]
    4021        25198 :               && !fixed_regs[i]
    4022        38993 :               && bitmap_bit_p (in, i))
    4023          344 :             bitmap_set_bit (live_caller_saved_regs, i);
    4024              : 
    4025          312 :       if (bitmap_empty_p (live_caller_saved_regs))
    4026              :         {
    4027           79 :           if (insn == BB_HEAD (bb))
    4028              :             {
    4029            0 :               *before_p = insn;
    4030            0 :               tls_insn = emit_insn_before (tls_set, insn);
    4031              :             }
    4032              :           else
    4033              :             {
    4034              :               /* Emit the TLS call after NOTE_INSN_FUNCTION_BEG in the
    4035              :                  beginning basic block:
    4036              : 
    4037              :                  (note 4 0 2 2 [bb 2] NOTE_INSN_BASIC_BLOCK)
    4038              :                  (note 2 4 26 2 NOTE_INSN_FUNCTION_BEG)
    4039              : 
    4040              :                  or after NOTE_INSN_BASIC_BLOCK in a basic block with
    4041              :                  only a label:
    4042              : 
    4043              :                  (code_label 78 11 77 3 14 (nil) [1 uses])
    4044              :                  (note 77 78 54 3 [bb 3] NOTE_INSN_BASIC_BLOCK)
    4045              : 
    4046              :                  or after debug marker in a basic block with only a
    4047              :                  debug marker:
    4048              : 
    4049              :                  (note 3 0 2 2 [bb 2] NOTE_INSN_BASIC_BLOCK)
    4050              :                  (note 2 3 5 2 NOTE_INSN_FUNCTION_BEG)
    4051              :                  (debug_insn 5 2 16 2 (debug_marker) "x.c":6:3 -1 (nil))
    4052              : 
    4053              :                */
    4054           79 :               insn = insn ? PREV_INSN (insn) : BB_END (bb);
    4055           79 :               *after_p = insn;
    4056           79 :               tls_insn = emit_insn_after (tls_set, insn);
    4057              :             }
    4058           79 :           return tls_insn;
    4059              :         }
    4060              : 
    4061          233 :       bool repeat = false;
    4062              : 
    4063              :       /* Search for REG_DEAD notes in this basic block.  */
    4064          661 :       FOR_BB_INSNS (bb, insn)
    4065              :         {
    4066          661 :           if (!NONDEBUG_INSN_P (insn))
    4067          283 :             continue;
    4068              : 
    4069              :           /* NB: Conditional jump is the only instruction which reads
    4070              :              flags register and changes control flow.  We can never
    4071              :              place the TLS call after unconditional jump.  */
    4072          378 :           if (JUMP_P (insn))
    4073              :             {
    4074              :               /* This must be a conditional jump.  */
    4075            2 :               rtx label = JUMP_LABEL (insn);
    4076            2 :               if (label == nullptr
    4077            2 :                   || ANY_RETURN_P (label)
    4078            2 :                   || !(LABEL_P (label) || SYMBOL_REF_P (label)))
    4079            0 :                 gcc_unreachable ();
    4080              : 
    4081              :               /* Place the call before all FLAGS_REG setting BBs since
    4082              :                  we can't place a call before nor after a conditional
    4083              :                  jump.  */
    4084            2 :               bb = ix86_get_dominator_for_reg (FLAGS_REG, bb);
    4085              : 
    4086              :               /* Start over again.  */
    4087            2 :               repeat = true;
    4088            2 :               break;
    4089              :             }
    4090              : 
    4091          376 :           if (bitmap_bit_p (updated_gnu_tls_insns, INSN_UID (insn)))
    4092              :             {
    4093              :               /* Insert the __tls_get_addr call before INSN which
    4094              :                  replaces a __tls_get_addr call.  */
    4095            1 :               *before_p = insn;
    4096            1 :               tls_insn = emit_insn_before (tls_set, insn);
    4097            1 :               return tls_insn;
    4098              :             }
    4099              : 
    4100          375 :           if (bitmap_bit_p (updated_gnu2_tls_insns, INSN_UID (insn)))
    4101              :             {
    4102              :               /* Mark FLAGS register as dead since FLAGS register
    4103              :                  would be clobbered by the GNU2 TLS instruction.  */
    4104            1 :               bitmap_clear_bit (live_caller_saved_regs, FLAGS_REG);
    4105            1 :               continue;
    4106              :             }
    4107              : 
    4108              :           /* Check if FLAGS register is live.  */
    4109          374 :           note_stores (insn, ix86_check_flags_reg,
    4110              :                        &live_caller_saved_regs);
    4111              : 
    4112          374 :           rtx link;
    4113          515 :           for (link = REG_NOTES (insn); link; link = XEXP (link, 1))
    4114          371 :             if ((REG_NOTE_KIND (link) == REG_DEAD
    4115            9 :                  || (REG_NOTE_KIND (link) == REG_UNUSED
    4116            7 :                      && REGNO (XEXP (link, 0)) == FLAGS_REG))
    4117          378 :                 && REG_P (XEXP (link, 0)))
    4118              :               {
    4119              :                 /* Mark the live caller-saved register as dead.  */
    4120          743 :                 for (i = REGNO (XEXP (link, 0));
    4121          743 :                      i < END_REGNO (XEXP (link, 0));
    4122              :                      i++)
    4123          374 :                   if (i < FIRST_PSEUDO_REGISTER)
    4124          351 :                     bitmap_clear_bit (live_caller_saved_regs, i);
    4125              : 
    4126          369 :                 if (bitmap_empty_p (live_caller_saved_regs))
    4127              :                   {
    4128          230 :                     *after_p = insn;
    4129          230 :                     tls_insn = emit_insn_after (tls_set, insn);
    4130          230 :                     return tls_insn;
    4131              :                   }
    4132              :               }
    4133              :         }
    4134              : 
    4135              :       /* NB: Start over again for conditional jump.  */
    4136            2 :       if (repeat)
    4137            2 :         continue;
    4138              : 
    4139            0 :       gcc_assert (!bitmap_empty_p (live_caller_saved_regs));
    4140              : 
    4141              :       /* If any live caller-saved registers aren't dead at the end of
    4142              :          this basic block, get the basic block which dominates all
    4143              :          basic blocks which set the remaining live registers.  */
    4144            0 :       auto_bitmap set_bbs;
    4145            0 :       bitmap_iterator bi;
    4146            0 :       unsigned int id;
    4147            0 :       EXECUTE_IF_SET_IN_BITMAP (live_caller_saved_regs, 0, id, bi)
    4148              :         {
    4149            0 :           basic_block set_bb = ix86_get_dominator_for_reg (id, bb);
    4150            0 :           bitmap_set_bit (set_bbs, set_bb->index);
    4151              :         }
    4152            0 :       bb = nearest_common_dominator_for_set (CDI_DOMINATORS, set_bbs);
    4153            2 :     }
    4154              :   while (true);
    4155              : }
    4156              : 
    4157              : /* Generate a TLS call of KIND with VAL and copy the call result to DEST,
    4158              :    at entry of the nearest dominator for basic block map BBS, which is in
    4159              :    the fake loop that contains the whole function, so that there is only
    4160              :    a single TLS CALL of KIND with VAL in the whole function.
    4161              :    UPDATED_GNU_TLS_INSNS contains instructions which replace the GNU TLS
    4162              :    instructions.  UPDATED_GNU2_TLS_INSNS contains instructions which
    4163              :    replace the GNU2 TLS instructions.  If TLSDESC_SET isn't nullptr,
    4164              :    insert it before the TLS call.  */
    4165              : 
    4166              : static void
    4167          310 : ix86_place_single_tls_call (rtx dest, rtx val, x86_cse_kind kind,
    4168              :                             auto_bitmap &bbs,
    4169              :                             auto_bitmap &updated_gnu_tls_insns,
    4170              :                             auto_bitmap &updated_gnu2_tls_insns,
    4171              :                             rtx tlsdesc_set = nullptr)
    4172              : {
    4173          310 :   basic_block bb = nearest_common_dominator_for_set (CDI_DOMINATORS, bbs);
    4174          310 :   while (bb->loop_father->latch
    4175          319 :          != EXIT_BLOCK_PTR_FOR_FN (cfun))
    4176            9 :     bb = get_immediate_dominator (CDI_DOMINATORS,
    4177              :                                   bb->loop_father->header);
    4178              : 
    4179          310 :   rtx rax = nullptr, rdi;
    4180          310 :   rtx eqv = nullptr;
    4181          310 :   rtx caddr;
    4182          310 :   rtx set;
    4183          310 :   rtx clob;
    4184          310 :   rtx symbol;
    4185          310 :   rtx tls;
    4186              : 
    4187          310 :   switch (kind)
    4188              :     {
    4189          262 :     case X86_CSE_TLS_GD:
    4190          262 :       rax = gen_rtx_REG (Pmode, AX_REG);
    4191          262 :       rdi = gen_rtx_REG (Pmode, DI_REG);
    4192          262 :       caddr = ix86_tls_get_addr ();
    4193              : 
    4194          262 :       symbol = XVECEXP (val, 0, 0);
    4195          262 :       tls = gen_tls_global_dynamic_64 (Pmode, rax, symbol, caddr, rdi);
    4196              : 
    4197          262 :       if (GET_MODE (symbol) != Pmode)
    4198            0 :         symbol = gen_rtx_ZERO_EXTEND (Pmode, symbol);
    4199              :       eqv = symbol;
    4200              :       break;
    4201              : 
    4202           30 :     case X86_CSE_TLS_LD_BASE:
    4203           30 :       rax = gen_rtx_REG (Pmode, AX_REG);
    4204           30 :       rdi = gen_rtx_REG (Pmode, DI_REG);
    4205           30 :       caddr = ix86_tls_get_addr ();
    4206              : 
    4207           30 :       tls = gen_tls_local_dynamic_base_64 (Pmode, rax, caddr, rdi);
    4208              : 
    4209              :       /* Attach a unique REG_EQUAL to DEST, to allow the RTL optimizers
    4210              :          to share the LD_BASE result with other LD model accesses.  */
    4211           30 :       eqv = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, const0_rtx),
    4212              :                             UNSPEC_TLS_LD_BASE);
    4213              : 
    4214           30 :       break;
    4215              : 
    4216           18 :     case X86_CSE_TLSDESC:
    4217           18 :       set = gen_rtx_SET (dest, val);
    4218           18 :       clob = gen_rtx_CLOBBER (VOIDmode,
    4219              :                               gen_rtx_REG (CCmode, FLAGS_REG));
    4220           18 :       tls = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, set, clob));
    4221           18 :       break;
    4222              : 
    4223            0 :     default:
    4224            0 :       gcc_unreachable ();
    4225              :     }
    4226              : 
    4227              :   /* Emit the TLS CALL insn.  */
    4228          310 :   rtx_insn *before = nullptr;
    4229          310 :   rtx_insn *after = nullptr;
    4230          310 :   rtx_insn *tls_insn = ix86_emit_tls_call (tls, kind, bb, &before,
    4231              :                                            &after,
    4232              :                                            updated_gnu_tls_insns,
    4233              :                                            updated_gnu2_tls_insns);
    4234              : 
    4235          310 :   rtx_insn *tlsdesc_insn = nullptr;
    4236          310 :   if (tlsdesc_set)
    4237              :     {
    4238           14 :       rtx dest = copy_rtx (SET_DEST (tlsdesc_set));
    4239           14 :       rtx src = copy_rtx (SET_SRC (tlsdesc_set));
    4240           14 :       tlsdesc_set = gen_rtx_SET (dest, src);
    4241           14 :       tlsdesc_insn = emit_insn_before (tlsdesc_set, tls_insn);
    4242              :     }
    4243              : 
    4244          310 :   if (kind != X86_CSE_TLSDESC)
    4245              :     {
    4246          292 :       RTL_CONST_CALL_P (tls_insn) = 1;
    4247              : 
    4248              :       /* Indicate that this function can't jump to non-local gotos.  */
    4249          292 :       make_reg_eh_region_note_nothrow_nononlocal (tls_insn);
    4250              :     }
    4251              : 
    4252          310 :   if (recog_memoized (tls_insn) < 0)
    4253            0 :     gcc_unreachable ();
    4254              : 
    4255          310 :   if (dump_file)
    4256              :     {
    4257            0 :       if (after)
    4258              :         {
    4259            0 :           fprintf (dump_file, "\nPlace:\n\n");
    4260            0 :           if (tlsdesc_insn)
    4261            0 :             print_rtl_single (dump_file, tlsdesc_insn);
    4262            0 :           print_rtl_single (dump_file, tls_insn);
    4263            0 :           fprintf (dump_file, "\nafter:\n\n");
    4264            0 :           print_rtl_single (dump_file, after);
    4265            0 :           fprintf (dump_file, "\n");
    4266              :         }
    4267              :       else
    4268              :         {
    4269            0 :           fprintf (dump_file, "\nPlace:\n\n");
    4270            0 :           if (tlsdesc_insn)
    4271            0 :             print_rtl_single (dump_file, tlsdesc_insn);
    4272            0 :           print_rtl_single (dump_file, tls_insn);
    4273            0 :           fprintf (dump_file, "\nbefore:\n\n");
    4274            0 :           print_rtl_single (dump_file, before);
    4275            0 :           fprintf (dump_file, "\n");
    4276              :         }
    4277              :     }
    4278              : 
    4279          310 :   if (kind != X86_CSE_TLSDESC)
    4280              :     {
    4281              :       /* Copy RAX to DEST.  */
    4282          292 :       set = gen_rtx_SET (dest, rax);
    4283          292 :       rtx_insn *set_insn = emit_insn_after (set, tls_insn);
    4284          292 :       set_dst_reg_note (set_insn, REG_EQUAL, copy_rtx (eqv), dest);
    4285          292 :       if (dump_file)
    4286              :         {
    4287            0 :           fprintf (dump_file, "\nPlace:\n\n");
    4288            0 :           print_rtl_single (dump_file, set_insn);
    4289            0 :           fprintf (dump_file, "\nafter:\n\n");
    4290            0 :           print_rtl_single (dump_file, tls_insn);
    4291            0 :           fprintf (dump_file, "\n");
    4292              :         }
    4293              :     }
    4294          310 : }
    4295              : 
    4296              : namespace {
    4297              : 
    4298              : const pass_data pass_data_x86_cse =
    4299              : {
    4300              :   RTL_PASS, /* type */
    4301              :   "x86_cse", /* name */
    4302              :   OPTGROUP_NONE, /* optinfo_flags */
    4303              :   TV_MACH_DEP, /* tv_id */
    4304              :   0, /* properties_required */
    4305              :   0, /* properties_provided */
    4306              :   0, /* properties_destroyed */
    4307              :   0, /* todo_flags_start */
    4308              :   0, /* todo_flags_finish */
    4309              : };
    4310              : 
    4311              : class pass_x86_cse : public rtl_opt_pass
    4312              : {
    4313              : public:
    4314       287872 :   pass_x86_cse (gcc::context *ctxt)
    4315       575744 :     : rtl_opt_pass (pass_data_x86_cse, ctxt)
    4316              :   {}
    4317              : 
    4318              :   /* opt_pass methods: */
    4319      1480955 :   bool gate (function *fun) final override
    4320              :     {
    4321      1480955 :       return (TARGET_SSE2
    4322      1476734 :               && optimize
    4323      2518835 :               && optimize_function_for_speed_p (fun));
    4324              :     }
    4325              : 
    4326       973304 :   unsigned int execute (function *) final override
    4327              :     {
    4328       973304 :       return x86_cse ();
    4329              :     }
    4330              : 
    4331              : private:
    4332              :   /* The redundant source value.  */
    4333              :   rtx val;
    4334              :   /* The actual redundant source value for UNSPEC_TLSDESC.  */
    4335              :   rtx tlsdesc_val;
    4336              :   /* The instruction which defines the redundant value.  */
    4337              :   rtx_insn *def_insn;
    4338              :   /* Mode of the destination of the candidate redundant instruction.  */
    4339              :   machine_mode mode;
    4340              :   /* Mode of the source of the candidate redundant instruction.  */
    4341              :   machine_mode scalar_mode;
    4342              :   /* The classification of the candidate redundant instruction.  */
    4343              :   x86_cse_kind kind;
    4344              : 
    4345              :   unsigned int x86_cse (void);
    4346              :   bool candidate_gnu_tls_p (rtx_insn *, attr_tls64);
    4347              :   bool candidate_gnu2_tls_p (rtx, attr_tls64);
    4348              :   bool candidate_vector_p (rtx);
    4349              :   rtx_insn *tls_set_insn_from_symbol (const_rtx, const_rtx);
    4350              : }; // class pass_x86_cse
    4351              : 
    4352              : /* Return the instruction which sets REG from TLS_SYMBOL.  */
    4353              : 
    4354              : rtx_insn *
    4355           38 : pass_x86_cse::tls_set_insn_from_symbol (const_rtx reg,
    4356              :                                         const_rtx tls_symbol)
    4357              : {
    4358           38 :   rtx_insn *set_insn = nullptr;
    4359           38 :   for (df_ref ref = DF_REG_DEF_CHAIN (REGNO (reg));
    4360          103 :        ref;
    4361           65 :        ref = DF_REF_NEXT_REG (ref))
    4362              :     {
    4363           65 :       if (DF_REF_IS_ARTIFICIAL (ref))
    4364              :         return nullptr;
    4365              : 
    4366           65 :       set_insn = DF_REF_INSN (ref);
    4367           65 :       if (get_attr_tls64 (set_insn) != TLS64_LEA)
    4368              :         return nullptr;
    4369              : 
    4370           65 :       rtx tls_set = PATTERN (set_insn);
    4371           65 :       rtx tls_src = XVECEXP (SET_SRC (tls_set), 0, 0);
    4372           65 :       if (!rtx_equal_p (tls_symbol, tls_src))
    4373              :         return nullptr;
    4374              :     }
    4375              : 
    4376              :   return set_insn;
    4377              : }
    4378              : 
    4379              : /* Return true and output def_insn, val, mode, scalar_mode and kind if
    4380              :    INSN is UNSPEC_TLS_GD or UNSPEC_TLS_LD_BASE.  */
    4381              : 
    4382              : bool
    4383         2185 : pass_x86_cse::candidate_gnu_tls_p (rtx_insn *insn, attr_tls64 tls64)
    4384              : {
    4385         2185 :   if (!TARGET_64BIT || !cfun->machine->tls_descriptor_call_multiple_p)
    4386              :     return false;
    4387              : 
    4388              :   /* Record the redundant TLS CALLs for 64-bit:
    4389              : 
    4390              :      (parallel [
    4391              :         (set (reg:DI 0 ax)
    4392              :              (call:DI (mem:QI (symbol_ref:DI ("__tls_get_addr")))
    4393              :                       (const_int 0 [0])))
    4394              :         (unspec:DI [(symbol_ref:DI ("foo") [flags 0x50])
    4395              :                     (reg/f:DI 7 sp)] UNSPEC_TLS_GD)
    4396              :         (clobber (reg:DI 5 di))])
    4397              : 
    4398              : 
    4399              :      and
    4400              : 
    4401              :      (parallel [
    4402              :         (set (reg:DI 0 ax)
    4403              :              (call:DI (mem:QI (symbol_ref:DI ("__tls_get_addr")))
    4404              :                       (const_int 0 [0])))
    4405              :         (unspec:DI [(reg/f:DI 7 sp)] UNSPEC_TLS_LD_BASE)])
    4406              : 
    4407              :    */
    4408              : 
    4409         2022 :   rtx pat = PATTERN (insn);
    4410         2022 :   rtx set = XVECEXP (pat, 0, 0);
    4411         2022 :   gcc_assert (GET_CODE (set) == SET);
    4412         2022 :   rtx dest = SET_DEST (set);
    4413         2022 :   scalar_mode = mode = GET_MODE (dest);
    4414         2022 :   val = XVECEXP (pat, 0, 1);
    4415         2022 :   gcc_assert (GET_CODE (val) == UNSPEC);
    4416              : 
    4417         2022 :   if (tls64 == TLS64_GD)
    4418         1921 :     kind = X86_CSE_TLS_GD;
    4419              :   else
    4420          101 :     kind = X86_CSE_TLS_LD_BASE;
    4421              : 
    4422         2022 :   def_insn = nullptr;
    4423         2022 :   return true;
    4424              : }
    4425              : 
    4426              : /* Return true and output def_insn, val, mode, scalar_mode and kind if
    4427              :    SET is UNSPEC_TLSDESC.  */
    4428              : 
    4429              : bool
    4430           50 : pass_x86_cse::candidate_gnu2_tls_p (rtx set, attr_tls64 tls64)
    4431              : {
    4432           50 :   if (!TARGET_64BIT || !cfun->machine->tls_descriptor_call_multiple_p)
    4433              :     return false;
    4434              : 
    4435           48 :   rtx tls_symbol;
    4436           48 :   rtx_insn *set_insn;
    4437           48 :   rtx src = SET_SRC (set);
    4438           48 :   val = src;
    4439           48 :   tlsdesc_val = src;
    4440           48 :   kind = X86_CSE_TLSDESC;
    4441              : 
    4442           48 :   if (tls64 == TLS64_COMBINE)
    4443              :     {
    4444              :       /* Record 64-bit TLS64_COMBINE:
    4445              : 
    4446              :          (set (reg/f:DI 104)
    4447              :               (plus:DI (unspec:DI [
    4448              :                           (symbol_ref:DI ("_TLS_MODULE_BASE_") [flags 0x10])
    4449              :                           (reg:DI 114)
    4450              :                           (reg/f:DI 7 sp)] UNSPEC_TLSDESC)
    4451              :                        (const:DI (unspec:DI [
    4452              :                                     (symbol_ref:DI ("e") [flags 0x1a])
    4453              :                                   ] UNSPEC_DTPOFF))))
    4454              : 
    4455              :          (set (reg/f:DI 104)
    4456              :               (plus:DI (unspec:DI [
    4457              :                           (symbol_ref:DI ("_TLS_MODULE_BASE_") [flags 0x10])
    4458              :                           (unspec:DI [
    4459              :                              (symbol_ref:DI ("_TLS_MODULE_BASE_") [flags 0x10])
    4460              :                           ] UNSPEC_TLSDESC)
    4461              :                           (reg/f:DI 7 sp)] UNSPEC_TLSDESC)
    4462              :                        (const:DI (unspec:DI [
    4463              :                                     (symbol_ref:DI ("e") [flags 0x1a])
    4464              :                                  ] UNSPEC_DTPOFF))))
    4465              :      */
    4466              : 
    4467           10 :       scalar_mode = mode = GET_MODE (src);
    4468              : 
    4469              :       /* Since the first operand of PLUS in the source TLS_COMBINE
    4470              :          pattern is unused, use the second operand of PLUS:
    4471              : 
    4472              :          (const:DI (unspec:DI [
    4473              :                       (symbol_ref:DI ("e") [flags 0x1a])
    4474              :                    ] UNSPEC_DTPOFF))
    4475              : 
    4476              :          as VAL to check if 2 TLS_COMBINE patterns have the same
    4477              :          source.  */
    4478           10 :       val = XEXP (src, 1);
    4479           10 :       gcc_assert (GET_CODE (val) == CONST
    4480              :                   && GET_CODE (XEXP (val, 0)) == UNSPEC
    4481              :                       && XINT (XEXP (val, 0), 1) == UNSPEC_DTPOFF
    4482              :                       && SYMBOL_REF_P (XVECEXP (XEXP (val, 0), 0, 0)));
    4483           10 :       def_insn = nullptr;
    4484           10 :       return true;
    4485              :     }
    4486              : 
    4487              :   /* Record 64-bit TLS_CALL:
    4488              : 
    4489              :      (set (reg:DI 101)
    4490              :           (unspec:DI [(symbol_ref:DI ("foo") [flags 0x50])
    4491              :                       (reg:DI 112)
    4492              :                       (reg/f:DI 7 sp)] UNSPEC_TLSDESC))
    4493              : 
    4494              :    */
    4495              : 
    4496           38 :   gcc_assert (GET_CODE (src) == UNSPEC);
    4497           38 :   tls_symbol = XVECEXP (src, 0, 0);
    4498           38 :   src = XVECEXP (src, 0, 1);
    4499           38 :   scalar_mode = mode = GET_MODE (src);
    4500           38 :   gcc_assert (REG_P (src));
    4501              : 
    4502              :   /* All definitions of reg:DI 129 in
    4503              : 
    4504              :      (set (reg:DI 110)
    4505              :           (unspec:DI [(symbol_ref:DI ("foo"))
    4506              :                       (reg:DI 129)
    4507              :                       (reg/f:DI 7 sp)] UNSPEC_TLSDESC))
    4508              : 
    4509              :      should have the same source as in
    4510              : 
    4511              :      (set (reg:DI 129)
    4512              :           (unspec:DI [(symbol_ref:DI ("foo"))] UNSPEC_TLSDESC))
    4513              : 
    4514              :    */
    4515              : 
    4516           38 :   set_insn = tls_set_insn_from_symbol (src, tls_symbol);
    4517           38 :   if (!set_insn)
    4518              :     return false;
    4519              : 
    4520              :   /* Use TLS_SYMBOL as VAL to check if 2 patterns have the same source.  */
    4521           38 :   val = tls_symbol;
    4522           38 :   def_insn = set_insn;
    4523           38 :   return true;
    4524              : }
    4525              : 
    4526              : /* Return true and output def_insn, val, mode, scalar_mode and kind if
    4527              :   INSN is a vector broadcast instruction.  */
    4528              : 
    4529              : bool
    4530     50151996 : pass_x86_cse::candidate_vector_p (rtx set)
    4531              : {
    4532     50151996 :   rtx src = SET_SRC (set);
    4533     50151996 :   rtx dest = SET_DEST (set);
    4534     50151996 :   mode = GET_MODE (dest);
    4535              :   /* Skip non-vector instruction.  */
    4536     50151996 :   if (!VECTOR_MODE_P (mode))
    4537              :     return false;
    4538              : 
    4539              :   /* Skip non-vector load instruction.  */
    4540      3707448 :   if (!REG_P (dest) && !SUBREG_P (dest))
    4541              :     return false;
    4542              : 
    4543      2200031 :   val = ix86_broadcast_inner (src, mode, &scalar_mode, &kind,
    4544              :                               &def_insn);
    4545      2200031 :   return val ? true : false;
    4546              : }
    4547              : 
    4548              : /* At entry of the nearest common dominator for basic blocks with
    4549              : 
    4550              :    1. Vector CONST0_RTX patterns.
    4551              :    2. Vector CONSTM1_RTX patterns.
    4552              :    3. Vector broadcast patterns.
    4553              :    4. UNSPEC_TLS_GD patterns.
    4554              :    5. UNSPEC_TLS_LD_BASE patterns.
    4555              :    6. UNSPEC_TLSDESC patterns.
    4556              : 
    4557              :    generate a single pattern whose destination is used to replace the
    4558              :    source in all identical patterns.
    4559              : 
    4560              :    NB: We want to generate a pattern, which is executed only once, to
    4561              :    cover the whole function.  The LCM algorithm isn't appropriate here
    4562              :    since it may place a pattern inside the loop.  */
    4563              : 
    4564              : unsigned int
    4565       973304 : pass_x86_cse::x86_cse (void)
    4566              : {
    4567       973304 :   timevar_push (TV_MACH_DEP);
    4568              : 
    4569       973304 :   auto_vec<redundant_pattern *> loads;
    4570       973304 :   redundant_pattern *load;
    4571       973304 :   basic_block bb;
    4572       973304 :   rtx_insn *insn;
    4573       973304 :   unsigned int i;
    4574       973304 :   auto_bitmap updated_gnu_tls_insns;
    4575       973304 :   auto_bitmap updated_gnu2_tls_insns;
    4576              : 
    4577       973304 :   df_set_flags (DF_DEFER_INSN_RESCAN);
    4578              : 
    4579       973304 :   bool recursive_call_p = cfun->machine->recursive_function;
    4580              : 
    4581     10970407 :   FOR_EACH_BB_FN (bb, cfun)
    4582              :     {
    4583    131812237 :       FOR_BB_INSNS (bb, insn)
    4584              :         {
    4585    121815134 :           if (!NONDEBUG_INSN_P (insn))
    4586     68020174 :             continue;
    4587              : 
    4588     53794960 :           bool matched = false;
    4589              :           /* Remove redundant pattens if there are more than 2 of
    4590              :              them.  */
    4591     53794960 :           unsigned int threshold = 2;
    4592              : 
    4593     53794960 :           rtx set = single_set (insn);
    4594     53794960 :           if (!set && !CALL_P (insn))
    4595      1100054 :             continue;
    4596              : 
    4597     52694906 :           tlsdesc_val = nullptr;
    4598              : 
    4599     52694906 :           attr_tls64 tls64 = get_attr_tls64 (insn);
    4600     52694906 :           switch (tls64)
    4601              :             {
    4602         2185 :             case TLS64_GD:
    4603         2185 :             case TLS64_LD_BASE:
    4604              :               /* Verify UNSPEC_TLS_GD and UNSPEC_TLS_LD_BASE.  */
    4605         2185 :               if (candidate_gnu_tls_p (insn, tls64))
    4606              :                 break;
    4607          163 :               continue;
    4608              : 
    4609           50 :             case TLS64_CALL:
    4610           50 :             case TLS64_COMBINE:
    4611              :               /* Verify UNSPEC_TLSDESC.  */
    4612           50 :               if (candidate_gnu2_tls_p (set, tls64))
    4613              :                 break;
    4614            2 :               continue;
    4615              : 
    4616           35 :             case TLS64_LEA:
    4617              :               /* Skip TLS64_LEA.  */
    4618           35 :               continue;
    4619              : 
    4620     52692636 :             case TLS64_NONE:
    4621     52692636 :               if (!set)
    4622      2540640 :                 continue;
    4623              : 
    4624              :               /* Check for vector broadcast.  */
    4625     50151996 :               if (candidate_vector_p (set))
    4626              :                 break;
    4627     49938972 :               continue;
    4628              :             }
    4629              : 
    4630              :           /* Check if there is a matching redundant load.   */
    4631       384315 :           FOR_EACH_VEC_ELT (loads, i, load)
    4632       260992 :             if (load->val
    4633       260992 :                 && load->kind == kind
    4634       202295 :                 && load->mode == scalar_mode
    4635       193093 :                 && (load->bb == bb
    4636       156848 :                     || kind != X86_CSE_VEC_DUP
    4637              :                     /* Non all 0s/1s vector load must be in the same
    4638              :                        basic block if it is in a recursive call.  */
    4639        97748 :                     || !recursive_call_p)
    4640       452165 :                 && rtx_equal_p (load->val, val))
    4641              :               {
    4642              :                 /* Record instruction.  */
    4643        91771 :                 bitmap_set_bit (load->insns, INSN_UID (insn));
    4644              : 
    4645              :                 /* Record the maximum vector size.  */
    4646        91771 :                 if (kind <= X86_CSE_VEC_DUP
    4647       182432 :                     && load->size < GET_MODE_SIZE (mode))
    4648          980 :                   load->size = GET_MODE_SIZE (mode);
    4649              : 
    4650              :                 /* Record the basic block.  */
    4651        91771 :                 bitmap_set_bit (load->bbs, bb->index);
    4652              : 
    4653              :                 /* Increment the count.  */
    4654        91771 :                 load->count++;
    4655              : 
    4656        91771 :                 matched = true;
    4657        91771 :                 break;
    4658              :               }
    4659              : 
    4660       215094 :           if (matched)
    4661        91771 :             continue;
    4662              : 
    4663              :           /* We see this instruction the first time.  Record the
    4664              :              redundant source value, its mode, the destination size,
    4665              :              instruction which defines the redundant source value,
    4666              :              instruction basic block and the instruction kind.  */
    4667       123323 :           load = new redundant_pattern;
    4668              : 
    4669       123323 :           load->val = copy_rtx (val);
    4670       123323 :           if (tlsdesc_val)
    4671           25 :             load->tlsdesc_val = copy_rtx (tlsdesc_val);
    4672              :           else
    4673       123298 :             load->tlsdesc_val = nullptr;
    4674       123323 :           load->mode = scalar_mode;
    4675       123323 :           load->size = GET_MODE_SIZE (mode);
    4676       123323 :           load->def_insn = def_insn;
    4677       123323 :           load->count = 1;
    4678       123323 :           load->threshold = threshold;
    4679       123323 :           load->bb = BLOCK_FOR_INSN (insn);
    4680       123323 :           load->kind = kind;
    4681              : 
    4682       123323 :           bitmap_set_bit (load->insns, INSN_UID (insn));
    4683       123323 :           bitmap_set_bit (load->bbs, bb->index);
    4684              : 
    4685       123323 :           loads.safe_push (load);
    4686              :         }
    4687              :     }
    4688              : 
    4689              :   bool replaced = false;
    4690      1096627 :   FOR_EACH_VEC_ELT (loads, i, load)
    4691       123323 :     if (load->count >= load->threshold)
    4692              :       {
    4693        32393 :         machine_mode mode;
    4694        32393 :         rtx reg, broadcast_source, broadcast_reg;
    4695        32393 :         replaced = true;
    4696        32393 :         switch (load->kind)
    4697              :           {
    4698          310 :           case X86_CSE_TLS_GD:
    4699          310 :           case X86_CSE_TLS_LD_BASE:
    4700          310 :           case X86_CSE_TLSDESC:
    4701          310 :             broadcast_reg = gen_reg_rtx (load->mode);
    4702          310 :             replace_tls_call (broadcast_reg, load->insns,
    4703          310 :                               (load->kind == X86_CSE_TLSDESC
    4704              :                                ? updated_gnu2_tls_insns
    4705              :                                : updated_gnu_tls_insns));
    4706          310 :             load->broadcast_reg = broadcast_reg;
    4707          310 :             break;
    4708              : 
    4709        32083 :           case X86_CSE_CONST0_VECTOR:
    4710        32083 :           case X86_CSE_CONSTM1_VECTOR:
    4711        32083 :           case X86_CSE_VEC_DUP:
    4712        32083 :             mode = ix86_get_vector_cse_mode (load->size, load->mode);
    4713        32083 :             broadcast_reg = gen_reg_rtx (mode);
    4714        32083 :             if (load->def_insn)
    4715              :               {
    4716              :                 /* Replace redundant vector loads with a single vector
    4717              :                    load in the same basic block.  */
    4718          835 :                 reg = load->val;
    4719          835 :                 if (load->mode != GET_MODE (reg))
    4720            0 :                   reg = gen_rtx_SUBREG (load->mode, reg, 0);
    4721          835 :                 broadcast_source = gen_rtx_VEC_DUPLICATE (mode, reg);
    4722              :               }
    4723              :             else
    4724              :               /* This is a constant integer/double vector.  If the
    4725              :                  inner scalar is 0 or -1, set vector to CONST0_RTX
    4726              :                  or CONSTM1_RTX directly.  */
    4727        31248 :               switch (load->kind)
    4728              :                 {
    4729        19725 :                 case X86_CSE_CONST0_VECTOR:
    4730        19725 :                   broadcast_source = CONST0_RTX (mode);
    4731        19725 :                   break;
    4732         1247 :                 case X86_CSE_CONSTM1_VECTOR:
    4733         1247 :                   broadcast_source = CONSTM1_RTX (mode);
    4734         1247 :                   break;
    4735        10276 :                 case X86_CSE_VEC_DUP:
    4736        10276 :                   reg = gen_reg_rtx (load->mode);
    4737        10276 :                   broadcast_source = gen_rtx_VEC_DUPLICATE (mode, reg);
    4738        10276 :                   break;
    4739            0 :                 default:
    4740            0 :                   gcc_unreachable ();
    4741              :                 }
    4742        32083 :             replace_vector_const (mode, broadcast_reg, load->insns,
    4743              :                                   load->mode);
    4744        32083 :             load->broadcast_source = broadcast_source;
    4745        32083 :             load->broadcast_reg = broadcast_reg;
    4746        32083 :             break;
    4747              :           }
    4748              :       }
    4749              : 
    4750       973304 :   if (replaced)
    4751              :     {
    4752        26251 :       auto_vec<rtx_insn *> control_flow_insns;
    4753              : 
    4754              :       /* (Re-)discover loops so that bb->loop_father can be used in the
    4755              :          analysis below.  */
    4756        26251 :       calculate_dominance_info (CDI_DOMINATORS);
    4757        26251 :       loop_optimizer_init (AVOID_CFG_MODIFICATIONS);
    4758              : 
    4759        72378 :       FOR_EACH_VEC_ELT (loads, i, load)
    4760        46127 :         if (load->count >= load->threshold)
    4761              :           {
    4762        32393 :             rtx set;
    4763        32393 :             if (load->def_insn)
    4764          849 :               switch (load->kind)
    4765              :                 {
    4766           14 :                 case X86_CSE_TLSDESC:
    4767           14 :                   ix86_place_single_tls_call (load->broadcast_reg,
    4768              :                                               load->tlsdesc_val,
    4769              :                                               load->kind,
    4770           14 :                                               load->bbs,
    4771              :                                               updated_gnu_tls_insns,
    4772              :                                               updated_gnu2_tls_insns,
    4773           14 :                                               PATTERN (load->def_insn));
    4774           14 :                   break;
    4775          835 :                 case X86_CSE_VEC_DUP:
    4776              :                   /* Insert a broadcast after the original scalar
    4777              :                      definition.  */
    4778          835 :                   set = gen_rtx_SET (load->broadcast_reg,
    4779              :                                      load->broadcast_source);
    4780          835 :                   insn = emit_insn_after (set, load->def_insn);
    4781              : 
    4782          835 :                   if (cfun->can_throw_non_call_exceptions)
    4783              :                     {
    4784              :                       /* Handle REG_EH_REGION note in DEF_INSN.  */
    4785            5 :                       rtx note = find_reg_note (load->def_insn,
    4786              :                                                 REG_EH_REGION, nullptr);
    4787            5 :                       if (note)
    4788              :                         {
    4789            1 :                           control_flow_insns.safe_push (load->def_insn);
    4790            1 :                           add_reg_note (insn, REG_EH_REGION,
    4791              :                                         XEXP (note, 0));
    4792              :                         }
    4793              :                     }
    4794              : 
    4795          835 :                   if (dump_file)
    4796              :                     {
    4797            0 :                       fprintf (dump_file, "\nAdd:\n\n");
    4798            0 :                       print_rtl_single (dump_file, insn);
    4799            0 :                       fprintf (dump_file, "\nafter:\n\n");
    4800            0 :                       print_rtl_single (dump_file, load->def_insn);
    4801            0 :                       fprintf (dump_file, "\n");
    4802              :                     }
    4803              :                   break;
    4804            0 :                 default:
    4805            0 :                   gcc_unreachable ();
    4806              :                 }
    4807              :             else
    4808        31544 :               switch (load->kind)
    4809              :                 {
    4810          296 :                 case X86_CSE_TLS_GD:
    4811          296 :                 case X86_CSE_TLS_LD_BASE:
    4812          296 :                 case X86_CSE_TLSDESC:
    4813          296 :                   ix86_place_single_tls_call (load->broadcast_reg,
    4814              :                                               (load->kind == X86_CSE_TLSDESC
    4815              :                                                ? load->tlsdesc_val
    4816              :                                                : load->val),
    4817              :                                               load->kind,
    4818          296 :                                               load->bbs,
    4819              :                                               updated_gnu_tls_insns,
    4820              :                                               updated_gnu2_tls_insns);
    4821          296 :                   break;
    4822        31248 :                 case X86_CSE_CONST0_VECTOR:
    4823        31248 :                 case X86_CSE_CONSTM1_VECTOR:
    4824        31248 :                 case X86_CSE_VEC_DUP:
    4825        31248 :                   ix86_place_single_vector_set (load->broadcast_reg,
    4826              :                                                 load->broadcast_source,
    4827              :                                                 load->bbs,
    4828              :                                                 load);
    4829        31248 :                   break;
    4830              :                 }
    4831              :           }
    4832              : 
    4833        26251 :       loop_optimizer_finalize ();
    4834              : 
    4835        26251 :       if (!control_flow_insns.is_empty ())
    4836              :         {
    4837            1 :           free_dominance_info (CDI_DOMINATORS);
    4838              : 
    4839            3 :           FOR_EACH_VEC_ELT (control_flow_insns, i, insn)
    4840            1 :             if (control_flow_insn_p (insn))
    4841              :               {
    4842              :                 /* Split the block after insn.  There will be a fallthru
    4843              :                    edge, which is OK so we keep it.  We have to create
    4844              :                    the exception edges ourselves.  */
    4845            1 :                 bb = BLOCK_FOR_INSN (insn);
    4846            1 :                 split_block (bb, insn);
    4847            1 :                 rtl_make_eh_edge (NULL, bb, BB_END (bb));
    4848              :               }
    4849              :         }
    4850              : 
    4851        26251 :       df_process_deferred_rescans ();
    4852        26251 :     }
    4853              : 
    4854      1096627 :   FOR_EACH_VEC_ELT (loads, i, load)
    4855       246646 :     delete load;
    4856              : 
    4857       973304 :   df_clear_flags (DF_DEFER_INSN_RESCAN);
    4858              : 
    4859       973304 :   timevar_pop (TV_MACH_DEP);
    4860       973304 :   return 0;
    4861       973304 : }
    4862              : 
    4863              : } // anon namespace
    4864              : 
    4865              : rtl_opt_pass *
    4866       287872 : make_pass_x86_cse (gcc::context *ctxt)
    4867              : {
    4868       287872 :   return new pass_x86_cse (ctxt);
    4869              : }
    4870              : 
    4871              : /* Convert legacy instructions that clobbers EFLAGS to APX_NF
    4872              :    instructions when there are no flag set between a flag
    4873              :    producer and user.  */
    4874              : 
    4875              : static unsigned int
    4876          367 : ix86_apx_nf_convert (void)
    4877              : {
    4878          367 :   timevar_push (TV_MACH_DEP);
    4879              : 
    4880          367 :   basic_block bb;
    4881          367 :   rtx_insn *insn;
    4882          367 :   hash_map <rtx_insn *, rtx> converting_map;
    4883          367 :   auto_vec <rtx_insn *> current_convert_list;
    4884              : 
    4885          367 :   bool converting_seq = false;
    4886          367 :   rtx cc = gen_rtx_REG (CCmode, FLAGS_REG);
    4887              : 
    4888          786 :   FOR_EACH_BB_FN (bb, cfun)
    4889              :     {
    4890              :       /* Reset conversion for each bb.  */
    4891          419 :       converting_seq = false;
    4892         5031 :       FOR_BB_INSNS (bb, insn)
    4893              :         {
    4894         4612 :           if (!NONDEBUG_INSN_P (insn))
    4895         4945 :             continue;
    4896              : 
    4897         3676 :           if (recog_memoized (insn) < 0)
    4898          335 :             continue;
    4899              : 
    4900              :           /* Convert candidate insns after cstore, which should
    4901              :              satisify the two conditions:
    4902              :              1. Is not flag user or producer, only clobbers
    4903              :              FLAGS_REG.
    4904              :              2. Have corresponding nf pattern.  */
    4905              : 
    4906         3341 :           rtx pat = PATTERN (insn);
    4907              : 
    4908              :           /* Starting convertion at first cstorecc.  */
    4909         3341 :           rtx set = NULL_RTX;
    4910         3341 :           if (!converting_seq
    4911         2760 :               && (set = single_set (insn))
    4912         2684 :               && ix86_comparison_operator (SET_SRC (set), VOIDmode)
    4913          126 :               && reg_overlap_mentioned_p (cc, SET_SRC (set))
    4914         3464 :               && !reg_overlap_mentioned_p (cc, SET_DEST (set)))
    4915              :             {
    4916          123 :               converting_seq = true;
    4917          123 :               current_convert_list.truncate (0);
    4918              :             }
    4919              :           /* Terminate at the next explicit flag set.  */
    4920         3218 :           else if (reg_set_p (cc, pat)
    4921         3218 :                    && GET_CODE (set_of (cc, pat)) != CLOBBER)
    4922              :             converting_seq = false;
    4923              : 
    4924         3122 :           if (!converting_seq)
    4925         2738 :             continue;
    4926              : 
    4927          603 :           if (get_attr_has_nf (insn)
    4928          603 :               && GET_CODE (pat) == PARALLEL)
    4929              :             {
    4930              :               /* Record the insn to candidate map.  */
    4931           72 :               current_convert_list.safe_push (insn);
    4932           72 :               converting_map.put (insn, pat);
    4933              :             }
    4934              :           /* If the insn clobbers flags but has no nf_attr,
    4935              :              revoke all previous candidates.  */
    4936          531 :           else if (!get_attr_has_nf (insn)
    4937          530 :                    && reg_set_p (cc, pat)
    4938          534 :                    && GET_CODE (set_of (cc, pat)) == CLOBBER)
    4939              :             {
    4940            3 :               for (auto item : current_convert_list)
    4941            0 :                 converting_map.remove (item);
    4942            3 :               converting_seq = false;
    4943              :             }
    4944              :         }
    4945              :     }
    4946              : 
    4947          367 :   if (!converting_map.is_empty ())
    4948              :     {
    4949           85 :       for (auto iter = converting_map.begin ();
    4950          170 :            iter != converting_map.end (); ++iter)
    4951              :         {
    4952           72 :           rtx_insn *replace = (*iter).first;
    4953           72 :           rtx pat = (*iter).second;
    4954           72 :           int i, n = 0, len = XVECLEN (pat, 0);
    4955           72 :           rtx *new_elems = XALLOCAVEC (rtx, len);
    4956           72 :           rtx new_pat;
    4957          216 :           for (i = 0; i < len; i++)
    4958              :             {
    4959          144 :               rtx temp = XVECEXP (pat, 0, i);
    4960          216 :               if (! (GET_CODE (temp) == CLOBBER
    4961           72 :                      && reg_overlap_mentioned_p (cc,
    4962           72 :                                                  XEXP (temp, 0))))
    4963              :                 {
    4964           72 :                   new_elems[n] = temp;
    4965           72 :                   n++;
    4966              :                 }
    4967              :             }
    4968              : 
    4969           72 :           if (n == 1)
    4970           72 :             new_pat = new_elems[0];
    4971              :           else
    4972            0 :             new_pat =
    4973            0 :               gen_rtx_PARALLEL (VOIDmode,
    4974              :                                 gen_rtvec_v (n,
    4975              :                                              new_elems));
    4976              : 
    4977           72 :           PATTERN (replace) = new_pat;
    4978           72 :           INSN_CODE (replace) = -1;
    4979           72 :           recog_memoized (replace);
    4980           72 :           df_insn_rescan (replace);
    4981              :         }
    4982              :     }
    4983              : 
    4984          367 :   timevar_pop (TV_MACH_DEP);
    4985          367 :   return 0;
    4986          367 : }
    4987              : 
    4988              : 
    4989              : namespace {
    4990              : 
    4991              : const pass_data pass_data_apx_nf_convert =
    4992              : {
    4993              :   RTL_PASS, /* type */
    4994              :   "apx_nfcvt", /* name */
    4995              :   OPTGROUP_NONE, /* optinfo_flags */
    4996              :   TV_MACH_DEP, /* tv_id */
    4997              :   0, /* properties_required */
    4998              :   0, /* properties_provided */
    4999              :   0, /* properties_destroyed */
    5000              :   0, /* todo_flags_start */
    5001              :   0, /* todo_flags_finish */
    5002              : };
    5003              : 
    5004              : class pass_apx_nf_convert : public rtl_opt_pass
    5005              : {
    5006              : public:
    5007       287872 :   pass_apx_nf_convert (gcc::context *ctxt)
    5008       575744 :     : rtl_opt_pass (pass_data_apx_nf_convert, ctxt)
    5009              :   {}
    5010              : 
    5011              :   /* opt_pass methods: */
    5012      1480955 :   bool gate (function *) final override
    5013              :     {
    5014      1480955 :       return (TARGET_APX_NF
    5015          460 :               && optimize
    5016      1481406 :               && optimize_function_for_speed_p (cfun));
    5017              :     }
    5018              : 
    5019          367 :   unsigned int execute (function *) final override
    5020              :     {
    5021          367 :       return ix86_apx_nf_convert ();
    5022              :     }
    5023              : }; // class pass_apx_nf_convert
    5024              : 
    5025              : } // anon namespace
    5026              : 
    5027              : rtl_opt_pass *
    5028       287872 : make_pass_apx_nf_convert (gcc::context *ctxt)
    5029              : {
    5030       287872 :   return new pass_apx_nf_convert (ctxt);
    5031              : }
    5032              : 
    5033              : /* When a hot loop can be fit into one cacheline,
    5034              :    force align the loop without considering the max skip.  */
    5035              : static void
    5036       975125 : ix86_align_loops ()
    5037              : {
    5038       975125 :   basic_block bb;
    5039              : 
    5040              :   /* Don't do this when we don't know cache line size.  */
    5041       975125 :   if (ix86_cost->prefetch_block == 0)
    5042            9 :     return;
    5043              : 
    5044       975116 :   loop_optimizer_init (AVOID_CFG_MODIFICATIONS);
    5045       975116 :   profile_count count_threshold = cfun->cfg->count_max / param_align_threshold;
    5046     11431595 :   FOR_EACH_BB_FN (bb, cfun)
    5047              :     {
    5048     10456479 :       rtx_insn *label = BB_HEAD (bb);
    5049     10456479 :       bool has_fallthru = 0;
    5050     10456479 :       edge e;
    5051     10456479 :       edge_iterator ei;
    5052              : 
    5053     10456479 :       if (!LABEL_P (label))
    5054      5306450 :         continue;
    5055              : 
    5056      5154842 :       profile_count fallthru_count = profile_count::zero ();
    5057      5154842 :       profile_count branch_count = profile_count::zero ();
    5058              : 
    5059     14986998 :       FOR_EACH_EDGE (e, ei, bb->preds)
    5060              :         {
    5061      9832156 :           if (e->flags & EDGE_FALLTHRU)
    5062      2508083 :             has_fallthru = 1, fallthru_count += e->count ();
    5063              :           else
    5064      7324073 :             branch_count += e->count ();
    5065              :         }
    5066              : 
    5067      5154842 :       if (!fallthru_count.initialized_p () || !branch_count.initialized_p ())
    5068         4813 :         continue;
    5069              : 
    5070      5150029 :       if (bb->loop_father
    5071      5150029 :           && bb->loop_father->latch != EXIT_BLOCK_PTR_FOR_FN (cfun)
    5072      6492781 :           && (has_fallthru
    5073      1342752 :               ? (!(single_succ_p (bb)
    5074       145770 :                    && single_succ (bb) == EXIT_BLOCK_PTR_FOR_FN (cfun))
    5075       933463 :                  && optimize_bb_for_speed_p (bb)
    5076       851756 :                  && branch_count + fallthru_count > count_threshold
    5077       729312 :                  && (branch_count > fallthru_count * param_align_loop_iterations))
    5078              :               /* In case there'no fallthru for the loop.
    5079              :                  Nops inserted won't be executed.  */
    5080       409289 :               : (branch_count > count_threshold
    5081       140789 :                  || (bb->count > bb->prev_bb->count * 10
    5082        13093 :                      && (bb->prev_bb->count
    5083      4616954 :                          <= ENTRY_BLOCK_PTR_FOR_FN (cfun)->count / 2)))))
    5084              :         {
    5085       546168 :           rtx_insn* insn, *end_insn;
    5086       546168 :           HOST_WIDE_INT size = 0;
    5087       546168 :           bool padding_p = true;
    5088       546168 :           basic_block tbb = bb;
    5089       546168 :           unsigned cond_branch_num = 0;
    5090       546168 :           bool detect_tight_loop_p = false;
    5091              : 
    5092       860655 :           for (unsigned int i = 0; i != bb->loop_father->num_nodes;
    5093       314487 :                i++, tbb = tbb->next_bb)
    5094              :             {
    5095              :               /* Only handle continuous cfg layout. */
    5096       860655 :               if (bb->loop_father != tbb->loop_father)
    5097              :                 {
    5098              :                   padding_p = false;
    5099              :                   break;
    5100              :                 }
    5101              : 
    5102     10150850 :               FOR_BB_INSNS (tbb, insn)
    5103              :                 {
    5104      9487581 :                   if (!NONDEBUG_INSN_P (insn))
    5105      5423779 :                     continue;
    5106      4063802 :                   size += ix86_min_insn_size (insn);
    5107              : 
    5108              :                   /* We don't know size of inline asm.
    5109              :                      Don't align loop for call.  */
    5110      4063802 :                   if (asm_noperands (PATTERN (insn)) >= 0
    5111      4063802 :                       || CALL_P (insn))
    5112              :                     {
    5113              :                       size = -1;
    5114              :                       break;
    5115              :                     }
    5116              :                 }
    5117              : 
    5118       820239 :               if (size == -1 || size > ix86_cost->prefetch_block)
    5119              :                 {
    5120              :                   padding_p = false;
    5121              :                   break;
    5122              :                 }
    5123              : 
    5124      1460132 :               FOR_EACH_EDGE (e, ei, tbb->succs)
    5125              :                 {
    5126              :                   /* It could be part of the loop.  */
    5127      1007625 :                   if (e->dest == bb)
    5128              :                     {
    5129              :                       detect_tight_loop_p = true;
    5130              :                       break;
    5131              :                     }
    5132              :                 }
    5133              : 
    5134       637315 :               if (detect_tight_loop_p)
    5135              :                 break;
    5136              : 
    5137       452507 :               end_insn = BB_END (tbb);
    5138       452507 :               if (JUMP_P (end_insn))
    5139              :                 {
    5140              :                   /* For decoded icache:
    5141              :                      1. Up to two branches are allowed per Way.
    5142              :                      2. A non-conditional branch is the last micro-op in a Way.
    5143              :                   */
    5144       366621 :                   if (onlyjump_p (end_insn)
    5145       366621 :                       && (any_uncondjump_p (end_insn)
    5146       310890 :                           || single_succ_p (tbb)))
    5147              :                     {
    5148              :                       padding_p = false;
    5149              :                       break;
    5150              :                     }
    5151       310890 :                   else if (++cond_branch_num >= 2)
    5152              :                     {
    5153              :                       padding_p = false;
    5154              :                       break;
    5155              :                     }
    5156              :                 }
    5157              : 
    5158              :             }
    5159              : 
    5160       546168 :           if (padding_p && detect_tight_loop_p)
    5161              :             {
    5162       369616 :               emit_insn_before (gen_max_skip_align (GEN_INT (ceil_log2 (size)),
    5163              :                                                     GEN_INT (0)), label);
    5164              :               /* End of function.  */
    5165       184808 :               if (!tbb || tbb == EXIT_BLOCK_PTR_FOR_FN (cfun))
    5166              :                 break;
    5167              :               /* Skip bb which already fits into one cacheline.  */
    5168              :               bb = tbb;
    5169              :             }
    5170              :         }
    5171              :     }
    5172              : 
    5173       975116 :   loop_optimizer_finalize ();
    5174       975116 :   free_dominance_info (CDI_DOMINATORS);
    5175              : }
    5176              : 
    5177              : namespace {
    5178              : 
    5179              : const pass_data pass_data_align_tight_loops =
    5180              : {
    5181              :   RTL_PASS, /* type */
    5182              :   "align_tight_loops", /* name */
    5183              :   OPTGROUP_NONE, /* optinfo_flags */
    5184              :   TV_MACH_DEP, /* tv_id */
    5185              :   0, /* properties_required */
    5186              :   0, /* properties_provided */
    5187              :   0, /* properties_destroyed */
    5188              :   0, /* todo_flags_start */
    5189              :   0, /* todo_flags_finish */
    5190              : };
    5191              : 
    5192              : class pass_align_tight_loops : public rtl_opt_pass
    5193              : {
    5194              : public:
    5195       287872 :   pass_align_tight_loops (gcc::context *ctxt)
    5196       575744 :     : rtl_opt_pass (pass_data_align_tight_loops, ctxt)
    5197              :   {}
    5198              : 
    5199              :   /* opt_pass methods: */
    5200      1480955 :   bool gate (function *) final override
    5201              :     {
    5202      1480955 :       return TARGET_ALIGN_TIGHT_LOOPS
    5203      1480469 :              && optimize
    5204      2520701 :              && optimize_function_for_speed_p (cfun);
    5205              :     }
    5206              : 
    5207       975125 :   unsigned int execute (function *) final override
    5208              :     {
    5209       975125 :       timevar_push (TV_MACH_DEP);
    5210              : #ifdef ASM_OUTPUT_MAX_SKIP_ALIGN
    5211       975125 :       ix86_align_loops ();
    5212              : #endif
    5213       975125 :       timevar_pop (TV_MACH_DEP);
    5214       975125 :       return 0;
    5215              :     }
    5216              : }; // class pass_align_tight_loops
    5217              : 
    5218              : } // anon namespace
    5219              : 
    5220              : rtl_opt_pass *
    5221       287872 : make_pass_align_tight_loops (gcc::context *ctxt)
    5222              : {
    5223       287872 :   return new pass_align_tight_loops (ctxt);
    5224              : }
    5225              : 
    5226              : /* This compares the priority of target features in function DECL1
    5227              :    and DECL2.  It returns positive value if DECL1 is higher priority,
    5228              :    negative value if DECL2 is higher priority and 0 if they are the
    5229              :    same.  */
    5230              : 
    5231              : int
    5232         5739 : ix86_compare_version_priority (tree decl1, tree decl2)
    5233              : {
    5234         5739 :   unsigned int priority1 = get_builtin_code_for_version (decl1, NULL);
    5235         5739 :   unsigned int priority2 = get_builtin_code_for_version (decl2, NULL);
    5236              : 
    5237         5739 :   return (int)priority1 - (int)priority2;
    5238              : }
    5239              : 
    5240              : /* V1 and V2 point to function versions with different priorities
    5241              :    based on the target ISA.  This function compares their priorities.  */
    5242              : 
    5243              : static int
    5244         6830 : feature_compare (const void *v1, const void *v2)
    5245              : {
    5246         6830 :   typedef struct _function_version_info
    5247              :     {
    5248              :       tree version_decl;
    5249              :       tree predicate_chain;
    5250              :       unsigned int dispatch_priority;
    5251              :     } function_version_info;
    5252              : 
    5253         6830 :   const function_version_info c1 = *(const function_version_info *)v1;
    5254         6830 :   const function_version_info c2 = *(const function_version_info *)v2;
    5255         6830 :   return (c2.dispatch_priority - c1.dispatch_priority);
    5256              : }
    5257              : 
    5258              : /* This adds a condition to the basic_block NEW_BB in function FUNCTION_DECL
    5259              :    to return a pointer to VERSION_DECL if the outcome of the expression
    5260              :    formed by PREDICATE_CHAIN is true.  This function will be called during
    5261              :    version dispatch to decide which function version to execute.  It returns
    5262              :    the basic block at the end, to which more conditions can be added.  */
    5263              : 
    5264              : static basic_block
    5265          822 : add_condition_to_bb (tree function_decl, tree version_decl,
    5266              :                      tree predicate_chain, basic_block new_bb)
    5267              : {
    5268          822 :   gimple *return_stmt;
    5269          822 :   tree convert_expr, result_var;
    5270          822 :   gimple *convert_stmt;
    5271          822 :   gimple *call_cond_stmt;
    5272          822 :   gimple *if_else_stmt;
    5273              : 
    5274          822 :   basic_block bb1, bb2, bb3;
    5275          822 :   edge e12, e23;
    5276              : 
    5277          822 :   tree cond_var, and_expr_var = NULL_TREE;
    5278          822 :   gimple_seq gseq;
    5279              : 
    5280          822 :   tree predicate_decl, predicate_arg;
    5281              : 
    5282          822 :   push_cfun (DECL_STRUCT_FUNCTION (function_decl));
    5283              : 
    5284          822 :   gcc_assert (new_bb != NULL);
    5285          822 :   gseq = bb_seq (new_bb);
    5286              : 
    5287              : 
    5288          822 :   convert_expr = build1 (CONVERT_EXPR, ptr_type_node,
    5289              :                          build_fold_addr_expr (version_decl));
    5290          822 :   result_var = create_tmp_var (ptr_type_node);
    5291          822 :   convert_stmt = gimple_build_assign (result_var, convert_expr);
    5292          822 :   return_stmt = gimple_build_return (result_var);
    5293              : 
    5294          822 :   if (predicate_chain == NULL_TREE)
    5295              :     {
    5296          197 :       gimple_seq_add_stmt (&gseq, convert_stmt);
    5297          197 :       gimple_seq_add_stmt (&gseq, return_stmt);
    5298          197 :       set_bb_seq (new_bb, gseq);
    5299          197 :       gimple_set_bb (convert_stmt, new_bb);
    5300          197 :       gimple_set_bb (return_stmt, new_bb);
    5301          197 :       pop_cfun ();
    5302          197 :       return new_bb;
    5303              :     }
    5304              : 
    5305         1289 :   while (predicate_chain != NULL)
    5306              :     {
    5307          664 :       cond_var = create_tmp_var (integer_type_node);
    5308          664 :       predicate_decl = TREE_PURPOSE (predicate_chain);
    5309          664 :       predicate_arg = TREE_VALUE (predicate_chain);
    5310          664 :       call_cond_stmt = gimple_build_call (predicate_decl, 1, predicate_arg);
    5311          664 :       gimple_call_set_lhs (call_cond_stmt, cond_var);
    5312              : 
    5313          664 :       gimple_set_block (call_cond_stmt, DECL_INITIAL (function_decl));
    5314          664 :       gimple_set_bb (call_cond_stmt, new_bb);
    5315          664 :       gimple_seq_add_stmt (&gseq, call_cond_stmt);
    5316              : 
    5317          664 :       predicate_chain = TREE_CHAIN (predicate_chain);
    5318              : 
    5319          664 :       if (and_expr_var == NULL)
    5320              :         and_expr_var = cond_var;
    5321              :       else
    5322              :         {
    5323           39 :           gimple *assign_stmt;
    5324              :           /* Use MIN_EXPR to check if any integer is zero?.
    5325              :              and_expr_var = min_expr <cond_var, and_expr_var>  */
    5326           39 :           assign_stmt = gimple_build_assign (and_expr_var,
    5327              :                           build2 (MIN_EXPR, integer_type_node,
    5328              :                                   cond_var, and_expr_var));
    5329              : 
    5330           39 :           gimple_set_block (assign_stmt, DECL_INITIAL (function_decl));
    5331           39 :           gimple_set_bb (assign_stmt, new_bb);
    5332           39 :           gimple_seq_add_stmt (&gseq, assign_stmt);
    5333              :         }
    5334              :     }
    5335              : 
    5336          625 :   if_else_stmt = gimple_build_cond (GT_EXPR, and_expr_var,
    5337              :                                     integer_zero_node,
    5338              :                                     NULL_TREE, NULL_TREE);
    5339          625 :   gimple_set_block (if_else_stmt, DECL_INITIAL (function_decl));
    5340          625 :   gimple_set_bb (if_else_stmt, new_bb);
    5341          625 :   gimple_seq_add_stmt (&gseq, if_else_stmt);
    5342              : 
    5343          625 :   gimple_seq_add_stmt (&gseq, convert_stmt);
    5344          625 :   gimple_seq_add_stmt (&gseq, return_stmt);
    5345          625 :   set_bb_seq (new_bb, gseq);
    5346              : 
    5347          625 :   bb1 = new_bb;
    5348          625 :   e12 = split_block (bb1, if_else_stmt);
    5349          625 :   bb2 = e12->dest;
    5350          625 :   e12->flags &= ~EDGE_FALLTHRU;
    5351          625 :   e12->flags |= EDGE_TRUE_VALUE;
    5352              : 
    5353          625 :   e23 = split_block (bb2, return_stmt);
    5354              : 
    5355          625 :   gimple_set_bb (convert_stmt, bb2);
    5356          625 :   gimple_set_bb (return_stmt, bb2);
    5357              : 
    5358          625 :   bb3 = e23->dest;
    5359          625 :   make_edge (bb1, bb3, EDGE_FALSE_VALUE);
    5360              : 
    5361          625 :   remove_edge (e23);
    5362          625 :   make_edge (bb2, EXIT_BLOCK_PTR_FOR_FN (cfun), 0);
    5363              : 
    5364          625 :   pop_cfun ();
    5365              : 
    5366          625 :   return bb3;
    5367              : }
    5368              : 
    5369              : /* This function generates the dispatch function for
    5370              :    multi-versioned functions.  DISPATCH_DECL is the function which will
    5371              :    contain the dispatch logic.  FNDECLS are the function choices for
    5372              :    dispatch, and is a tree chain.  EMPTY_BB is the basic block pointer
    5373              :    in DISPATCH_DECL in which the dispatch code is generated.  */
    5374              : 
    5375              : static int
    5376          197 : dispatch_function_versions (tree dispatch_decl,
    5377              :                             void *fndecls_p,
    5378              :                             basic_block *empty_bb)
    5379              : {
    5380          197 :   tree default_decl;
    5381          197 :   gimple *ifunc_cpu_init_stmt;
    5382          197 :   gimple_seq gseq;
    5383          197 :   int ix;
    5384          197 :   tree ele;
    5385          197 :   vec<tree> *fndecls;
    5386          197 :   unsigned int num_versions = 0;
    5387          197 :   unsigned int actual_versions = 0;
    5388          197 :   unsigned int i;
    5389              : 
    5390          197 :   struct _function_version_info
    5391              :     {
    5392              :       tree version_decl;
    5393              :       tree predicate_chain;
    5394              :       unsigned int dispatch_priority;
    5395              :     }*function_version_info;
    5396              : 
    5397          197 :   gcc_assert (dispatch_decl != NULL
    5398              :               && fndecls_p != NULL
    5399              :               && empty_bb != NULL);
    5400              : 
    5401              :   /*fndecls_p is actually a vector.  */
    5402          197 :   fndecls = static_cast<vec<tree> *> (fndecls_p);
    5403              : 
    5404              :   /* At least one more version other than the default.  */
    5405          197 :   num_versions = fndecls->length ();
    5406          197 :   gcc_assert (num_versions >= 2);
    5407              : 
    5408          197 :   function_version_info = (struct _function_version_info *)
    5409          197 :     XNEWVEC (struct _function_version_info, (num_versions - 1));
    5410              : 
    5411              :   /* The first version in the vector is the default decl.  */
    5412          197 :   default_decl = (*fndecls)[0];
    5413              : 
    5414          197 :   push_cfun (DECL_STRUCT_FUNCTION (dispatch_decl));
    5415              : 
    5416          197 :   gseq = bb_seq (*empty_bb);
    5417              :   /* Function version dispatch is via IFUNC.  IFUNC resolvers fire before
    5418              :      constructors, so explicity call __builtin_cpu_init here.  */
    5419          197 :   ifunc_cpu_init_stmt
    5420          197 :     = gimple_build_call_vec (get_ix86_builtin (IX86_BUILTIN_CPU_INIT), vNULL);
    5421          197 :   gimple_seq_add_stmt (&gseq, ifunc_cpu_init_stmt);
    5422          197 :   gimple_set_bb (ifunc_cpu_init_stmt, *empty_bb);
    5423          197 :   set_bb_seq (*empty_bb, gseq);
    5424              : 
    5425          197 :   pop_cfun ();
    5426              : 
    5427              : 
    5428          979 :   for (ix = 1; fndecls->iterate (ix, &ele); ++ix)
    5429              :     {
    5430          782 :       tree version_decl = ele;
    5431          782 :       tree predicate_chain = NULL_TREE;
    5432          782 :       unsigned int priority;
    5433              :       /* Get attribute string, parse it and find the right predicate decl.
    5434              :          The predicate function could be a lengthy combination of many
    5435              :          features, like arch-type and various isa-variants.  */
    5436          782 :       priority = get_builtin_code_for_version (version_decl,
    5437              :                                                &predicate_chain);
    5438              : 
    5439          782 :       if (predicate_chain == NULL_TREE)
    5440          157 :         continue;
    5441              : 
    5442          625 :       function_version_info [actual_versions].version_decl = version_decl;
    5443          625 :       function_version_info [actual_versions].predicate_chain
    5444          625 :          = predicate_chain;
    5445          625 :       function_version_info [actual_versions].dispatch_priority = priority;
    5446          625 :       actual_versions++;
    5447              :     }
    5448              : 
    5449              :   /* Sort the versions according to descending order of dispatch priority.  The
    5450              :      priority is based on the ISA.  This is not a perfect solution.  There
    5451              :      could still be ambiguity.  If more than one function version is suitable
    5452              :      to execute,  which one should be dispatched?  In future, allow the user
    5453              :      to specify a dispatch  priority next to the version.  */
    5454          197 :   qsort (function_version_info, actual_versions,
    5455              :          sizeof (struct _function_version_info), feature_compare);
    5456              : 
    5457         1019 :   for  (i = 0; i < actual_versions; ++i)
    5458          625 :     *empty_bb = add_condition_to_bb (dispatch_decl,
    5459              :                                      function_version_info[i].version_decl,
    5460          625 :                                      function_version_info[i].predicate_chain,
    5461              :                                      *empty_bb);
    5462              : 
    5463              :   /* dispatch default version at the end.  */
    5464          197 :   *empty_bb = add_condition_to_bb (dispatch_decl, default_decl,
    5465              :                                    NULL, *empty_bb);
    5466              : 
    5467          197 :   free (function_version_info);
    5468          197 :   return 0;
    5469              : }
    5470              : 
    5471              : /* This function changes the assembler name for functions that are
    5472              :    versions.  If DECL is a function version and has a "target"
    5473              :    attribute, it appends the attribute string to its assembler name.  */
    5474              : 
    5475              : static tree
    5476         1104 : ix86_mangle_function_version_assembler_name (tree decl, tree id)
    5477              : {
    5478         1104 :   tree version_attr;
    5479         1104 :   char *attr_str;
    5480              : 
    5481         1104 :   if (DECL_DECLARED_INLINE_P (decl)
    5482         1153 :       && lookup_attribute ("gnu_inline",
    5483           49 :                            DECL_ATTRIBUTES (decl)))
    5484            0 :     error_at (DECL_SOURCE_LOCATION (decl),
    5485              :               "function versions cannot be marked as %<gnu_inline%>,"
    5486              :               " bodies have to be generated");
    5487              : 
    5488         1104 :   if (DECL_VIRTUAL_P (decl)
    5489         2208 :       || DECL_VINDEX (decl))
    5490            0 :     sorry ("virtual function multiversioning not supported");
    5491              : 
    5492         1104 :   version_attr = lookup_attribute ("target", DECL_ATTRIBUTES (decl));
    5493              : 
    5494              :   /* target attribute string cannot be NULL.  */
    5495         1104 :   gcc_assert (version_attr != NULL_TREE);
    5496              : 
    5497         1104 :   attr_str = sorted_attr_string (TREE_VALUE (version_attr));
    5498              : 
    5499              :   /* Allow assembler name to be modified if already set.  */
    5500         1104 :   if (DECL_ASSEMBLER_NAME_SET_P (decl))
    5501         1089 :     SET_DECL_RTL (decl, NULL);
    5502              : 
    5503         1104 :   tree ret = clone_identifier (id, attr_str, true);
    5504              : 
    5505         1104 :   XDELETEVEC (attr_str);
    5506              : 
    5507         1104 :   return ret;
    5508              : }
    5509              : 
    5510              : tree
    5511    495876008 : ix86_mangle_decl_assembler_name (tree decl, tree id)
    5512              : {
    5513              :   /* For function version, add the target suffix to the assembler name.  */
    5514    495876008 :   if (TREE_CODE (decl) == FUNCTION_DECL)
    5515              :     {
    5516    457464050 :       cgraph_node *node = cgraph_node::get (decl);
    5517              :       /* Mangle all versions when annotated with target_clones, but only
    5518              :          non-default versions when annotated with target attributes.  */
    5519    457464050 :       if (DECL_FUNCTION_VERSIONED (decl)
    5520    457464050 :           && (node->is_target_clone
    5521         1077 :               || !is_function_default_version (node->decl)))
    5522         1104 :         id = ix86_mangle_function_version_assembler_name (decl, id);
    5523              :       /* Mangle the dispatched symbol but only in the case of target clones.  */
    5524    457462946 :       else if (node && node->dispatcher_function && !node->is_target_clone)
    5525          114 :         id = clone_identifier (id, "ifunc");
    5526     72847012 :       else if (node && node->dispatcher_resolver_function)
    5527          197 :         id = clone_identifier (id, "resolver");
    5528              :     }
    5529              : #ifdef SUBTARGET_MANGLE_DECL_ASSEMBLER_NAME
    5530              :   id = SUBTARGET_MANGLE_DECL_ASSEMBLER_NAME (decl, id);
    5531              : #endif
    5532              : 
    5533    495876008 :   return id;
    5534              : }
    5535              : 
    5536              : /* Make a dispatcher declaration for the multi-versioned function DECL.
    5537              :    Calls to DECL function will be replaced with calls to the dispatcher
    5538              :    by the front-end.  Returns the decl of the dispatcher function.  */
    5539              : 
    5540              : tree
    5541          323 : ix86_get_function_versions_dispatcher (void *decl)
    5542              : {
    5543          323 :   tree fn = (tree) decl;
    5544          323 :   struct cgraph_node *node = NULL;
    5545          323 :   struct cgraph_node *default_node = NULL;
    5546          323 :   struct cgraph_function_version_info *node_v = NULL;
    5547              : 
    5548          323 :   tree dispatch_decl = NULL;
    5549              : 
    5550          323 :   struct cgraph_function_version_info *default_version_info = NULL;
    5551              : 
    5552          646 :   gcc_assert (fn != NULL && DECL_FUNCTION_VERSIONED (fn));
    5553              : 
    5554          323 :   node = cgraph_node::get (fn);
    5555          323 :   gcc_assert (node != NULL);
    5556              : 
    5557          323 :   node_v = node->function_version ();
    5558          323 :   gcc_assert (node_v != NULL);
    5559              : 
    5560          323 :   if (node_v->dispatcher_resolver != NULL)
    5561              :     return node_v->dispatcher_resolver;
    5562              : 
    5563              :   /* The default node is always the beginning of the chain.  */
    5564              :   default_version_info = node_v;
    5565          662 :   while (default_version_info->prev != NULL)
    5566              :     default_version_info = default_version_info->prev;
    5567          209 :   default_node = default_version_info->this_node;
    5568              : 
    5569              :   /* If there is no default node, just return NULL.  */
    5570          209 :   if (!is_function_default_version (default_node->decl))
    5571              :     return NULL;
    5572              : 
    5573              : #if defined (ASM_OUTPUT_TYPE_DIRECTIVE)
    5574          200 :   if (targetm.has_ifunc_p ())
    5575              :     {
    5576          200 :       struct cgraph_function_version_info *it_v = NULL;
    5577              : 
    5578              :       /* Right now, the dispatching is done via ifunc.  */
    5579          200 :       dispatch_decl = make_dispatcher_decl (default_node->decl);
    5580              : 
    5581              :       /* Set the dispatcher for all the versions.  */
    5582          200 :       it_v = default_version_info;
    5583         1385 :       while (it_v != NULL)
    5584              :         {
    5585          985 :           it_v->dispatcher_resolver = dispatch_decl;
    5586          985 :           it_v = it_v->next;
    5587              :         }
    5588              :     }
    5589              :   else
    5590              : #endif
    5591              :     {
    5592            0 :       error_at (DECL_SOURCE_LOCATION (default_node->decl),
    5593              :                 "multiversioning needs %<ifunc%> which is not supported "
    5594              :                 "on this target");
    5595              :     }
    5596              : 
    5597              :   return dispatch_decl;
    5598              : }
    5599              : 
    5600              : /* Make the resolver function decl to dispatch the versions of
    5601              :    a multi-versioned function,  DEFAULT_DECL.  IFUNC_ALIAS_DECL is
    5602              :    ifunc alias that will point to the created resolver.  Create an
    5603              :    empty basic block in the resolver and store the pointer in
    5604              :    EMPTY_BB.  Return the decl of the resolver function.  */
    5605              : 
    5606              : static tree
    5607          197 : make_resolver_func (const tree default_decl,
    5608              :                     const tree ifunc_alias_decl,
    5609              :                     basic_block *empty_bb)
    5610              : {
    5611          197 :   tree decl, type, t;
    5612              : 
    5613              :   /* The resolver function should return a (void *). */
    5614          197 :   type = build_function_type_list (ptr_type_node, NULL_TREE);
    5615              : 
    5616          197 :   cgraph_node *node = cgraph_node::get (default_decl);
    5617          197 :   gcc_assert (node && node->function_version ());
    5618              : 
    5619          197 :   decl = build_fn_decl (IDENTIFIER_POINTER (DECL_NAME (default_decl)), type);
    5620              : 
    5621              :   /* Set the assembler name to prevent cgraph_node attempting to mangle.  */
    5622          197 :   SET_DECL_ASSEMBLER_NAME (decl, DECL_ASSEMBLER_NAME (default_decl));
    5623              : 
    5624          197 :   cgraph_node *resolver_node = cgraph_node::get_create (decl);
    5625          197 :   resolver_node->dispatcher_resolver_function = true;
    5626              : 
    5627          197 :   if (node->is_target_clone)
    5628           86 :     resolver_node->is_target_clone = true;
    5629              : 
    5630          197 :   tree id = ix86_mangle_decl_assembler_name
    5631          197 :     (decl, node->function_version ()->assembler_name);
    5632          197 :   symtab->change_decl_assembler_name (decl, id);
    5633              : 
    5634          197 :   DECL_NAME (decl) = DECL_NAME (default_decl);
    5635          197 :   TREE_USED (decl) = 1;
    5636          197 :   DECL_ARTIFICIAL (decl) = 1;
    5637          197 :   DECL_IGNORED_P (decl) = 1;
    5638          197 :   TREE_PUBLIC (decl) = 0;
    5639          197 :   DECL_UNINLINABLE (decl) = 1;
    5640              : 
    5641              :   /* Resolver is not external, body is generated.  */
    5642          197 :   DECL_EXTERNAL (decl) = 0;
    5643          197 :   DECL_EXTERNAL (ifunc_alias_decl) = 0;
    5644              : 
    5645          197 :   DECL_CONTEXT (decl) = NULL_TREE;
    5646          197 :   DECL_INITIAL (decl) = make_node (BLOCK);
    5647          197 :   DECL_STATIC_CONSTRUCTOR (decl) = 0;
    5648              : 
    5649          197 :   if (DECL_COMDAT_GROUP (default_decl)
    5650          197 :       || TREE_PUBLIC (default_decl))
    5651              :     {
    5652              :       /* In this case, each translation unit with a call to this
    5653              :          versioned function will put out a resolver.  Ensure it
    5654              :          is comdat to keep just one copy.  */
    5655          173 :       DECL_COMDAT (decl) = 1;
    5656          173 :       make_decl_one_only (decl, DECL_ASSEMBLER_NAME (decl));
    5657              :     }
    5658              :   else
    5659           24 :     TREE_PUBLIC (ifunc_alias_decl) = 0;
    5660              : 
    5661              :   /* Build result decl and add to function_decl. */
    5662          197 :   t = build_decl (UNKNOWN_LOCATION, RESULT_DECL, NULL_TREE, ptr_type_node);
    5663          197 :   DECL_CONTEXT (t) = decl;
    5664          197 :   DECL_ARTIFICIAL (t) = 1;
    5665          197 :   DECL_IGNORED_P (t) = 1;
    5666          197 :   DECL_RESULT (decl) = t;
    5667              : 
    5668          197 :   gimplify_function_tree (decl);
    5669          197 :   push_cfun (DECL_STRUCT_FUNCTION (decl));
    5670          197 :   *empty_bb = init_lowered_empty_function (decl, false,
    5671              :                                            profile_count::uninitialized ());
    5672              : 
    5673          197 :   cgraph_node::add_new_function (decl, true);
    5674          197 :   symtab->call_cgraph_insertion_hooks (cgraph_node::get_create (decl));
    5675              : 
    5676          197 :   pop_cfun ();
    5677              : 
    5678          197 :   gcc_assert (ifunc_alias_decl != NULL);
    5679              :   /* Mark ifunc_alias_decl as "ifunc" with resolver as resolver_name.  */
    5680          197 :   DECL_ATTRIBUTES (ifunc_alias_decl)
    5681          197 :     = make_attribute ("ifunc", IDENTIFIER_POINTER (DECL_ASSEMBLER_NAME (decl)),
    5682          197 :                       DECL_ATTRIBUTES (ifunc_alias_decl));
    5683              : 
    5684              :   /* Create the alias for dispatch to resolver here.  */
    5685          197 :   cgraph_node::create_same_body_alias (ifunc_alias_decl, decl);
    5686          197 :   return decl;
    5687              : }
    5688              : 
    5689              : /* Generate the dispatching code body to dispatch multi-versioned function
    5690              :    DECL.  The target hook is called to process the "target" attributes and
    5691              :    provide the code to dispatch the right function at run-time.  NODE points
    5692              :    to the dispatcher decl whose body will be created.  */
    5693              : 
    5694              : tree
    5695          197 : ix86_generate_version_dispatcher_body (void *node_p)
    5696              : {
    5697          197 :   tree resolver_decl;
    5698          197 :   basic_block empty_bb;
    5699          197 :   tree default_ver_decl;
    5700          197 :   struct cgraph_node *versn;
    5701          197 :   struct cgraph_node *node;
    5702              : 
    5703          197 :   struct cgraph_function_version_info *node_version_info = NULL;
    5704          197 :   struct cgraph_function_version_info *versn_info = NULL;
    5705              : 
    5706          197 :   node = (cgraph_node *)node_p;
    5707              : 
    5708          197 :   node_version_info = node->function_version ();
    5709          197 :   gcc_assert (node->dispatcher_function
    5710              :               && node_version_info != NULL);
    5711              : 
    5712          197 :   if (node_version_info->dispatcher_resolver)
    5713              :     return node_version_info->dispatcher_resolver;
    5714              : 
    5715              :   /* The first version in the chain corresponds to the default version.  */
    5716          197 :   default_ver_decl = node_version_info->next->this_node->decl;
    5717              : 
    5718              :   /* node is going to be an alias, so remove the finalized bit.  */
    5719          197 :   node->definition = false;
    5720              : 
    5721          197 :   resolver_decl = make_resolver_func (default_ver_decl,
    5722              :                                       node->decl, &empty_bb);
    5723              : 
    5724          197 :   node_version_info->dispatcher_resolver = resolver_decl;
    5725              : 
    5726          197 :   push_cfun (DECL_STRUCT_FUNCTION (resolver_decl));
    5727              : 
    5728          197 :   auto_vec<tree, 2> fn_ver_vec;
    5729              : 
    5730         1176 :   for (versn_info = node_version_info->next; versn_info;
    5731          979 :        versn_info = versn_info->next)
    5732              :     {
    5733          979 :       versn = versn_info->this_node;
    5734              :       /* Check for virtual functions here again, as by this time it should
    5735              :          have been determined if this function needs a vtable index or
    5736              :          not.  This happens for methods in derived classes that override
    5737              :          virtual methods in base classes but are not explicitly marked as
    5738              :          virtual.  */
    5739          979 :       if (DECL_VIRTUAL_P (versn->decl))
    5740            0 :         sorry ("virtual function multiversioning not supported");
    5741              : 
    5742          979 :       fn_ver_vec.safe_push (versn->decl);
    5743              :     }
    5744              : 
    5745          197 :   dispatch_function_versions (resolver_decl, &fn_ver_vec, &empty_bb);
    5746          197 :   cgraph_edge::rebuild_edges ();
    5747          197 :   pop_cfun ();
    5748          197 :   return resolver_decl;
    5749          197 : }
    5750              : 
    5751              : 
        

Generated by: LCOV version 2.4-beta

LCOV profile is generated on x86_64 machine using following configure options: configure --disable-bootstrap --enable-coverage=opt --enable-languages=c,c++,fortran,go,jit,lto,rust,m2 --enable-host-shared. GCC test suite is run with the built compiler.