LCOV - code coverage report
Current view: top level - gcc/config/i386 - x86-tune-sched.cc (source / functions) Coverage Total Hit
Test: gcc.info Lines: 63.8 % 298 190
Test Date: 2026-05-11 19:44:49 Functions: 70.0 % 10 7
Legend: Lines:     hit not hit

            Line data    Source code
       1              : /* Scheduler hooks for IA-32 which implement CPU specific logic.
       2              :    Copyright (C) 1988-2026 Free Software Foundation, Inc.
       3              : 
       4              : This file is part of GCC.
       5              : 
       6              : GCC is free software; you can redistribute it and/or modify
       7              : it under the terms of the GNU General Public License as published by
       8              : the Free Software Foundation; either version 3, or (at your option)
       9              : any later version.
      10              : 
      11              : GCC is distributed in the hope that it will be useful,
      12              : but WITHOUT ANY WARRANTY; without even the implied warranty of
      13              : MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
      14              : GNU General Public License for more details.
      15              : 
      16              : You should have received a copy of the GNU General Public License
      17              : along with GCC; see the file COPYING3.  If not see
      18              : <http://www.gnu.org/licenses/>.  */
      19              : 
      20              : #define IN_TARGET_CODE 1
      21              : 
      22              : #include "config.h"
      23              : #include "system.h"
      24              : #include "coretypes.h"
      25              : #include "backend.h"
      26              : #include "rtl.h"
      27              : #include "tree.h"
      28              : #include "cfghooks.h"
      29              : #include "tm_p.h"
      30              : #include "target.h"
      31              : #include "insn-config.h"
      32              : #include "insn-attr.h"
      33              : #include "insn-opinit.h"
      34              : #include "recog.h"
      35              : #include "tm-constrs.h"
      36              : 
      37              : /* Return the maximum number of instructions a cpu can issue.  */
      38              : 
      39              : int
      40     36648759 : ix86_issue_rate (void)
      41              : {
      42     36648759 :   switch (ix86_tune)
      43              :     {
      44              :     case PROCESSOR_PENTIUM:
      45              :     case PROCESSOR_LAKEMONT:
      46              :     case PROCESSOR_BONNELL:
      47              :     case PROCESSOR_SILVERMONT:
      48              :     case PROCESSOR_K6:
      49              :     case PROCESSOR_BTVER2:
      50              :     case PROCESSOR_PENTIUM4:
      51              :     case PROCESSOR_NOCONA:
      52              :       return 2;
      53              : 
      54              :     case PROCESSOR_PENTIUMPRO:
      55              :     case PROCESSOR_ATHLON:
      56              :     case PROCESSOR_K8:
      57              :     case PROCESSOR_AMDFAM10:
      58              :     case PROCESSOR_BTVER1:
      59              :     case PROCESSOR_LUJIAZUI:
      60              :       return 3;
      61              : 
      62              :     case PROCESSOR_BDVER1:
      63              :     case PROCESSOR_BDVER2:
      64              :     case PROCESSOR_BDVER3:
      65              :     case PROCESSOR_BDVER4:
      66              :     case PROCESSOR_ZNVER1:
      67              :     case PROCESSOR_ZNVER2:
      68              :     case PROCESSOR_ZNVER3:
      69              :     case PROCESSOR_ZNVER4:
      70              :     case PROCESSOR_CORE2:
      71              :     case PROCESSOR_NEHALEM:
      72              :     case PROCESSOR_SANDYBRIDGE:
      73              :     case PROCESSOR_HASWELL:
      74              :     case PROCESSOR_TREMONT:
      75              :     case PROCESSOR_SKYLAKE:
      76              :     case PROCESSOR_SKYLAKE_AVX512:
      77              :     case PROCESSOR_CASCADELAKE:
      78              :     case PROCESSOR_CANNONLAKE:
      79              :     case PROCESSOR_ALDERLAKE:
      80              :     case PROCESSOR_YONGFENG:
      81              :     case PROCESSOR_SHIJIDADAO:
      82              :     case PROCESSOR_SIERRAFOREST:
      83              :     case PROCESSOR_INTEL:
      84              :     case PROCESSOR_GENERIC:
      85              :     /* For znver5 decoder can handle 4 or 8 instructions per cycle,
      86              :        op cache 12 instruction/cycle, dispatch 8 instructions
      87              :        integer rename 8 instructions and Fp 6 instructions.
      88              : 
      89              :        The scheduler, without understanding out of order nature of the CPU
      90              :        is not going to be able to use more than 4 instructions since that
      91              :        is limits of the decoders.  */
      92              :     case PROCESSOR_ZNVER5:
      93              :     case PROCESSOR_ZNVER6:
      94              :     case PROCESSOR_C86_4G_M4:
      95              :     case PROCESSOR_C86_4G_M6:
      96              :     case PROCESSOR_C86_4G_M7:
      97              :       return 4;
      98              : 
      99              :     case PROCESSOR_ICELAKE_CLIENT:
     100              :     case PROCESSOR_ICELAKE_SERVER:
     101              :     case PROCESSOR_TIGERLAKE:
     102              :     case PROCESSOR_COOPERLAKE:
     103              :     case PROCESSOR_ROCKETLAKE:
     104              :       return 5;
     105              : 
     106              :     case PROCESSOR_SAPPHIRERAPIDS:
     107              :     case PROCESSOR_GRANITERAPIDS:
     108              :     case PROCESSOR_GRANITERAPIDS_D:
     109              :     case PROCESSOR_DIAMONDRAPIDS:
     110              :     case PROCESSOR_GRANDRIDGE:
     111              :     case PROCESSOR_CLEARWATERFOREST:
     112              :     case PROCESSOR_ARROWLAKE:
     113              :     case PROCESSOR_ARROWLAKE_S:
     114              :     case PROCESSOR_PANTHERLAKE:
     115              :       return 6;
     116              : 
     117              :     case PROCESSOR_NOVALAKE:
     118              :       return 8;
     119              : 
     120              :     default:
     121              :       return 1;
     122              :     }
     123              : }
     124              : 
     125              : /* Return true iff USE_INSN has a memory address with operands set by
     126              :    SET_INSN.  */
     127              : 
     128              : bool
     129      9891802 : ix86_agi_dependent (rtx_insn *set_insn, rtx_insn *use_insn)
     130              : {
     131      9891802 :   int i;
     132      9891802 :   extract_insn_cached (use_insn);
     133     12166799 :   for (i = recog_data.n_operands - 1; i >= 0; --i)
     134     11808088 :     if (MEM_P (recog_data.operand[i]))
     135              :       {
     136      9533091 :         rtx addr = XEXP (recog_data.operand[i], 0);
     137      9533091 :         if (modified_in_p (addr, set_insn) != 0)
     138              :           {
     139              :             /* No AGI stall if SET_INSN is a push or pop and USE_INSN
     140              :                has SP based memory (unless index reg is modified in a pop).  */
     141      4314012 :             rtx set = single_set (set_insn);
     142      4314012 :             if (set
     143      4314012 :                 && (push_operand (SET_DEST (set), GET_MODE (SET_DEST (set)))
     144      3535837 :                     || pop_operand (SET_SRC (set), GET_MODE (SET_SRC (set)))))
     145              :               {
     146       591284 :                 struct ix86_address parts;
     147       591284 :                 if (ix86_decompose_address (addr, &parts)
     148       591284 :                     && parts.base == stack_pointer_rtx
     149      1182396 :                     && (parts.index == NULL_RTX
     150          471 :                         || MEM_P (SET_DEST (set))
     151            2 :                         || !modified_in_p (parts.index, set_insn)))
     152       591111 :                   return false;
     153              :               }
     154      3722901 :             return true;
     155              :           }
     156              :         return false;
     157              :       }
     158              :   return false;
     159              : }
     160              : 
     161              : /* A subroutine of ix86_adjust_cost -- return TRUE iff INSN reads flags set
     162              :    by DEP_INSN and nothing set by DEP_INSN.  */
     163              : 
     164              : static bool
     165            0 : ix86_flags_dependent (rtx_insn *insn, rtx_insn *dep_insn, enum attr_type insn_type)
     166              : {
     167            0 :   rtx set, set2;
     168              : 
     169              :   /* Simplify the test for uninteresting insns.  */
     170            0 :   if (insn_type != TYPE_SETCC
     171            0 :       && insn_type != TYPE_ICMOV
     172            0 :       && insn_type != TYPE_FCMOV
     173            0 :       && insn_type != TYPE_IBR)
     174              :     return false;
     175              : 
     176            0 :   if ((set = single_set (dep_insn)) != 0)
     177              :     {
     178            0 :       set = SET_DEST (set);
     179            0 :       set2 = NULL_RTX;
     180              :     }
     181            0 :   else if (GET_CODE (PATTERN (dep_insn)) == PARALLEL
     182            0 :            && XVECLEN (PATTERN (dep_insn), 0) == 2
     183            0 :            && GET_CODE (XVECEXP (PATTERN (dep_insn), 0, 0)) == SET
     184            0 :            && GET_CODE (XVECEXP (PATTERN (dep_insn), 0, 1)) == SET)
     185              :     {
     186            0 :       set = SET_DEST (XVECEXP (PATTERN (dep_insn), 0, 0));
     187            0 :       set2 = SET_DEST (XVECEXP (PATTERN (dep_insn), 0, 0));
     188              :     }
     189              :   else
     190              :     return false;
     191              : 
     192            0 :   if (!REG_P (set) || REGNO (set) != FLAGS_REG)
     193              :     return false;
     194              : 
     195              :   /* This test is true if the dependent insn reads the flags but
     196              :      not any other potentially set register.  */
     197            0 :   if (!reg_overlap_mentioned_p (set, PATTERN (insn)))
     198              :     return false;
     199              : 
     200            0 :   if (set2 && reg_overlap_mentioned_p (set2, PATTERN (insn)))
     201              :     return false;
     202              : 
     203              :   return true;
     204              : }
     205              : 
     206              : /* Helper function for exact_store_load_dependency.
     207              :    Return true if addr is found in insn.  */
     208              : static bool
     209            0 : exact_dependency_1 (rtx addr, rtx insn)
     210              : {
     211            0 :   enum rtx_code code;
     212            0 :   const char *format_ptr;
     213            0 :   int i, j;
     214              : 
     215            0 :   code = GET_CODE (insn);
     216            0 :   switch (code)
     217              :     {
     218            0 :     case MEM:
     219            0 :       if (rtx_equal_p (addr, insn))
     220              :         return true;
     221              :       break;
     222              :     case REG:
     223              :     CASE_CONST_ANY:
     224              :     case SYMBOL_REF:
     225              :     case CODE_LABEL:
     226              :     case PC:
     227              :     case EXPR_LIST:
     228              :       return false;
     229              :     default:
     230              :       break;
     231              :     }
     232              : 
     233            0 :   format_ptr = GET_RTX_FORMAT (code);
     234            0 :   for (i = 0; i < GET_RTX_LENGTH (code); i++)
     235              :     {
     236            0 :       switch (*format_ptr++)
     237              :         {
     238            0 :         case 'e':
     239            0 :           if (exact_dependency_1 (addr, XEXP (insn, i)))
     240              :             return true;
     241              :           break;
     242              :         case 'E':
     243            0 :           for (j = 0; j < XVECLEN (insn, i); j++)
     244            0 :             if (exact_dependency_1 (addr, XVECEXP (insn, i, j)))
     245              :               return true;
     246              :           break;
     247              :         }
     248              :     }
     249              :   return false;
     250              : }
     251              : 
     252              : /* Return true if there exists exact dependency for store & load, i.e.
     253              :    the same memory address is used in them.  */
     254              : static bool
     255            0 : exact_store_load_dependency (rtx_insn *store, rtx_insn *load)
     256              : {
     257            0 :   rtx set1, set2;
     258              : 
     259            0 :   set1 = single_set (store);
     260            0 :   if (!set1)
     261              :     return false;
     262            0 :   if (!MEM_P (SET_DEST (set1)))
     263              :     return false;
     264            0 :   set2 = single_set (load);
     265            0 :   if (!set2)
     266              :     return false;
     267            0 :   if (exact_dependency_1 (SET_DEST (set1), SET_SRC (set2)))
     268              :     return true;
     269              :   return false;
     270              : }
     271              : 
     272              : 
     273              : /* This function corrects the value of COST (latency) based on the relationship
     274              :    between INSN and DEP_INSN through a dependence of type DEP_TYPE, and strength
     275              :    DW.  It should return the new value.
     276              : 
     277              :    On x86 CPUs this is most commonly used to model the fact that valus of
     278              :    registers used to compute address of memory operand  needs to be ready
     279              :    earlier than values of registers used in the actual operation.  */
     280              : 
     281              : int
     282    151178103 : ix86_adjust_cost (rtx_insn *insn, int dep_type, rtx_insn *dep_insn, int cost,
     283              :                   unsigned int)
     284              : {
     285    151178103 :   enum attr_type insn_type, dep_insn_type;
     286    151178103 :   enum attr_memory memory;
     287    151178103 :   rtx set, set2;
     288    151178103 :   int dep_insn_code_number;
     289              : 
     290              :   /* Anti and output dependencies have zero cost on all CPUs.  */
     291    151178103 :   if (dep_type != 0)
     292              :     return 0;
     293              : 
     294     53877513 :   dep_insn_code_number = recog_memoized (dep_insn);
     295              : 
     296              :   /* If we can't recognize the insns, we can't really do anything.  */
     297     53877513 :   if (dep_insn_code_number < 0 || recog_memoized (insn) < 0)
     298       354556 :     return cost;
     299              : 
     300     53522957 :   insn_type = get_attr_type (insn);
     301     53522957 :   dep_insn_type = get_attr_type (dep_insn);
     302              : 
     303     53522957 :   switch (ix86_tune)
     304              :     {
     305            0 :     case PROCESSOR_PENTIUM:
     306            0 :     case PROCESSOR_LAKEMONT:
     307              :       /* Address Generation Interlock adds a cycle of latency.  */
     308            0 :       if (insn_type == TYPE_LEA)
     309              :         {
     310            0 :           rtx addr = PATTERN (insn);
     311              : 
     312            0 :           if (GET_CODE (addr) == PARALLEL)
     313            0 :             addr = XVECEXP (addr, 0, 0);
     314              : 
     315            0 :           gcc_assert (GET_CODE (addr) == SET);
     316              : 
     317            0 :           addr = SET_SRC (addr);
     318            0 :           if (modified_in_p (addr, dep_insn))
     319            0 :             cost += 1;
     320              :         }
     321            0 :       else if (ix86_agi_dependent (dep_insn, insn))
     322            0 :         cost += 1;
     323              : 
     324              :       /* ??? Compares pair with jump/setcc.  */
     325            0 :       if (ix86_flags_dependent (insn, dep_insn, insn_type))
     326            0 :         cost = 0;
     327              : 
     328              :       /* Floating point stores require value to be ready one cycle earlier.  */
     329            0 :       if (insn_type == TYPE_FMOV
     330            0 :           && get_attr_memory (insn) == MEMORY_STORE
     331            0 :           && !ix86_agi_dependent (dep_insn, insn))
     332            0 :         cost += 1;
     333              :       break;
     334              : 
     335            0 :     case PROCESSOR_PENTIUMPRO:
     336              :       /* INT->FP conversion is expensive.  */
     337            0 :       if (get_attr_fp_int_src (dep_insn))
     338            0 :         cost += 5;
     339              : 
     340              :       /* There is one cycle extra latency between an FP op and a store.  */
     341            0 :       if (insn_type == TYPE_FMOV
     342            0 :           && (set = single_set (dep_insn)) != NULL_RTX
     343            0 :           && (set2 = single_set (insn)) != NULL_RTX
     344            0 :           && rtx_equal_p (SET_DEST (set), SET_SRC (set2))
     345            0 :           && MEM_P (SET_DEST (set2)))
     346            0 :         cost += 1;
     347              : 
     348            0 :       memory = get_attr_memory (insn);
     349              : 
     350              :       /* Show ability of reorder buffer to hide latency of load by executing
     351              :          in parallel with previous instruction in case
     352              :          previous instruction is not needed to compute the address.  */
     353            0 :       if ((memory == MEMORY_LOAD || memory == MEMORY_BOTH)
     354            0 :           && !ix86_agi_dependent (dep_insn, insn))
     355              :         {
     356              :           /* Claim moves to take one cycle, as core can issue one load
     357              :              at time and the next load can start cycle later.  */
     358            0 :           if (dep_insn_type == TYPE_IMOV
     359            0 :               || dep_insn_type == TYPE_FMOV)
     360              :             cost = 1;
     361            0 :           else if (cost > 1)
     362            0 :             cost--;
     363              :         }
     364              :       break;
     365              : 
     366            0 :     case PROCESSOR_K6:
     367              :      /* The esp dependency is resolved before
     368              :         the instruction is really finished.  */
     369            0 :       if ((insn_type == TYPE_PUSH || insn_type == TYPE_POP)
     370            0 :           && (dep_insn_type == TYPE_PUSH || dep_insn_type == TYPE_POP))
     371              :         return 1;
     372              : 
     373              :       /* INT->FP conversion is expensive.  */
     374            0 :       if (get_attr_fp_int_src (dep_insn))
     375            0 :         cost += 5;
     376              : 
     377            0 :       memory = get_attr_memory (insn);
     378              : 
     379              :       /* Show ability of reorder buffer to hide latency of load by executing
     380              :          in parallel with previous instruction in case
     381              :          previous instruction is not needed to compute the address.  */
     382            0 :       if ((memory == MEMORY_LOAD || memory == MEMORY_BOTH)
     383            0 :           && !ix86_agi_dependent (dep_insn, insn))
     384              :         {
     385              :           /* Claim moves to take one cycle, as core can issue one load
     386              :              at time and the next load can start cycle later.  */
     387            0 :           if (dep_insn_type == TYPE_IMOV
     388            0 :               || dep_insn_type == TYPE_FMOV)
     389              :             cost = 1;
     390            0 :           else if (cost > 2)
     391            0 :             cost -= 2;
     392              :           else
     393              :             cost = 1;
     394              :         }
     395              :       break;
     396              : 
     397         8961 :     case PROCESSOR_AMDFAM10:
     398         8961 :     case PROCESSOR_BDVER1:
     399         8961 :     case PROCESSOR_BDVER2:
     400         8961 :     case PROCESSOR_BDVER3:
     401         8961 :     case PROCESSOR_BDVER4:
     402         8961 :     case PROCESSOR_BTVER1:
     403         8961 :     case PROCESSOR_BTVER2:
     404              :       /* Stack engine allows to execute push&pop instructions in parall.  */
     405         8961 :       if ((insn_type == TYPE_PUSH || insn_type == TYPE_POP)
     406          219 :           && (dep_insn_type == TYPE_PUSH || dep_insn_type == TYPE_POP))
     407              :         return 0;
     408              :       /* FALLTHRU */
     409              : 
     410        73826 :     case PROCESSOR_ATHLON:
     411        73826 :     case PROCESSOR_K8:
     412        73826 :       memory = get_attr_memory (insn);
     413              : 
     414              :       /* Show ability of reorder buffer to hide latency of load by executing
     415              :          in parallel with previous instruction in case
     416              :          previous instruction is not needed to compute the address.  */
     417        73826 :       if ((memory == MEMORY_LOAD || memory == MEMORY_BOTH)
     418        73826 :           && !ix86_agi_dependent (dep_insn, insn))
     419              :         {
     420        10371 :           enum attr_unit unit = get_attr_unit (insn);
     421        10371 :           int loadcost = 3;
     422              : 
     423              :           /* Because of the difference between the length of integer and
     424              :              floating unit pipeline preparation stages, the memory operands
     425              :              for floating point are cheaper.
     426              : 
     427              :              ??? For Athlon it the difference is most probably 2.  */
     428        10371 :           if (unit == UNIT_INTEGER || unit == UNIT_UNKNOWN)
     429              :             loadcost = 3;
     430              :           else
     431         5071 :             loadcost = TARGET_CPU_P (ATHLON) ? 2 : 0;
     432              : 
     433        10371 :           if (cost >= loadcost)
     434         5841 :             cost -= loadcost;
     435              :           else
     436              :             cost = 0;
     437              :         }
     438              :       break;
     439              : 
     440         4669 :     case PROCESSOR_ZNVER1:
     441         4669 :     case PROCESSOR_ZNVER2:
     442         4669 :     case PROCESSOR_ZNVER3:
     443         4669 :     case PROCESSOR_ZNVER4:
     444         4669 :     case PROCESSOR_ZNVER5:
     445         4669 :     case PROCESSOR_ZNVER6:
     446         4669 :     case PROCESSOR_C86_4G_M4:
     447         4669 :     case PROCESSOR_C86_4G_M6:
     448         4669 :     case PROCESSOR_C86_4G_M7:
     449              :       /* Stack engine allows to execute push&pop instructions in parall.  */
     450         4669 :       if ((insn_type == TYPE_PUSH || insn_type == TYPE_POP)
     451          559 :           && (dep_insn_type == TYPE_PUSH || dep_insn_type == TYPE_POP))
     452              :         return 0;
     453              : 
     454         4410 :       memory = get_attr_memory (insn);
     455              : 
     456              :       /* Show ability of reorder buffer to hide latency of load by executing
     457              :          in parallel with previous instruction in case
     458              :          previous instruction is not needed to compute the address.  */
     459         4410 :       if ((memory == MEMORY_LOAD || memory == MEMORY_BOTH)
     460         4410 :           && !ix86_agi_dependent (dep_insn, insn))
     461              :         {
     462          687 :           enum attr_unit unit = get_attr_unit (insn);
     463          687 :           int loadcost;
     464              : 
     465              :           /* TODO: On znver5 complex addressing modes have
     466              :              greater latency.  */
     467          687 :           if (unit == UNIT_INTEGER || unit == UNIT_UNKNOWN)
     468              :             loadcost = 4;
     469              :           else
     470          449 :             loadcost = 7;
     471              : 
     472          687 :           if (cost >= loadcost)
     473          151 :             cost -= loadcost;
     474              :           else
     475              :             cost = 0;
     476              :         }
     477              :       break;
     478              : 
     479            0 :     case PROCESSOR_YONGFENG:
     480            0 :     case PROCESSOR_SHIJIDADAO:
     481              :       /* Stack engine allows to execute push&pop instructions in parallel.  */
     482            0 :       if ((insn_type == TYPE_PUSH || insn_type == TYPE_POP)
     483            0 :           && (dep_insn_type == TYPE_PUSH || dep_insn_type == TYPE_POP))
     484              :         return 0;
     485              :       /* FALLTHRU */
     486              : 
     487            0 :     case PROCESSOR_LUJIAZUI:
     488            0 :       memory = get_attr_memory (insn);
     489              : 
     490              :       /* Show ability of reorder buffer to hide latency of load by executing
     491              :           in parallel with previous instruction in case
     492              :           previous instruction is not needed to compute the address.  */
     493            0 :       if ((memory == MEMORY_LOAD || memory == MEMORY_BOTH)
     494            0 :           && !ix86_agi_dependent (dep_insn, insn))
     495              :           {
     496            0 :             int loadcost = 4;
     497              : 
     498            0 :             if (cost >= loadcost)
     499            0 :               cost -= loadcost;
     500              :             else
     501              :               cost = 0;
     502              :           }
     503              :        break;
     504              : 
     505     53413843 :     case PROCESSOR_CORE2:
     506     53413843 :     case PROCESSOR_NEHALEM:
     507     53413843 :     case PROCESSOR_SANDYBRIDGE:
     508     53413843 :     case PROCESSOR_HASWELL:
     509     53413843 :     case PROCESSOR_TREMONT:
     510     53413843 :     case PROCESSOR_ALDERLAKE:
     511     53413843 :     case PROCESSOR_INTEL:
     512     53413843 :     case PROCESSOR_GENERIC:
     513              :       /* Stack engine allows to execute push&pop instructions in parall.  */
     514     53413843 :       if ((insn_type == TYPE_PUSH || insn_type == TYPE_POP)
     515      8709739 :           && (dep_insn_type == TYPE_PUSH || dep_insn_type == TYPE_POP))
     516              :         return 0;
     517              : 
     518     47881485 :       memory = get_attr_memory (insn);
     519              : 
     520              :       /* Show ability of reorder buffer to hide latency of load by executing
     521              :          in parallel with previous instruction in case
     522              :          previous instruction is not needed to compute the address.  */
     523     47881485 :       if ((memory == MEMORY_LOAD || memory == MEMORY_BOTH)
     524     47881485 :           && !ix86_agi_dependent (dep_insn, insn))
     525              :         {
     526      6157772 :           if (cost >= 4)
     527       214782 :             cost -= 4;
     528              :           else
     529              :             cost = 0;
     530              :         }
     531              :       break;
     532              : 
     533          939 :     case PROCESSOR_SILVERMONT:
     534          939 :       if (!reload_completed)
     535              :         return cost;
     536              : 
     537              :       /* Increase cost of integer loads.  */
     538          939 :       memory = get_attr_memory (dep_insn);
     539          939 :       if (memory == MEMORY_LOAD || memory == MEMORY_BOTH)
     540              :         {
     541          214 :           enum attr_unit unit = get_attr_unit (dep_insn);
     542          214 :           if (unit == UNIT_INTEGER && cost == 1)
     543              :             {
     544          165 :               if (memory == MEMORY_LOAD)
     545              :                 cost = 3;
     546              :               else
     547              :                 {
     548              :                   /* Increase cost of ld/st for short int types only
     549              :                      because of store forwarding issue.  */
     550            0 :                   rtx set = single_set (dep_insn);
     551            0 :                   if (set && (GET_MODE (SET_DEST (set)) == QImode
     552            0 :                               || GET_MODE (SET_DEST (set)) == HImode))
     553              :                     {
     554              :                       /* Increase cost of store/load insn if exact
     555              :                          dependence exists and it is load insn.  */
     556            0 :                       enum attr_memory insn_memory = get_attr_memory (insn);
     557            0 :                       if (insn_memory == MEMORY_LOAD
     558            0 :                           && exact_store_load_dependency (dep_insn, insn))
     559              :                         cost = 3;
     560              :                     }
     561              :                 }
     562              :             }
     563              :         }
     564              : 
     565              :     default:
     566              :       break;
     567              :     }
     568              : 
     569              :   return cost;
     570              : }
     571              : 
     572              : /* How many alternative schedules to try.  This should be as wide as the
     573              :    scheduling freedom in the DFA, but no wider.  Making this value too
     574              :    large results extra work for the scheduler.  */
     575              : 
     576              : int
     577       961988 : ia32_multipass_dfa_lookahead (void)
     578              : {
     579              :   /* Generally, we want haifa-sched:max_issue() to look ahead as far
     580              :      as many instructions can be executed on a cycle, i.e.,
     581              :      issue_rate.  */
     582       961988 :   if (reload_completed)
     583       961549 :     return ix86_issue_rate ();
     584              :   /* Don't use lookahead for pre-reload schedule to save compile time.  */
     585              :   return 0;
     586              : }
     587              : 
     588              : /* Return true if target platform supports macro-fusion.  */
     589              : 
     590              : bool
     591    107481657 : ix86_macro_fusion_p ()
     592              : {
     593    107481657 :   return TARGET_FUSE_CMP_AND_BRANCH;
     594              : }
     595              : 
     596              : /* Check whether MOV is a reg-reg move and ALU is an
     597              :    ALU operation that allows macro-op fusion.  */
     598              : 
     599              : static bool
     600         2069 : ix86_fuse_mov_alu_p (rtx_insn *mov, rtx_insn *alu)
     601              : {
     602              :   /* Validate mov:
     603              :       - It should be reg-reg move with opcode 0x89 or 0x8B.  */
     604         2069 :   rtx set1 = PATTERN (mov);
     605         2069 :   if (GET_CODE (set1) != SET
     606         1854 :       || !GENERAL_REG_P (SET_SRC (set1))
     607         2288 :       || !GENERAL_REG_P (SET_DEST (set1)))
     608              :     return false;
     609           91 :   rtx reg = SET_DEST (set1);
     610              :   /*  - it should have 0x89 or 0x8B opcode.  */
     611           91 :   if (!INTEGRAL_MODE_P (GET_MODE (reg))
     612          182 :       || GET_MODE_SIZE (GET_MODE (reg)) < 2
     613          182 :       || GET_MODE_SIZE (GET_MODE (reg)) > 8)
     614              :     return false;
     615              :   /* Validate ALU.  */
     616           91 :   if (GET_CODE (PATTERN (alu)) != PARALLEL)
     617              :     return false;
     618           24 :   rtx set2 = XVECEXP (PATTERN (alu), 0, 0);
     619           24 :   if (GET_CODE (set2) != SET)
     620              :     return false;
     621              :   /* If this is instruction setting both compare and normal
     622              :      register, the first set always sets flags, while
     623              :      second set writes to the output operan.  Pick
     624              :      the second set.  */
     625           24 :   if (GET_CODE (SET_SRC (set2)) == COMPARE)
     626              :     {
     627            0 :       set2 = XVECEXP (PATTERN (alu), 0, 1);
     628            0 :       if (GET_CODE (set2) != SET)
     629              :         return false;
     630              :     }
     631              :   /* Match one of:
     632              :      ADD ADC AND XOR OR SUB SBB INC DEC NOT SAL SHL SHR SAR
     633              :      We also may add insn attribute to handle some of sporadic
     634              :      case we output those with different RTX expressions.  */
     635              : 
     636           24 :   if (GET_CODE (SET_SRC (set2)) != PLUS
     637           24 :       && GET_CODE (SET_SRC (set2)) != MINUS
     638              :       && GET_CODE (SET_SRC (set2)) != XOR
     639              :       && GET_CODE (SET_SRC (set2)) != AND
     640              :       && GET_CODE (SET_SRC (set2)) != IOR
     641              :       && GET_CODE (SET_SRC (set2)) != NOT
     642              :       && GET_CODE (SET_SRC (set2)) != ASHIFT
     643              :       && GET_CODE (SET_SRC (set2)) != ASHIFTRT
     644              :       && GET_CODE (SET_SRC (set2)) != LSHIFTRT)
     645              :     return false;
     646           24 :   rtx op0 = XEXP (SET_SRC (set2), 0);
     647           24 :   rtx op1 = GET_CODE (SET_SRC (set2)) != NOT ? XEXP (SET_SRC (set2), 1) : NULL;
     648              :   /* One of operands should be register.  */
     649           24 :   if (op1 && (!REG_P (op0) || REGNO (op0) != REGNO (reg)))
     650              :     std::swap (op0, op1);
     651           24 :   if (!REG_P (op0) || REGNO (op0) != REGNO (reg))
     652              :     return false;
     653           23 :   if (op1
     654           23 :       && !REG_P (op1)
     655           39 :       && !x86_64_immediate_operand (op1, VOIDmode))
     656              :     return false;
     657              :   /* Only one of two parameters must be move destination.  */
     658           23 :   if (op1 && REG_P (op1) && REGNO (op1) == REGNO (reg))
     659              :     return false;
     660              :   return true;
     661              : }
     662              : 
     663              : /* Check whether current microarchitecture support macro fusion
     664              :    for insn pair "CONDGEN + CONDJMP". Refer to
     665              :    "Intel Architectures Optimization Reference Manual". */
     666              : 
     667              : bool
     668     87369234 : ix86_macro_fusion_pair_p (rtx_insn *condgen, rtx_insn *condjmp)
     669              : {
     670     87369234 :   if (TARGET_FUSE_MOV_AND_ALU
     671     87369234 :       && ix86_fuse_mov_alu_p (condgen, condjmp))
     672              :     return true;
     673     87369211 :   rtx src, imm = NULL_RTX;
     674     87369211 :   enum rtx_code ccode;
     675     87369211 :   rtx compare_set = NULL_RTX, test_if, cond;
     676     87369211 :   rtx alu_set = NULL_RTX, addr = NULL_RTX;
     677     87369211 :   rtx alu_clobber = NULL_RTX;
     678     87369211 :   enum attr_type condgen_type;
     679              : 
     680     87369211 :   if (!any_condjump_p (condjmp))
     681              :     return false;
     682              : 
     683     13058834 :   unsigned int condreg1, condreg2;
     684     13058834 :   rtx cc_reg_1;
     685     13058834 :   targetm.fixed_condition_code_regs (&condreg1, &condreg2);
     686     13058834 :   cc_reg_1 = gen_rtx_REG (CCmode, condreg1);
     687     13058834 :   if (!reg_referenced_p (cc_reg_1, PATTERN (condjmp))
     688     13058759 :       || !condgen
     689     26117593 :       || !modified_in_p (cc_reg_1, condgen))
     690       118811 :     return false;
     691              : 
     692     12940023 :   condgen_type = get_attr_type (condgen);
     693     12940023 :   if (condgen_type == TYPE_MULTI
     694          418 :       && INSN_CODE (condgen) == code_for_stack_protect_test_1 (ptr_mode)
     695     12940441 :       && TARGET_FUSE_ALU_AND_BRANCH)
     696              :     {
     697              :       /* stack_protect_test_<mode> ends with a sub, which subtracts
     698              :          a non-rip special memory operand from a GPR.  */
     699          418 :       src = NULL_RTX;
     700          418 :       alu_set = XVECEXP (PATTERN (condgen), 0, 1);
     701          418 :       goto handle_stack_protect_test;
     702              :     }
     703              :   /* ??? zen5 can fuse cmp, test, sub, add, inc, dec, or, and xor.
     704              :      Cores can not fuse or and xor which will pass the test below
     705              :      since type is ALU.  */
     706     12939605 :   else if (condgen_type != TYPE_TEST
     707     12939605 :            && condgen_type != TYPE_ICMP
     708     12939605 :            && condgen_type != TYPE_INCDEC
     709       581544 :            && condgen_type != TYPE_ALU)
     710              :     return false;
     711              : 
     712     12557461 :   compare_set = single_set (condgen);
     713     12557461 :   if (compare_set == NULL_RTX && !TARGET_FUSE_ALU_AND_BRANCH)
     714              :     return false;
     715              : 
     716        71851 :   if (compare_set == NULL_RTX)
     717              :     {
     718        71851 :       int i;
     719        71851 :       rtx pat = PATTERN (condgen);
     720       215553 :       for (i = 0; i < XVECLEN (pat, 0); i++)
     721       143702 :         if (GET_CODE (XVECEXP (pat, 0, i)) == SET)
     722              :           {
     723       143702 :             rtx set_src = SET_SRC (XVECEXP (pat, 0, i));
     724       143702 :             if (GET_CODE (set_src) == COMPARE)
     725              :               compare_set = XVECEXP (pat, 0, i);
     726              :             else
     727        76238 :               alu_set = XVECEXP (pat, 0, i);
     728              :           }
     729              :         /* We also possibly generated ALU instruction only to set
     730              :            flags.  In this case there will be clobber.  */
     731            0 :         else if (GET_CODE (XVECEXP (pat, 0, i)) == CLOBBER
     732            0 :             && GENERAL_REG_P (XEXP (XVECEXP (pat, 0, i), 0)))
     733              :           alu_clobber = XVECEXP (pat, 0, i);
     734              :     }
     735        71851 :   if (compare_set == NULL_RTX)
     736              :     return false;
     737     12553056 :   src = SET_SRC (compare_set);
     738     12553056 :   if (GET_CODE (src) != COMPARE)
     739              :     return false;
     740              : 
     741              :   /* Check for memory operand.  */
     742     12544417 :   if (MEM_P (XEXP (src, 0)))
     743      1940464 :     addr = XEXP (XEXP (src, 0), 0);
     744     10603953 :   else if (MEM_P (XEXP (src, 1)))
     745       787430 :     addr = XEXP (XEXP (src, 1), 0);
     746              :   /* Some CPUs, i.e. tigerlake and cooperlake does not fuse
     747              :      ALU with memory operand.  */
     748      2727894 :   if (addr && !TARGET_FUSE_ALU_AND_BRANCH_MEM)
     749              :     return false;
     750     12543821 :   if (CONST_INT_P (XEXP (src, 0)))
     751              :     imm = XEXP (src, 0);
     752     12543821 :   else if (CONST_INT_P (XEXP (src, 1)))
     753              :     imm = XEXP (src, 1);
     754              :   /* Check that the instruction really has immediate.
     755              :      In particular compare with 0 is done using test with no immediate.  */
     756      8258784 :   if (imm && !get_attr_length_immediate (condgen))
     757              :     imm = NULL;
     758              :   /* Macro-fusion for cmp/test MEM-IMM + conditional jmp is not
     759              :      supported.   */
     760     12543821 :   if (addr && imm && !TARGET_FUSE_ALU_AND_BRANCH_MEM_IMM)
     761              :     return false;
     762              : 
     763              :   /* No fusion for RIP-relative address.   */
     764     12543497 :   if (addr && !TARGET_FUSE_ALU_AND_BRANCH_RIP_RELATIVE)
     765              :     {
     766      2726974 :       ix86_address parts;
     767      2726974 :       int ok = ix86_decompose_address (addr, &parts);
     768      2726974 :       gcc_assert (ok);
     769              : 
     770      2726974 :       if (ix86_rip_relative_addr_p (&parts))
     771       402739 :         return false;
     772              :     }
     773              :   /* Znver5 supports fussion fusion with their reg/reg, reg/imm and
     774              :      reg/mem forms. They are also supported when the instruction has an
     775              :      immediate and displacement that meets the criteria of 4 byte displacement
     776              :      and 2 byte immediate or the case of 2 byte displacement and 4 byte
     777              :      immediate.  We do not know the displacement size, so we ignore this
     778              :      limitation.  */
     779              : 
     780      9816523 :  handle_stack_protect_test:
     781     12141176 :   test_if = SET_SRC (pc_set (condjmp));
     782     12141176 :   cond = XEXP (test_if, 0);
     783     12141176 :   ccode = GET_CODE (cond);
     784              :   /* Check whether conditional jump use Sign or Overflow Flags.  */
     785     12141176 :   if (!TARGET_FUSE_CMP_AND_BRANCH_SOFLAGS
     786         1460 :       && (ccode == GE || ccode == GT || ccode == LE || ccode == LT))
     787              :     return false;
     788              : 
     789              :   /* Return true for TYPE_TEST and TYPE_ICMP.  */
     790     12141070 :   if (condgen_type == TYPE_TEST || condgen_type == TYPE_ICMP)
     791              :     return true;
     792              : 
     793              :   /* The following is the case that macro-fusion for alu + jmp.  */
     794       191995 :   if (!TARGET_FUSE_ALU_AND_BRANCH || (!alu_set && !alu_clobber))
     795              :     return false;
     796              : 
     797              :   /* No fusion for alu op with memory destination operand.  */
     798        67882 :   if (alu_set && MEM_P (SET_DEST (alu_set)))
     799              :     return false;
     800              : 
     801              : 
     802              :   /* Macro-fusion for inc/dec + unsigned conditional jump is not
     803              :      supported on some CPUs while supported on others (znver5 and core_avx512).
     804              :      We however never generate it, so we do not need a specific tune for it.  */
     805        64394 :   gcc_checking_assert (!(condgen_type == TYPE_INCDEC
     806              :                        && (ccode == GEU || ccode == GTU || ccode == LEU || ccode == LTU)));
     807              : 
     808              :   return true;
     809              : }
        

Generated by: LCOV version 2.4-beta

LCOV profile is generated on x86_64 machine using following configure options: configure --disable-bootstrap --enable-coverage=opt --enable-languages=c,c++,fortran,go,jit,lto,rust,m2 --enable-host-shared. GCC test suite is run with the built compiler.