LCOV - code coverage report
Current view: top level - gcc - lra-coalesce.cc (source / functions) Coverage Total Hit
Test: gcc.info Lines: 90.4 % 167 151
Test Date: 2024-12-21 13:15:12 Functions: 87.5 % 8 7
Legend: Lines: hit not hit | Branches: + taken - not taken # not executed Branches: - 0 0

             Branch data     Line data    Source code
       1                 :             : /* Coalesce spilled pseudos.
       2                 :             :    Copyright (C) 2010-2024 Free Software Foundation, Inc.
       3                 :             :    Contributed by Vladimir Makarov <vmakarov@redhat.com>.
       4                 :             : 
       5                 :             : This file is part of GCC.
       6                 :             : 
       7                 :             : GCC is free software; you can redistribute it and/or modify it under
       8                 :             : the terms of the GNU General Public License as published by the Free
       9                 :             : Software Foundation; either version 3, or (at your option) any later
      10                 :             : version.
      11                 :             : 
      12                 :             : GCC is distributed in the hope that it will be useful, but WITHOUT ANY
      13                 :             : WARRANTY; without even the implied warranty of MERCHANTABILITY or
      14                 :             : FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
      15                 :             : for more details.
      16                 :             : 
      17                 :             : You should have received a copy of the GNU General Public License
      18                 :             : along with GCC; see the file COPYING3.  If not see
      19                 :             : <http://www.gnu.org/licenses/>.    */
      20                 :             : 
      21                 :             : 
      22                 :             : /* This file contains a pass making some simple RTL code
      23                 :             :    transformations by coalescing pseudos to remove some move insns.
      24                 :             : 
      25                 :             :    Spilling pseudos in LRA can create memory-memory moves.  We should
      26                 :             :    remove potential memory-memory moves before the next constraint
      27                 :             :    pass because the constraint pass will generate additional insns for
      28                 :             :    such moves and all these insns will be hard to remove afterwards.
      29                 :             : 
      30                 :             :    Here we coalesce only spilled pseudos.  Coalescing non-spilled
      31                 :             :    pseudos (with different hard regs) might result in spilling
      32                 :             :    additional pseudos because of possible conflicts with other
      33                 :             :    non-spilled pseudos and, as a consequence, in more constraint
      34                 :             :    passes and even LRA infinite cycling.  Trivial the same hard
      35                 :             :    register moves will be removed by subsequent compiler passes.
      36                 :             : 
      37                 :             :    We don't coalesce special reload pseudos.  It complicates LRA code
      38                 :             :    a lot without visible generated code improvement.
      39                 :             : 
      40                 :             :    The pseudo live-ranges are used to find conflicting pseudos during
      41                 :             :    coalescing.
      42                 :             : 
      43                 :             :    Most frequently executed moves is tried to be coalesced first.  */
      44                 :             : 
      45                 :             : #include "config.h"
      46                 :             : #include "system.h"
      47                 :             : #include "coretypes.h"
      48                 :             : #include "backend.h"
      49                 :             : #include "rtl.h"
      50                 :             : #include "predict.h"
      51                 :             : #include "df.h"
      52                 :             : #include "insn-config.h"
      53                 :             : #include "regs.h"
      54                 :             : #include "memmodel.h"
      55                 :             : #include "ira.h"
      56                 :             : #include "recog.h"
      57                 :             : #include "lra-int.h"
      58                 :             : 
      59                 :             : /* Arrays whose elements represent the first and the next pseudo
      60                 :             :    (regno) in the coalesced pseudos group to which given pseudo (its
      61                 :             :    regno is the index) belongs.  The next of the last pseudo in the
      62                 :             :    group refers to the first pseudo in the group, in other words the
      63                 :             :    group is represented by a cyclic list.  */
      64                 :             : static int *first_coalesced_pseudo, *next_coalesced_pseudo;
      65                 :             : 
      66                 :             : /* The function is used to sort moves according to their execution
      67                 :             :    frequencies.  */
      68                 :             : static int
      69                 :       17352 : move_freq_compare_func (const void *v1p, const void *v2p)
      70                 :             : {
      71                 :       17352 :   rtx_insn *mv1 = *(rtx_insn * const *) v1p;
      72                 :       17352 :   rtx_insn *mv2 = *(rtx_insn * const *) v2p;
      73                 :       17352 :   int pri1, pri2;
      74                 :             : 
      75                 :       17352 :   pri1 = REG_FREQ_FROM_BB (BLOCK_FOR_INSN (mv1));
      76                 :       17352 :   pri2 = REG_FREQ_FROM_BB (BLOCK_FOR_INSN (mv2));
      77                 :       17352 :   if (pri2 - pri1)
      78                 :        6708 :     return pri2 - pri1;
      79                 :             : 
      80                 :             :   /* If frequencies are equal, sort by moves, so that the results of
      81                 :             :      qsort leave nothing to chance.  */
      82                 :       10644 :   return (int) INSN_UID (mv1) - (int) INSN_UID (mv2);
      83                 :             : }
      84                 :             : 
      85                 :             : /* Pseudos which go away after coalescing.  */
      86                 :             : static bitmap_head coalesced_pseudos_bitmap;
      87                 :             : 
      88                 :             : /* Merge two sets of coalesced pseudos given correspondingly by
      89                 :             :    pseudos REGNO1 and REGNO2 (more accurately merging REGNO2 group
      90                 :             :    into REGNO1 group).  Set up COALESCED_PSEUDOS_BITMAP.  */
      91                 :             : static void
      92                 :        1140 : merge_pseudos (int regno1, int regno2)
      93                 :             : {
      94                 :        1140 :   int regno, first, first2, last, next;
      95                 :             : 
      96                 :        1140 :   first = first_coalesced_pseudo[regno1];
      97                 :        1140 :   if ((first2 = first_coalesced_pseudo[regno2]) == first)
      98                 :             :     return;
      99                 :        1140 :   for (last = regno2, regno = next_coalesced_pseudo[regno2];;
     100                 :          49 :        regno = next_coalesced_pseudo[regno])
     101                 :             :     {
     102                 :        1189 :       first_coalesced_pseudo[regno] = first;
     103                 :        1189 :       bitmap_set_bit (&coalesced_pseudos_bitmap, regno);
     104                 :        1189 :       if (regno == regno2)
     105                 :             :         break;
     106                 :          49 :       last = regno;
     107                 :             :     }
     108                 :        1140 :   next = next_coalesced_pseudo[first];
     109                 :        1140 :   next_coalesced_pseudo[first] = regno2;
     110                 :        1140 :   next_coalesced_pseudo[last] = next;
     111                 :        1140 :   lra_reg_info[first].live_ranges
     112                 :        1140 :     = (lra_merge_live_ranges
     113                 :        1140 :        (lra_reg_info[first].live_ranges,
     114                 :        1140 :         lra_copy_live_range_list (lra_reg_info[first2].live_ranges)));
     115                 :        1140 :   lra_update_biggest_mode (first, lra_reg_info[first2].biggest_mode);
     116                 :             : }
     117                 :             : 
     118                 :             : /* Change pseudos in *LOC on their coalescing group
     119                 :             :    representatives.  */
     120                 :             : static bool
     121                 :       80378 : substitute (rtx *loc)
     122                 :             : {
     123                 :       80378 :   int i, regno;
     124                 :       80378 :   const char *fmt;
     125                 :       80378 :   enum rtx_code code;
     126                 :       80378 :   bool res;
     127                 :             : 
     128                 :       80378 :   if (*loc == NULL_RTX)
     129                 :             :     return false;
     130                 :       70451 :   code = GET_CODE (*loc);
     131                 :       70451 :   if (code == REG)
     132                 :             :     {
     133                 :       29711 :       regno = REGNO (*loc);
     134                 :       29711 :       if (regno < FIRST_PSEUDO_REGISTER
     135                 :       26400 :           || first_coalesced_pseudo[regno] == regno)
     136                 :             :         return false;
     137                 :        6637 :       *loc = regno_reg_rtx[first_coalesced_pseudo[regno]];
     138                 :        6637 :       return true;
     139                 :             :     }
     140                 :             : 
     141                 :       40740 :   res = false;
     142                 :       40740 :   fmt = GET_RTX_FORMAT (code);
     143                 :      166308 :   for (i = GET_RTX_LENGTH (code) - 1; i >= 0; i--)
     144                 :             :     {
     145                 :      125568 :       if (fmt[i] == 'e')
     146                 :             :         {
     147                 :       67674 :           if (substitute (&XEXP (*loc, i)))
     148                 :      125568 :             res = true;
     149                 :             :         }
     150                 :       57894 :       else if (fmt[i] == 'E')
     151                 :             :         {
     152                 :        1394 :           int j;
     153                 :             : 
     154                 :        4171 :           for (j = XVECLEN (*loc, i) - 1; j >= 0; j--)
     155                 :        2777 :             if (substitute (&XVECEXP (*loc, i, j)))
     156                 :         396 :               res = true;
     157                 :             :         }
     158                 :             :     }
     159                 :             :   return res;
     160                 :             : }
     161                 :             : 
     162                 :             : /* Specialize "substitute" for use on an insn.  This can't change
     163                 :             :    the insn ptr, just the contents of the insn.  */
     164                 :             : 
     165                 :             : static bool
     166                 :        9927 : substitute_within_insn (rtx_insn *insn)
     167                 :             : {
     168                 :        9927 :   rtx loc = insn;
     169                 :           0 :   return substitute (&loc);
     170                 :             : }
     171                 :             : 
     172                 :             : /* The current iteration (1, 2, ...) of the coalescing pass.  */
     173                 :             : int lra_coalesce_iter;
     174                 :             : 
     175                 :             : /* Return true if the move involving REGNO1 and REGNO2 is a potential
     176                 :             :    memory-memory move.  */
     177                 :             : static bool
     178                 :     3607861 : mem_move_p (int regno1, int regno2)
     179                 :             : {
     180                 :     3607861 :   return reg_renumber[regno1] < 0 && reg_renumber[regno2] < 0;
     181                 :             : }
     182                 :             : 
     183                 :             : /* Pseudos used instead of the coalesced pseudos.  */
     184                 :             : static bitmap_head used_pseudos_bitmap;
     185                 :             : 
     186                 :             : /* Set up USED_PSEUDOS_BITMAP, and update LR_BITMAP (a BB live info
     187                 :             :    bitmap).  */
     188                 :             : static void
     189                 :     3174392 : update_live_info (bitmap lr_bitmap)
     190                 :             : {
     191                 :     3174392 :   unsigned int j;
     192                 :     3174392 :   bitmap_iterator bi;
     193                 :             : 
     194                 :     3174392 :   bitmap_clear (&used_pseudos_bitmap);
     195                 :     3232057 :   EXECUTE_IF_AND_IN_BITMAP (&coalesced_pseudos_bitmap, lr_bitmap,
     196                 :             :                             FIRST_PSEUDO_REGISTER, j, bi)
     197                 :       57665 :     bitmap_set_bit (&used_pseudos_bitmap, first_coalesced_pseudo[j]);
     198                 :     3174392 :   if (! bitmap_empty_p (&used_pseudos_bitmap))
     199                 :             :     {
     200                 :       43672 :       bitmap_and_compl_into (lr_bitmap, &coalesced_pseudos_bitmap);
     201                 :       43672 :       bitmap_ior_into (lr_bitmap, &used_pseudos_bitmap);
     202                 :             :     }
     203                 :     3174392 : }
     204                 :             : 
     205                 :             : /* Return true if pseudo REGNO can be potentially coalesced.  */
     206                 :             : static bool
     207                 :        6234 : coalescable_pseudo_p (int regno)
     208                 :             : {
     209                 :        6234 :   lra_assert (regno >= FIRST_PSEUDO_REGISTER);
     210                 :        6234 :   return (/* We don't want to coalesce regnos with equivalences, at
     211                 :             :              least without updating this info.  */
     212                 :        6234 :           ira_reg_equiv[regno].constant == NULL_RTX
     213                 :        6234 :           && ira_reg_equiv[regno].memory == NULL_RTX
     214                 :       12466 :           && ira_reg_equiv[regno].invariant == NULL_RTX);
     215                 :             : }
     216                 :             : 
     217                 :             : /* The major function for aggressive pseudo coalescing of moves only
     218                 :             :    if the both pseudos were spilled and not special reload pseudos.  */
     219                 :             : bool
     220                 :       26002 : lra_coalesce (void)
     221                 :             : {
     222                 :       26002 :   basic_block bb;
     223                 :       26002 :   rtx_insn *mv, *insn, *next, **sorted_moves;
     224                 :       26002 :   rtx set;
     225                 :       26002 :   int i, mv_num, sregno, dregno;
     226                 :       26002 :   int coalesced_moves;
     227                 :       26002 :   int max_regno = max_reg_num ();
     228                 :       26002 :   bitmap_head involved_insns_bitmap;
     229                 :             : 
     230                 :       26002 :   timevar_push (TV_LRA_COALESCE);
     231                 :             : 
     232                 :       26002 :   if (lra_dump_file != NULL)
     233                 :           0 :     fprintf (lra_dump_file,
     234                 :             :              "\n********** Pseudos coalescing #%d: **********\n\n",
     235                 :             :              ++lra_coalesce_iter);
     236                 :       26002 :   first_coalesced_pseudo = XNEWVEC (int, max_regno);
     237                 :       26002 :   next_coalesced_pseudo = XNEWVEC (int, max_regno);
     238                 :    13588718 :   for (i = 0; i < max_regno; i++)
     239                 :    13562716 :     first_coalesced_pseudo[i] = next_coalesced_pseudo[i] = i;
     240                 :       26002 :   sorted_moves = XNEWVEC (rtx_insn *, get_max_uid ());
     241                 :       26002 :   mv_num = 0;
     242                 :             :   /* Collect moves.  */
     243                 :       26002 :   coalesced_moves = 0;
     244                 :     1613198 :   FOR_EACH_BB_FN (bb, cfun)
     245                 :             :     {
     246                 :    49385586 :       FOR_BB_INSNS_SAFE (bb, insn, next)
     247                 :    23105597 :         if (INSN_P (insn)
     248                 :    19438457 :             && (set = single_set (insn)) != NULL_RTX
     249                 :    12593152 :             && REG_P (SET_DEST (set)) && REG_P (SET_SRC (set))
     250                 :     4196803 :             && (sregno = REGNO (SET_SRC (set))) >= FIRST_PSEUDO_REGISTER
     251                 :     3989999 :             && (dregno = REGNO (SET_DEST (set))) >= FIRST_PSEUDO_REGISTER
     252                 :     3607861 :             && mem_move_p (sregno, dregno)
     253                 :        3118 :             && coalescable_pseudo_p (sregno) && coalescable_pseudo_p (dregno)
     254                 :        3116 :             && ! side_effects_p (set)
     255                 :    23108713 :             && !(lra_intersected_live_ranges_p
     256                 :        3116 :                  (lra_reg_info[sregno].live_ranges,
     257                 :        3116 :                   lra_reg_info[dregno].live_ranges)))
     258                 :        2085 :           sorted_moves[mv_num++] = insn;
     259                 :             :     }
     260                 :       26002 :   qsort (sorted_moves, mv_num, sizeof (rtx), move_freq_compare_func);
     261                 :             :   /* Coalesced copies, most frequently executed first.  */
     262                 :       26002 :   bitmap_initialize (&coalesced_pseudos_bitmap, &reg_obstack);
     263                 :       26002 :   bitmap_initialize (&involved_insns_bitmap, &reg_obstack);
     264                 :       28087 :   for (i = 0; i < mv_num; i++)
     265                 :             :     {
     266                 :        2085 :       mv = sorted_moves[i];
     267                 :        2085 :       set = single_set (mv);
     268                 :        2085 :       lra_assert (set != NULL && REG_P (SET_SRC (set))
     269                 :             :                   && REG_P (SET_DEST (set)));
     270                 :        2085 :       sregno = REGNO (SET_SRC (set));
     271                 :        2085 :       dregno = REGNO (SET_DEST (set));
     272                 :        2085 :       if (first_coalesced_pseudo[sregno] == first_coalesced_pseudo[dregno])
     273                 :             :         {
     274                 :         945 :           coalesced_moves++;
     275                 :         945 :           if (lra_dump_file != NULL)
     276                 :           0 :             fprintf
     277                 :           0 :               (lra_dump_file, "      Coalescing move %i:r%d-r%d (freq=%d)\n",
     278                 :           0 :                INSN_UID (mv), sregno, dregno,
     279                 :           0 :                REG_FREQ_FROM_BB (BLOCK_FOR_INSN (mv)));
     280                 :             :           /* We updated involved_insns_bitmap when doing the merge.  */
     281                 :             :         }
     282                 :        1140 :       else if (!(lra_intersected_live_ranges_p
     283                 :        1140 :                  (lra_reg_info[first_coalesced_pseudo[sregno]].live_ranges,
     284                 :        1140 :                   lra_reg_info[first_coalesced_pseudo[dregno]].live_ranges)))
     285                 :             :         {
     286                 :        1140 :           coalesced_moves++;
     287                 :        1140 :           if (lra_dump_file != NULL)
     288                 :           0 :             fprintf
     289                 :           0 :               (lra_dump_file,
     290                 :             :                "  Coalescing move %i:r%d(%d)-r%d(%d) (freq=%d)\n",
     291                 :           0 :                INSN_UID (mv), sregno, ORIGINAL_REGNO (SET_SRC (set)),
     292                 :           0 :                dregno, ORIGINAL_REGNO (SET_DEST (set)),
     293                 :           0 :                REG_FREQ_FROM_BB (BLOCK_FOR_INSN (mv)));
     294                 :        1140 :           bitmap_ior_into (&involved_insns_bitmap,
     295                 :        1140 :                            &lra_reg_info[sregno].insn_bitmap);
     296                 :        1140 :           bitmap_ior_into (&involved_insns_bitmap,
     297                 :        1140 :                            &lra_reg_info[dregno].insn_bitmap);
     298                 :        1140 :           merge_pseudos (sregno, dregno);
     299                 :             :         }
     300                 :             :     }
     301                 :       26002 :   bitmap_initialize (&used_pseudos_bitmap, &reg_obstack);
     302                 :     1613198 :   FOR_EACH_BB_FN (bb, cfun)
     303                 :             :     {
     304                 :     1587196 :       update_live_info (df_get_live_in (bb));
     305                 :     1587196 :       update_live_info (df_get_live_out (bb));
     306                 :    49385586 :       FOR_BB_INSNS_SAFE (bb, insn, next)
     307                 :    23105597 :         if (INSN_P (insn)
     308                 :    23105597 :             && bitmap_bit_p (&involved_insns_bitmap, INSN_UID (insn)))
     309                 :             :           {
     310                 :        9927 :             if (! substitute_within_insn (insn))
     311                 :        5176 :               continue;
     312                 :        4751 :             lra_update_insn_regno_info (insn);
     313                 :        4751 :             if ((set = single_set (insn)) != NULL_RTX && set_noop_p (set))
     314                 :             :               {
     315                 :             :                 /* Coalesced move.  */
     316                 :        2085 :                 if (lra_dump_file != NULL)
     317                 :           0 :                   fprintf (lra_dump_file, "         Removing move %i (freq=%d)\n",
     318                 :           0 :                            INSN_UID (insn),
     319                 :           0 :                            REG_FREQ_FROM_BB (BLOCK_FOR_INSN (insn)));
     320                 :        2085 :                 lra_set_insn_deleted (insn);
     321                 :             :               }
     322                 :             :           }
     323                 :             :     }
     324                 :             :   /* If we have situation after inheritance pass:
     325                 :             : 
     326                 :             :      r1 <- p1   insn originally setting p1
     327                 :             :      i1 <- r1   setting inheritance i1 from reload r1
     328                 :             :        ...
     329                 :             :      ... <- ... p2 ... dead p2
     330                 :             :      ..
     331                 :             :      p1 <- i1
     332                 :             :      r2 <- i1
     333                 :             :      ...<- ... r2 ...
     334                 :             : 
     335                 :             :      And we are coalescing p1 and p2 using p1.  In this case i1 and p1
     336                 :             :      should have different values, otherwise they can get the same
     337                 :             :      hard reg and this is wrong for insn using p2 before coalescing.
     338                 :             :      The situation even can be more complicated when new reload
     339                 :             :      pseudos occur after the inheriatnce.  So invalidate the result
     340                 :             :      pseudos.  */
     341                 :    13588718 :   for (i = 0; i < max_regno; i++)
     342                 :    13562716 :     if (first_coalesced_pseudo[i] == i
     343                 :    13561576 :         && first_coalesced_pseudo[i] != next_coalesced_pseudo[i])
     344                 :             :       {
     345                 :         914 :         lra_set_regno_unique_value (i);
     346                 :         914 :         if (lra_dump_file != NULL)
     347                 :           0 :           fprintf (lra_dump_file,
     348                 :             :                    "        Make unique value for coalescing result r%d\n", i);
     349                 :             :       }
     350                 :       26002 :   bitmap_clear (&used_pseudos_bitmap);
     351                 :       26002 :   bitmap_clear (&involved_insns_bitmap);
     352                 :       26002 :   bitmap_clear (&coalesced_pseudos_bitmap);
     353                 :       26002 :   if (lra_dump_file != NULL && coalesced_moves != 0)
     354                 :           0 :     fprintf (lra_dump_file, "Coalesced Moves = %d\n", coalesced_moves);
     355                 :       26002 :   free (sorted_moves);
     356                 :       26002 :   free (next_coalesced_pseudo);
     357                 :       26002 :   free (first_coalesced_pseudo);
     358                 :       26002 :   timevar_pop (TV_LRA_COALESCE);
     359                 :       26002 :   return coalesced_moves != 0;
     360                 :             : }
        

Generated by: LCOV version 2.1-beta

LCOV profile is generated on x86_64 machine using following configure options: configure --disable-bootstrap --enable-coverage=opt --enable-languages=c,c++,fortran,go,jit,lto,rust,m2 --enable-host-shared. GCC test suite is run with the built compiler.