LCOV - code coverage report
Current view: top level - gcc - late-combine.cc (source / functions) Coverage Total Hit
Test: gcc.info Lines: 93.8 % 357 335
Test Date: 2026-02-28 14:20:25 Functions: 100.0 % 25 25
Legend: Lines:     hit not hit

            Line data    Source code
       1              : // Late-stage instruction combination pass.
       2              : // Copyright (C) 2023-2026 Free Software Foundation, Inc.
       3              : //
       4              : // This file is part of GCC.
       5              : //
       6              : // GCC is free software; you can redistribute it and/or modify it under
       7              : // the terms of the GNU General Public License as published by the Free
       8              : // Software Foundation; either version 3, or (at your option) any later
       9              : // version.
      10              : //
      11              : // GCC is distributed in the hope that it will be useful, but WITHOUT ANY
      12              : // WARRANTY; without even the implied warranty of MERCHANTABILITY or
      13              : // FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
      14              : // for more details.
      15              : //
      16              : // You should have received a copy of the GNU General Public License
      17              : // along with GCC; see the file COPYING3.  If not see
      18              : // <http://www.gnu.org/licenses/>.
      19              : 
      20              : // This pass currently has two purposes:
      21              : //
      22              : // - to substitute definitions into all uses, so that the definition
      23              : //   can be removed.
      24              : //
      25              : // - to try to parallelise sets of condition-code registers with a
      26              : //   related instruction (see combine_cc_setter for details).
      27              : //
      28              : // However, it could be extended to handle other combination-related
      29              : // optimizations in future.
      30              : //
      31              : // The pass can run before or after register allocation.  When running
      32              : // before register allocation, it tries to avoid cases that are likely
      33              : // to increase register pressure.  For the same reason, it avoids moving
      34              : // instructions around, even if doing so would allow an optimization to
      35              : // succeed.  These limitations are removed when running after register
      36              : // allocation.
      37              : 
      38              : #define INCLUDE_ALGORITHM
      39              : #define INCLUDE_FUNCTIONAL
      40              : #define INCLUDE_ARRAY
      41              : #include "config.h"
      42              : #include "system.h"
      43              : #include "coretypes.h"
      44              : #include "backend.h"
      45              : #include "rtl.h"
      46              : #include "df.h"
      47              : #include "rtl-ssa.h"
      48              : #include "print-rtl.h"
      49              : #include "tree-pass.h"
      50              : #include "cfgcleanup.h"
      51              : #include "target.h"
      52              : #include "dbgcnt.h"
      53              : 
      54              : using namespace rtl_ssa;
      55              : 
      56              : namespace {
      57              : const pass_data pass_data_late_combine =
      58              : {
      59              :   RTL_PASS, // type
      60              :   "late_combine", // name
      61              :   OPTGROUP_NONE, // optinfo_flags
      62              :   TV_LATE_COMBINE, // tv_id
      63              :   0, // properties_required
      64              :   0, // properties_provided
      65              :   0, // properties_destroyed
      66              :   0, // todo_flags_start
      67              :   TODO_df_finish, // todo_flags_finish
      68              : };
      69              : 
      70              : // Represents an attempt to substitute a single-set definition into all
      71              : // uses of the definition.
      72              : class insn_combination
      73              : {
      74              : public:
      75              :   insn_combination (set_info *, rtx, rtx);
      76              :   bool run ();
      77              :   array_slice<insn_change *const> use_changes () const;
      78              : 
      79              : private:
      80              :   use_array get_new_uses (use_info *);
      81              :   bool substitute_nondebug_use (use_info *);
      82              :   bool substitute_nondebug_uses (set_info *);
      83              :   bool try_to_preserve_debug_info (insn_change &, use_info *);
      84              :   void substitute_debug_use (use_info *);
      85              :   bool substitute_note (insn_info *, rtx, bool);
      86              :   void substitute_notes (insn_info *, bool);
      87              :   void substitute_note_uses (use_info *);
      88              :   void substitute_optional_uses (set_info *);
      89              : 
      90              :   // Represents the state of the function's RTL at the start of this
      91              :   // combination attempt.
      92              :   insn_change_watermark m_rtl_watermark;
      93              : 
      94              :   // Represents the rtl-ssa state at the start of this combination attempt.
      95              :   obstack_watermark m_attempt;
      96              : 
      97              :   // The instruction that contains the definition, and that we're trying
      98              :   // to delete.
      99              :   insn_info *m_def_insn;
     100              : 
     101              :   // The definition itself.
     102              :   set_info *m_def;
     103              : 
     104              :   // The destination and source of the single set that defines m_def.
     105              :   // The destination is known to be a plain REG.
     106              :   rtx m_dest;
     107              :   rtx m_src;
     108              : 
     109              :   // Contains the full list of changes that we want to make, in reverse
     110              :   // postorder.
     111              :   auto_vec<insn_change *> m_nondebug_changes;
     112              : };
     113              : 
     114              : // Class that represents one run of the pass.
     115      5783352 : class late_combine
     116              : {
     117              : public:
     118              :   unsigned int execute (function *);
     119              : 
     120              : private:
     121              :   rtx optimizable_set (set_info *);
     122              :   bool check_register_pressure (insn_info *, rtx);
     123              :   bool check_uses (set_info *, rtx);
     124              :   bool combine_into_uses (set_info *, rtx, insn_info *);
     125              :   bool parallelize_insns (set_info *, rtx, set_info *, rtx);
     126              :   bool combine_cc_setter (set_info *, rtx);
     127              :   bool combine_insn (insn_info *, insn_info *);
     128              : 
     129              :   auto_vec<insn_info *> m_worklist;
     130              : 
     131              :   // A spare PARALLEL rtx, or null if none.
     132              :   rtx m_parallel = NULL_RTX;
     133              : };
     134              : 
     135     20797694 : insn_combination::insn_combination (set_info *def, rtx dest, rtx src)
     136     20797694 :   : m_rtl_watermark (),
     137     20797694 :     m_attempt (crtl->ssa->new_change_attempt ()),
     138     20797694 :     m_def_insn (def->insn ()),
     139     20797694 :     m_def (def),
     140     20797694 :     m_dest (dest),
     141     20797694 :     m_src (src),
     142     20797694 :     m_nondebug_changes ()
     143              : {
     144     20797694 : }
     145              : 
     146              : array_slice<insn_change *const>
     147      2155174 : insn_combination::use_changes () const
     148              : {
     149      2155174 :   return { m_nondebug_changes.address () + 1,
     150      2155174 :            m_nondebug_changes.length () - 1 };
     151              : }
     152              : 
     153              : // USE is a direct or indirect use of m_def.  Return the list of uses
     154              : // that would be needed after substituting m_def into the instruction.
     155              : // The returned list is marked as invalid if USE's insn and m_def_insn
     156              : // use different definitions for the same resource (register or memory).
     157              : use_array
     158     26989988 : insn_combination::get_new_uses (use_info *use)
     159              : {
     160     26989988 :   auto *def = use->def ();
     161     26989988 :   auto *use_insn = use->insn ();
     162              : 
     163     26989988 :   use_array new_uses = use_insn->uses ();
     164     26989988 :   new_uses = remove_uses_of_def (m_attempt, new_uses, def);
     165     26989988 :   new_uses = merge_access_arrays (m_attempt, m_def_insn->uses (), new_uses);
     166     53871884 :   if (new_uses.is_valid () && use->ebb () != m_def->ebb ())
     167      4542663 :     new_uses = crtl->ssa->make_uses_available (m_attempt, new_uses, use->bb (),
     168      4542663 :                                                use_insn->is_debug_insn ());
     169     26989988 :   return new_uses;
     170              : }
     171              : 
     172              : // Start the process of trying to replace USE by substitution, given that
     173              : // USE occurs in a non-debug instruction.  Check:
     174              : //
     175              : // - that the substitution can be represented in RTL
     176              : //
     177              : // - that each use of a resource (register or memory) within the new
     178              : //   instruction has a consistent definition
     179              : //
     180              : // - that the new instruction is a recognized pattern
     181              : //
     182              : // - that the instruction can be placed somewhere that makes all definitions
     183              : //   and uses valid, and that permits any new hard-register clobbers added
     184              : //   during the recognition process
     185              : //
     186              : // Return true on success.
     187              : bool
     188     25847451 : insn_combination::substitute_nondebug_use (use_info *use)
     189              : {
     190     25847451 :   insn_info *use_insn = use->insn ();
     191     25847451 :   rtx_insn *use_rtl = use_insn->rtl ();
     192              : 
     193     25847451 :   if (dump_file && (dump_flags & TDF_DETAILS))
     194            0 :     dump_insn_slim (dump_file, use->insn ()->rtl ());
     195              : 
     196              :   // Reject second and subsequent uses if the target does not allow
     197              :   // the defining instruction to be copied.
     198     25847451 :   if (targetm.cannot_copy_insn_p
     199            0 :       && m_nondebug_changes.length () >= 2
     200     25847451 :       && targetm.cannot_copy_insn_p (m_def_insn->rtl ()))
     201              :     {
     202            0 :       if (dump_file && (dump_flags & TDF_DETAILS))
     203            0 :         fprintf (dump_file, "-- The target does not allow multiple"
     204            0 :                  " copies of insn %d\n", m_def_insn->uid ());
     205            0 :       return false;
     206              :     }
     207              : 
     208              :   // Check that we can change the instruction pattern.  Leave recognition
     209              :   // of the result till later.
     210     25847451 :   insn_propagation prop (use_rtl, m_dest, m_src);
     211     25847451 :   if (!prop.apply_to_pattern (&PATTERN (use_rtl))
     212     25847451 :       || prop.num_replacements == 0)
     213              :     {
     214       488940 :       if (dump_file && (dump_flags & TDF_DETAILS))
     215            0 :         fprintf (dump_file, "-- RTL substitution failed\n");
     216       488940 :       return false;
     217              :     }
     218              : 
     219     25358511 :   use_array new_uses = get_new_uses (use);
     220     25358511 :   if (!new_uses.is_valid ())
     221              :     {
     222      1056853 :       if (dump_file && (dump_flags & TDF_DETAILS))
     223            0 :         fprintf (dump_file, "-- could not prove that all sources"
     224              :                  " are available\n");
     225      1056853 :       return false;
     226              :     }
     227              : 
     228              :   // Create a tentative change for the use.
     229     24301658 :   auto *where = XOBNEW (m_attempt, insn_change);
     230     24301658 :   auto *use_change = new (where) insn_change (use_insn);
     231     24301658 :   m_nondebug_changes.safe_push (use_change);
     232     24301658 :   use_change->new_uses = new_uses;
     233              : 
     234     24301658 :   struct local_ignore : ignore_nothing
     235              :   {
     236     24301658 :     local_ignore (const set_info *def, const insn_info *use_insn)
     237     24301658 :       : m_def (def), m_use_insn (use_insn) {}
     238              : 
     239              :     // We don't limit the number of insns per optimization, so ignoring all
     240              :     // insns for all insns would lead to quadratic complexity.  Just ignore
     241              :     // the use and definition, which should be enough for most purposes.
     242              :     bool
     243    138447409 :     should_ignore_insn (const insn_info *insn)
     244              :     {
     245    138447409 :       return insn == m_def->insn () || insn == m_use_insn;
     246              :     }
     247              : 
     248              :     // Ignore the definition that we're removing, and all uses of it.
     249     47578525 :     bool should_ignore_def (const def_info *def) { return def == m_def; }
     250              : 
     251              :     const set_info *m_def;
     252              :     const insn_info *m_use_insn;
     253              :   };
     254              : 
     255     24301658 :   auto ignore = local_ignore (m_def, use_insn);
     256              : 
     257              :   // Moving instructions before register allocation could increase
     258              :   // register pressure.  Only try moving them after RA.
     259     24301658 :   if (reload_completed && can_move_insn_p (use_insn))
     260      8305289 :     use_change->move_range = { use_insn->bb ()->head_insn (),
     261      8305289 :                                use_insn->ebb ()->last_bb ()->end_insn () };
     262     24301658 :   if (!restrict_movement (*use_change, ignore))
     263              :     {
     264       749783 :       if (dump_file && (dump_flags & TDF_DETAILS))
     265            0 :         fprintf (dump_file, "-- cannot satisfy all definitions and uses"
     266            0 :                  " in insn %d\n", INSN_UID (use_insn->rtl ()));
     267       749783 :       return false;
     268              :     }
     269              : 
     270     23551875 :   if (!recog (m_attempt, *use_change, ignore))
     271              :     return false;
     272              : 
     273              :   return true;
     274              : }
     275              : 
     276              : // Apply substitute_nondebug_use to all direct and indirect uses of DEF.
     277              : // There will be at most one level of indirection.
     278              : bool
     279     21432086 : insn_combination::substitute_nondebug_uses (set_info *def)
     280              : {
     281     64468760 :   for (use_info *use : def->nondebug_insn_uses ())
     282     29244807 :     if (!use->is_live_out_use ()
     283     25928150 :         && !use->only_occurs_in_notes ()
     284     55092258 :         && !substitute_nondebug_use (use))
     285     19007724 :       return false;
     286              : 
     287      6048327 :   for (use_info *use : def->phi_uses ())
     288       634392 :     if (!substitute_nondebug_uses (use->phi ()))
     289       565211 :       return false;
     290              : 
     291      2424362 :   return true;
     292              : }
     293              : 
     294              : // USE_CHANGE.insn () is a debug instruction that uses m_def.  Try to
     295              : // substitute the definition into the instruction and try to describe
     296              : // the result in USE_CHANGE.  Return true on success.  Failure means that
     297              : // the instruction must be reset instead.
     298              : bool
     299      1583718 : insn_combination::try_to_preserve_debug_info (insn_change &use_change,
     300              :                                               use_info *use)
     301              : {
     302              :   // Punt on unsimplified subregs of hard registers.  In that case,
     303              :   // propagation can succeed and create a wider reg than the one we
     304              :   // started with.
     305      1583718 :   if (HARD_REGISTER_NUM_P (use->regno ())
     306      1583718 :       && use->includes_subregs ())
     307              :     return false;
     308              : 
     309      1583695 :   insn_info *use_insn = use_change.insn ();
     310      1583695 :   rtx_insn *use_rtl = use_insn->rtl ();
     311              : 
     312      1583695 :   use_change.new_uses = get_new_uses (use);
     313      1583868 :   if (!use_change.new_uses.is_valid ()
     314      1578688 :       || !restrict_movement (use_change))
     315        16344 :     return false;
     316              : 
     317      1567351 :   insn_propagation prop (use_rtl, m_dest, m_src);
     318      1567351 :   return prop.apply_to_pattern (&INSN_VAR_LOCATION_LOC (use_rtl));
     319              : }
     320              : 
     321              : // USE_INSN is a debug instruction that uses m_def.  Update it to reflect
     322              : // the fact that m_def is going to disappear.  Try to preserve the source
     323              : // value if possible, but reset the instruction if not.
     324              : void
     325      1583718 : insn_combination::substitute_debug_use (use_info *use)
     326              : {
     327      1583718 :   auto *use_insn = use->insn ();
     328      1583718 :   rtx_insn *use_rtl = use_insn->rtl ();
     329              : 
     330      1583718 :   auto use_change = insn_change (use_insn);
     331      1583718 :   if (!try_to_preserve_debug_info (use_change, use))
     332              :     {
     333        16431 :       use_change.new_uses = {};
     334        16431 :       use_change.move_range = use_change.insn ();
     335        16431 :       INSN_VAR_LOCATION_LOC (use_rtl) = gen_rtx_UNKNOWN_VAR_LOC ();
     336              :     }
     337      1583718 :   insn_change *changes[] = { &use_change };
     338      1583718 :   crtl->ssa->change_insns (changes);
     339      1583718 : }
     340              : 
     341              : // NOTE is a reg note of USE_INSN, which previously used m_def.  Update
     342              : // the note to reflect the fact that m_def is going to disappear.  Return
     343              : // true on success, or false if the note must be deleted.
     344              : //
     345              : // CAN_PROPAGATE is true if m_dest can be replaced with m_use.
     346              : bool
     347      3401857 : insn_combination::substitute_note (insn_info *use_insn, rtx note,
     348              :                                    bool can_propagate)
     349              : {
     350      3401857 :   if (REG_NOTE_KIND (note) == REG_EQUAL
     351      3401857 :       || REG_NOTE_KIND (note) == REG_EQUIV)
     352              :     {
     353       188739 :       insn_propagation prop (use_insn->rtl (), m_dest, m_src);
     354       188739 :       return (prop.apply_to_note (&XEXP (note, 0))
     355       188739 :               && (can_propagate || prop.num_replacements == 0));
     356              :     }
     357              :   return true;
     358              : }
     359              : 
     360              : // Update USE_INSN's notes after deciding to go ahead with the optimization.
     361              : // CAN_PROPAGATE is true if m_dest can be replaced with m_use.
     362              : void
     363      5782309 : insn_combination::substitute_notes (insn_info *use_insn, bool can_propagate)
     364              : {
     365      5782309 :   rtx_insn *use_rtl = use_insn->rtl ();
     366      5782309 :   rtx *ptr = &REG_NOTES (use_rtl);
     367      9184166 :   while (rtx note = *ptr)
     368              :     {
     369      3401857 :       if (substitute_note (use_insn, note, can_propagate))
     370      3398812 :         ptr = &XEXP (note, 1);
     371              :       else
     372         3045 :         *ptr = XEXP (note, 1);
     373              :     }
     374      5782309 : }
     375              : 
     376              : // We've decided to go ahead with the substitution.  Update all REG_NOTES
     377              : // involving USE.
     378              : void
     379      5782309 : insn_combination::substitute_note_uses (use_info *use)
     380              : {
     381      5782309 :   insn_info *use_insn = use->insn ();
     382              : 
     383      5782309 :   bool can_propagate = true;
     384      5782309 :   if (use->only_occurs_in_notes ())
     385              :     {
     386              :       // The only uses are in notes.  Try to keep the note if we can,
     387              :       // but removing it is better than aborting the optimization.
     388        47782 :       insn_change use_change (use_insn);
     389        47782 :       use_change.new_uses = get_new_uses (use);
     390        47782 :       if (!use_change.new_uses.is_valid ()
     391        47410 :           || !restrict_movement (use_change))
     392              :         {
     393         3042 :           use_change.move_range = use_insn;
     394         3042 :           use_change.new_uses = remove_uses_of_def (m_attempt,
     395              :                                                     use_insn->uses (),
     396         3042 :                                                     use->def ());
     397         3042 :           can_propagate = false;
     398              :         }
     399        47782 :       if (dump_file && (dump_flags & TDF_DETAILS))
     400              :         {
     401            0 :           fprintf (dump_file, "%s notes in:\n",
     402              :                    can_propagate ? "updating" : "removing");
     403            0 :           dump_insn_slim (dump_file, use_insn->rtl ());
     404              :         }
     405        47782 :       substitute_notes (use_insn, can_propagate);
     406        47782 :       insn_change *changes[] = { &use_change };
     407        47782 :       crtl->ssa->change_insns (changes);
     408              :     }
     409              :   else
     410              :     // We've already decided to update the insn's pattern and know that m_src
     411              :     // will be available at the insn's new location.  Now update its notes.
     412      5734527 :     substitute_notes (use_insn, can_propagate);
     413      5782309 : }
     414              : 
     415              : // We've decided to go ahead with the substitution and we've dealt with
     416              : // all uses that occur in the patterns of non-debug insns.  Update all
     417              : // other uses for the fact that m_def is about to disappear.
     418              : void
     419      2181179 : insn_combination::substitute_optional_uses (set_info *def)
     420              : {
     421      4362358 :   if (auto insn_uses = def->all_insn_uses ())
     422              :     {
     423              :       use_info *use = *insn_uses.begin ();
     424     10803499 :       while (use)
     425              :         {
     426      8658357 :           use_info *next_use = use->next_any_insn_use ();
     427      8658357 :           if (use->is_in_debug_insn ())
     428      1583718 :             substitute_debug_use (use);
     429      7074639 :           else if (!use->is_live_out_use ())
     430      5782309 :             substitute_note_uses (use);
     431              :           use = next_use;
     432              :         }
     433              :     }
     434      4388363 :   for (use_info *use : def->phi_uses ())
     435        26005 :     substitute_optional_uses (use->phi ());
     436      2181179 : }
     437              : 
     438              : // Try to perform the substitution.  Return true on success.
     439              : bool
     440     20797694 : insn_combination::run ()
     441              : {
     442     20797694 :   if (dump_file && (dump_flags & TDF_DETAILS))
     443              :     {
     444            0 :       fprintf (dump_file, "\ntrying to combine definition of r%d in:\n",
     445            0 :                m_def->regno ());
     446            0 :       dump_insn_slim (dump_file, m_def_insn->rtl ());
     447            0 :       fprintf (dump_file, "into:\n");
     448              :     }
     449              : 
     450     20797694 :   auto def_change = insn_change::delete_insn (m_def_insn);
     451     20797694 :   m_nondebug_changes.safe_push (&def_change);
     452              : 
     453     20797694 :   if (!substitute_nondebug_uses (m_def)
     454      4710362 :       || !changes_are_worthwhile (m_nondebug_changes)
     455     25116636 :       || !crtl->ssa->verify_insn_changes (m_nondebug_changes))
     456     18642520 :     return false;
     457              : 
     458              :   // We've now decided that the optimization is valid and profitable.
     459              :   // Allow it to be suppressed for bisection purposes.
     460      2155174 :   if (!dbg_cnt (::late_combine))
     461              :     return false;
     462              : 
     463      2155174 :   substitute_optional_uses (m_def);
     464              : 
     465      2155174 :   confirm_change_group ();
     466      4310348 :   crtl->ssa->change_insns (m_nondebug_changes);
     467      2155174 :   return true;
     468              : }
     469              : 
     470              : // DEF is the result of calling single_set_info on its instruction.
     471              : // See whether that instruction is a single_set that we can optimize.
     472              : // Return the set if so, otherwise return null.
     473              : rtx
     474     89910597 : late_combine::optimizable_set (set_info *def)
     475              : {
     476              :   // For simplicity, don't try to handle sets of multiple hard registers.
     477              :   // And for correctness, don't remove any assignments to the stack or
     478              :   // frame pointers, since that would implicitly change the set of valid
     479              :   // memory locations between this assignment and the next.
     480              :   //
     481              :   // Removing assignments to the hard frame pointer would invalidate
     482              :   // backtraces.
     483     89910597 :   if (!def->is_reg ()
     484     71005624 :       || def->regno () == STACK_POINTER_REGNUM
     485     71005624 :       || def->regno () == FRAME_POINTER_REGNUM
     486    160916221 :       || def->regno () == HARD_FRAME_POINTER_REGNUM)
     487              :     return NULL_RTX;
     488              : 
     489     70422015 :   auto *insn = def->insn ();
     490              :   if (!insn->can_be_optimized ()
     491     70422015 :       || insn->is_asm ()
     492     70403307 :       || insn->is_call ()
     493     69382704 :       || insn->has_volatile_refs ()
     494     68666259 :       || insn->has_pre_post_modify ()
     495    139088274 :       || !can_move_insn_p (insn))
     496     10581061 :     return NULL_RTX;
     497              : 
     498     59840954 :   rtx set = single_set (insn->rtl ());
     499     59840954 :   if (!set)
     500              :     return NULL_RTX;
     501              : 
     502              :   // For simplicity, don't try to handle subreg destinations.
     503     59832114 :   rtx dest = SET_DEST (set);
     504     59832114 :   if (!REG_P (dest) || REG_NREGS (dest) != 1 || def->regno () != REGNO (dest))
     505              :     return NULL_RTX;
     506              : 
     507              :   return set;
     508              : }
     509              : 
     510              : // Suppose that we can replace all uses of SET_DEST (SET) with SET_SRC (SET),
     511              : // where SET occurs in INSN.  Return true if doing so is not likely to
     512              : // increase register pressure.
     513              : bool
     514     26971744 : late_combine::check_register_pressure (insn_info *insn, rtx set)
     515              : {
     516              :   // Plain register-to-register moves do not establish a register class
     517              :   // preference and have no well-defined effect on the register allocator.
     518              :   // If changes in register class are needed, the register allocator is
     519              :   // in the best position to place those changes.  If no change in
     520              :   // register class is needed, then the optimization reduces register
     521              :   // pressure if SET_SRC (set) was already live at uses, otherwise the
     522              :   // optimization is pressure-neutral.
     523     26971744 :   rtx src = SET_SRC (set);
     524     26971744 :   if (REG_P (src))
     525              :     return true;
     526              : 
     527              :   // On the same basis, substituting a SET_SRC that contains a single
     528              :   // pseudo register either reduces pressure or is pressure-neutral,
     529              :   // subject to the constraints below.  We would need to do more
     530              :   // analysis for SET_SRCs that use more than one pseudo register.
     531     19722147 :   unsigned int nregs = 0;
     532     36768638 :   for (auto *use : insn->uses ())
     533     20411243 :     if (use->is_reg ()
     534     17084468 :         && !HARD_REGISTER_NUM_P (use->regno ())
     535     34083606 :         && !use->only_occurs_in_notes ())
     536     13589473 :       if (++nregs > 1)
     537      3364752 :         return false;
     538              : 
     539              :   // If there are no pseudo registers in SET_SRC then the optimization
     540              :   // should improve register pressure.
     541     16357395 :   if (nregs == 0)
     542              :     return true;
     543              : 
     544              :   // We'd be substituting (set (reg R1) SRC) where SRC is known to
     545              :   // contain a single pseudo register R2.  Assume for simplicity that
     546              :   // each new use of R2 would need to be in the same class C as the
     547              :   // current use of R2.  If, for a realistic allocation, C is a
     548              :   // non-strict superset of the R1's register class, the effect on
     549              :   // register pressure should be positive or neutral.  If instead
     550              :   // R1 occupies a different register class from R2, or if R1 has
     551              :   // more allocation freedom than R2, then there's a higher risk that
     552              :   // the effect on register pressure could be negative.
     553              :   //
     554              :   // First use constrain_operands to get the most likely choice of
     555              :   // alternative.  For simplicity, just handle the case where the
     556              :   // output operand is operand 0.
     557      6859969 :   extract_insn (insn->rtl ());
     558      6859969 :   rtx dest = SET_DEST (set);
     559      6859969 :   if (recog_data.n_operands == 0
     560      6859969 :       || recog_data.operand[0] != dest)
     561              :     return false;
     562              : 
     563      4459769 :   if (!constrain_operands (0, get_enabled_alternatives (insn->rtl ())))
     564              :     return false;
     565              : 
     566      4459769 :   preprocess_constraints (insn->rtl ());
     567      4459769 :   auto *alt = which_op_alt ();
     568      4459769 :   auto dest_class = alt[0].cl;
     569              : 
     570              :   // Check operands 1 and above.
     571     11920543 :   auto check_src = [&] (unsigned int i)
     572              :     {
     573      7460774 :       if (recog_data.is_operator[i])
     574              :         return true;
     575              : 
     576      7357343 :       rtx op = recog_data.operand[i];
     577      7357343 :       if (CONSTANT_P (op))
     578              :         return true;
     579              : 
     580      4563008 :       if (SUBREG_P (op))
     581       551564 :         op = SUBREG_REG (op);
     582      4563008 :       if (REG_P (op))
     583              :         {
     584              :           // Ignore hard registers.  We've already rejected uses of non-fixed
     585              :           // hard registers in the SET_SRC.
     586      3767802 :           if (HARD_REGISTER_P (op))
     587              :             return true;
     588              : 
     589              :           // Make sure that the source operand's class is at least as
     590              :           // permissive as the destination operand's class.
     591      3765816 :           auto src_class = alternative_class (alt, i);
     592      3765816 :           if (dest_class != src_class)
     593              :             {
     594       340478 :               auto extra_dest_regs = (reg_class_contents[dest_class]
     595       340478 :                                       & ~reg_class_contents[src_class]
     596       340478 :                                       & ~fixed_reg_set);
     597       680956 :               if (!hard_reg_set_empty_p (extra_dest_regs))
     598        92644 :                 return false;
     599              :             }
     600              : 
     601              :           // Make sure that the source operand occupies no more hard
     602              :           // registers than the destination operand.  This mostly matters
     603              :           // for subregs.
     604      7346344 :           if (targetm.class_max_nregs (dest_class, GET_MODE (dest))
     605      3673172 :               < targetm.class_max_nregs (src_class, GET_MODE (op)))
     606              :             return false;
     607              : 
     608              :           return true;
     609              :         }
     610              :       return false;
     611      4459769 :     };
     612     10843837 :   for (int i = 1; i < recog_data.n_operands; ++i)
     613      7465861 :     if (recog_data.operand_type[i] != OP_OUT && !check_src (i))
     614              :       return false;
     615              : 
     616              :   return true;
     617              : }
     618              : 
     619              : // Check uses of DEF to see whether there is anything obvious that
     620              : // prevents the substitution of SET into uses of DEF.
     621              : bool
     622     47838279 : late_combine::check_uses (set_info *def, rtx set)
     623              : {
     624     47838279 :   use_info *prev_use = nullptr;
     625    196548274 :   for (use_info *use : def->nondebug_insn_uses ())
     626              :     {
     627     67546217 :       insn_info *use_insn = use->insn ();
     628              : 
     629     67546217 :       if (use->is_live_out_use ())
     630     14130263 :         continue;
     631     53415954 :       if (use->only_occurs_in_notes ())
     632       196730 :         continue;
     633              : 
     634              :       // We cannot replace all uses if the value is live on exit.
     635     53219224 :       if (use->is_artificial ())
     636     47838279 :         return false;
     637              : 
     638              :       // Avoid increasing the complexity of instructions that
     639              :       // reference allocatable hard registers.
     640     52501393 :       if (!REG_P (SET_SRC (set))
     641     36756531 :           && !reload_completed
     642     66626131 :           && (accesses_include_nonfixed_hard_registers (use_insn->uses ())
     643     10717032 :               || accesses_include_nonfixed_hard_registers (use_insn->defs ())))
     644      4695588 :         return false;
     645              : 
     646              :       // Don't substitute into a non-local goto, since it can then be
     647              :       // treated as a jump to local label, e.g. in shorten_branches.
     648              :       // ??? But this shouldn't be necessary.
     649     47805805 :       if (use_insn->is_jump ()
     650     47805805 :           && find_reg_note (use_insn->rtl (), REG_NON_LOCAL_GOTO, NULL_RTX))
     651              :         return false;
     652              : 
     653              :       // Reject cases where one of the uses is a function argument.
     654              :       // The combine attempt should fail anyway, but this is a common
     655              :       // case that is easy to check early.
     656     47804773 :       if (use_insn->is_call ()
     657     11120919 :           && HARD_REGISTER_P (SET_DEST (set))
     658     58913572 :           && find_reg_fusage (use_insn->rtl (), USE, SET_DEST (set)))
     659              :         return false;
     660              : 
     661              :       // We'll keep the uses in their original order, even if we move
     662              :       // them relative to other instructions.  Make sure that non-final
     663              :       // uses do not change any values that occur in the SET_SRC.
     664     59504864 :       if (prev_use && prev_use->ebb () == use->ebb ())
     665              :         {
     666      8705293 :           def_info *ultimate_def = look_through_degenerate_phi (def);
     667      8705293 :           if (insn_clobbers_resources (prev_use->insn (),
     668      8705293 :                                        ultimate_def->insn ()->uses ()))
     669              :             return false;
     670              :         }
     671              : 
     672              :       prev_use = use;
     673              :     }
     674              : 
     675     62973698 :   for (use_info *use : def->phi_uses ())
     676      9930226 :     if (!use->phi ()->is_degenerate ()
     677      9930226 :         || !check_uses (use->phi (), set))
     678      8412368 :       return false;
     679              : 
     680     22315552 :   return true;
     681              : }
     682              : 
     683              : // Try to remove DEF's instruction by substituting DEF into all uses.
     684              : // SET is the rtx set associated with DEF.  If the optimization moves any
     685              : // instructions before CURSOR, add those instructions to the end of m_worklist.
     686              : bool
     687     55196223 : late_combine::combine_into_uses (set_info *def, rtx set, insn_info *cursor)
     688              : {
     689     55196223 :   auto *insn = def->insn ();
     690              : 
     691              :   // Don't prolong the live ranges of allocatable hard registers, or put
     692              :   // them into more complicated instructions.  Failing to prevent this
     693              :   // could lead to spill failures, or at least to worst register allocation.
     694     55196223 :   if (!reload_completed
     695     55196223 :       && accesses_include_nonfixed_hard_registers (insn->uses ()))
     696      2823332 :     return false;
     697              : 
     698     52372891 :   if (!reload_completed && !check_register_pressure (insn, set))
     699              :     return false;
     700              : 
     701     45526146 :   if (!check_uses (def, set))
     702              :     return false;
     703              : 
     704     20797694 :   insn_combination combination (def, SET_DEST (set), SET_SRC (set));
     705     20797694 :   if (!combination.run ())
     706              :     return false;
     707              : 
     708      2155174 :   for (auto *use_change : combination.use_changes ())
     709      2119045 :     if (*use_change->insn () < *cursor)
     710            0 :       m_worklist.safe_push (use_change->insn ());
     711              :     else
     712              :       break;
     713      2155174 :   return true;
     714     20797694 : }
     715              : 
     716              : // CC_SET is a single_set that sets (only) CC_DEF; OTHER_SET is likewise
     717              : // a single_set that sets (only) OTHER_DEF.  CC_SET is known to set a
     718              : // condition-code register and the instruction that contains CC_SET is
     719              : // known to use OTHER_DEF.  Try to do CC_SET and OTHER_SET in parallel.
     720              : bool
     721      4372510 : late_combine::parallelize_insns (set_info *cc_def, rtx cc_set,
     722              :                                  set_info *other_def, rtx other_set)
     723              : {
     724      4372510 :   auto attempt = crtl->ssa->new_change_attempt ();
     725              : 
     726      4372510 :   insn_info *cc_insn = cc_def->insn ();
     727      4372510 :   insn_info *other_insn = other_def->insn ();
     728      4372510 :   if (dump_file && (dump_flags & TDF_DETAILS))
     729            0 :     fprintf (dump_file, "trying to parallelize insn %d and insn %d\n",
     730              :              other_insn->uid (), cc_insn->uid ());
     731              : 
     732              :   // Try to substitute OTHER_SET into CC_INSN.
     733      4372510 :   insn_change_watermark rtl_watermark;
     734      4372510 :   rtx_insn *cc_rtl = cc_insn->rtl ();
     735      4372510 :   insn_propagation prop (cc_rtl, SET_DEST (other_set), SET_SRC (other_set));
     736      4372510 :   if (!prop.apply_to_pattern (&PATTERN (cc_rtl))
     737      4372510 :       || prop.num_replacements == 0)
     738              :     {
     739        13438 :       if (dump_file && (dump_flags & TDF_DETAILS))
     740            0 :         fprintf (dump_file, "-- failed to substitute all uses of r%d\n",
     741              :                  other_def->regno ());
     742        13438 :       return false;
     743              :     }
     744              : 
     745              :   // Restrict the uses to those outside notes.
     746      4359072 :   use_array cc_uses = remove_note_accesses (attempt, cc_insn->uses ());
     747      4359072 :   use_array other_set_uses = remove_note_accesses (attempt,
     748              :                                                    other_insn->uses ());
     749              : 
     750              :   // Remove the use of the substituted value.
     751      4359072 :   cc_uses = remove_uses_of_def (attempt, cc_uses, other_def);
     752              : 
     753              :   // Get the list of uses for the new instruction.
     754      4359072 :   insn_change cc_change (cc_insn);
     755      4359072 :   cc_change.new_uses = merge_access_arrays (attempt, other_set_uses, cc_uses);
     756      4359072 :   if (!cc_change.new_uses.is_valid ())
     757              :     {
     758        47639 :       if (dump_file && (dump_flags & TDF_DETAILS))
     759            0 :         fprintf (dump_file, "-- cannot merge uses\n");
     760        47639 :       return false;
     761              :     }
     762              : 
     763              :   // The instruction initially defines just two registers.  recog can add
     764              :   // extra clobbers if necessary.
     765      4311433 :   auto_vec<access_info *, 2> new_defs;
     766      4311433 :   new_defs.quick_push (cc_def);
     767      4311433 :   new_defs.quick_push (other_def);
     768      4311433 :   sort_accesses (new_defs);
     769      8622866 :   cc_change.new_defs = def_array (access_array (new_defs));
     770              : 
     771              :   // Make sure there is somewhere that the new instruction could live.
     772      4311433 :   auto other_change = insn_change::delete_insn (other_insn);
     773      4311433 :   insn_change *changes[] = { &other_change, &cc_change };
     774      4311433 :   cc_change.move_range = cc_insn->ebb ()->insn_range ();
     775      4311433 :   if (!restrict_movement (cc_change, ignore_changing_insns (changes)))
     776              :     {
     777       880857 :       if (dump_file && (dump_flags & TDF_DETAILS))
     778            0 :         fprintf (dump_file, "-- cannot satisfy all definitions and uses\n");
     779       880857 :       return false;
     780              :     }
     781              : 
     782              :   // Tentatively install the new pattern.  By convention, the CC set
     783              :   // must be first.
     784      3430576 :   if (m_parallel)
     785              :     {
     786      2734244 :       XVECEXP (m_parallel, 0, 0) = cc_set;
     787      2734244 :       XVECEXP (m_parallel, 0, 1) = other_set;
     788              :     }
     789              :   else
     790              :     {
     791       696332 :       rtvec vec = gen_rtvec (2, cc_set, other_set);
     792       696332 :       m_parallel = gen_rtx_PARALLEL (VOIDmode, vec);
     793              :     }
     794      3430576 :   validate_change (cc_rtl, &PATTERN (cc_rtl), m_parallel, 1);
     795              : 
     796              :   // These routines report failures themselves.
     797      3430576 :   if (!recog (attempt, cc_change, ignore_changing_insns (changes))
     798        12618 :       || !changes_are_worthwhile (changes)
     799      3443194 :       || !crtl->ssa->verify_insn_changes (changes))
     800      3417958 :     return false;
     801              : 
     802        12618 :   remove_reg_equal_equiv_notes (cc_rtl);
     803        12618 :   confirm_change_group ();
     804        12618 :   crtl->ssa->change_insns (changes);
     805        12618 :   m_parallel = NULL_RTX;
     806        12618 :   return true;
     807      4372510 : }
     808              : 
     809              : // CC_SET is a single_set that sets (only) CC_DEF.  See whether CC_DEF
     810              : // is a definition of a condition-code register and try to optimize it
     811              : // with related instructions if so.  Return true if something changed.
     812              : //
     813              : // This function looks for sequences of the form:
     814              : //
     815              : //    A: (set (reg R1) X1)
     816              : //    B: ...instructions that might change the value of X1...
     817              : //    C: (set (reg CC) X2) // X2 uses R1
     818              : //
     819              : // and tries to change them to:
     820              : //
     821              : //    C': [(set (reg CC) X2')
     822              : //         (set (reg R1) X1)]
     823              : //    B: ...instructions that might change the value of X1...
     824              : //
     825              : // where X2' is the result of replacing R1 with X1 in X2.
     826              : //
     827              : // Combine can handle this optimization if B doesn't exist and if A and
     828              : // C are in the same BB.  This pass instead handles cases where B does
     829              : // exist and cases where A and C are in different BBs of the same EBB.
     830              : bool
     831     53041049 : late_combine::combine_cc_setter (set_info *cc_def, rtx cc_set)
     832              : {
     833              :   // Check for a set of a CC register.  This isn't needed for correctness;
     834              :   // it's just a way of narrowing the search space.  It could be relaxed if
     835              :   // there are other situations that would benefit from the same optimization.
     836     53041049 :   if (!HARD_REGISTER_NUM_P (cc_def->regno ())
     837     53041049 :       || GET_MODE_CLASS (cc_def->mode()) != MODE_CC)
     838              :     return false;
     839              : 
     840              :   // Search the registers used by the CC setter for an easily-substitutable
     841              :   // def-use chain.
     842     18718039 :   for (use_info *other_use : cc_def->insn ()->uses ())
     843     10836574 :     if (auto *other_def = other_use->def ())
     844     10830164 :       if (other_use->regno () != cc_def->regno ()
     845     10794984 :           && other_def->ebb () == cc_def->ebb ()
     846     20269774 :           && other_def == single_set_info (other_def->insn ()))
     847      6414442 :         if (rtx other_set = optimizable_set (other_def))
     848      4372510 :           if (parallelize_insns (cc_def, cc_set, other_def, other_set))
     849        12618 :             return true;
     850              : 
     851      7881465 :   return false;
     852              : }
     853              : 
     854              : // Try to optimize INSN in some way.  If the optimization moves any
     855              : // instructions before CURSOR, and if further optimizations might be
     856              : // possible on those instructions, add them to the end of m_worklist.
     857              : bool
     858    107102595 : late_combine::combine_insn (insn_info *insn, insn_info *cursor)
     859              : {
     860    107102595 :   if (set_info *def = single_set_info (insn))
     861     83496155 :     if (rtx set = optimizable_set (def))
     862     55196223 :       return (combine_into_uses (def, set, cursor)
     863     55196223 :               || combine_cc_setter (def, set));
     864              : 
     865              :   return false;
     866              : }
     867              : 
     868              : // Run the pass on function FN.
     869              : unsigned int
     870      1927784 : late_combine::execute (function *fn)
     871              : {
     872              :   // Initialization.
     873      1927784 :   calculate_dominance_info (CDI_DOMINATORS);
     874      1927784 :   df_analyze ();
     875      1927784 :   crtl->ssa = new rtl_ssa::function_info (fn);
     876              :   // Don't allow memory_operand to match volatile MEMs.
     877      1927784 :   init_recog_no_volatile ();
     878              : 
     879      1927784 :   insn_info *insn = *crtl->ssa->nondebug_insns ().begin ();
     880    171281539 :   while (insn)
     881              :     {
     882    169353755 :       if (!insn->is_artificial ())
     883              :         {
     884    107102595 :           insn_info *prev = insn->prev_nondebug_insn ();
     885    107102595 :           if (combine_insn (insn, prev))
     886              :             {
     887              :               // Any instructions that get added to the worklist were
     888              :               // previously after PREV.  Thus if we were able to move
     889              :               // an instruction X before PREV during one combination,
     890              :               // X cannot depend on any instructions that we move before
     891              :               // PREV during subsequent combinations.  This means that
     892              :               // the worklist should be free of backwards dependencies,
     893              :               // even if it isn't necessarily in RPO.
     894      2167792 :               for (unsigned int i = 0; i < m_worklist.length (); ++i)
     895            0 :                 combine_insn (m_worklist[i], prev);
     896      2167792 :               m_worklist.truncate (0);
     897      2167792 :               insn = prev;
     898              :             }
     899              :         }
     900    169353755 :       insn = insn->next_nondebug_insn ();
     901              :     }
     902              : 
     903              :   // Finalization.
     904      1927784 :   if (crtl->ssa->perform_pending_updates ())
     905          517 :     cleanup_cfg (0);
     906              : 
     907      1927784 :   delete crtl->ssa;
     908      1927784 :   crtl->ssa = nullptr;
     909              : 
     910              :   // Make the recognizer allow volatile MEMs again.
     911      1927784 :   init_recog ();
     912      1927784 :   free_dominance_info (CDI_DOMINATORS);
     913      1927784 :   return 0;
     914              : }
     915              : 
     916              : class pass_late_combine : public rtl_opt_pass
     917              : {
     918              : public:
     919       571444 :   pass_late_combine (gcc::context *ctxt)
     920      1142888 :     : rtl_opt_pass (pass_data_late_combine, ctxt)
     921              :   {}
     922              : 
     923              :   // opt_pass methods:
     924       285722 :   opt_pass *clone () override { return new pass_late_combine (m_ctxt); }
     925              :   bool gate (function *) override;
     926              :   unsigned int execute (function *) override;
     927              : };
     928              : 
     929              : bool
     930      2942740 : pass_late_combine::gate (function *)
     931              : {
     932      2942740 :   return optimize > 0 && flag_late_combine_instructions;
     933              : }
     934              : 
     935              : unsigned int
     936      1927784 : pass_late_combine::execute (function *fn)
     937              : {
     938      1927784 :   return late_combine ().execute (fn);
     939              : }
     940              : 
     941              : } // end namespace
     942              : 
     943              : // Create a new CC fusion pass instance.
     944              : 
     945              : rtl_opt_pass *
     946       285722 : make_pass_late_combine (gcc::context *ctxt)
     947              : {
     948       285722 :   return new pass_late_combine (ctxt);
     949              : }
        

Generated by: LCOV version 2.4-beta

LCOV profile is generated on x86_64 machine using following configure options: configure --disable-bootstrap --enable-coverage=opt --enable-languages=c,c++,fortran,go,jit,lto,rust,m2 --enable-host-shared. GCC test suite is run with the built compiler.