LCOV - code coverage report
Current view: top level - gcc - tree-ssa-threadupdate.cc (source / functions) Coverage Total Hit
Test: gcc.info Lines: 96.0 % 1054 1012
Test Date: 2026-02-28 14:20:25 Functions: 93.0 % 57 53
Legend: Lines:     hit not hit

            Line data    Source code
       1              : /* Thread edges through blocks and update the control flow and SSA graphs.
       2              :    Copyright (C) 2004-2026 Free Software Foundation, Inc.
       3              : 
       4              : This file is part of GCC.
       5              : 
       6              : GCC is free software; you can redistribute it and/or modify
       7              : it under the terms of the GNU General Public License as published by
       8              : the Free Software Foundation; either version 3, or (at your option)
       9              : any later version.
      10              : 
      11              : GCC is distributed in the hope that it will be useful,
      12              : but WITHOUT ANY WARRANTY; without even the implied warranty of
      13              : MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
      14              : GNU General Public License for more details.
      15              : 
      16              : You should have received a copy of the GNU General Public License
      17              : along with GCC; see the file COPYING3.  If not see
      18              : <http://www.gnu.org/licenses/>.  */
      19              : 
      20              : #include "config.h"
      21              : #include "system.h"
      22              : #include "coretypes.h"
      23              : #include "backend.h"
      24              : #include "tree.h"
      25              : #include "gimple.h"
      26              : #include "cfghooks.h"
      27              : #include "tree-pass.h"
      28              : #include "ssa.h"
      29              : #include "fold-const.h"
      30              : #include "cfganal.h"
      31              : #include "gimple-iterator.h"
      32              : #include "tree-ssa.h"
      33              : #include "tree-ssa-threadupdate.h"
      34              : #include "cfgloop.h"
      35              : #include "dbgcnt.h"
      36              : #include "tree-cfg.h"
      37              : #include "tree-vectorizer.h"
      38              : #include "tree-pass.h"
      39              : 
      40              : /* Given a block B, update the CFG and SSA graph to reflect redirecting
      41              :    one or more in-edges to B to instead reach the destination of an
      42              :    out-edge from B while preserving any side effects in B.
      43              : 
      44              :    i.e., given A->B and B->C, change A->B to be A->C yet still preserve the
      45              :    side effects of executing B.
      46              : 
      47              :      1. Make a copy of B (including its outgoing edges and statements).  Call
      48              :         the copy B'.  Note B' has no incoming edges or PHIs at this time.
      49              : 
      50              :      2. Remove the control statement at the end of B' and all outgoing edges
      51              :         except B'->C.
      52              : 
      53              :      3. Add a new argument to each PHI in C with the same value as the existing
      54              :         argument associated with edge B->C.  Associate the new PHI arguments
      55              :         with the edge B'->C.
      56              : 
      57              :      4. For each PHI in B, find or create a PHI in B' with an identical
      58              :         PHI_RESULT.  Add an argument to the PHI in B' which has the same
      59              :         value as the PHI in B associated with the edge A->B.  Associate
      60              :         the new argument in the PHI in B' with the edge A->B.
      61              : 
      62              :      5. Change the edge A->B to A->B'.
      63              : 
      64              :         5a. This automatically deletes any PHI arguments associated with the
      65              :             edge A->B in B.
      66              : 
      67              :         5b. This automatically associates each new argument added in step 4
      68              :             with the edge A->B'.
      69              : 
      70              :      6. Repeat for other incoming edges into B.
      71              : 
      72              :      7. Put the duplicated resources in B and all the B' blocks into SSA form.
      73              : 
      74              :    Note that block duplication can be minimized by first collecting the
      75              :    set of unique destination blocks that the incoming edges should
      76              :    be threaded to.
      77              : 
      78              :    We reduce the number of edges and statements we create by not copying all
      79              :    the outgoing edges and the control statement in step #1.  We instead create
      80              :    a template block without the outgoing edges and duplicate the template.
      81              : 
      82              :    Another case this code handles is threading through a "joiner" block.  In
      83              :    this case, we do not know the destination of the joiner block, but one
      84              :    of the outgoing edges from the joiner block leads to a threadable path.  This
      85              :    case largely works as outlined above, except the duplicate of the joiner
      86              :    block still contains a full set of outgoing edges and its control statement.
      87              :    We just redirect one of its outgoing edges to our jump threading path.  */
      88              : 
      89              : 
      90              : /* Steps #5 and #6 of the above algorithm are best implemented by walking
      91              :    all the incoming edges which thread to the same destination edge at
      92              :    the same time.  That avoids lots of table lookups to get information
      93              :    for the destination edge.
      94              : 
      95              :    To realize that implementation we create a list of incoming edges
      96              :    which thread to the same outgoing edge.  Thus to implement steps
      97              :    #5 and #6 we traverse our hash table of outgoing edge information.
      98              :    For each entry we walk the list of incoming edges which thread to
      99              :    the current outgoing edge.  */
     100              : 
     101              : struct el
     102              : {
     103              :   edge e;
     104              :   struct el *next;
     105              : };
     106              : 
     107              : /* Main data structure recording information regarding B's duplicate
     108              :    blocks.  */
     109              : 
     110              : /* We need to efficiently record the unique thread destinations of this
     111              :    block and specific information associated with those destinations.  We
     112              :    may have many incoming edges threaded to the same outgoing edge.  This
     113              :    can be naturally implemented with a hash table.  */
     114              : 
     115              : struct redirection_data : free_ptr_hash<redirection_data>
     116              : {
     117              :   /* We support wiring up two block duplicates in a jump threading path.
     118              : 
     119              :      One is a normal block copy where we remove the control statement
     120              :      and wire up its single remaining outgoing edge to the thread path.
     121              : 
     122              :      The other is a joiner block where we leave the control statement
     123              :      in place, but wire one of the outgoing edges to a thread path.
     124              : 
     125              :      In theory we could have multiple block duplicates in a jump
     126              :      threading path, but I haven't tried that.
     127              : 
     128              :      The duplicate blocks appear in this array in the same order in
     129              :      which they appear in the jump thread path.  */
     130              :   basic_block dup_blocks[2];
     131              : 
     132              :   vec<jump_thread_edge *> *path;
     133              : 
     134              :   /* A list of incoming edges which we want to thread to the
     135              :      same path.  */
     136              :   struct el *incoming_edges;
     137              : 
     138              :   /* hash_table support.  */
     139              :   static inline hashval_t hash (const redirection_data *);
     140              :   static inline int equal (const redirection_data *, const redirection_data *);
     141              : };
     142              : 
     143      8351647 : jump_thread_path_allocator::jump_thread_path_allocator ()
     144              : {
     145      8351647 :   obstack_init (&m_obstack);
     146      8351647 : }
     147              : 
     148      8351647 : jump_thread_path_allocator::~jump_thread_path_allocator ()
     149              : {
     150      8351647 :   obstack_free (&m_obstack, NULL);
     151      8351647 : }
     152              : 
     153              : jump_thread_edge *
     154     25495055 : jump_thread_path_allocator::allocate_thread_edge (edge e,
     155              :                                                   jump_thread_edge_type type)
     156              : {
     157     25495055 :   void *r = obstack_alloc (&m_obstack, sizeof (jump_thread_edge));
     158     25495055 :   return new (r) jump_thread_edge (e, type);
     159              : }
     160              : 
     161              : vec<jump_thread_edge *> *
     162     18251164 : jump_thread_path_allocator::allocate_thread_path ()
     163              : {
     164              :   // ?? Since the paths live in an obstack, we should be able to remove all
     165              :   // references to path->release() throughout the code.
     166     18251164 :   void *r = obstack_alloc (&m_obstack, sizeof (vec <jump_thread_edge *>));
     167     18251164 :   return new (r) vec<jump_thread_edge *> ();
     168              : }
     169              : 
     170      8351647 : jt_path_registry::jt_path_registry (bool backedge_threads)
     171              : {
     172      8351647 :   m_paths.create (5);
     173      8351647 :   m_num_threaded_edges = 0;
     174      8351647 :   m_backedge_threads = backedge_threads;
     175      8351647 : }
     176              : 
     177      8351647 : jt_path_registry::~jt_path_registry ()
     178              : {
     179      8351647 :   m_paths.release ();
     180      8351647 : }
     181              : 
     182      2083233 : fwd_jt_path_registry::fwd_jt_path_registry ()
     183      2083233 :   : jt_path_registry (/*backedge_threads=*/false)
     184              : {
     185      2083233 :   m_removed_edges = new hash_table<struct removed_edges> (17);
     186      2083233 :   m_redirection_data = NULL;
     187      2083233 : }
     188              : 
     189      4166466 : fwd_jt_path_registry::~fwd_jt_path_registry ()
     190              : {
     191      2083233 :   delete m_removed_edges;
     192      4166466 : }
     193              : 
     194      6268414 : back_jt_path_registry::back_jt_path_registry ()
     195      6268414 :   : jt_path_registry (/*backedge_threads=*/true)
     196              : {
     197      6268414 : }
     198              : 
     199              : void
     200     25495055 : jt_path_registry::push_edge (vec<jump_thread_edge *> *path,
     201              :                              edge e, jump_thread_edge_type type)
     202              : {
     203     25495055 :   jump_thread_edge *x =  m_allocator.allocate_thread_edge (e, type);
     204     25495055 :   path->safe_push (x);
     205     25495055 : }
     206              : 
     207              : vec<jump_thread_edge *> *
     208     18251164 : jt_path_registry::allocate_thread_path ()
     209              : {
     210     18251164 :   return m_allocator.allocate_thread_path ();
     211              : }
     212              : 
     213              : /* Dump a jump threading path, including annotations about each
     214              :    edge in the path.  */
     215              : 
     216              : static void
     217          193 : dump_jump_thread_path (FILE *dump_file,
     218              :                        const vec<jump_thread_edge *> &path,
     219              :                        bool registering)
     220              : {
     221          193 :   if (registering)
     222          252 :     fprintf (dump_file,
     223              :              "  [%u] Registering jump thread: (%d, %d) incoming edge; ",
     224              :              dbg_cnt_counter (registered_jump_thread),
     225          126 :              path[0]->e->src->index, path[0]->e->dest->index);
     226              :   else
     227           67 :     fprintf (dump_file,
     228              :              "  Cancelling jump thread: (%d, %d) incoming edge; ",
     229           67 :              path[0]->e->src->index, path[0]->e->dest->index);
     230              : 
     231          629 :   for (unsigned int i = 1; i < path.length (); i++)
     232              :     {
     233              :       /* We can get paths with a NULL edge when the final destination
     234              :          of a jump thread turns out to be a constant address.  We dump
     235              :          those paths when debugging, so we have to be prepared for that
     236              :          possibility here.  */
     237          436 :       if (path[i]->e == NULL)
     238            0 :         continue;
     239              : 
     240          436 :       fprintf (dump_file, " (%d, %d) ",
     241          436 :                path[i]->e->src->index, path[i]->e->dest->index);
     242          436 :       switch (path[i]->type)
     243              :         {
     244           39 :         case EDGE_COPY_SRC_JOINER_BLOCK:
     245           39 :           fprintf (dump_file, "joiner");
     246           39 :           break;
     247          249 :         case EDGE_COPY_SRC_BLOCK:
     248          249 :           fprintf (dump_file, "normal");
     249          249 :           break;
     250          148 :         case EDGE_NO_COPY_SRC_BLOCK:
     251          148 :           fprintf (dump_file, "nocopy");
     252          148 :           break;
     253            0 :         default:
     254            0 :           gcc_unreachable ();
     255              :         }
     256              : 
     257          436 :       if ((path[i]->e->flags & EDGE_DFS_BACK) != 0)
     258           35 :         fprintf (dump_file, " (back)");
     259              :     }
     260          193 :   fprintf (dump_file, "; \n");
     261          193 : }
     262              : 
     263              : DEBUG_FUNCTION void
     264            0 : debug (const vec<jump_thread_edge *> &path)
     265              : {
     266            0 :   dump_jump_thread_path (stderr, path, true);
     267            0 : }
     268              : 
     269              : DEBUG_FUNCTION void
     270            0 : debug (const vec<jump_thread_edge *> *path)
     271              : {
     272            0 :   debug (*path);
     273            0 : }
     274              : 
     275              : /* Release the memory associated with PATH, and if dumping is enabled,
     276              :    dump out the reason why the thread was canceled.  */
     277              : 
     278              : static void
     279      1068193 : cancel_thread (vec<jump_thread_edge *> *path, const char *reason = NULL)
     280              : {
     281      1068193 :   if (dump_file && (dump_flags & TDF_DETAILS))
     282              :     {
     283           67 :       if (reason)
     284           59 :         fprintf (dump_file, "%s: ", reason);
     285              : 
     286           67 :       dump_jump_thread_path (dump_file, *path, false);
     287           67 :       fprintf (dump_file, "\n");
     288              :     }
     289      1068193 :   path->release ();
     290      1068193 : }
     291              : 
     292              : /* Simple hashing function.  For any given incoming edge E, we're going
     293              :    to be most concerned with the final destination of its jump thread
     294              :    path.  So hash on the block index of the final edge in the path.  */
     295              : 
     296              : inline hashval_t
     297       516147 : redirection_data::hash (const redirection_data *p)
     298              : {
     299       516147 :   vec<jump_thread_edge *> *path = p->path;
     300       516147 :   return path->last ()->e->dest->index;
     301              : }
     302              : 
     303              : /* Given two hash table entries, return true if they have the same
     304              :    jump threading path.  */
     305              : inline int
     306       158351 : redirection_data::equal (const redirection_data *p1, const redirection_data *p2)
     307              : {
     308       158351 :   vec<jump_thread_edge *> *path1 = p1->path;
     309       158351 :   vec<jump_thread_edge *> *path2 = p2->path;
     310              : 
     311       475053 :   if (path1->length () != path2->length ())
     312              :     return false;
     313              : 
     314       260110 :   for (unsigned int i = 1; i < path1->length (); i++)
     315              :     {
     316       189518 :       if ((*path1)[i]->type != (*path2)[i]->type
     317       189518 :           || (*path1)[i]->e != (*path2)[i]->e)
     318              :         return false;
     319              :     }
     320              : 
     321              :   return true;
     322              : }
     323              : 
     324              : /* Data structure of information to pass to hash table traversal routines.  */
     325              : struct ssa_local_info_t
     326              : {
     327              :   /* The current block we are working on.  */
     328              :   basic_block bb;
     329              : 
     330              :   /* We only create a template block for the first duplicated block in a
     331              :      jump threading path as we may need many duplicates of that block.
     332              : 
     333              :      The second duplicate block in a path is specific to that path.  Creating
     334              :      and sharing a template for that block is considerably more difficult.  */
     335              :   basic_block template_block;
     336              : 
     337              :   /* If we append debug stmts to the template block after creating it,
     338              :      this iterator won't be the last one in the block, and further
     339              :      copies of the template block shouldn't get debug stmts after
     340              :      it.  */
     341              :   gimple_stmt_iterator template_last_to_copy;
     342              : 
     343              :   /* Blocks duplicated for the thread.  */
     344              :   bitmap duplicate_blocks;
     345              : 
     346              :   /* TRUE if we thread one or more jumps, FALSE otherwise.  */
     347              :   bool jumps_threaded;
     348              : 
     349              :   /* When we have multiple paths through a joiner which reach different
     350              :      final destinations, then we may need to correct for potential
     351              :      profile insanities.  */
     352              :   bool need_profile_correction;
     353              : 
     354              :   // Jump threading statistics.
     355              :   unsigned long num_threaded_edges;
     356              : };
     357              : 
     358              : /* When we start updating the CFG for threading, data necessary for jump
     359              :    threading is attached to the AUX field for the incoming edge.  Use these
     360              :    macros to access the underlying structure attached to the AUX field.  */
     361              : #define THREAD_PATH(E) ((vec<jump_thread_edge *> *)(E)->aux)
     362              : 
     363              : /* Remove the last statement in block BB if it is a control statement
     364              :    Also remove all outgoing edges except the edge which reaches DEST_BB.
     365              :    If DEST_BB is NULL, then remove all outgoing edges.  */
     366              : 
     367              : static void
     368      1515269 : remove_ctrl_stmt_and_useless_edges (basic_block bb, basic_block dest_bb)
     369              : {
     370      1515269 :   gimple_stmt_iterator gsi;
     371      1515269 :   edge e;
     372      1515269 :   edge_iterator ei;
     373              : 
     374      1515269 :   gsi = gsi_last_bb (bb);
     375              : 
     376              :   /* If the duplicate ends with a control statement, then remove it.
     377              : 
     378              :      Note that if we are duplicating the template block rather than the
     379              :      original basic block, then the duplicate might not have any real
     380              :      statements in it.  */
     381      1515269 :   if (!gsi_end_p (gsi)
     382      1515269 :       && gsi_stmt (gsi)
     383      1515269 :       && (gimple_code (gsi_stmt (gsi)) == GIMPLE_COND
     384              :           || gimple_code (gsi_stmt (gsi)) == GIMPLE_GOTO
     385              :           || gimple_code (gsi_stmt (gsi)) == GIMPLE_SWITCH))
     386      1515269 :     gsi_remove (&gsi, true);
     387              : 
     388      4564565 :   for (ei = ei_start (bb->succs); (e = ei_safe_edge (ei)); )
     389              :     {
     390      3049296 :       if (e->dest != dest_bb)
     391              :         {
     392      1788903 :           free_dom_edge_info (e);
     393      1788903 :           remove_edge (e);
     394              :         }
     395              :       else
     396              :         {
     397      1260393 :           e->probability = profile_probability::always ();
     398      1260393 :           ei_next (&ei);
     399              :         }
     400              :     }
     401              : 
     402              :   /* If the remaining edge is a loop exit, there must have
     403              :      a removed edge that was not a loop exit.
     404              : 
     405              :      In that case BB and possibly other blocks were previously
     406              :      in the loop, but are now outside the loop.  Thus, we need
     407              :      to update the loop structures.  */
     408      1515269 :   if (single_succ_p (bb)
     409      1260393 :       && loop_outer (bb->loop_father)
     410      1881470 :       && loop_exit_edge_p (bb->loop_father, single_succ_edge (bb)))
     411        56015 :     loops_state_set (LOOPS_NEED_FIXUP);
     412      1515269 : }
     413              : 
     414              : /* Create a duplicate of BB.  Record the duplicate block in an array
     415              :    indexed by COUNT stored in RD.  */
     416              : 
     417              : static void
     418       333819 : create_block_for_threading (basic_block bb,
     419              :                             struct redirection_data *rd,
     420              :                             unsigned int count,
     421              :                             bitmap *duplicate_blocks)
     422              : {
     423       333819 :   edge_iterator ei;
     424       333819 :   edge e;
     425              : 
     426              :   /* We can use the generic block duplication code and simply remove
     427              :      the stuff we do not need.  */
     428       333819 :   rd->dup_blocks[count] = duplicate_block (bb, NULL, NULL);
     429              : 
     430      1021016 :   FOR_EACH_EDGE (e, ei, rd->dup_blocks[count]->succs)
     431              :     {
     432       687197 :       e->aux = NULL;
     433              : 
     434              :       /* If we duplicate a block with an outgoing edge marked as
     435              :          EDGE_IGNORE, we must clear EDGE_IGNORE so that it doesn't
     436              :          leak out of the current pass.
     437              : 
     438              :          It would be better to simplify switch statements and remove
     439              :          the edges before we get here, but the sequencing is nontrivial.  */
     440       687197 :       e->flags &= ~EDGE_IGNORE;
     441              :     }
     442              : 
     443              :   /* Zero out the profile, since the block is unreachable for now.  */
     444       333819 :   rd->dup_blocks[count]->count = profile_count::uninitialized ();
     445       333819 :   if (duplicate_blocks)
     446       333819 :     bitmap_set_bit (*duplicate_blocks, rd->dup_blocks[count]->index);
     447       333819 : }
     448              : 
     449              : /* Given an outgoing edge E lookup and return its entry in our hash table.
     450              : 
     451              :    If INSERT is true, then we insert the entry into the hash table if
     452              :    it is not already present.  INCOMING_EDGE is added to the list of incoming
     453              :    edges associated with E in the hash table.  */
     454              : 
     455              : redirection_data *
     456       361376 : fwd_jt_path_registry::lookup_redirection_data (edge e, insert_option insert)
     457              : {
     458       361376 :   struct redirection_data **slot;
     459       361376 :   struct redirection_data *elt;
     460       361376 :   vec<jump_thread_edge *> *path = THREAD_PATH (e);
     461              : 
     462              :   /* Build a hash table element so we can see if E is already
     463              :      in the table.  */
     464       361376 :   elt = XNEW (struct redirection_data);
     465       361376 :   elt->path = path;
     466       361376 :   elt->dup_blocks[0] = NULL;
     467       361376 :   elt->dup_blocks[1] = NULL;
     468       361376 :   elt->incoming_edges = NULL;
     469              : 
     470       361376 :   slot = m_redirection_data->find_slot (elt, insert);
     471              : 
     472              :   /* This will only happen if INSERT is false and the entry is not
     473              :      in the hash table.  */
     474       361376 :   if (slot == NULL)
     475              :     {
     476            0 :       free (elt);
     477            0 :       return NULL;
     478              :     }
     479              : 
     480              :   /* This will only happen if E was not in the hash table and
     481              :      INSERT is true.  */
     482       361376 :   if (*slot == NULL)
     483              :     {
     484       290784 :       *slot = elt;
     485       290784 :       elt->incoming_edges = XNEW (struct el);
     486       290784 :       elt->incoming_edges->e = e;
     487       290784 :       elt->incoming_edges->next = NULL;
     488       290784 :       return elt;
     489              :     }
     490              :   /* E was in the hash table.  */
     491              :   else
     492              :     {
     493              :       /* Free ELT as we do not need it anymore, we will extract the
     494              :          relevant entry from the hash table itself.  */
     495        70592 :       free (elt);
     496              : 
     497              :       /* Get the entry stored in the hash table.  */
     498        70592 :       elt = *slot;
     499              : 
     500              :       /* If insertion was requested, then we need to add INCOMING_EDGE
     501              :          to the list of incoming edges associated with E.  */
     502        70592 :       if (insert)
     503              :         {
     504        70592 :           struct el *el = XNEW (struct el);
     505        70592 :           el->next = elt->incoming_edges;
     506        70592 :           el->e = e;
     507        70592 :           elt->incoming_edges = el;
     508              :         }
     509              : 
     510        70592 :       return elt;
     511              :     }
     512              : }
     513              : 
     514              : /* Given ssa_name DEF, backtrack jump threading PATH from node IDX
     515              :    to see if it has constant value in a flow sensitive manner.  Set
     516              :    LOCUS to location of the constant phi arg and return the value.
     517              :    Return DEF directly if either PATH or idx is ZERO.  */
     518              : 
     519              : static tree
     520       129300 : get_value_locus_in_path (tree def, vec<jump_thread_edge *> *path,
     521              :                          basic_block bb, int idx, location_t *locus)
     522              : {
     523       129300 :   tree arg;
     524       129300 :   gphi *def_phi;
     525       129300 :   basic_block def_bb;
     526              : 
     527       129300 :   if (path == NULL || idx == 0)
     528              :     return def;
     529              : 
     530       179377 :   def_phi = dyn_cast <gphi *> (SSA_NAME_DEF_STMT (def));
     531        70072 :   if (!def_phi)
     532              :     return def;
     533              : 
     534        70072 :   def_bb = gimple_bb (def_phi);
     535              :   /* Don't propagate loop invariants into deeper loops.  */
     536        70072 :   if (!def_bb || bb_loop_depth (def_bb) < bb_loop_depth (bb))
     537         1383 :     return def;
     538              : 
     539              :   /* Backtrack jump threading path from IDX to see if def has constant
     540              :      value.  */
     541        88576 :   for (int j = idx - 1; j >= 0; j--)
     542              :     {
     543        72758 :       edge e = (*path)[j]->e;
     544        72758 :       if (e->dest == def_bb)
     545              :         {
     546        52871 :           arg = gimple_phi_arg_def (def_phi, e->dest_idx);
     547        52871 :           if (is_gimple_min_invariant (arg))
     548              :             {
     549        19995 :               *locus = gimple_phi_arg_location (def_phi, e->dest_idx);
     550        19995 :               return arg;
     551              :             }
     552              :           break;
     553              :         }
     554              :     }
     555              : 
     556              :   return def;
     557              : }
     558              : 
     559              : /* For each PHI in BB, copy the argument associated with SRC_E to TGT_E.
     560              :    Try to backtrack jump threading PATH from node IDX to see if the arg
     561              :    has constant value, copy constant value instead of argument itself
     562              :    if yes.  */
     563              : 
     564              : static void
     565       441791 : copy_phi_args (basic_block bb, edge src_e, edge tgt_e,
     566              :                vec<jump_thread_edge *> *path, int idx)
     567              : {
     568       441791 :   gphi_iterator gsi;
     569       441791 :   int src_indx = src_e->dest_idx;
     570              : 
     571       739848 :   for (gsi = gsi_start_phis (bb); !gsi_end_p (gsi); gsi_next (&gsi))
     572              :     {
     573       298057 :       gphi *phi = gsi.phi ();
     574       298057 :       tree def = gimple_phi_arg_def (phi, src_indx);
     575       298057 :       location_t locus = gimple_phi_arg_location (phi, src_indx);
     576              : 
     577       298057 :       if (TREE_CODE (def) == SSA_NAME
     578       528101 :           && !virtual_operand_p (gimple_phi_result (phi)))
     579       129300 :         def = get_value_locus_in_path (def, path, bb, idx, &locus);
     580              : 
     581       298057 :       add_phi_arg (phi, def, tgt_e, locus);
     582              :     }
     583       441791 : }
     584              : 
     585              : /* We have recently made a copy of ORIG_BB, including its outgoing
     586              :    edges.  The copy is NEW_BB.  Every PHI node in every direct successor of
     587              :    ORIG_BB has a new argument associated with edge from NEW_BB to the
     588              :    successor.  Initialize the PHI argument so that it is equal to the PHI
     589              :    argument associated with the edge from ORIG_BB to the successor.
     590              :    PATH and IDX are used to check if the new PHI argument has constant
     591              :    value in a flow sensitive manner.  */
     592              : 
     593              : static void
     594        78943 : update_destination_phis (basic_block orig_bb, basic_block new_bb,
     595              :                          vec<jump_thread_edge *> *path, int idx)
     596              : {
     597        78943 :   edge_iterator ei;
     598        78943 :   edge e;
     599              : 
     600       249727 :   FOR_EACH_EDGE (e, ei, orig_bb->succs)
     601              :     {
     602       170784 :       edge e2 = find_edge (new_bb, e->dest);
     603       170784 :       copy_phi_args (e->dest, e, e2, path, idx);
     604              :     }
     605        78943 : }
     606              : 
     607              : /* Given a duplicate block and its single destination (both stored
     608              :    in RD).  Create an edge between the duplicate and its single
     609              :    destination.
     610              : 
     611              :    Add an additional argument to any PHI nodes at the single
     612              :    destination.  IDX is the start node in jump threading path
     613              :    we start to check to see if the new PHI argument has constant
     614              :    value along the jump threading path.  */
     615              : 
     616              : static void
     617       254876 : create_edge_and_update_destination_phis (struct redirection_data *rd,
     618              :                                          basic_block bb, int idx)
     619              : {
     620       254876 :   edge e = make_single_succ_edge (bb, rd->path->last ()->e->dest, EDGE_FALLTHRU);
     621              : 
     622       254876 :   rescan_loop_exit (e, true, false);
     623              : 
     624              :   /* We used to copy the thread path here.  That was added in 2007
     625              :      and dutifully updated through the representation changes in 2013.
     626              : 
     627              :      In 2013 we added code to thread from an interior node through
     628              :      the backedge to another interior node.  That runs after the code
     629              :      to thread through loop headers from outside the loop.
     630              : 
     631              :      The latter may delete edges in the CFG, including those
     632              :      which appeared in the jump threading path we copied here.  Thus
     633              :      we'd end up using a dangling pointer.
     634              : 
     635              :      After reviewing the 2007/2011 code, I can't see how anything
     636              :      depended on copying the AUX field and clearly copying the jump
     637              :      threading path is problematical due to embedded edge pointers.
     638              :      It has been removed.  */
     639       254876 :   e->aux = NULL;
     640              : 
     641              :   /* If there are any PHI nodes at the destination of the outgoing edge
     642              :      from the duplicate block, then we will need to add a new argument
     643              :      to them.  The argument should have the same value as the argument
     644              :      associated with the outgoing edge stored in RD.  */
     645       254876 :   copy_phi_args (e->dest, rd->path->last ()->e, e, rd->path, idx);
     646       254876 : }
     647              : 
     648              : /* Look through PATH beginning at START and return TRUE if there are
     649              :    any additional blocks that need to be duplicated.  Otherwise,
     650              :    return FALSE.  */
     651              : static bool
     652        78943 : any_remaining_duplicated_blocks (vec<jump_thread_edge *> *path,
     653              :                                  unsigned int start)
     654              : {
     655       100245 :   for (unsigned int i = start + 1; i < path->length (); i++)
     656              :     {
     657        64337 :       if ((*path)[i]->type == EDGE_COPY_SRC_JOINER_BLOCK
     658        64337 :           || (*path)[i]->type == EDGE_COPY_SRC_BLOCK)
     659              :         return true;
     660              :     }
     661              :   return false;
     662              : }
     663              : 
     664              : 
     665              : /* Compute the amount of profile count coming into the jump threading
     666              :    path stored in RD that we are duplicating, returned in PATH_IN_COUNT_PTR and
     667              :    PATH_IN_FREQ_PTR, as well as the amount of counts flowing out of the
     668              :    duplicated path, returned in PATH_OUT_COUNT_PTR.  LOCAL_INFO is used to
     669              :    identify blocks duplicated for jump threading, which have duplicated
     670              :    edges that need to be ignored in the analysis.  Return true if path contains
     671              :    a joiner, false otherwise.
     672              : 
     673              :    In the non-joiner case, this is straightforward - all the counts
     674              :    flowing into the jump threading path should flow through the duplicated
     675              :    block and out of the duplicated path.
     676              : 
     677              :    In the joiner case, it is very tricky.  Some of the counts flowing into
     678              :    the original path go offpath at the joiner.  The problem is that while
     679              :    we know how much total count goes off-path in the original control flow,
     680              :    we don't know how many of the counts corresponding to just the jump
     681              :    threading path go offpath at the joiner.
     682              : 
     683              :    For example, assume we have the following control flow and identified
     684              :    jump threading paths:
     685              : 
     686              :                 A     B     C
     687              :                  \    |    /
     688              :                Ea \   |Eb / Ec
     689              :                    \  |  /
     690              :                     v v v
     691              :                       J       <-- Joiner
     692              :                      / \
     693              :                 Eoff/   \Eon
     694              :                    /     \
     695              :                   v       v
     696              :                 Soff     Son  <--- Normal
     697              :                          /\
     698              :                       Ed/  \ Ee
     699              :                        /    \
     700              :                       v     v
     701              :                       D      E
     702              : 
     703              :             Jump threading paths: A -> J -> Son -> D (path 1)
     704              :                                   C -> J -> Son -> E (path 2)
     705              : 
     706              :    Note that the control flow could be more complicated:
     707              :    - Each jump threading path may have more than one incoming edge.  I.e. A and
     708              :    Ea could represent multiple incoming blocks/edges that are included in
     709              :    path 1.
     710              :    - There could be EDGE_NO_COPY_SRC_BLOCK edges after the joiner (either
     711              :    before or after the "normal" copy block).  These are not duplicated onto
     712              :    the jump threading path, as they are single-successor.
     713              :    - Any of the blocks along the path may have other incoming edges that
     714              :    are not part of any jump threading path, but add profile counts along
     715              :    the path.
     716              : 
     717              :    In the above example, after all jump threading is complete, we will
     718              :    end up with the following control flow:
     719              : 
     720              :                 A          B           C
     721              :                 |          |           |
     722              :               Ea|          |Eb         |Ec
     723              :                 |          |           |
     724              :                 v          v           v
     725              :                Ja          J          Jc
     726              :                / \        / \Eon'     / \
     727              :           Eona/   \   ---/---\--------   \Eonc
     728              :              /     \ /  /     \           \
     729              :             v       v  v       v          v
     730              :            Sona     Soff      Son       Sonc
     731              :              \                 /\         /
     732              :               \___________    /  \  _____/
     733              :                           \  /    \/
     734              :                            vv      v
     735              :                             D      E
     736              : 
     737              :    The main issue to notice here is that when we are processing path 1
     738              :    (A->J->Son->D) we need to figure out the outgoing edge weights to
     739              :    the duplicated edges Ja->Sona and Ja->Soff, while ensuring that the
     740              :    sum of the incoming weights to D remain Ed.  The problem with simply
     741              :    assuming that Ja (and Jc when processing path 2) has the same outgoing
     742              :    probabilities to its successors as the original block J, is that after
     743              :    all paths are processed and other edges/counts removed (e.g. none
     744              :    of Ec will reach D after processing path 2), we may end up with not
     745              :    enough count flowing along duplicated edge Sona->D.
     746              : 
     747              :    Therefore, in the case of a joiner, we keep track of all counts
     748              :    coming in along the current path, as well as from predecessors not
     749              :    on any jump threading path (Eb in the above example).  While we
     750              :    first assume that the duplicated Eona for Ja->Sona has the same
     751              :    probability as the original, we later compensate for other jump
     752              :    threading paths that may eliminate edges.  We do that by keep track
     753              :    of all counts coming into the original path that are not in a jump
     754              :    thread (Eb in the above example, but as noted earlier, there could
     755              :    be other predecessors incoming to the path at various points, such
     756              :    as at Son).  Call this cumulative non-path count coming into the path
     757              :    before D as Enonpath.  We then ensure that the count from Sona->D is as at
     758              :    least as big as (Ed - Enonpath), but no bigger than the minimum
     759              :    weight along the jump threading path.  The probabilities of both the
     760              :    original and duplicated joiner block J and Ja will be adjusted
     761              :    accordingly after the updates.  */
     762              : 
     763              : static bool
     764       290784 : compute_path_counts (struct redirection_data *rd,
     765              :                      ssa_local_info_t *local_info,
     766              :                      profile_count *path_in_count_ptr,
     767              :                      profile_count *path_out_count_ptr)
     768              : {
     769       290784 :   edge e = rd->incoming_edges->e;
     770       290784 :   vec<jump_thread_edge *> *path = THREAD_PATH (e);
     771       290784 :   edge elast = path->last ()->e;
     772       290784 :   profile_count nonpath_count = profile_count::zero ();
     773       290784 :   bool has_joiner = false;
     774       290784 :   profile_count path_in_count = profile_count::zero ();
     775              : 
     776              :   /* Start by accumulating incoming edge counts to the path's first bb
     777              :      into a couple buckets:
     778              :         path_in_count: total count of incoming edges that flow into the
     779              :                   current path.
     780              :         nonpath_count: total count of incoming edges that are not
     781              :                   flowing along *any* path.  These are the counts
     782              :                   that will still flow along the original path after
     783              :                   all path duplication is done by potentially multiple
     784              :                   calls to this routine.
     785              :      (any other incoming edge counts are for a different jump threading
     786              :      path that will be handled by a later call to this routine.)
     787              :      To make this easier, start by recording all incoming edges that flow into
     788              :      the current path in a bitmap.  We could add up the path's incoming edge
     789              :      counts here, but we still need to walk all the first bb's incoming edges
     790              :      below to add up the counts of the other edges not included in this jump
     791              :      threading path.  */
     792       290784 :   struct el *next, *el;
     793       290784 :   auto_bitmap in_edge_srcs;
     794       652160 :   for (el = rd->incoming_edges; el; el = next)
     795              :     {
     796       361376 :       next = el->next;
     797       361376 :       bitmap_set_bit (in_edge_srcs, el->e->src->index);
     798              :     }
     799       290784 :   edge ein;
     800       290784 :   edge_iterator ei;
     801      1068276 :   FOR_EACH_EDGE (ein, ei, e->dest->preds)
     802              :     {
     803       777492 :       vec<jump_thread_edge *> *ein_path = THREAD_PATH (ein);
     804              :       /* Simply check the incoming edge src against the set captured above.  */
     805       777492 :       if (ein_path
     806      1302147 :           && bitmap_bit_p (in_edge_srcs, (*ein_path)[0]->e->src->index))
     807              :         {
     808              :           /* It is necessary but not sufficient that the last path edges
     809              :              are identical.  There may be different paths that share the
     810              :              same last path edge in the case where the last edge has a nocopy
     811              :              source block.  */
     812       361376 :           gcc_assert (ein_path->last ()->e == elast);
     813       361376 :           path_in_count += ein->count ();
     814              :         }
     815       416116 :       else if (!ein_path)
     816              :         {
     817              :           /* Keep track of the incoming edges that are not on any jump-threading
     818              :              path.  These counts will still flow out of original path after all
     819              :              jump threading is complete.  */
     820       252837 :             nonpath_count += ein->count ();
     821              :         }
     822              :     }
     823              : 
     824              :   /* Now compute the fraction of the total count coming into the first
     825              :      path bb that is from the current threading path.  */
     826       290784 :   profile_count total_count = e->dest->count;
     827              :   /* Handle incoming profile insanities.  */
     828       290784 :   if (total_count < path_in_count)
     829        22205 :     path_in_count = total_count;
     830       290784 :   profile_probability onpath_scale = path_in_count.probability_in (total_count);
     831              : 
     832              :   /* Walk the entire path to do some more computation in order to estimate
     833              :      how much of the path_in_count will flow out of the duplicated threading
     834              :      path.  In the non-joiner case this is straightforward (it should be
     835              :      the same as path_in_count, although we will handle incoming profile
     836              :      insanities by setting it equal to the minimum count along the path).
     837              : 
     838              :      In the joiner case, we need to estimate how much of the path_in_count
     839              :      will stay on the threading path after the joiner's conditional branch.
     840              :      We don't really know for sure how much of the counts
     841              :      associated with this path go to each successor of the joiner, but we'll
     842              :      estimate based on the fraction of the total count coming into the path
     843              :      bb was from the threading paths (computed above in onpath_scale).
     844              :      Afterwards, we will need to do some fixup to account for other threading
     845              :      paths and possible profile insanities.
     846              : 
     847              :      In order to estimate the joiner case's counts we also need to update
     848              :      nonpath_count with any additional counts coming into the path.  Other
     849              :      blocks along the path may have additional predecessors from outside
     850              :      the path.  */
     851       290784 :   profile_count path_out_count = path_in_count;
     852       290784 :   profile_count min_path_count = path_in_count;
     853       665743 :   for (unsigned int i = 1; i < path->length (); i++)
     854              :     {
     855       374959 :       edge epath = (*path)[i]->e;
     856       374959 :       profile_count cur_count = epath->count ();
     857       374959 :       if ((*path)[i]->type == EDGE_COPY_SRC_JOINER_BLOCK)
     858              :         {
     859        78943 :           has_joiner = true;
     860        78943 :           cur_count = cur_count.apply_probability (onpath_scale);
     861              :         }
     862              :       /* In the joiner case we need to update nonpath_count for any edges
     863              :          coming into the path that will contribute to the count flowing
     864              :          into the path successor.  */
     865       374959 :       if (has_joiner && epath != elast)
     866              :         {
     867              :           /* Look for other incoming edges after joiner.  */
     868       251227 :           FOR_EACH_EDGE (ein, ei, epath->dest->preds)
     869              :             {
     870       184762 :               if (ein != epath
     871              :                   /* Ignore in edges from blocks we have duplicated for a
     872              :                      threading path, which have duplicated edge counts until
     873              :                      they are redirected by an invocation of this routine.  */
     874       303059 :                   && !bitmap_bit_p (local_info->duplicate_blocks,
     875       118297 :                                     ein->src->index))
     876        42113 :                 nonpath_count += ein->count ();
     877              :             }
     878              :         }
     879       374959 :       if (cur_count < path_out_count)
     880       161163 :         path_out_count = cur_count;
     881       374959 :       if (epath->count () < min_path_count)
     882       139113 :         min_path_count = epath->count ();
     883              :     }
     884              : 
     885              :   /* We computed path_out_count above assuming that this path targeted
     886              :      the joiner's on-path successor with the same likelihood as it
     887              :      reached the joiner.  However, other thread paths through the joiner
     888              :      may take a different path through the normal copy source block
     889              :      (i.e. they have a different elast), meaning that they do not
     890              :      contribute any counts to this path's elast.  As a result, it may
     891              :      turn out that this path must have more count flowing to the on-path
     892              :      successor of the joiner.  Essentially, all of this path's elast
     893              :      count must be contributed by this path and any nonpath counts
     894              :      (since any path through the joiner with a different elast will not
     895              :      include a copy of this elast in its duplicated path).
     896              :      So ensure that this path's path_out_count is at least the
     897              :      difference between elast->count () and nonpath_count.  Otherwise the edge
     898              :      counts after threading will not be sane.  */
     899       290784 :   if (local_info->need_profile_correction
     900       319308 :       && has_joiner && path_out_count < elast->count () - nonpath_count)
     901              :     {
     902         9554 :       path_out_count = elast->count () - nonpath_count;
     903              :       /* But neither can we go above the minimum count along the path
     904              :          we are duplicating.  This can be an issue due to profile
     905              :          insanities coming in to this pass.  */
     906         9554 :       if (path_out_count > min_path_count)
     907         6038 :         path_out_count = min_path_count;
     908              :     }
     909              : 
     910       290784 :   *path_in_count_ptr = path_in_count;
     911       290784 :   *path_out_count_ptr = path_out_count;
     912       290784 :   return has_joiner;
     913       290784 : }
     914              : 
     915              : 
     916              : /* Update the counts and frequencies for both an original path
     917              :    edge EPATH and its duplicate EDUP.  The duplicate source block
     918              :    will get a count of PATH_IN_COUNT and PATH_IN_FREQ,
     919              :    and the duplicate edge EDUP will have a count of PATH_OUT_COUNT.  */
     920              : static void
     921       374959 : update_profile (edge epath, edge edup, profile_count path_in_count,
     922              :                 profile_count path_out_count)
     923              : {
     924              : 
     925              :   /* First update the duplicated block's count.  */
     926       374959 :   if (edup)
     927              :     {
     928       333819 :       basic_block dup_block = edup->src;
     929              : 
     930              :       /* Edup's count is reduced by path_out_count.  We need to redistribute
     931              :          probabilities to the remaining edges.  */
     932              : 
     933       333819 :       edge esucc;
     934       333819 :       edge_iterator ei;
     935       333819 :       profile_probability edup_prob
     936       333819 :          = path_out_count.probability_in (path_in_count);
     937              : 
     938              :       /* Either scale up or down the remaining edges.
     939              :          probabilities are always in range <0,1> and thus we can't do
     940              :          both by same loop.  */
     941       333819 :       if (edup->probability > edup_prob)
     942              :         {
     943        26628 :            profile_probability rev_scale
     944        26628 :              = (profile_probability::always () - edup->probability)
     945        53256 :                / (profile_probability::always () - edup_prob);
     946        86440 :            FOR_EACH_EDGE (esucc, ei, dup_block->succs)
     947        59812 :              if (esucc != edup)
     948        33184 :                esucc->probability /= rev_scale;
     949              :         }
     950       307191 :       else if (edup->probability < edup_prob)
     951              :         {
     952        20867 :            profile_probability scale
     953        20867 :              = (profile_probability::always () - edup_prob)
     954        41734 :                / (profile_probability::always () - edup->probability);
     955        65924 :           FOR_EACH_EDGE (esucc, ei, dup_block->succs)
     956        45057 :             if (esucc != edup)
     957        24190 :               esucc->probability *= scale;
     958              :         }
     959       333819 :       if (edup_prob.initialized_p ())
     960       311855 :         edup->probability = edup_prob;
     961              : 
     962       333819 :       gcc_assert (!dup_block->count.initialized_p ());
     963       333819 :       dup_block->count = path_in_count;
     964              :     }
     965              : 
     966       374959 :   if (path_in_count == profile_count::zero ())
     967        10821 :     return;
     968              : 
     969       364138 :   profile_count final_count = epath->count () - path_out_count;
     970              : 
     971              :   /* Now update the original block's count in the
     972              :      opposite manner - remove the counts/freq that will flow
     973              :      into the duplicated block.  Handle underflow due to precision/
     974              :      rounding issues.  */
     975       364138 :   epath->src->count -= path_in_count;
     976              : 
     977              :   /* Next update this path edge's original and duplicated counts.  We know
     978              :      that the duplicated path will have path_out_count flowing
     979              :      out of it (in the joiner case this is the count along the duplicated path
     980              :      out of the duplicated joiner).  This count can then be removed from the
     981              :      original path edge.  */
     982              : 
     983       364138 :   edge esucc;
     984       364138 :   edge_iterator ei;
     985       364138 :   profile_probability epath_prob = final_count.probability_in (epath->src->count);
     986              : 
     987       364138 :   if (epath->probability > epath_prob)
     988              :     {
     989       225571 :        profile_probability rev_scale
     990       225571 :          = (profile_probability::always () - epath->probability)
     991       451142 :            / (profile_probability::always () - epath_prob);
     992       679382 :        FOR_EACH_EDGE (esucc, ei, epath->src->succs)
     993       453811 :          if (esucc != epath)
     994       228240 :            esucc->probability /= rev_scale;
     995              :     }
     996       138567 :   else if (epath->probability < epath_prob)
     997              :     {
     998        20241 :        profile_probability scale
     999        20241 :          = (profile_probability::always () - epath_prob)
    1000        40482 :            / (profile_probability::always () - epath->probability);
    1001        67583 :       FOR_EACH_EDGE (esucc, ei, epath->src->succs)
    1002        47342 :         if (esucc != epath)
    1003        27101 :           esucc->probability *= scale;
    1004              :     }
    1005       364138 :   if (epath_prob.initialized_p ())
    1006       333580 :     epath->probability = epath_prob;
    1007              : }
    1008              : 
    1009              : /* Wire up the outgoing edges from the duplicate blocks and
    1010              :    update any PHIs as needed.  Also update the profile counts
    1011              :    on the original and duplicate blocks and edges.  */
    1012              : void
    1013       290784 : ssa_fix_duplicate_block_edges (struct redirection_data *rd,
    1014              :                                ssa_local_info_t *local_info)
    1015              : {
    1016       290784 :   bool multi_incomings = (rd->incoming_edges->next != NULL);
    1017       290784 :   edge e = rd->incoming_edges->e;
    1018       290784 :   vec<jump_thread_edge *> *path = THREAD_PATH (e);
    1019       290784 :   edge elast = path->last ()->e;
    1020       290784 :   profile_count path_in_count = profile_count::zero ();
    1021       290784 :   profile_count path_out_count = profile_count::zero ();
    1022              : 
    1023              :   /* First determine how much profile count to move from original
    1024              :      path to the duplicate path.  This is tricky in the presence of
    1025              :      a joiner (see comments for compute_path_counts), where some portion
    1026              :      of the path's counts will flow off-path from the joiner.  In the
    1027              :      non-joiner case the path_in_count and path_out_count should be the
    1028              :      same.  */
    1029       290784 :   bool has_joiner = compute_path_counts (rd, local_info,
    1030              :                                          &path_in_count, &path_out_count);
    1031              : 
    1032       665743 :   for (unsigned int count = 0, i = 1; i < path->length (); i++)
    1033              :     {
    1034       374959 :       edge epath = (*path)[i]->e;
    1035              : 
    1036              :       /* If we were threading through an joiner block, then we want
    1037              :          to keep its control statement and redirect an outgoing edge.
    1038              :          Else we want to remove the control statement & edges, then create
    1039              :          a new outgoing edge.  In both cases we may need to update PHIs.  */
    1040       374959 :       if ((*path)[i]->type == EDGE_COPY_SRC_JOINER_BLOCK)
    1041              :         {
    1042        78943 :           edge victim;
    1043        78943 :           edge e2;
    1044              : 
    1045        78943 :           gcc_assert (has_joiner);
    1046              : 
    1047              :           /* This updates the PHIs at the destination of the duplicate
    1048              :              block.  Pass 0 instead of i if we are threading a path which
    1049              :              has multiple incoming edges.  */
    1050       141458 :           update_destination_phis (local_info->bb, rd->dup_blocks[count],
    1051              :                                    path, multi_incomings ? 0 : i);
    1052              : 
    1053              :           /* Find the edge from the duplicate block to the block we're
    1054              :              threading through.  That's the edge we want to redirect.  */
    1055        78943 :           victim = find_edge (rd->dup_blocks[count], (*path)[i]->e->dest);
    1056              : 
    1057              :           /* If there are no remaining blocks on the path to duplicate,
    1058              :              then redirect VICTIM to the final destination of the jump
    1059              :              threading path.  */
    1060        78943 :           if (!any_remaining_duplicated_blocks (path, i))
    1061              :             {
    1062        35908 :               if (victim->dest != elast->dest)
    1063              :                 {
    1064        16361 :                   e2 = redirect_edge_and_branch (victim, elast->dest);
    1065              :                   /* If we redirected the edge, then we need to copy PHI arguments
    1066              :                      at the target.  If the edge already existed (e2 != victim
    1067              :                      case), then the PHIs in the target already have the correct
    1068              :                      arguments.  */
    1069        16361 :                   if (e2 == victim)
    1070        16131 :                     copy_phi_args (e2->dest, elast, e2,
    1071              :                                    path, multi_incomings ? 0 : i);
    1072              :                 }
    1073              :               else
    1074              :                 e2 = victim;
    1075              :             }
    1076              :           else
    1077              :             {
    1078              :               /* Redirect VICTIM to the next duplicated block in the path.  */
    1079        43035 :               e2 = redirect_edge_and_branch (victim, rd->dup_blocks[count + 1]);
    1080              : 
    1081              :               /* We need to update the PHIs in the next duplicated block.  We
    1082              :                  want the new PHI args to have the same value as they had
    1083              :                  in the source of the next duplicate block.
    1084              : 
    1085              :                  Thus, we need to know which edge we traversed into the
    1086              :                  source of the duplicate.  Furthermore, we may have
    1087              :                  traversed many edges to reach the source of the duplicate.
    1088              : 
    1089              :                  Walk through the path starting at element I until we
    1090              :                  hit an edge marked with EDGE_COPY_SRC_BLOCK.  We want
    1091              :                  the edge from the prior element.  */
    1092        44555 :               for (unsigned int j = i + 1; j < path->length (); j++)
    1093              :                 {
    1094        44555 :                   if ((*path)[j]->type == EDGE_COPY_SRC_BLOCK)
    1095              :                     {
    1096        43035 :                       copy_phi_arg_into_existing_phi ((*path)[j - 1]->e, e2);
    1097        43035 :                       break;
    1098              :                     }
    1099              :                 }
    1100              :             }
    1101              : 
    1102              :           /* Update the counts of both the original block
    1103              :              and path edge, and the duplicates.  The path duplicate's
    1104              :              incoming count are the totals for all edges
    1105              :              incoming to this jump threading path computed earlier.
    1106              :              And we know that the duplicated path will have path_out_count
    1107              :              flowing out of it (i.e. along the duplicated path out of the
    1108              :              duplicated joiner).  */
    1109        78943 :           update_profile (epath, e2, path_in_count, path_out_count);
    1110              :         }
    1111       296016 :       else if ((*path)[i]->type == EDGE_COPY_SRC_BLOCK)
    1112              :         {
    1113       254876 :           remove_ctrl_stmt_and_useless_edges (rd->dup_blocks[count], NULL);
    1114       471371 :           create_edge_and_update_destination_phis (rd, rd->dup_blocks[count],
    1115              :                                                    multi_incomings ? 0 : i);
    1116       254876 :           if (count == 1)
    1117        43035 :             single_succ_edge (rd->dup_blocks[1])->aux = NULL;
    1118              : 
    1119              :           /* Update the counts of both the original block
    1120              :              and path edge, and the duplicates.  Since we are now after
    1121              :              any joiner that may have existed on the path, the count
    1122              :              flowing along the duplicated threaded path is path_out_count.
    1123              :              If we didn't have a joiner, then cur_path_freq was the sum
    1124              :              of the total frequencies along all incoming edges to the
    1125              :              thread path (path_in_freq).  If we had a joiner, it would have
    1126              :              been updated at the end of that handling to the edge frequency
    1127              :              along the duplicated joiner path edge.  */
    1128       254876 :           update_profile (epath, EDGE_SUCC (rd->dup_blocks[count], 0),
    1129              :                           path_out_count, path_out_count);
    1130              :         }
    1131              :       else
    1132              :         {
    1133              :           /* No copy case.  In this case we don't have an equivalent block
    1134              :              on the duplicated thread path to update, but we do need
    1135              :              to remove the portion of the counts/freqs that were moved
    1136              :              to the duplicated path from the counts/freqs flowing through
    1137              :              this block on the original path.  Since all the no-copy edges
    1138              :              are after any joiner, the removed count is the same as
    1139              :              path_out_count.
    1140              : 
    1141              :              If we didn't have a joiner, then cur_path_freq was the sum
    1142              :              of the total frequencies along all incoming edges to the
    1143              :              thread path (path_in_freq).  If we had a joiner, it would have
    1144              :              been updated at the end of that handling to the edge frequency
    1145              :              along the duplicated joiner path edge.  */
    1146        41140 :            update_profile (epath, NULL, path_out_count, path_out_count);
    1147              :         }
    1148              : 
    1149              :       /* Increment the index into the duplicated path when we processed
    1150              :          a duplicated block.  */
    1151       374959 :       if ((*path)[i]->type == EDGE_COPY_SRC_JOINER_BLOCK
    1152       374959 :           || (*path)[i]->type == EDGE_COPY_SRC_BLOCK)
    1153              :         {
    1154       333819 :           count++;
    1155              :         }
    1156              :     }
    1157       290784 : }
    1158              : 
    1159              : /* Hash table traversal callback routine to create duplicate blocks.  */
    1160              : 
    1161              : int
    1162       290784 : ssa_create_duplicates (struct redirection_data **slot,
    1163              :                        ssa_local_info_t *local_info)
    1164              : {
    1165       290784 :   struct redirection_data *rd = *slot;
    1166              : 
    1167              :   /* The second duplicated block in a jump threading path is specific
    1168              :      to the path.  So it gets stored in RD rather than in LOCAL_DATA.
    1169              : 
    1170              :      Each time we're called, we have to look through the path and see
    1171              :      if a second block needs to be duplicated.
    1172              : 
    1173              :      Note the search starts with the third edge on the path.  The first
    1174              :      edge is the incoming edge, the second edge always has its source
    1175              :      duplicated.  Thus we start our search with the third edge.  */
    1176       290784 :   vec<jump_thread_edge *> *path = rd->path;
    1177       329796 :   for (unsigned int i = 2; i < path->length (); i++)
    1178              :     {
    1179        82047 :       if ((*path)[i]->type == EDGE_COPY_SRC_BLOCK
    1180        82047 :           || (*path)[i]->type == EDGE_COPY_SRC_JOINER_BLOCK)
    1181              :         {
    1182        43035 :           create_block_for_threading ((*path)[i]->e->src, rd, 1,
    1183              :                                       &local_info->duplicate_blocks);
    1184        43035 :           break;
    1185              :         }
    1186              :     }
    1187              : 
    1188              :   /* Create a template block if we have not done so already.  Otherwise
    1189              :      use the template to create a new block.  */
    1190       290784 :   if (local_info->template_block == NULL)
    1191              :     {
    1192       233098 :       create_block_for_threading ((*path)[1]->e->src, rd, 0,
    1193              :                                   &local_info->duplicate_blocks);
    1194       233098 :       local_info->template_block = rd->dup_blocks[0];
    1195       233098 :       local_info->template_last_to_copy
    1196       466196 :         = gsi_last_bb (local_info->template_block);
    1197              : 
    1198              :       /* We do not create any outgoing edges for the template.  We will
    1199              :          take care of that in a later traversal.  That way we do not
    1200              :          create edges that are going to just be deleted.  */
    1201              :     }
    1202              :   else
    1203              :     {
    1204        57686 :       gimple_seq seq = NULL;
    1205        57686 :       if (gsi_stmt (local_info->template_last_to_copy)
    1206       115372 :           != gsi_stmt (gsi_last_bb (local_info->template_block)))
    1207              :         {
    1208            0 :           if (gsi_end_p (local_info->template_last_to_copy))
    1209              :             {
    1210            0 :               seq = bb_seq (local_info->template_block);
    1211            0 :               set_bb_seq (local_info->template_block, NULL);
    1212              :             }
    1213              :           else
    1214            0 :             seq = gsi_split_seq_after (local_info->template_last_to_copy);
    1215              :         }
    1216        57686 :       create_block_for_threading (local_info->template_block, rd, 0,
    1217              :                                   &local_info->duplicate_blocks);
    1218        57686 :       if (seq)
    1219              :         {
    1220            0 :           if (gsi_end_p (local_info->template_last_to_copy))
    1221            0 :             set_bb_seq (local_info->template_block, seq);
    1222              :           else
    1223            0 :             gsi_insert_seq_after (&local_info->template_last_to_copy,
    1224              :                                   seq, GSI_SAME_STMT);
    1225              :         }
    1226              : 
    1227              :       /* Go ahead and wire up outgoing edges and update PHIs for the duplicate
    1228              :          block.   */
    1229        57686 :       ssa_fix_duplicate_block_edges (rd, local_info);
    1230              :     }
    1231              : 
    1232       290784 :   if (MAY_HAVE_DEBUG_STMTS)
    1233              :     {
    1234              :       /* Copy debug stmts from each NO_COPY src block to the block
    1235              :          that would have been its predecessor, if we can append to it
    1236              :          (we can't add stmts after a block-ending stmt), or prepending
    1237              :          to the duplicate of the successor, if there is one.  If
    1238              :          there's no duplicate successor, we'll mostly drop the blocks
    1239              :          on the floor; propagate_threaded_block_debug_into, called
    1240              :          elsewhere, will consolidate and preserve the effects of the
    1241              :          binds, but none of the markers.  */
    1242       217989 :       gimple_stmt_iterator copy_to = gsi_last_bb (rd->dup_blocks[0]);
    1243       217989 :       if (!gsi_end_p (copy_to))
    1244              :         {
    1245       215936 :           if (stmt_ends_bb_p (gsi_stmt (copy_to)))
    1246              :             {
    1247       193264 :               if (rd->dup_blocks[1])
    1248        35235 :                 copy_to = gsi_after_labels (rd->dup_blocks[1]);
    1249              :               else
    1250       158029 :                 copy_to = gsi_none ();
    1251              :             }
    1252              :           else
    1253        22672 :             gsi_next (&copy_to);
    1254              :         }
    1255       286072 :       for (unsigned int i = 2, j = 0; i < path->length (); i++)
    1256        68083 :         if ((*path)[i]->type == EDGE_NO_COPY_SRC_BLOCK
    1257        68083 :             && gsi_bb (copy_to))
    1258              :           {
    1259         7006 :             for (gimple_stmt_iterator gsi = gsi_start_bb ((*path)[i]->e->src);
    1260         7495 :                  !gsi_end_p (gsi); gsi_next (&gsi))
    1261              :               {
    1262         3992 :                 if (!is_gimple_debug (gsi_stmt (gsi)))
    1263         1534 :                   continue;
    1264         2458 :                 gimple *stmt = gsi_stmt (gsi);
    1265         2458 :                 gimple *copy = gimple_copy (stmt);
    1266         2458 :                 gsi_insert_before (&copy_to, copy, GSI_SAME_STMT);
    1267              :               }
    1268              :           }
    1269        64580 :         else if ((*path)[i]->type == EDGE_COPY_SRC_BLOCK
    1270        64580 :                  || (*path)[i]->type == EDGE_COPY_SRC_JOINER_BLOCK)
    1271              :           {
    1272        35634 :             j++;
    1273        35634 :             gcc_assert (j < 2);
    1274        35634 :             copy_to = gsi_last_bb (rd->dup_blocks[j]);
    1275        35634 :             if (!gsi_end_p (copy_to))
    1276              :               {
    1277        35539 :                 if (stmt_ends_bb_p (gsi_stmt (copy_to)))
    1278        29785 :                   copy_to = gsi_none ();
    1279              :                 else
    1280         5754 :                   gsi_next (&copy_to);
    1281              :               }
    1282              :           }
    1283              :     }
    1284              : 
    1285              :   /* Keep walking the hash table.  */
    1286       290784 :   return 1;
    1287              : }
    1288              : 
    1289              : /* We did not create any outgoing edges for the template block during
    1290              :    block creation.  This hash table traversal callback creates the
    1291              :    outgoing edge for the template block.  */
    1292              : 
    1293              : inline int
    1294       233098 : ssa_fixup_template_block (struct redirection_data **slot,
    1295              :                           ssa_local_info_t *local_info)
    1296              : {
    1297       233098 :   struct redirection_data *rd = *slot;
    1298              : 
    1299              :   /* If this is the template block halt the traversal after updating
    1300              :      it appropriately.
    1301              : 
    1302              :      If we were threading through an joiner block, then we want
    1303              :      to keep its control statement and redirect an outgoing edge.
    1304              :      Else we want to remove the control statement & edges, then create
    1305              :      a new outgoing edge.  In both cases we may need to update PHIs.  */
    1306       233098 :   if (rd->dup_blocks[0] && rd->dup_blocks[0] == local_info->template_block)
    1307              :     {
    1308       233098 :       ssa_fix_duplicate_block_edges (rd, local_info);
    1309       233098 :       return 0;
    1310              :     }
    1311              : 
    1312              :   return 1;
    1313              : }
    1314              : 
    1315              : /* Hash table traversal callback to redirect each incoming edge
    1316              :    associated with this hash table element to its new destination.  */
    1317              : 
    1318              : static int
    1319       290784 : ssa_redirect_edges (struct redirection_data **slot,
    1320              :                     ssa_local_info_t *local_info)
    1321              : {
    1322       290784 :   struct redirection_data *rd = *slot;
    1323       290784 :   struct el *next, *el;
    1324              : 
    1325              :   /* Walk over all the incoming edges associated with this hash table
    1326              :      entry.  */
    1327       652160 :   for (el = rd->incoming_edges; el; el = next)
    1328              :     {
    1329       361376 :       edge e = el->e;
    1330       361376 :       vec<jump_thread_edge *> *path = THREAD_PATH (e);
    1331              : 
    1332              :       /* Go ahead and free this element from the list.  Doing this now
    1333              :          avoids the need for another list walk when we destroy the hash
    1334              :          table.  */
    1335       361376 :       next = el->next;
    1336       361376 :       free (el);
    1337              : 
    1338       361376 :       local_info->num_threaded_edges++;
    1339              : 
    1340       361376 :       if (rd->dup_blocks[0])
    1341              :         {
    1342       361376 :           edge e2;
    1343              : 
    1344       361376 :           if (dump_file && (dump_flags & TDF_DETAILS))
    1345           25 :             fprintf (dump_file, "  Threaded jump %d --> %d to %d\n",
    1346           25 :                      e->src->index, e->dest->index, rd->dup_blocks[0]->index);
    1347              : 
    1348              :           /* Redirect the incoming edge (possibly to the joiner block) to the
    1349              :              appropriate duplicate block.  */
    1350       361376 :           e2 = redirect_edge_and_branch (e, rd->dup_blocks[0]);
    1351       361376 :           gcc_assert (e == e2);
    1352       361376 :           flush_pending_stmts (e2);
    1353              :         }
    1354              : 
    1355              :       /* Go ahead and clear E->aux.  It's not needed anymore and failure
    1356              :          to clear it will cause all kinds of unpleasant problems later.  */
    1357       361376 :       path->release ();
    1358       361376 :       e->aux = NULL;
    1359              : 
    1360              :     }
    1361              : 
    1362              :   /* Indicate that we actually threaded one or more jumps.  */
    1363       290784 :   if (rd->incoming_edges)
    1364       290784 :     local_info->jumps_threaded = true;
    1365              : 
    1366       290784 :   return 1;
    1367              : }
    1368              : 
    1369              : /* Return true if this block has no executable statements other than
    1370              :    a simple ctrl flow instruction.  When the number of outgoing edges
    1371              :    is one, this is equivalent to a "forwarder" block.  */
    1372              : 
    1373              : static bool
    1374       160118 : redirection_block_p (basic_block bb)
    1375              : {
    1376       160118 :   gimple_stmt_iterator gsi;
    1377              : 
    1378              :   /* Advance to the first executable statement.  */
    1379       160118 :   gsi = gsi_start_bb (bb);
    1380       160118 :   while (!gsi_end_p (gsi)
    1381       357556 :          && (gimple_code (gsi_stmt (gsi)) == GIMPLE_LABEL
    1382              :              || is_gimple_debug (gsi_stmt (gsi))
    1383              :              || gimple_nop_p (gsi_stmt (gsi))
    1384       159437 :              || gimple_clobber_p (gsi_stmt (gsi))))
    1385       197438 :     gsi_next (&gsi);
    1386              : 
    1387              :   /* Check if this is an empty block.  */
    1388       160118 :   if (gsi_end_p (gsi))
    1389              :     return true;
    1390              : 
    1391              :   /* Test that we've reached the terminating control statement.  */
    1392       159128 :   return gsi_stmt (gsi)
    1393       159128 :          && (gimple_code (gsi_stmt (gsi)) == GIMPLE_COND
    1394              :              || gimple_code (gsi_stmt (gsi)) == GIMPLE_GOTO
    1395              :              || gimple_code (gsi_stmt (gsi)) == GIMPLE_SWITCH);
    1396              : }
    1397              : 
    1398              : /* BB is a block which ends with a COND_EXPR or SWITCH_EXPR and when BB
    1399              :    is reached via one or more specific incoming edges, we know which
    1400              :    outgoing edge from BB will be traversed.
    1401              : 
    1402              :    We want to redirect those incoming edges to the target of the
    1403              :    appropriate outgoing edge.  Doing so avoids a conditional branch
    1404              :    and may expose new optimization opportunities.  Note that we have
    1405              :    to update dominator tree and SSA graph after such changes.
    1406              : 
    1407              :    The key to keeping the SSA graph update manageable is to duplicate
    1408              :    the side effects occurring in BB so that those side effects still
    1409              :    occur on the paths which bypass BB after redirecting edges.
    1410              : 
    1411              :    We accomplish this by creating duplicates of BB and arranging for
    1412              :    the duplicates to unconditionally pass control to one specific
    1413              :    successor of BB.  We then revector the incoming edges into BB to
    1414              :    the appropriate duplicate of BB.
    1415              : 
    1416              :    If NOLOOP_ONLY is true, we only perform the threading as long as it
    1417              :    does not affect the structure of the loops in a nontrivial way.
    1418              : 
    1419              :    If JOINERS is true, then thread through joiner blocks as well.  */
    1420              : 
    1421              : bool
    1422       737656 : fwd_jt_path_registry::thread_block_1 (basic_block bb,
    1423              :                                       bool noloop_only,
    1424              :                                       bool joiners)
    1425              : {
    1426              :   /* E is an incoming edge into BB that we may or may not want to
    1427              :      redirect to a duplicate of BB.  */
    1428       737656 :   edge e, e2;
    1429       737656 :   edge_iterator ei;
    1430       737656 :   ssa_local_info_t local_info;
    1431              : 
    1432       737656 :   local_info.duplicate_blocks = BITMAP_ALLOC (NULL);
    1433       737656 :   local_info.need_profile_correction = false;
    1434       737656 :   local_info.num_threaded_edges = 0;
    1435              : 
    1436              :   /* To avoid scanning a linear array for the element we need we instead
    1437              :      use a hash table.  For normal code there should be no noticeable
    1438              :      difference.  However, if we have a block with a large number of
    1439              :      incoming and outgoing edges such linear searches can get expensive.  */
    1440       737656 :   m_redirection_data
    1441      1475312 :     = new hash_table<struct redirection_data> (EDGE_COUNT (bb->succs));
    1442              : 
    1443              :   /* Record each unique threaded destination into a hash table for
    1444              :      efficient lookups.  */
    1445       737656 :   edge last = NULL;
    1446      2273910 :   FOR_EACH_EDGE (e, ei, bb->preds)
    1447              :     {
    1448      1536254 :       if (e->aux == NULL)
    1449       762159 :         continue;
    1450              : 
    1451       774095 :       vec<jump_thread_edge *> *path = THREAD_PATH (e);
    1452              : 
    1453      1111929 :       if (((*path)[1]->type == EDGE_COPY_SRC_JOINER_BLOCK && !joiners)
    1454       943012 :           || ((*path)[1]->type == EDGE_COPY_SRC_BLOCK && joiners))
    1455       258389 :         continue;
    1456              : 
    1457              :       /* When a NO_COPY_SRC block became non-empty cancel the path.  */
    1458       515706 :       if (path->last ()->type == EDGE_NO_COPY_SRC_BLOCK)
    1459              :         {
    1460        55869 :           auto gsi = gsi_start_nondebug_bb (path->last ()->e->src);
    1461        55869 :           if (!gsi_end_p (gsi)
    1462        55869 :               && !is_ctrl_stmt (gsi_stmt (gsi)))
    1463              :             {
    1464            0 :               cancel_thread (path, "Non-empty EDGE_NO_COPY_SRC_BLOCK");
    1465            0 :               e->aux = NULL;
    1466            0 :               continue;
    1467              :             }
    1468              :         }
    1469              : 
    1470       515706 :       e2 = path->last ()->e;
    1471       515706 :       if (!e2 || noloop_only)
    1472              :         {
    1473              :           /* If NOLOOP_ONLY is true, we only allow threading through the
    1474              :              header of a loop to exit edges.  */
    1475              : 
    1476              :           /* One case occurs when there was loop header buried in a jump
    1477              :              threading path that crosses loop boundaries.  We do not try
    1478              :              and thread this elsewhere, so just cancel the jump threading
    1479              :              request by clearing the AUX field now.  */
    1480       536470 :           if (bb->loop_father != e2->src->loop_father
    1481       512952 :               && (!loop_exit_edge_p (e2->src->loop_father, e2)
    1482          984 :                   || flow_loop_nested_p (bb->loop_father,
    1483          984 :                                          e2->dest->loop_father)))
    1484              :             {
    1485              :               /* Since this case is not handled by our special code
    1486              :                  to thread through a loop header, we must explicitly
    1487              :                  cancel the threading request here.  */
    1488        23518 :               cancel_thread (path, "Threading through unhandled loop header");
    1489        23518 :               e->aux = NULL;
    1490        23518 :               continue;
    1491              :             }
    1492              : 
    1493              :           /* Another case occurs when trying to thread through our
    1494              :              own loop header, possibly from inside the loop.  We will
    1495              :              thread these later.  */
    1496              :           unsigned int i;
    1497       956708 :           for (i = 1; i < path->length (); i++)
    1498              :             {
    1499       598000 :               if ((*path)[i]->e->src == bb->loop_father->header
    1500       598000 :                   && (!loop_exit_edge_p (bb->loop_father, e2)
    1501         2437 :                       || (*path)[1]->type == EDGE_COPY_SRC_JOINER_BLOCK))
    1502              :                 break;
    1503              :             }
    1504              : 
    1505       978868 :           if (i != path->length ())
    1506       130726 :             continue;
    1507              : 
    1508              :           /* Loop parallelization can be confused by the result of
    1509              :              threading through the loop exit test back into the loop.
    1510              :              However, theading those jumps seems to help other codes.
    1511              : 
    1512              :              I have been unable to find anything related to the shape of
    1513              :              the CFG, the contents of the affected blocks, etc which would
    1514              :              allow a more sensible test than what we're using below which
    1515              :              merely avoids the optimization when parallelizing loops.  */
    1516       358708 :           if (flag_tree_parallelize_loops > 1)
    1517              :             {
    1518          212 :               for (i = 1; i < path->length (); i++)
    1519          155 :                 if (bb->loop_father == e2->src->loop_father
    1520          155 :                     && loop_exits_from_bb_p (bb->loop_father,
    1521          155 :                                              (*path)[i]->e->src)
    1522          246 :                     && !loop_exit_edge_p (bb->loop_father, e2))
    1523              :                   break;
    1524              : 
    1525          286 :               if (i != path->length ())
    1526              :                 {
    1527           86 :                   cancel_thread (path, "Threading through loop exit");
    1528           86 :                   e->aux = NULL;
    1529           86 :                   continue;
    1530              :                 }
    1531              :             }
    1532              :         }
    1533              : 
    1534              :       /* Insert the outgoing edge into the hash table if it is not
    1535              :          already in the hash table.  */
    1536       361376 :       lookup_redirection_data (e, INSERT);
    1537              : 
    1538              :       /* When we have thread paths through a common joiner with different
    1539              :          final destinations, then we may need corrections to deal with
    1540              :          profile insanities.  See the big comment before compute_path_counts.  */
    1541       361376 :       if ((*path)[1]->type == EDGE_COPY_SRC_JOINER_BLOCK)
    1542              :         {
    1543       104274 :           if (!last)
    1544              :             last = e2;
    1545        39762 :           else if (e2 != last)
    1546        18816 :             local_info.need_profile_correction = true;
    1547              :         }
    1548              :     }
    1549              : 
    1550              :   /* We do not update dominance info.  */
    1551       737656 :   free_dominance_info (CDI_DOMINATORS);
    1552              : 
    1553              :   /* We know we only thread through the loop header to loop exits.
    1554              :      Let the basic block duplication hook know we are not creating
    1555              :      a multiple entry loop.  */
    1556       737656 :   if (noloop_only
    1557       732148 :       && bb == bb->loop_father->header)
    1558       275278 :     set_loop_copy (bb->loop_father, loop_outer (bb->loop_father));
    1559              : 
    1560              :   /* Now create duplicates of BB.
    1561              : 
    1562              :      Note that for a block with a high outgoing degree we can waste
    1563              :      a lot of time and memory creating and destroying useless edges.
    1564              : 
    1565              :      So we first duplicate BB and remove the control structure at the
    1566              :      tail of the duplicate as well as all outgoing edges from the
    1567              :      duplicate.  We then use that duplicate block as a template for
    1568              :      the rest of the duplicates.  */
    1569       737656 :   local_info.template_block = NULL;
    1570       737656 :   local_info.bb = bb;
    1571       737656 :   local_info.jumps_threaded = false;
    1572       737656 :   m_redirection_data->traverse <ssa_local_info_t *, ssa_create_duplicates>
    1573      1028440 :                             (&local_info);
    1574              : 
    1575              :   /* The template does not have an outgoing edge.  Create that outgoing
    1576              :      edge and update PHI nodes as the edge's target as necessary.
    1577              : 
    1578              :      We do this after creating all the duplicates to avoid creating
    1579              :      unnecessary edges.  */
    1580       737656 :   m_redirection_data->traverse <ssa_local_info_t *, ssa_fixup_template_block>
    1581       970754 :                             (&local_info);
    1582              : 
    1583              :   /* The hash table traversals above created the duplicate blocks (and the
    1584              :      statements within the duplicate blocks).  This loop creates PHI nodes for
    1585              :      the duplicated blocks and redirects the incoming edges into BB to reach
    1586              :      the duplicates of BB.  */
    1587       737656 :   m_redirection_data->traverse <ssa_local_info_t *, ssa_redirect_edges>
    1588      1028440 :                             (&local_info);
    1589              : 
    1590              :   /* Done with this block.  Clear REDIRECTION_DATA.  */
    1591       737656 :   delete m_redirection_data;
    1592       737656 :   m_redirection_data = NULL;
    1593              : 
    1594       737656 :   if (noloop_only
    1595       732148 :       && bb == bb->loop_father->header)
    1596       275278 :     set_loop_copy (bb->loop_father, NULL);
    1597              : 
    1598       737656 :   BITMAP_FREE (local_info.duplicate_blocks);
    1599       737656 :   local_info.duplicate_blocks = NULL;
    1600              : 
    1601       737656 :   m_num_threaded_edges += local_info.num_threaded_edges;
    1602              : 
    1603              :   /* Indicate to our caller whether or not any jumps were threaded.  */
    1604       737656 :   return local_info.jumps_threaded;
    1605              : }
    1606              : 
    1607              : /* Wrapper for thread_block_1 so that we can first handle jump
    1608              :    thread paths which do not involve copying joiner blocks, then
    1609              :    handle jump thread paths which have joiner blocks.
    1610              : 
    1611              :    By doing things this way we can be as aggressive as possible and
    1612              :    not worry that copying a joiner block will create a jump threading
    1613              :    opportunity.  */
    1614              : 
    1615              : bool
    1616       368828 : fwd_jt_path_registry::thread_block (basic_block bb, bool noloop_only)
    1617              : {
    1618       368828 :   bool retval;
    1619       368828 :   retval = thread_block_1 (bb, noloop_only, false);
    1620       368828 :   retval |= thread_block_1 (bb, noloop_only, true);
    1621       368828 :   return retval;
    1622              : }
    1623              : 
    1624              : /* Callback for dfs_enumerate_from.  Returns true if BB is different
    1625              :    from STOP and DBDS_CE_STOP.  */
    1626              : 
    1627              : static basic_block dbds_ce_stop;
    1628              : static bool
    1629      1541557 : dbds_continue_enumeration_p (const_basic_block bb, const void *stop)
    1630              : {
    1631      1541557 :   return (bb != (const_basic_block) stop
    1632      1541557 :           && bb != dbds_ce_stop);
    1633              : }
    1634              : 
    1635              : /* Evaluates the dominance relationship of latch of the LOOP and BB, and
    1636              :    returns the state.  */
    1637              : 
    1638              : enum bb_dom_status
    1639       152315 : determine_bb_domination_status (class loop *loop, basic_block bb)
    1640              : {
    1641       152315 :   basic_block *bblocks;
    1642       152315 :   unsigned nblocks, i;
    1643       152315 :   bool bb_reachable = false;
    1644       152315 :   edge_iterator ei;
    1645       152315 :   edge e;
    1646              : 
    1647              :   /* This function assumes BB is a successor of LOOP->header.
    1648              :      If that is not the case return DOMST_NONDOMINATING which
    1649              :      is always safe.  */
    1650       152315 :     {
    1651       152315 :       bool ok = false;
    1652              : 
    1653       252966 :       FOR_EACH_EDGE (e, ei, bb->preds)
    1654              :         {
    1655       199070 :           if (e->src == loop->header)
    1656              :             {
    1657              :               ok = true;
    1658              :               break;
    1659              :             }
    1660              :         }
    1661              : 
    1662       152315 :       if (!ok)
    1663              :         return DOMST_NONDOMINATING;
    1664              :     }
    1665              : 
    1666        98419 :   if (bb == loop->latch)
    1667              :     return DOMST_DOMINATING;
    1668              : 
    1669              :   /* Check that BB dominates LOOP->latch, and that it is back-reachable
    1670              :      from it.  */
    1671              : 
    1672        94974 :   bblocks = XCNEWVEC (basic_block, loop->num_nodes);
    1673        94974 :   dbds_ce_stop = loop->header;
    1674       189948 :   nblocks = dfs_enumerate_from (loop->latch, 1, dbds_continue_enumeration_p,
    1675        94974 :                                 bblocks, loop->num_nodes, bb);
    1676       960160 :   for (i = 0; i < nblocks; i++)
    1677      2251510 :     FOR_EACH_EDGE (e, ei, bblocks[i]->preds)
    1678              :       {
    1679      1386324 :         if (e->src == loop->header)
    1680              :           {
    1681        48805 :             free (bblocks);
    1682        48805 :             return DOMST_NONDOMINATING;
    1683              :           }
    1684      1337519 :         if (e->src == bb)
    1685        93028 :           bb_reachable = true;
    1686              :       }
    1687              : 
    1688        46169 :   free (bblocks);
    1689        46169 :   return (bb_reachable ? DOMST_DOMINATING : DOMST_LOOP_BROKEN);
    1690              : }
    1691              : 
    1692              : /* Thread jumps through the header of LOOP.  Returns true if cfg changes.
    1693              :    If MAY_PEEL_LOOP_HEADERS is false, we avoid threading from entry edges
    1694              :    to the inside of the loop.  */
    1695              : 
    1696              : bool
    1697       137639 : fwd_jt_path_registry::thread_through_loop_header (class loop *loop,
    1698              :                                                   bool may_peel_loop_headers)
    1699              : {
    1700       137639 :   basic_block header = loop->header;
    1701       137639 :   edge e, tgt_edge, latch = loop_latch_edge (loop);
    1702       137639 :   edge_iterator ei;
    1703       137639 :   basic_block tgt_bb, atgt_bb;
    1704       137639 :   enum bb_dom_status domst;
    1705              : 
    1706              :   /* We have already threaded through headers to exits, so all the threading
    1707              :      requests now are to the inside of the loop.  We need to avoid creating
    1708              :      irreducible regions (i.e., loops with more than one entry block), and
    1709              :      also loop with several latch edges, or new subloops of the loop (although
    1710              :      there are cases where it might be appropriate, it is difficult to decide,
    1711              :      and doing it wrongly may confuse other optimizers).
    1712              : 
    1713              :      We could handle more general cases here.  However, the intention is to
    1714              :      preserve some information about the loop, which is impossible if its
    1715              :      structure changes significantly, in a way that is not well understood.
    1716              :      Thus we only handle few important special cases, in which also updating
    1717              :      of the loop-carried information should be feasible:
    1718              : 
    1719              :      1) Propagation of latch edge to a block that dominates the latch block
    1720              :         of a loop.  This aims to handle the following idiom:
    1721              : 
    1722              :         first = 1;
    1723              :         while (1)
    1724              :           {
    1725              :             if (first)
    1726              :               initialize;
    1727              :             first = 0;
    1728              :             body;
    1729              :           }
    1730              : 
    1731              :         After threading the latch edge, this becomes
    1732              : 
    1733              :         first = 1;
    1734              :         if (first)
    1735              :           initialize;
    1736              :         while (1)
    1737              :           {
    1738              :             first = 0;
    1739              :             body;
    1740              :           }
    1741              : 
    1742              :         The original header of the loop is moved out of it, and we may thread
    1743              :         the remaining edges through it without further constraints.
    1744              : 
    1745              :      2) All entry edges are propagated to a single basic block that dominates
    1746              :         the latch block of the loop.  This aims to handle the following idiom
    1747              :         (normally created for "for" loops):
    1748              : 
    1749              :         i = 0;
    1750              :         while (1)
    1751              :           {
    1752              :             if (i >= 100)
    1753              :               break;
    1754              :             body;
    1755              :             i++;
    1756              :           }
    1757              : 
    1758              :         This becomes
    1759              : 
    1760              :         i = 0;
    1761              :         while (1)
    1762              :           {
    1763              :             body;
    1764              :             i++;
    1765              :             if (i >= 100)
    1766              :               break;
    1767              :           }
    1768              :      */
    1769              : 
    1770              :   /* Threading through the header won't improve the code if the header has just
    1771              :      one successor.  */
    1772       137639 :   if (single_succ_p (header))
    1773          252 :     goto fail;
    1774              : 
    1775       137387 :   if (!may_peel_loop_headers && !redirection_block_p (loop->header))
    1776       126049 :     goto fail;
    1777              :   else
    1778              :     {
    1779        11338 :       tgt_bb = NULL;
    1780        11338 :       tgt_edge = NULL;
    1781        25599 :       FOR_EACH_EDGE (e, ei, header->preds)
    1782              :         {
    1783        20515 :           if (!e->aux)
    1784              :             {
    1785        11037 :               if (e == latch)
    1786         9474 :                 continue;
    1787              : 
    1788              :               /* If latch is not threaded, and there is a header
    1789              :                  edge that is not threaded, we would create loop
    1790              :                  with multiple entries.  */
    1791         1563 :               goto fail;
    1792              :             }
    1793              : 
    1794         9478 :           vec<jump_thread_edge *> *path = THREAD_PATH (e);
    1795              : 
    1796         9478 :           if ((*path)[1]->type == EDGE_COPY_SRC_JOINER_BLOCK)
    1797         4691 :             goto fail;
    1798         4787 :           tgt_edge = (*path)[1]->e;
    1799         4787 :           atgt_bb = tgt_edge->dest;
    1800         4787 :           if (!tgt_bb)
    1801              :             tgt_bb = atgt_bb;
    1802              :           /* Two targets of threading would make us create loop
    1803              :              with multiple entries.  */
    1804            0 :           else if (tgt_bb != atgt_bb)
    1805            0 :             goto fail;
    1806              :         }
    1807              : 
    1808         5084 :       if (!tgt_bb)
    1809              :         {
    1810              :           /* There are no threading requests.  */
    1811              :           return false;
    1812              :         }
    1813              : 
    1814              :       /* Redirecting to empty loop latch is useless.  */
    1815         4410 :       if (tgt_bb == loop->latch
    1816         4410 :           && empty_block_p (loop->latch))
    1817            2 :         goto fail;
    1818              :     }
    1819              : 
    1820              :   /* The target block must dominate the loop latch, otherwise we would be
    1821              :      creating a subloop.  */
    1822         4408 :   domst = determine_bb_domination_status (loop, tgt_bb);
    1823         4408 :   if (domst == DOMST_NONDOMINATING)
    1824         1654 :     goto fail;
    1825         2754 :   if (domst == DOMST_LOOP_BROKEN)
    1826              :     {
    1827              :       /* If the loop ceased to exist, mark it as such, and thread through its
    1828              :          original header.  */
    1829            0 :       mark_loop_for_removal (loop);
    1830            0 :       return thread_block (header, false);
    1831              :     }
    1832              : 
    1833         2754 :   if (tgt_bb->loop_father->header == tgt_bb)
    1834              :     {
    1835              :       /* If the target of the threading is a header of a subloop, we need
    1836              :          to create a preheader for it, so that the headers of the two loops
    1837              :          do not merge.  */
    1838            0 :       if (EDGE_COUNT (tgt_bb->preds) > 2)
    1839              :         {
    1840            0 :           tgt_bb = create_preheader (tgt_bb->loop_father, 0);
    1841            0 :           gcc_assert (tgt_bb != NULL);
    1842              :         }
    1843              :       else
    1844            0 :         tgt_bb = split_edge (tgt_edge);
    1845              :     }
    1846              : 
    1847         2754 :   basic_block new_preheader;
    1848              : 
    1849              :   /* Now consider the case entry edges are redirected to the new entry
    1850              :      block.  Remember one entry edge, so that we can find the new
    1851              :      preheader (its destination after threading).  */
    1852         4188 :   FOR_EACH_EDGE (e, ei, header->preds)
    1853              :     {
    1854         4188 :       if (e->aux)
    1855              :         break;
    1856              :     }
    1857              : 
    1858              :   /* The duplicate of the header is the new preheader of the loop.  Ensure
    1859              :      that it is placed correctly in the loop hierarchy.  */
    1860         2754 :   set_loop_copy (loop, loop_outer (loop));
    1861              : 
    1862         2754 :   thread_block (header, false);
    1863         2754 :   set_loop_copy (loop, NULL);
    1864         2754 :   new_preheader = e->dest;
    1865              : 
    1866              :   /* Create the new latch block.  This is always necessary, as the latch
    1867              :      must have only a single successor, but the original header had at
    1868              :      least two successors.  */
    1869         2754 :   loop->latch = NULL;
    1870         2754 :   mfb_kj_edge = single_succ_edge (new_preheader);
    1871         2754 :   loop->header = mfb_kj_edge->dest;
    1872         2754 :   latch = make_forwarder_block (tgt_bb, mfb_keep_just, NULL);
    1873         2754 :   loop->header = latch->dest;
    1874         2754 :   loop->latch = latch->src;
    1875         2754 :   return true;
    1876              : 
    1877       134211 : fail:
    1878              :   /* We failed to thread anything.  Cancel the requests.  */
    1879       403210 :   FOR_EACH_EDGE (e, ei, header->preds)
    1880              :     {
    1881       268999 :       vec<jump_thread_edge *> *path = THREAD_PATH (e);
    1882              : 
    1883       268999 :       if (path)
    1884              :         {
    1885       127972 :           cancel_thread (path, "Failure in thread_through_loop_header");
    1886       127972 :           e->aux = NULL;
    1887              :         }
    1888              :     }
    1889              :   return false;
    1890              : }
    1891              : 
    1892              : /* E1 and E2 are edges into the same basic block.  Return TRUE if the
    1893              :    PHI arguments associated with those edges are equal or there are no
    1894              :    PHI arguments, otherwise return FALSE.  */
    1895              : 
    1896              : static bool
    1897        28799 : phi_args_equal_on_edges (edge e1, edge e2)
    1898              : {
    1899        28799 :   gphi_iterator gsi;
    1900        28799 :   int indx1 = e1->dest_idx;
    1901        28799 :   int indx2 = e2->dest_idx;
    1902              : 
    1903        45506 :   for (gsi = gsi_start_phis (e1->dest); !gsi_end_p (gsi); gsi_next (&gsi))
    1904              :     {
    1905        18008 :       gphi *phi = gsi.phi ();
    1906              : 
    1907        18008 :       if (!operand_equal_p (gimple_phi_arg_def (phi, indx1),
    1908        18008 :                             gimple_phi_arg_def (phi, indx2), 0))
    1909              :         return false;
    1910              :     }
    1911              :   return true;
    1912              : }
    1913              : 
    1914              : /* Return the number of non-debug statements and non-virtual PHIs in a
    1915              :    block.  */
    1916              : 
    1917              : static unsigned int
    1918         9418 : count_stmts_and_phis_in_block (basic_block bb)
    1919              : {
    1920         9418 :   unsigned int num_stmts = 0;
    1921              : 
    1922         9418 :   gphi_iterator gpi;
    1923        24580 :   for (gpi = gsi_start_phis (bb); !gsi_end_p (gpi); gsi_next (&gpi))
    1924        30324 :     if (!virtual_operand_p (PHI_RESULT (gpi.phi ())))
    1925         9193 :       num_stmts++;
    1926              : 
    1927         9418 :   gimple_stmt_iterator gsi;
    1928        78211 :   for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi); gsi_next (&gsi))
    1929              :     {
    1930        59375 :       gimple *stmt = gsi_stmt (gsi);
    1931        59375 :       if (!is_gimple_debug (stmt))
    1932        53117 :         num_stmts++;
    1933              :     }
    1934              : 
    1935         9418 :   return num_stmts;
    1936              : }
    1937              : 
    1938              : 
    1939              : /* Walk through the registered jump threads and convert them into a
    1940              :    form convenient for this pass.
    1941              : 
    1942              :    Any block which has incoming edges threaded to outgoing edges
    1943              :    will have its entry in THREADED_BLOCK set.
    1944              : 
    1945              :    Any threaded edge will have its new outgoing edge stored in the
    1946              :    original edge's AUX field.
    1947              : 
    1948              :    This form avoids the need to walk all the edges in the CFG to
    1949              :    discover blocks which need processing and avoids unnecessary
    1950              :    hash table lookups to map from threaded edge to new target.  */
    1951              : 
    1952              : void
    1953       156638 : fwd_jt_path_registry::mark_threaded_blocks (bitmap threaded_blocks)
    1954              : {
    1955       156638 :   unsigned int i;
    1956       156638 :   bitmap_iterator bi;
    1957       156638 :   auto_bitmap tmp;
    1958       156638 :   basic_block bb;
    1959       156638 :   edge e;
    1960       156638 :   edge_iterator ei;
    1961              : 
    1962              :   /* It is possible to have jump threads in which one is a subpath
    1963              :      of the other.  ie, (A, B), (B, C), (C, D) where B is a joiner
    1964              :      block and (B, C), (C, D) where no joiner block exists.
    1965              : 
    1966              :      When this occurs ignore the jump thread request with the joiner
    1967              :      block.  It's totally subsumed by the simpler jump thread request.
    1968              : 
    1969              :      This results in less block copying, simpler CFGs.  More importantly,
    1970              :      when we duplicate the joiner block, B, in this case we will create
    1971              :      a new threading opportunity that we wouldn't be able to optimize
    1972              :      until the next jump threading iteration.
    1973              : 
    1974              :      So first convert the jump thread requests which do not require a
    1975              :      joiner block.  */
    1976       812906 :   for (i = 0; i < m_paths.length (); i++)
    1977              :     {
    1978       656268 :       vec<jump_thread_edge *> *path = m_paths[i];
    1979              : 
    1980      1312536 :       if (path->length () > 1
    1981      1312536 :           && (*path)[1]->type != EDGE_COPY_SRC_JOINER_BLOCK)
    1982              :         {
    1983       352451 :           edge e = (*path)[0]->e;
    1984       352451 :           e->aux = (void *)path;
    1985       352451 :           bitmap_set_bit (tmp, e->dest->index);
    1986              :         }
    1987              :     }
    1988              : 
    1989              :   /* Now iterate again, converting cases where we want to thread
    1990              :      through a joiner block, but only if no other edge on the path
    1991              :      already has a jump thread attached to it.  We do this in two passes,
    1992              :      to avoid situations where the order in the paths vec can hide overlapping
    1993              :      threads (the path is recorded on the incoming edge, so we would miss
    1994              :      cases where the second path starts at a downstream edge on the same
    1995              :      path).  First record all joiner paths, deleting any in the unexpected
    1996              :      case where there is already a path for that incoming edge.  */
    1997       812906 :   for (i = 0; i < m_paths.length ();)
    1998              :     {
    1999       656268 :       vec<jump_thread_edge *> *path = m_paths[i];
    2000              : 
    2001       656268 :       if (path->length () > 1
    2002      1312536 :           && (*path)[1]->type == EDGE_COPY_SRC_JOINER_BLOCK)
    2003              :         {
    2004              :           /* Attach the path to the starting edge if none is yet recorded.  */
    2005       303817 :           if ((*path)[0]->e->aux == NULL)
    2006              :             {
    2007       255045 :               (*path)[0]->e->aux = path;
    2008       255045 :               i++;
    2009              :             }
    2010              :           else
    2011              :             {
    2012        48772 :               m_paths.unordered_remove (i);
    2013        48772 :               cancel_thread (path);
    2014              :             }
    2015              :         }
    2016              :       else
    2017              :         {
    2018       352451 :           i++;
    2019              :         }
    2020              :     }
    2021              : 
    2022              :   /* Second, look for paths that have any other jump thread attached to
    2023              :      them, and either finish converting them or cancel them.  */
    2024       764134 :   for (i = 0; i < m_paths.length ();)
    2025              :     {
    2026       607496 :       vec<jump_thread_edge *> *path = m_paths[i];
    2027       607496 :       edge e = (*path)[0]->e;
    2028              : 
    2029       607496 :       if (path->length () > 1
    2030      1214992 :           && (*path)[1]->type == EDGE_COPY_SRC_JOINER_BLOCK && e->aux == path)
    2031              :         {
    2032              :           unsigned int j;
    2033       608570 :           for (j = 1; j < path->length (); j++)
    2034       433603 :             if ((*path)[j]->e->aux != NULL)
    2035              :               break;
    2036              : 
    2037              :           /* If we iterated through the entire path without exiting the loop,
    2038              :              then we are good to go, record it.  */
    2039       255045 :           if (j == path->length ())
    2040              :             {
    2041       174967 :               bitmap_set_bit (tmp, e->dest->index);
    2042       174967 :               i++;
    2043              :             }
    2044              :           else
    2045              :             {
    2046        80078 :               e->aux = NULL;
    2047        80078 :               m_paths.unordered_remove (i);
    2048        80078 :               cancel_thread (path);
    2049              :             }
    2050              :         }
    2051              :       else
    2052              :         {
    2053       352451 :           i++;
    2054              :         }
    2055              :     }
    2056              : 
    2057              :   /* When optimizing for size, prune all thread paths where statement
    2058              :      duplication is necessary.
    2059              : 
    2060              :      We walk the jump thread path looking for copied blocks.  There's
    2061              :      two types of copied blocks.
    2062              : 
    2063              :        EDGE_COPY_SRC_JOINER_BLOCK is always copied and thus we will
    2064              :        cancel the jump threading request when optimizing for size.
    2065              : 
    2066              :        EDGE_COPY_SRC_BLOCK which is copied, but some of its statements
    2067              :        will be killed by threading.  If threading does not kill all of
    2068              :        its statements, then we should cancel the jump threading request
    2069              :        when optimizing for size.  */
    2070       156638 :   if (optimize_function_for_size_p (cfun))
    2071              :     {
    2072        22594 :       EXECUTE_IF_SET_IN_BITMAP (tmp, 0, i, bi)
    2073              :         {
    2074        51677 :           FOR_EACH_EDGE (e, ei, BASIC_BLOCK_FOR_FN (cfun, i)->preds)
    2075        35349 :             if (e->aux)
    2076              :               {
    2077              :                 vec<jump_thread_edge *> *path = THREAD_PATH (e);
    2078              : 
    2079              :                 unsigned int j;
    2080        34672 :                 for (j = 1; j < path->length (); j++)
    2081              :                   {
    2082        24972 :                     bb = (*path)[j]->e->src;
    2083        24972 :                     if (redirection_block_p (bb))
    2084              :                       ;
    2085        13847 :                     else if ((*path)[j]->type == EDGE_COPY_SRC_JOINER_BLOCK
    2086        13847 :                              || ((*path)[j]->type == EDGE_COPY_SRC_BLOCK
    2087        18836 :                                  && (count_stmts_and_phis_in_block (bb)
    2088         9418 :                                      != estimate_threading_killed_stmts (bb))))
    2089              :                       break;
    2090              :                   }
    2091              : 
    2092        45730 :                 if (j != path->length ())
    2093              :                   {
    2094        13165 :                     cancel_thread (path);
    2095        13165 :                     e->aux = NULL;
    2096              :                   }
    2097              :                 else
    2098         9700 :                   bitmap_set_bit (threaded_blocks, i);
    2099              :               }
    2100              :         }
    2101              :     }
    2102              :   else
    2103       150372 :     bitmap_copy (threaded_blocks, tmp);
    2104              : 
    2105              :   /* If we have a joiner block (J) which has two successors S1 and S2 and
    2106              :      we are threading though S1 and the final destination of the thread
    2107              :      is S2, then we must verify that any PHI nodes in S2 have the same
    2108              :      PHI arguments for the edge J->S2 and J->S1->...->S2.
    2109              : 
    2110              :      We used to detect this prior to registering the jump thread, but
    2111              :      that prohibits propagation of edge equivalences into non-dominated
    2112              :      PHI nodes as the equivalency test might occur before propagation.
    2113              : 
    2114              :      This must also occur after we truncate any jump threading paths
    2115              :      as this scenario may only show up after truncation.
    2116              : 
    2117              :      This works for now, but will need improvement as part of the FSA
    2118              :      optimization.
    2119              : 
    2120              :      Note since we've moved the thread request data to the edges,
    2121              :      we have to iterate on those rather than the threaded_edges vector.  */
    2122       533485 :   EXECUTE_IF_SET_IN_BITMAP (tmp, 0, i, bi)
    2123              :     {
    2124       376847 :       bb = BASIC_BLOCK_FOR_FN (cfun, i);
    2125      1291403 :       FOR_EACH_EDGE (e, ei, bb->preds)
    2126              :         {
    2127       914556 :           if (e->aux)
    2128              :             {
    2129       514253 :               vec<jump_thread_edge *> *path = THREAD_PATH (e);
    2130       514253 :               bool have_joiner = ((*path)[1]->type == EDGE_COPY_SRC_JOINER_BLOCK);
    2131              : 
    2132       514253 :               if (have_joiner)
    2133              :                 {
    2134       170218 :                   basic_block joiner = e->dest;
    2135       170218 :                   edge final_edge = path->last ()->e;
    2136       170218 :                   basic_block final_dest = final_edge->dest;
    2137       170218 :                   edge e2 = find_edge (joiner, final_dest);
    2138              : 
    2139       170218 :                   if (e2 && !phi_args_equal_on_edges (e2, final_edge))
    2140              :                     {
    2141         1301 :                       cancel_thread (path);
    2142         1301 :                       e->aux = NULL;
    2143              :                     }
    2144              :                 }
    2145              :             }
    2146              :         }
    2147              :     }
    2148              : 
    2149              :   /* Look for jump threading paths which cross multiple loop headers.
    2150              : 
    2151              :      The code to thread through loop headers will change the CFG in ways
    2152              :      that invalidate the cached loop iteration information.  So we must
    2153              :      detect that case and wipe the cached information.  */
    2154       533485 :   EXECUTE_IF_SET_IN_BITMAP (tmp, 0, i, bi)
    2155              :     {
    2156       376847 :       basic_block bb = BASIC_BLOCK_FOR_FN (cfun, i);
    2157      1291403 :       FOR_EACH_EDGE (e, ei, bb->preds)
    2158              :         {
    2159       914556 :           if (e->aux)
    2160              :             {
    2161       512952 :               gcc_assert (loops_state_satisfies_p
    2162              :                             (LOOPS_HAVE_MARKED_IRREDUCIBLE_REGIONS));
    2163              :               vec<jump_thread_edge *> *path = THREAD_PATH (e);
    2164              : 
    2165      1207083 :               for (unsigned int i = 0, crossed_headers = 0;
    2166      2123315 :                    i < path->length ();
    2167              :                    i++)
    2168              :                 {
    2169      1208759 :                   basic_block dest = (*path)[i]->e->dest;
    2170      1208759 :                   basic_block src = (*path)[i]->e->src;
    2171              :                   /* If we enter a loop.  */
    2172      1208759 :                   if (flow_loop_nested_p (src->loop_father, dest->loop_father))
    2173       152672 :                     ++crossed_headers;
    2174              :                   /* If we step from a block outside an irreducible region
    2175              :                      to a block inside an irreducible region, then we have
    2176              :                      crossed into a loop.  */
    2177      1056087 :                   else if (! (src->flags & BB_IRREDUCIBLE_LOOP)
    2178      1050828 :                            && (dest->flags & BB_IRREDUCIBLE_LOOP))
    2179         1726 :                       ++crossed_headers;
    2180      1208759 :                   if (crossed_headers > 1)
    2181              :                     {
    2182         1676 :                       vect_free_loop_info_assumptions
    2183         3352 :                         ((*path)[path->length () - 1]->e->dest->loop_father);
    2184         1676 :                       break;
    2185              :                     }
    2186              :                 }
    2187              :             }
    2188              :         }
    2189              :     }
    2190       156638 : }
    2191              : 
    2192              : 
    2193              : /* Verify that the REGION is a valid jump thread.  A jump thread is a special
    2194              :    case of SEME Single Entry Multiple Exits region in which all nodes in the
    2195              :    REGION have exactly one incoming edge.  The only exception is the first block
    2196              :    that may not have been connected to the rest of the cfg yet.  */
    2197              : 
    2198              : DEBUG_FUNCTION void
    2199      1260359 : verify_jump_thread (basic_block *region, unsigned n_region)
    2200              : {
    2201      3020964 :   for (unsigned i = 0; i < n_region; i++)
    2202      1760605 :     gcc_assert (EDGE_COUNT (region[i]->preds) <= 1);
    2203      1260359 : }
    2204              : 
    2205              : /* Return true when BB is one of the first N items in BBS.  */
    2206              : 
    2207              : static inline bool
    2208      2532883 : bb_in_bbs (basic_block bb, basic_block *bbs, int n)
    2209              : {
    2210      6030971 :   for (int i = 0; i < n; i++)
    2211      3514075 :     if (bb == bbs[i])
    2212              :       return true;
    2213              : 
    2214              :   return false;
    2215              : }
    2216              : 
    2217              : void
    2218           23 : jt_path_registry::debug_path (FILE *dump_file, int pathno)
    2219              : {
    2220           23 :   vec<jump_thread_edge *> *p = m_paths[pathno];
    2221           23 :   fprintf (dump_file, "path: ");
    2222          136 :   for (unsigned i = 0; i < p->length (); ++i)
    2223          113 :     fprintf (dump_file, "%d -> %d, ",
    2224          113 :              (*p)[i]->e->src->index, (*p)[i]->e->dest->index);
    2225           23 :   fprintf (dump_file, "\n");
    2226           23 : }
    2227              : 
    2228              : void
    2229            0 : jt_path_registry::debug ()
    2230              : {
    2231            0 :   for (unsigned i = 0; i < m_paths.length (); ++i)
    2232            0 :     debug_path (stderr, i);
    2233            0 : }
    2234              : 
    2235              : /* Rewire a jump_thread_edge so that the source block is now a
    2236              :    threaded source block.
    2237              : 
    2238              :    PATH_NUM is an index into the global path table PATHS.
    2239              :    EDGE_NUM is the jump thread edge number into said path.
    2240              : 
    2241              :    Returns TRUE if we were able to successfully rewire the edge.  */
    2242              : 
    2243              : bool
    2244        89047 : back_jt_path_registry::rewire_first_differing_edge (unsigned path_num,
    2245              :                                                     unsigned edge_num)
    2246              : {
    2247        89047 :   vec<jump_thread_edge *> *path = m_paths[path_num];
    2248        89047 :   edge &e = (*path)[edge_num]->e;
    2249        89047 :   if (dump_file && (dump_flags & TDF_DETAILS))
    2250           15 :     fprintf (dump_file, "rewiring edge candidate: %d -> %d\n",
    2251           15 :              e->src->index, e->dest->index);
    2252        89047 :   basic_block src_copy = get_bb_copy (e->src);
    2253        89047 :   if (src_copy == NULL)
    2254              :     {
    2255        19795 :       if (dump_file && (dump_flags & TDF_DETAILS))
    2256           10 :         fprintf (dump_file, "ignoring candidate: there is no src COPY\n");
    2257        19795 :       return false;
    2258              :     }
    2259        69252 :   edge new_edge = find_edge (src_copy, e->dest);
    2260              :   /* If the previously threaded paths created a flow graph where we
    2261              :      can no longer figure out where to go, give up.  */
    2262        69252 :   if (new_edge == NULL)
    2263              :     {
    2264         7160 :       if (dump_file && (dump_flags & TDF_DETAILS))
    2265            0 :         fprintf (dump_file, "ignoring candidate: we lost our way\n");
    2266         7160 :       return false;
    2267              :     }
    2268        62092 :   e = new_edge;
    2269        62092 :   return true;
    2270              : }
    2271              : 
    2272              : /* After a path has been jump threaded, adjust the remaining paths
    2273              :    that are subsets of this path, so these paths can be safely
    2274              :    threaded within the context of the new threaded path.
    2275              : 
    2276              :    For example, suppose we have just threaded:
    2277              : 
    2278              :    5 -> 6 -> 7 -> 8 -> 12   =>   5 -> 6' -> 7' -> 8' -> 12'
    2279              : 
    2280              :    And we have an upcoming threading candidate:
    2281              :    5 -> 6 -> 7 -> 8 -> 15 -> 20
    2282              : 
    2283              :    This function adjusts the upcoming path into:
    2284              :    8' -> 15 -> 20
    2285              : 
    2286              :    CURR_PATH_NUM is an index into the global paths table.  It
    2287              :    specifies the path that was just threaded.  */
    2288              : 
    2289              : void
    2290      1260393 : back_jt_path_registry::adjust_paths_after_duplication (unsigned curr_path_num)
    2291              : {
    2292      1260393 :   vec<jump_thread_edge *> *curr_path = m_paths[curr_path_num];
    2293              : 
    2294              :   /* Iterate through all the other paths and adjust them.  */
    2295     24328326 :   for (unsigned cand_path_num = 0; cand_path_num < m_paths.length (); )
    2296              :     {
    2297     23067933 :       if (cand_path_num == curr_path_num)
    2298              :         {
    2299      1260393 :           ++cand_path_num;
    2300      1260393 :           continue;
    2301              :         }
    2302              :       /* Make sure the candidate to adjust starts with the same path
    2303              :          as the recently threaded path.  */
    2304     21807540 :       vec<jump_thread_edge *> *cand_path = m_paths[cand_path_num];
    2305     21807540 :       if ((*cand_path)[0]->e != (*curr_path)[0]->e)
    2306              :         {
    2307     21677009 :           ++cand_path_num;
    2308     21677009 :           continue;
    2309              :         }
    2310       130531 :       if (dump_file && (dump_flags & TDF_DETAILS))
    2311              :         {
    2312           18 :           fprintf (dump_file, "adjusting candidate: ");
    2313           18 :           debug_path (dump_file, cand_path_num);
    2314              :         }
    2315              : 
    2316              :       /* Chop off from the candidate path any prefix it shares with
    2317              :          the recently threaded path.  */
    2318       261062 :       unsigned minlength = MIN (curr_path->length (), cand_path->length ());
    2319       130531 :       unsigned j;
    2320       367099 :       for (j = 0; j < minlength; ++j)
    2321              :         {
    2322       305820 :           edge cand_edge = (*cand_path)[j]->e;
    2323       305820 :           edge curr_edge = (*curr_path)[j]->e;
    2324              : 
    2325              :           /* Once the prefix no longer matches, adjust the first
    2326              :              non-matching edge to point from an adjusted edge to
    2327              :              wherever it was going.  */
    2328       305820 :           if (cand_edge != curr_edge)
    2329              :             {
    2330        69252 :               gcc_assert (cand_edge->src == curr_edge->src);
    2331        69252 :               if (!rewire_first_differing_edge (cand_path_num, j))
    2332         7160 :                 goto remove_candidate_from_list;
    2333              :               break;
    2334              :             }
    2335              :         }
    2336       123371 :       if (j == minlength)
    2337              :         {
    2338              :           /* If we consumed the max subgraph we could look at, and
    2339              :              still didn't find any different edges, it's the
    2340              :              last edge after MINLENGTH.  */
    2341        61279 :           if (cand_path->length () > minlength)
    2342              :             {
    2343        19795 :               if (!rewire_first_differing_edge (cand_path_num, j))
    2344        19795 :                 goto remove_candidate_from_list;
    2345              :             }
    2346        41484 :           else if (dump_file && (dump_flags & TDF_DETAILS))
    2347            3 :             fprintf (dump_file, "adjusting first edge after MINLENGTH.\n");
    2348              :         }
    2349       103576 :       if (j > 0)
    2350              :         {
    2351              :           /* If we are removing everything, delete the entire candidate.  */
    2352       207152 :           if (j == cand_path->length ())
    2353              :             {
    2354        41484 :             remove_candidate_from_list:
    2355        68439 :               cancel_thread (cand_path, "Adjusted candidate is EMPTY");
    2356        68439 :               m_paths.unordered_remove (cand_path_num);
    2357        68439 :               continue;
    2358              :             }
    2359              :           /* Otherwise, just remove the redundant sub-path.  */
    2360        62092 :           if (cand_path->length () - j > 1)
    2361        47081 :             cand_path->block_remove (0, j);
    2362        15011 :           else if (dump_file && (dump_flags & TDF_DETAILS))
    2363            0 :             fprintf (dump_file, "Dropping illformed candidate.\n");
    2364              :         }
    2365        62092 :       if (dump_file && (dump_flags & TDF_DETAILS))
    2366              :         {
    2367            5 :           fprintf (dump_file, "adjusted candidate: ");
    2368            5 :           debug_path (dump_file, cand_path_num);
    2369              :         }
    2370        62092 :       ++cand_path_num;
    2371              :     }
    2372      1260393 : }
    2373              : 
    2374              : /* Duplicates a jump-thread path of N_REGION basic blocks.
    2375              :    The ENTRY edge is redirected to the duplicate of the region.
    2376              : 
    2377              :    Remove the last conditional statement in the last basic block in the REGION,
    2378              :    and create a single fallthru edge pointing to the same destination as the
    2379              :    EXIT edge.
    2380              : 
    2381              :    CURRENT_PATH_NO is an index into the global paths[] table
    2382              :    specifying the jump-thread path.
    2383              : 
    2384              :    Returns false if it is unable to copy the region, true otherwise.  */
    2385              : 
    2386              : bool
    2387      1260427 : back_jt_path_registry::duplicate_thread_path (edge entry,
    2388              :                                               edge exit,
    2389              :                                               basic_block *region,
    2390              :                                               unsigned n_region,
    2391              :                                               unsigned current_path_no)
    2392              : {
    2393      1260427 :   unsigned i;
    2394      1260427 :   class loop *loop = entry->dest->loop_father;
    2395      1260427 :   edge exit_copy;
    2396      1260427 :   edge redirected;
    2397      1260427 :   profile_count curr_count;
    2398              : 
    2399      1260427 :   if (!can_copy_bbs_p (region, n_region))
    2400              :     return false;
    2401              : 
    2402              :   /* Some sanity checking.  Note that we do not check for all possible
    2403              :      missuses of the functions.  I.e. if you ask to copy something weird,
    2404              :      it will work, but the state of structures probably will not be
    2405              :      correct.  */
    2406      3021048 :   for (i = 0; i < n_region; i++)
    2407              :     {
    2408              :       /* We do not handle subloops, i.e. all the blocks must belong to the
    2409              :          same loop.  */
    2410      1760655 :       if (region[i]->loop_father != loop)
    2411              :         return false;
    2412              :     }
    2413              : 
    2414      1260393 :   initialize_original_copy_tables ();
    2415              : 
    2416      1260393 :   set_loop_copy (loop, loop);
    2417              : 
    2418      1260393 :   basic_block *region_copy = XNEWVEC (basic_block, n_region);
    2419      1260393 :   copy_bbs (region, n_region, region_copy, &exit, 1, &exit_copy, loop,
    2420              :             split_edge_bb_loc (entry), false);
    2421              : 
    2422              :   /* Fix up: copy_bbs redirects all edges pointing to copied blocks.  The
    2423              :      following code ensures that all the edges exiting the jump-thread path are
    2424              :      redirected back to the original code: these edges are exceptions
    2425              :      invalidating the property that is propagated by executing all the blocks of
    2426              :      the jump-thread path in order.  */
    2427              : 
    2428      1260393 :   curr_count = entry->count ();
    2429              : 
    2430      3021048 :   for (i = 0; i < n_region; i++)
    2431              :     {
    2432      1760655 :       edge e;
    2433      1760655 :       edge_iterator ei;
    2434      1760655 :       basic_block bb = region_copy[i];
    2435              : 
    2436              :       /* Watch inconsistent profile.  */
    2437      1760655 :       if (curr_count > region[i]->count)
    2438        63190 :         curr_count = region[i]->count;
    2439              :       /* Scale current BB.  */
    2440      3046714 :       if (region[i]->count.nonzero_p () && curr_count.initialized_p ())
    2441              :         {
    2442              :           /* In the middle of the path we only scale the frequencies.
    2443              :              In last BB we need to update probabilities of outgoing edges
    2444              :              because we know which one is taken at the threaded path.  */
    2445      1286059 :           if (i + 1 != n_region)
    2446       476008 :             scale_bbs_frequencies_profile_count (region + i, 1,
    2447              :                                                  region[i]->count - curr_count,
    2448              :                                                  region[i]->count);
    2449              :           else
    2450       810051 :             update_bb_profile_for_threading (region[i],
    2451              :                                              curr_count,
    2452              :                                              exit);
    2453      1286059 :           scale_bbs_frequencies_profile_count (region_copy + i, 1, curr_count,
    2454      1286059 :                                                region_copy[i]->count);
    2455              :         }
    2456              : 
    2457      1760655 :       if (single_succ_p (bb))
    2458              :         {
    2459              :           /* Make sure the successor is the next node in the path.  */
    2460       150462 :           gcc_assert (i + 1 == n_region
    2461              :                       || region_copy[i + 1] == single_succ_edge (bb)->dest);
    2462       150462 :           if (i + 1 != n_region)
    2463              :             {
    2464       150462 :               curr_count = single_succ_edge (bb)->count ();
    2465              :             }
    2466      1410855 :           continue;
    2467              :         }
    2468              : 
    2469              :       /* Special case the last block on the path: make sure that it does not
    2470              :          jump back on the copied path, including back to itself.  */
    2471      1610193 :       if (i + 1 == n_region)
    2472              :         {
    2473      3793276 :           FOR_EACH_EDGE (e, ei, bb->succs)
    2474      5065766 :             if (bb_in_bbs (e->dest, region_copy, n_region))
    2475              :               {
    2476        15987 :                 basic_block orig = get_bb_original (e->dest);
    2477        15987 :                 if (orig)
    2478        15987 :                   redirect_edge_and_branch_force (e, orig);
    2479              :               }
    2480      1260393 :           continue;
    2481      1260393 :         }
    2482              : 
    2483              :       /* Redirect all other edges jumping to non-adjacent blocks back to the
    2484              :          original code.  */
    2485      1049403 :       FOR_EACH_EDGE (e, ei, bb->succs)
    2486       699603 :         if (region_copy[i + 1] != e->dest)
    2487              :           {
    2488       349803 :             basic_block orig = get_bb_original (e->dest);
    2489       349803 :             if (orig)
    2490        15351 :               redirect_edge_and_branch_force (e, orig);
    2491              :           }
    2492              :         else
    2493              :           {
    2494       349800 :             curr_count = e->count ();
    2495              :           }
    2496              :     }
    2497              : 
    2498              : 
    2499      1260393 :   if (flag_checking)
    2500      1260359 :     verify_jump_thread (region_copy, n_region);
    2501              : 
    2502              :   /* Remove the last branch in the jump thread path.  */
    2503      1260393 :   remove_ctrl_stmt_and_useless_edges (region_copy[n_region - 1], exit->dest);
    2504              : 
    2505              :   /* And fixup the flags on the single remaining edge.  */
    2506      1260393 :   edge fix_e = find_edge (region_copy[n_region - 1], exit->dest);
    2507      1260393 :   fix_e->flags &= ~(EDGE_TRUE_VALUE | EDGE_FALSE_VALUE | EDGE_ABNORMAL);
    2508      1260393 :   fix_e->flags |= EDGE_FALLTHRU;
    2509              : 
    2510      1260393 :   edge e = make_edge (region_copy[n_region - 1], exit->dest, EDGE_FALLTHRU);
    2511              : 
    2512      1260393 :   if (e)
    2513              :     {
    2514            0 :       rescan_loop_exit (e, true, false);
    2515            0 :       e->probability = profile_probability::always ();
    2516              :     }
    2517              : 
    2518              :   /* Redirect the entry and add the phi node arguments.  */
    2519      1260393 :   if (entry->dest == loop->header)
    2520        12106 :     mark_loop_for_removal (loop);
    2521      1260393 :   redirected = redirect_edge_and_branch (entry, get_bb_copy (entry->dest));
    2522      1260393 :   gcc_assert (redirected != NULL);
    2523      1260393 :   flush_pending_stmts (entry);
    2524              : 
    2525              :   /* Add the other PHI node arguments.  */
    2526      1260393 :   add_phi_args_after_copy (region_copy, n_region, NULL);
    2527              : 
    2528      1260393 :   free (region_copy);
    2529              : 
    2530      1260393 :   adjust_paths_after_duplication (current_path_no);
    2531              : 
    2532      1260393 :   free_original_copy_tables ();
    2533      1260393 :   return true;
    2534              : }
    2535              : 
    2536              : /* Return true when PATH is a valid jump-thread path.  */
    2537              : 
    2538              : static bool
    2539      1280465 : valid_jump_thread_path (vec<jump_thread_edge *> *path)
    2540              : {
    2541      1280465 :   unsigned len = path->length ();
    2542              : 
    2543              :   /* Check that the path is connected.  */
    2544      3070481 :   for (unsigned int j = 0; j < len - 1; j++)
    2545              :     {
    2546      1810054 :       edge e = (*path)[j]->e;
    2547      1810054 :       if (e->dest != (*path)[j+1]->e->src)
    2548              :         return false;
    2549              :     }
    2550              :   return true;
    2551              : }
    2552              : 
    2553              : /* Remove any queued jump threads that include edge E.
    2554              : 
    2555              :    We don't actually remove them here, just record the edges into ax
    2556              :    hash table.  That way we can do the search once per iteration of
    2557              :    DOM/VRP rather than for every case where DOM optimizes away a COND_EXPR.  */
    2558              : 
    2559              : void
    2560       370387 : fwd_jt_path_registry::remove_jump_threads_including (edge_def *e)
    2561              : {
    2562       370387 :   if (!m_paths.exists () || !flag_thread_jumps)
    2563              :     return;
    2564              : 
    2565       370351 :   edge *slot = m_removed_edges->find_slot (e, INSERT);
    2566       370351 :   *slot = e;
    2567              : }
    2568              : 
    2569              : /* Thread all paths that have been queued for jump threading, and
    2570              :    update the CFG accordingly.
    2571              : 
    2572              :    It is the caller's responsibility to fix the dominance information
    2573              :    and rewrite duplicated SSA_NAMEs back into SSA form.
    2574              : 
    2575              :    If PEEL_LOOP_HEADERS is false, avoid threading edges through loop
    2576              :    headers if it does not simplify the loop.
    2577              : 
    2578              :    Returns true if one or more edges were threaded.  */
    2579              : 
    2580              : bool
    2581      8351647 : jt_path_registry::thread_through_all_blocks (bool peel_loop_headers)
    2582              : {
    2583      8406341 :   if (m_paths.length () == 0)
    2584              :     return false;
    2585              : 
    2586       454242 :   m_num_threaded_edges = 0;
    2587              : 
    2588       454242 :   bool retval = update_cfg (peel_loop_headers);
    2589              : 
    2590       454242 :   statistics_counter_event (cfun, "Jumps threaded", m_num_threaded_edges);
    2591              : 
    2592       454242 :   if (retval)
    2593              :     {
    2594       399548 :       loops_state_set (LOOPS_NEED_FIXUP);
    2595       399548 :       return true;
    2596              :     }
    2597              :   return false;
    2598              : }
    2599              : 
    2600              : /* This is the backward threader version of thread_through_all_blocks
    2601              :    using a generic BB copier.  */
    2602              : 
    2603              : bool
    2604       297604 : back_jt_path_registry::update_cfg (bool /*peel_loop_headers*/)
    2605              : {
    2606       297604 :   bool retval = false;
    2607       297604 :   hash_set<edge> visited_starting_edges;
    2608              : 
    2609      1593080 :   while (m_paths.length ())
    2610              :     {
    2611      1295476 :       vec<jump_thread_edge *> *path = m_paths[0];
    2612      1295476 :       edge entry = (*path)[0]->e;
    2613              : 
    2614              :       /* Do not jump-thread twice from the same starting edge.
    2615              : 
    2616              :          Previously we only checked that we weren't threading twice
    2617              :          from the same BB, but that was too restrictive.  Imagine a
    2618              :          path that starts from GIMPLE_COND(x_123 == 0,...), where both
    2619              :          edges out of this conditional yield paths that can be
    2620              :          threaded (for example, both lead to an x_123==0 or x_123!=0
    2621              :          conditional further down the line.  */
    2622      1295476 :       if (visited_starting_edges.contains (entry)
    2623              :           /* We may not want to realize this jump thread path for
    2624              :              various reasons.  So check it first.  */
    2625      1295476 :           || !valid_jump_thread_path (path))
    2626              :         {
    2627              :           /* Remove invalid jump-thread paths.  */
    2628        35049 :           cancel_thread (path, "Avoiding threading twice from same edge");
    2629        35049 :           m_paths.unordered_remove (0);
    2630        35049 :           continue;
    2631              :         }
    2632              : 
    2633      1260427 :       unsigned len = path->length ();
    2634      1260427 :       edge exit = (*path)[len - 1]->e;
    2635      1260427 :       basic_block *region = XNEWVEC (basic_block, len - 1);
    2636              : 
    2637      3021230 :       for (unsigned int j = 0; j < len - 1; j++)
    2638      1760803 :         region[j] = (*path)[j]->e->dest;
    2639              : 
    2640      1260427 :       if (duplicate_thread_path (entry, exit, region, len - 1, 0))
    2641              :         {
    2642              :           /* We do not update dominance info.  */
    2643      1260393 :           free_dominance_info (CDI_DOMINATORS);
    2644      1260393 :           visited_starting_edges.add (entry);
    2645      1260393 :           retval = true;
    2646      1260393 :           m_num_threaded_edges++;
    2647              :         }
    2648              : 
    2649      1260427 :       path->release ();
    2650      1260427 :       m_paths.unordered_remove (0);
    2651      1260427 :       free (region);
    2652              :     }
    2653       297604 :   return retval;
    2654       297604 : }
    2655              : 
    2656              : /* This is the forward threader version of thread_through_all_blocks,
    2657              :    using a custom BB copier.  */
    2658              : 
    2659              : bool
    2660       156638 : fwd_jt_path_registry::update_cfg (bool may_peel_loop_headers)
    2661              : {
    2662       156638 :   bool retval = false;
    2663              : 
    2664              :   /* Remove any paths that referenced removed edges.  */
    2665       156638 :   if (m_removed_edges)
    2666       914703 :     for (unsigned i = 0; i < m_paths.length (); )
    2667              :       {
    2668       758065 :         unsigned int j;
    2669       758065 :         vec<jump_thread_edge *> *path = m_paths[i];
    2670              : 
    2671      2498718 :         for (j = 0; j < path->length (); j++)
    2672              :           {
    2673      1842450 :             edge e = (*path)[j]->e;
    2674      1842450 :             if (m_removed_edges->find_slot (e, NO_INSERT)
    2675      3583103 :                 || (((*path)[j]->type == EDGE_COPY_SRC_BLOCK
    2676      1175827 :                      || (*path)[j]->type == EDGE_COPY_SRC_JOINER_BLOCK)
    2677       897647 :                     && !can_duplicate_block_p (e->src)))
    2678              :               break;
    2679              :           }
    2680              : 
    2681      1516130 :         if (j != path->length ())
    2682              :           {
    2683       101797 :             cancel_thread (path, "Thread references removed edge");
    2684       101797 :             m_paths.unordered_remove (i);
    2685       101797 :             continue;
    2686              :           }
    2687       656268 :         i++;
    2688              :       }
    2689              : 
    2690       156638 :   auto_bitmap threaded_blocks;
    2691       156638 :   mark_threaded_blocks (threaded_blocks);
    2692              : 
    2693       156638 :   initialize_original_copy_tables ();
    2694              : 
    2695              :   /* The order in which we process jump threads can be important.
    2696              : 
    2697              :      Consider if we have two jump threading paths A and B.  If the
    2698              :      target edge of A is the starting edge of B and we thread path A
    2699              :      first, then we create an additional incoming edge into B->dest that
    2700              :      we cannot discover as a jump threading path on this iteration.
    2701              : 
    2702              :      If we instead thread B first, then the edge into B->dest will have
    2703              :      already been redirected before we process path A and path A will
    2704              :      natually, with no further work, target the redirected path for B.
    2705              : 
    2706              :      An post-order is sufficient here.  Compute the ordering first, then
    2707              :      process the blocks.  */
    2708       156638 :   if (!bitmap_empty_p (threaded_blocks))
    2709              :     {
    2710       143640 :       int *postorder = XNEWVEC (int, n_basic_blocks_for_fn (cfun));
    2711       143640 :       unsigned int postorder_num = post_order_compute (postorder, false, false);
    2712      8244399 :       for (unsigned int i = 0; i < postorder_num; i++)
    2713              :         {
    2714      8100759 :           unsigned int indx = postorder[i];
    2715      8100759 :           if (bitmap_bit_p (threaded_blocks, indx))
    2716              :             {
    2717       366074 :               basic_block bb = BASIC_BLOCK_FOR_FN (cfun, indx);
    2718       366074 :               retval |= thread_block (bb, true);
    2719              :             }
    2720              :         }
    2721       143640 :       free (postorder);
    2722              :     }
    2723              : 
    2724              :   /* Then perform the threading through loop headers.  We start with the
    2725              :      innermost loop, so that the changes in cfg we perform won't affect
    2726              :      further threading.  */
    2727      1073238 :   for (auto loop : loops_list (cfun, LI_FROM_INNERMOST))
    2728              :     {
    2729      1069009 :       if (!loop->header
    2730       603324 :           || !bitmap_bit_p (threaded_blocks, loop->header->index))
    2731       465685 :         continue;
    2732              : 
    2733       137639 :       retval |= thread_through_loop_header (loop, may_peel_loop_headers);
    2734       156638 :     }
    2735              : 
    2736              :   /* All jump threading paths should have been resolved at this
    2737              :      point.  Verify that is the case.  */
    2738       156638 :   basic_block bb;
    2739      9190359 :   FOR_EACH_BB_FN (bb, cfun)
    2740              :     {
    2741      9033721 :       edge_iterator ei;
    2742      9033721 :       edge e;
    2743     22244641 :       FOR_EACH_EDGE (e, ei, bb->preds)
    2744     13210920 :         gcc_assert (e->aux == NULL);
    2745              :     }
    2746              : 
    2747       156638 :   free_original_copy_tables ();
    2748              : 
    2749       156638 :   return retval;
    2750       156638 : }
    2751              : 
    2752              : bool
    2753      2689996 : jt_path_registry::cancel_invalid_paths (vec<jump_thread_edge *> &path)
    2754              : {
    2755      2689996 :   gcc_checking_assert (!path.is_empty ());
    2756      2689996 :   edge entry = path[0]->e;
    2757      2689996 :   edge exit = path[path.length () - 1]->e;
    2758      2689996 :   bool seen_latch = false;
    2759      2689996 :   int loops_crossed = 0;
    2760      2689996 :   bool crossed_latch = false;
    2761      2689996 :   bool crossed_loop_header = false;
    2762              :   // Use ->dest here instead of ->src to ignore the first block.  The
    2763              :   // first block is allowed to be in a different loop, since it'll be
    2764              :   // redirected.  See similar comment in profitable_path_p: "we don't
    2765              :   // care about that block...".
    2766      2689996 :   loop_p loop = entry->dest->loop_father;
    2767      2689996 :   loop_p curr_loop = loop;
    2768              : 
    2769      9442092 :   for (unsigned int i = 0; i < path.length (); i++)
    2770              :     {
    2771      6752096 :       edge e = path[i]->e;
    2772              : 
    2773      6752096 :       if (e == NULL)
    2774              :         {
    2775              :           // NULL outgoing edges on a path can happen for jumping to a
    2776              :           // constant address.
    2777            0 :           cancel_thread (&path, "Found NULL edge in jump threading path");
    2778            0 :           return true;
    2779              :         }
    2780              : 
    2781      6752096 :       if (loop->latch == e->src || loop->latch == e->dest)
    2782              :         {
    2783       578085 :           seen_latch = true;
    2784              :           // Like seen_latch, but excludes the first block.
    2785       578085 :           if (e->src != entry->src)
    2786       540958 :             crossed_latch = true;
    2787              :         }
    2788              : 
    2789      6752096 :       if (e->dest->loop_father != curr_loop)
    2790              :         {
    2791       378848 :           curr_loop = e->dest->loop_father;
    2792       378848 :           ++loops_crossed;
    2793              :         }
    2794              : 
    2795              :       // ?? Avoid threading through loop headers that remain in the
    2796              :       // loop, as such threadings tend to create sub-loops which
    2797              :       // _might_ be OK ??.
    2798      6752096 :       if (e->dest->loop_father->header == e->dest
    2799      6752096 :           && !flow_loop_nested_p (exit->dest->loop_father,
    2800              :                                   e->dest->loop_father))
    2801              :         crossed_loop_header = true;
    2802              : 
    2803      6752096 :       if (flag_checking && !m_backedge_threads)
    2804      3224794 :         gcc_assert ((path[i]->e->flags & EDGE_DFS_BACK) == 0);
    2805              :     }
    2806              : 
    2807              :   // If we crossed a loop into an outer loop without crossing the
    2808              :   // latch, this is just an early exit from the loop.
    2809      2689996 :   if (loops_crossed == 1
    2810      2689996 :       && !crossed_latch
    2811      2689996 :       && flow_loop_nested_p (exit->dest->loop_father, exit->src->loop_father))
    2812              :     return false;
    2813              : 
    2814      2599687 :   if (cfun->curr_properties & PROP_loop_opts_done)
    2815              :     return false;
    2816              : 
    2817      1907442 :   if (seen_latch && empty_block_p (loop->latch))
    2818              :     {
    2819       232681 :       cancel_thread (&path, "Threading through latch before loop opts "
    2820              :                      "would create non-empty latch");
    2821       232681 :       return true;
    2822              :     }
    2823      1674761 :   if (loops_crossed)
    2824              :     {
    2825       164617 :       cancel_thread (&path, "Path crosses loops");
    2826       164617 :       return true;
    2827              :     }
    2828              :   // The path should either start and end in the same loop or exit the
    2829              :   // loop it starts in but never enter a loop.  This also catches
    2830              :   // creating irreducible loops, not only rotation.
    2831      1510144 :   if (entry->src->loop_father != exit->dest->loop_father
    2832      1664321 :       && !flow_loop_nested_p (exit->src->loop_father,
    2833       154177 :                               entry->dest->loop_father))
    2834              :     {
    2835       154177 :       cancel_thread (&path, "Path rotates loop");
    2836       154177 :       return true;
    2837              :     }
    2838      1355967 :   if (crossed_loop_header)
    2839              :     {
    2840        16541 :       cancel_thread (&path, "Path crosses loop header but does not exit it");
    2841        16541 :       return true;
    2842              :     }
    2843              :   return false;
    2844              : }
    2845              : 
    2846              : /* Register a jump threading opportunity.  We queue up all the jump
    2847              :    threading opportunities discovered by a pass and update the CFG
    2848              :    and SSA form all at once.
    2849              : 
    2850              :    E is the edge we can thread, E2 is the new target edge, i.e., we
    2851              :    are effectively recording that E->dest can be changed to E2->dest
    2852              :    after fixing the SSA graph.
    2853              : 
    2854              :    Return TRUE if PATH was successfully threaded.  */
    2855              : 
    2856              : bool
    2857      2689996 : jt_path_registry::register_jump_thread (vec<jump_thread_edge *> *path)
    2858              : {
    2859      2689996 :   gcc_checking_assert (flag_thread_jumps);
    2860              : 
    2861      2689996 :   if (!dbg_cnt (registered_jump_thread))
    2862              :     {
    2863            0 :       path->release ();
    2864            0 :       return false;
    2865              :     }
    2866              : 
    2867      2689996 :   if (cancel_invalid_paths (*path))
    2868              :     return false;
    2869              : 
    2870      2121980 :   if (dump_file && (dump_flags & TDF_DETAILS))
    2871          126 :     dump_jump_thread_path (dump_file, *path, true);
    2872              : 
    2873      2121980 :   m_paths.safe_push (path);
    2874      2121980 :   return true;
    2875              : }
    2876              : 
    2877              : /* Return how many uses of T there are within BB, as long as there
    2878              :    aren't any uses outside BB.  If there are any uses outside BB,
    2879              :    return -1 if there's at most one use within BB, or -2 if there is
    2880              :    more than one use within BB.  */
    2881              : 
    2882              : static int
    2883      1857244 : uses_in_bb (tree t, basic_block bb)
    2884              : {
    2885      1857244 :   int uses = 0;
    2886      1857244 :   bool outside_bb = false;
    2887              : 
    2888      1857244 :   imm_use_iterator iter;
    2889      1857244 :   use_operand_p use_p;
    2890      7604952 :   FOR_EACH_IMM_USE_FAST (use_p, iter, t)
    2891              :     {
    2892      4006504 :       if (is_gimple_debug (USE_STMT (use_p)))
    2893       702430 :         continue;
    2894              : 
    2895      3304074 :       if (gimple_bb (USE_STMT (use_p)) != bb)
    2896              :         outside_bb = true;
    2897              :       else
    2898      2168423 :         uses++;
    2899              : 
    2900      3304074 :       if (outside_bb && uses > 1)
    2901       116040 :         return -2;
    2902       116040 :     }
    2903              : 
    2904      1741204 :   if (outside_bb)
    2905       519972 :     return -1;
    2906              : 
    2907              :   return uses;
    2908              : }
    2909              : 
    2910              : /* Starting from the final control flow stmt in BB, assuming it will
    2911              :    be removed, follow uses in to-be-removed stmts back to their defs
    2912              :    and count how many defs are to become dead and be removed as
    2913              :    well.  */
    2914              : 
    2915              : unsigned int
    2916      1670487 : estimate_threading_killed_stmts (basic_block bb)
    2917              : {
    2918      1670487 :   int killed_stmts = 0;
    2919      1670487 :   hash_map<tree, int> ssa_remaining_uses;
    2920      1670487 :   auto_vec<gimple *, 4> dead_worklist;
    2921              : 
    2922              :   /* If the block has only two predecessors, threading will turn phi
    2923              :      dsts into either src, so count them as dead stmts.  */
    2924      1670487 :   bool drop_all_phis = EDGE_COUNT (bb->preds) == 2;
    2925              : 
    2926      1670487 :   if (drop_all_phis)
    2927       736674 :     for (gphi_iterator gsi = gsi_start_phis (bb);
    2928      2498900 :          !gsi_end_p (gsi); gsi_next (&gsi))
    2929              :       {
    2930      1762226 :         gphi *phi = gsi.phi ();
    2931      1762226 :         tree dst = gimple_phi_result (phi);
    2932              : 
    2933              :         /* We don't count virtual PHIs as stmts in
    2934              :            record_temporary_equivalences_from_phis.  */
    2935      3524452 :         if (virtual_operand_p (dst))
    2936       521303 :           continue;
    2937              : 
    2938      1240923 :         killed_stmts++;
    2939              :       }
    2940              : 
    2941      3340974 :   if (gsi_end_p (gsi_last_bb (bb)))
    2942            0 :     return killed_stmts;
    2943              : 
    2944      1670487 :   gimple *stmt = gsi_stmt (gsi_last_bb (bb));
    2945      1670487 :   if (gimple_code (stmt) != GIMPLE_COND
    2946              :       && gimple_code (stmt) != GIMPLE_GOTO
    2947              :       && gimple_code (stmt) != GIMPLE_SWITCH)
    2948       497608 :     return killed_stmts;
    2949              : 
    2950              :   /* The control statement is always dead.  */
    2951      1172879 :   killed_stmts++;
    2952      1172879 :   dead_worklist.quick_push (stmt);
    2953      3406848 :   while (!dead_worklist.is_empty ())
    2954              :     {
    2955      2233969 :       stmt = dead_worklist.pop ();
    2956              : 
    2957      2233969 :       ssa_op_iter iter;
    2958      2233969 :       use_operand_p use_p;
    2959      4891810 :       FOR_EACH_SSA_USE_OPERAND (use_p, stmt, iter, SSA_OP_USE)
    2960              :         {
    2961      2657841 :           tree t = USE_FROM_PTR (use_p);
    2962      2657841 :           gimple *def = SSA_NAME_DEF_STMT (t);
    2963              : 
    2964      2657841 :           if (gimple_bb (def) == bb
    2965      2082510 :               && (gimple_code (def) != GIMPLE_PHI
    2966       144425 :                   || !drop_all_phis)
    2967      4646555 :               && !gimple_has_side_effects (def))
    2968              :             {
    2969      1914676 :               int *usesp = ssa_remaining_uses.get (t);
    2970      1914676 :               int uses;
    2971              : 
    2972      1914676 :               if (usesp)
    2973        57432 :                 uses = *usesp;
    2974              :               else
    2975      1857244 :                 uses = uses_in_bb (t, bb);
    2976              : 
    2977      1914676 :               gcc_assert (uses);
    2978              : 
    2979              :               /* Don't bother recording the expected use count if we
    2980              :                  won't find any further uses within BB.  */
    2981      1914676 :               if (!usesp && (uses < -1 || uses > 1))
    2982              :                 {
    2983       279681 :                   usesp = &ssa_remaining_uses.get_or_insert (t);
    2984       279681 :                   *usesp = uses;
    2985              :                 }
    2986              : 
    2987      1914676 :               if (uses < 0)
    2988       670477 :                 continue;
    2989              : 
    2990      1244199 :               --uses;
    2991      1244199 :               if (usesp)
    2992       186608 :                 *usesp = uses;
    2993              : 
    2994      1244199 :               if (!uses)
    2995              :                 {
    2996      1075272 :                   killed_stmts++;
    2997      1075272 :                   if (usesp)
    2998        17681 :                     ssa_remaining_uses.remove (t);
    2999      1075272 :                   if (gimple_code (def) != GIMPLE_PHI)
    3000      1061090 :                     dead_worklist.safe_push (def);
    3001              :                 }
    3002              :             }
    3003              :         }
    3004              :     }
    3005              : 
    3006      1172879 :   if (dump_file)
    3007           25 :     fprintf (dump_file, "threading bb %i kills %i stmts\n",
    3008              :              bb->index, killed_stmts);
    3009              : 
    3010      1172879 :   return killed_stmts;
    3011      1670487 : }
        

Generated by: LCOV version 2.4-beta

LCOV profile is generated on x86_64 machine using following configure options: configure --disable-bootstrap --enable-coverage=opt --enable-languages=c,c++,fortran,go,jit,lto,rust,m2 --enable-host-shared. GCC test suite is run with the built compiler.