LCOV - code coverage report
Current view: top level - gcc - tree-ssa-threadupdate.cc (source / functions) Coverage Total Hit
Test: gcc.info Lines: 96.0 % 1055 1013
Test Date: 2026-05-11 19:44:49 Functions: 93.0 % 57 53
Legend: Lines:     hit not hit

            Line data    Source code
       1              : /* Thread edges through blocks and update the control flow and SSA graphs.
       2              :    Copyright (C) 2004-2026 Free Software Foundation, Inc.
       3              : 
       4              : This file is part of GCC.
       5              : 
       6              : GCC is free software; you can redistribute it and/or modify
       7              : it under the terms of the GNU General Public License as published by
       8              : the Free Software Foundation; either version 3, or (at your option)
       9              : any later version.
      10              : 
      11              : GCC is distributed in the hope that it will be useful,
      12              : but WITHOUT ANY WARRANTY; without even the implied warranty of
      13              : MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
      14              : GNU General Public License for more details.
      15              : 
      16              : You should have received a copy of the GNU General Public License
      17              : along with GCC; see the file COPYING3.  If not see
      18              : <http://www.gnu.org/licenses/>.  */
      19              : 
      20              : #include "config.h"
      21              : #include "system.h"
      22              : #include "coretypes.h"
      23              : #include "backend.h"
      24              : #include "tree.h"
      25              : #include "gimple.h"
      26              : #include "cfghooks.h"
      27              : #include "tree-pass.h"
      28              : #include "ssa.h"
      29              : #include "fold-const.h"
      30              : #include "cfganal.h"
      31              : #include "gimple-iterator.h"
      32              : #include "tree-ssa.h"
      33              : #include "tree-ssa-threadupdate.h"
      34              : #include "cfgloop.h"
      35              : #include "dbgcnt.h"
      36              : #include "tree-cfg.h"
      37              : #include "tree-vectorizer.h"
      38              : #include "tree-pass.h"
      39              : 
      40              : /* Given a block B, update the CFG and SSA graph to reflect redirecting
      41              :    one or more in-edges to B to instead reach the destination of an
      42              :    out-edge from B while preserving any side effects in B.
      43              : 
      44              :    i.e., given A->B and B->C, change A->B to be A->C yet still preserve the
      45              :    side effects of executing B.
      46              : 
      47              :      1. Make a copy of B (including its outgoing edges and statements).  Call
      48              :         the copy B'.  Note B' has no incoming edges or PHIs at this time.
      49              : 
      50              :      2. Remove the control statement at the end of B' and all outgoing edges
      51              :         except B'->C.
      52              : 
      53              :      3. Add a new argument to each PHI in C with the same value as the existing
      54              :         argument associated with edge B->C.  Associate the new PHI arguments
      55              :         with the edge B'->C.
      56              : 
      57              :      4. For each PHI in B, find or create a PHI in B' with an identical
      58              :         PHI_RESULT.  Add an argument to the PHI in B' which has the same
      59              :         value as the PHI in B associated with the edge A->B.  Associate
      60              :         the new argument in the PHI in B' with the edge A->B.
      61              : 
      62              :      5. Change the edge A->B to A->B'.
      63              : 
      64              :         5a. This automatically deletes any PHI arguments associated with the
      65              :             edge A->B in B.
      66              : 
      67              :         5b. This automatically associates each new argument added in step 4
      68              :             with the edge A->B'.
      69              : 
      70              :      6. Repeat for other incoming edges into B.
      71              : 
      72              :      7. Put the duplicated resources in B and all the B' blocks into SSA form.
      73              : 
      74              :    Note that block duplication can be minimized by first collecting the
      75              :    set of unique destination blocks that the incoming edges should
      76              :    be threaded to.
      77              : 
      78              :    We reduce the number of edges and statements we create by not copying all
      79              :    the outgoing edges and the control statement in step #1.  We instead create
      80              :    a template block without the outgoing edges and duplicate the template.
      81              : 
      82              :    Another case this code handles is threading through a "joiner" block.  In
      83              :    this case, we do not know the destination of the joiner block, but one
      84              :    of the outgoing edges from the joiner block leads to a threadable path.  This
      85              :    case largely works as outlined above, except the duplicate of the joiner
      86              :    block still contains a full set of outgoing edges and its control statement.
      87              :    We just redirect one of its outgoing edges to our jump threading path.  */
      88              : 
      89              : 
      90              : /* Steps #5 and #6 of the above algorithm are best implemented by walking
      91              :    all the incoming edges which thread to the same destination edge at
      92              :    the same time.  That avoids lots of table lookups to get information
      93              :    for the destination edge.
      94              : 
      95              :    To realize that implementation we create a list of incoming edges
      96              :    which thread to the same outgoing edge.  Thus to implement steps
      97              :    #5 and #6 we traverse our hash table of outgoing edge information.
      98              :    For each entry we walk the list of incoming edges which thread to
      99              :    the current outgoing edge.  */
     100              : 
     101              : struct el
     102              : {
     103              :   edge e;
     104              :   struct el *next;
     105              : };
     106              : 
     107              : /* Main data structure recording information regarding B's duplicate
     108              :    blocks.  */
     109              : 
     110              : /* We need to efficiently record the unique thread destinations of this
     111              :    block and specific information associated with those destinations.  We
     112              :    may have many incoming edges threaded to the same outgoing edge.  This
     113              :    can be naturally implemented with a hash table.  */
     114              : 
     115              : struct redirection_data : free_ptr_hash<redirection_data>
     116              : {
     117              :   /* We support wiring up two block duplicates in a jump threading path.
     118              : 
     119              :      One is a normal block copy where we remove the control statement
     120              :      and wire up its single remaining outgoing edge to the thread path.
     121              : 
     122              :      The other is a joiner block where we leave the control statement
     123              :      in place, but wire one of the outgoing edges to a thread path.
     124              : 
     125              :      In theory we could have multiple block duplicates in a jump
     126              :      threading path, but I haven't tried that.
     127              : 
     128              :      The duplicate blocks appear in this array in the same order in
     129              :      which they appear in the jump thread path.  */
     130              :   basic_block dup_blocks[2];
     131              : 
     132              :   vec<jump_thread_edge *> *path;
     133              : 
     134              :   /* A list of incoming edges which we want to thread to the
     135              :      same path.  */
     136              :   struct el *incoming_edges;
     137              : 
     138              :   /* hash_table support.  */
     139              :   static inline hashval_t hash (const redirection_data *);
     140              :   static inline int equal (const redirection_data *, const redirection_data *);
     141              : };
     142              : 
     143      8337306 : jump_thread_path_allocator::jump_thread_path_allocator ()
     144              : {
     145      8337306 :   obstack_init (&m_obstack);
     146      8337306 : }
     147              : 
     148      8337306 : jump_thread_path_allocator::~jump_thread_path_allocator ()
     149              : {
     150      8337306 :   obstack_free (&m_obstack, NULL);
     151      8337306 : }
     152              : 
     153              : jump_thread_edge *
     154     24823673 : jump_thread_path_allocator::allocate_thread_edge (edge e,
     155              :                                                   jump_thread_edge_type type)
     156              : {
     157     24823673 :   void *r = obstack_alloc (&m_obstack, sizeof (jump_thread_edge));
     158     24823673 :   return new (r) jump_thread_edge (e, type);
     159              : }
     160              : 
     161              : vec<jump_thread_edge *> *
     162     17821925 : jump_thread_path_allocator::allocate_thread_path ()
     163              : {
     164              :   // ?? Since the paths live in an obstack, we should be able to remove all
     165              :   // references to path->release() throughout the code.
     166     17821925 :   void *r = obstack_alloc (&m_obstack, sizeof (vec <jump_thread_edge *>));
     167     17821925 :   return new (r) vec<jump_thread_edge *> ();
     168              : }
     169              : 
     170      8337306 : jt_path_registry::jt_path_registry (bool backedge_threads)
     171              : {
     172      8337306 :   m_paths.create (5);
     173      8337306 :   m_num_threaded_edges = 0;
     174      8337306 :   m_backedge_threads = backedge_threads;
     175      8337306 : }
     176              : 
     177      8337306 : jt_path_registry::~jt_path_registry ()
     178              : {
     179      8337306 :   m_paths.release ();
     180      8337306 : }
     181              : 
     182      2079334 : fwd_jt_path_registry::fwd_jt_path_registry ()
     183      2079334 :   : jt_path_registry (/*backedge_threads=*/false)
     184              : {
     185      2079334 :   m_removed_edges = new hash_table<struct removed_edges> (17);
     186      2079334 :   m_redirection_data = NULL;
     187      2079334 : }
     188              : 
     189      4158668 : fwd_jt_path_registry::~fwd_jt_path_registry ()
     190              : {
     191      2079334 :   delete m_removed_edges;
     192      4158668 : }
     193              : 
     194      6257972 : back_jt_path_registry::back_jt_path_registry ()
     195      6257972 :   : jt_path_registry (/*backedge_threads=*/true)
     196              : {
     197      6257972 : }
     198              : 
     199              : void
     200     24823673 : jt_path_registry::push_edge (vec<jump_thread_edge *> *path,
     201              :                              edge e, jump_thread_edge_type type)
     202              : {
     203     24823673 :   jump_thread_edge *x =  m_allocator.allocate_thread_edge (e, type);
     204     24823673 :   path->safe_push (x);
     205     24823673 : }
     206              : 
     207              : vec<jump_thread_edge *> *
     208     17821925 : jt_path_registry::allocate_thread_path ()
     209              : {
     210     17821925 :   return m_allocator.allocate_thread_path ();
     211              : }
     212              : 
     213              : /* Dump a jump threading path, including annotations about each
     214              :    edge in the path.  */
     215              : 
     216              : static void
     217          188 : dump_jump_thread_path (FILE *dump_file,
     218              :                        const vec<jump_thread_edge *> &path,
     219              :                        bool registering)
     220              : {
     221          188 :   if (registering)
     222          248 :     fprintf (dump_file,
     223              :              "  [%u] Registering jump thread: (%d, %d) incoming edge; ",
     224              :              dbg_cnt_counter (registered_jump_thread),
     225          124 :              path[0]->e->src->index, path[0]->e->dest->index);
     226              :   else
     227           64 :     fprintf (dump_file,
     228              :              "  Cancelling jump thread: (%d, %d) incoming edge; ",
     229           64 :              path[0]->e->src->index, path[0]->e->dest->index);
     230              : 
     231          604 :   for (unsigned int i = 1; i < path.length (); i++)
     232              :     {
     233              :       /* We can get paths with a NULL edge when the final destination
     234              :          of a jump thread turns out to be a constant address.  We dump
     235              :          those paths when debugging, so we have to be prepared for that
     236              :          possibility here.  */
     237          416 :       if (path[i]->e == NULL)
     238            0 :         continue;
     239              : 
     240          416 :       fprintf (dump_file, " (%d, %d) ",
     241          416 :                path[i]->e->src->index, path[i]->e->dest->index);
     242          416 :       switch (path[i]->type)
     243              :         {
     244           39 :         case EDGE_COPY_SRC_JOINER_BLOCK:
     245           39 :           fprintf (dump_file, "joiner");
     246           39 :           break;
     247          234 :         case EDGE_COPY_SRC_BLOCK:
     248          234 :           fprintf (dump_file, "normal");
     249          234 :           break;
     250          143 :         case EDGE_NO_COPY_SRC_BLOCK:
     251          143 :           fprintf (dump_file, "nocopy");
     252          143 :           break;
     253            0 :         default:
     254            0 :           gcc_unreachable ();
     255              :         }
     256              : 
     257          416 :       if ((path[i]->e->flags & EDGE_DFS_BACK) != 0)
     258           30 :         fprintf (dump_file, " (back)");
     259              :     }
     260          188 :   fprintf (dump_file, "; \n");
     261          188 : }
     262              : 
     263              : DEBUG_FUNCTION void
     264            0 : debug (const vec<jump_thread_edge *> &path)
     265              : {
     266            0 :   dump_jump_thread_path (stderr, path, true);
     267            0 : }
     268              : 
     269              : DEBUG_FUNCTION void
     270            0 : debug (const vec<jump_thread_edge *> *path)
     271              : {
     272            0 :   debug (*path);
     273            0 : }
     274              : 
     275              : /* Release the memory associated with PATH, and if dumping is enabled,
     276              :    dump out the reason why the thread was canceled.  */
     277              : 
     278              : static void
     279      1057235 : cancel_thread (vec<jump_thread_edge *> *path, const char *reason = NULL)
     280              : {
     281      1057235 :   if (dump_file && (dump_flags & TDF_DETAILS))
     282              :     {
     283           64 :       if (reason)
     284           56 :         fprintf (dump_file, "%s: ", reason);
     285              : 
     286           64 :       dump_jump_thread_path (dump_file, *path, false);
     287           64 :       fprintf (dump_file, "\n");
     288              :     }
     289      1057235 :   path->release ();
     290      1057235 : }
     291              : 
     292              : /* Simple hashing function.  For any given incoming edge E, we're going
     293              :    to be most concerned with the final destination of its jump thread
     294              :    path.  So hash on the block index of the final edge in the path.  */
     295              : 
     296              : inline hashval_t
     297       505743 : redirection_data::hash (const redirection_data *p)
     298              : {
     299       505743 :   vec<jump_thread_edge *> *path = p->path;
     300       505743 :   return path->last ()->e->dest->index;
     301              : }
     302              : 
     303              : /* Given two hash table entries, return true if they have the same
     304              :    jump threading path.  */
     305              : inline int
     306       155387 : redirection_data::equal (const redirection_data *p1, const redirection_data *p2)
     307              : {
     308       155387 :   vec<jump_thread_edge *> *path1 = p1->path;
     309       155387 :   vec<jump_thread_edge *> *path2 = p2->path;
     310              : 
     311       466161 :   if (path1->length () != path2->length ())
     312              :     return false;
     313              : 
     314       251962 :   for (unsigned int i = 1; i < path1->length (); i++)
     315              :     {
     316       183511 :       if ((*path1)[i]->type != (*path2)[i]->type
     317       183511 :           || (*path1)[i]->e != (*path2)[i]->e)
     318              :         return false;
     319              :     }
     320              : 
     321              :   return true;
     322              : }
     323              : 
     324              : /* Data structure of information to pass to hash table traversal routines.  */
     325              : struct ssa_local_info_t
     326              : {
     327              :   /* The current block we are working on.  */
     328              :   basic_block bb;
     329              : 
     330              :   /* We only create a template block for the first duplicated block in a
     331              :      jump threading path as we may need many duplicates of that block.
     332              : 
     333              :      The second duplicate block in a path is specific to that path.  Creating
     334              :      and sharing a template for that block is considerably more difficult.  */
     335              :   basic_block template_block;
     336              : 
     337              :   /* If we append debug stmts to the template block after creating it,
     338              :      this iterator won't be the last one in the block, and further
     339              :      copies of the template block shouldn't get debug stmts after
     340              :      it.  */
     341              :   gimple_stmt_iterator template_last_to_copy;
     342              : 
     343              :   /* Blocks duplicated for the thread.  */
     344              :   bitmap duplicate_blocks;
     345              : 
     346              :   /* TRUE if we thread one or more jumps, FALSE otherwise.  */
     347              :   bool jumps_threaded;
     348              : 
     349              :   /* When we have multiple paths through a joiner which reach different
     350              :      final destinations, then we may need to correct for potential
     351              :      profile insanities.  */
     352              :   bool need_profile_correction;
     353              : 
     354              :   // Jump threading statistics.
     355              :   unsigned long num_threaded_edges;
     356              : };
     357              : 
     358              : /* When we start updating the CFG for threading, data necessary for jump
     359              :    threading is attached to the AUX field for the incoming edge.  Use these
     360              :    macros to access the underlying structure attached to the AUX field.  */
     361              : #define THREAD_PATH(E) ((vec<jump_thread_edge *> *)(E)->aux)
     362              : 
     363              : /* Remove the last statement in block BB if it is a control statement
     364              :    Also remove all outgoing edges except the edge which reaches DEST_BB.
     365              :    If DEST_BB is NULL, then remove all outgoing edges.  */
     366              : 
     367              : static void
     368      1483809 : remove_ctrl_stmt_and_useless_edges (basic_block bb, basic_block dest_bb)
     369              : {
     370      1483809 :   gimple_stmt_iterator gsi;
     371      1483809 :   edge e;
     372      1483809 :   edge_iterator ei;
     373              : 
     374      1483809 :   gsi = gsi_last_bb (bb);
     375              : 
     376              :   /* If the duplicate ends with a control statement, then remove it.
     377              : 
     378              :      Note that if we are duplicating the template block rather than the
     379              :      original basic block, then the duplicate might not have any real
     380              :      statements in it.  */
     381      1483809 :   if (!gsi_end_p (gsi)
     382      1483809 :       && gsi_stmt (gsi)
     383      1483809 :       && (gimple_code (gsi_stmt (gsi)) == GIMPLE_COND
     384              :           || gimple_code (gsi_stmt (gsi)) == GIMPLE_GOTO
     385              :           || gimple_code (gsi_stmt (gsi)) == GIMPLE_SWITCH))
     386      1483809 :     gsi_remove (&gsi, true);
     387              : 
     388      4468015 :   for (ei = ei_start (bb->succs); (e = ei_safe_edge (ei)); )
     389              :     {
     390      2984206 :       if (e->dest != dest_bb)
     391              :         {
     392      1750568 :           free_dom_edge_info (e);
     393      1750568 :           remove_edge (e);
     394              :         }
     395              :       else
     396              :         {
     397      1233638 :           e->probability = profile_probability::always ();
     398      1233638 :           ei_next (&ei);
     399              :         }
     400              :     }
     401              : 
     402              :   /* If the remaining edge is a loop exit, there must have
     403              :      a removed edge that was not a loop exit.
     404              : 
     405              :      In that case BB and possibly other blocks were previously
     406              :      in the loop, but are now outside the loop.  Thus, we need
     407              :      to update the loop structures.  */
     408      1483809 :   if (single_succ_p (bb)
     409      1233638 :       && loop_outer (bb->loop_father)
     410      1829800 :       && loop_exit_edge_p (bb->loop_father, single_succ_edge (bb)))
     411        55184 :     loops_state_set (LOOPS_NEED_FIXUP);
     412      1483809 : }
     413              : 
     414              : /* Create a duplicate of BB.  Record the duplicate block in an array
     415              :    indexed by COUNT stored in RD.  */
     416              : 
     417              : static void
     418       325877 : create_block_for_threading (basic_block bb,
     419              :                             struct redirection_data *rd,
     420              :                             unsigned int count,
     421              :                             bitmap *duplicate_blocks)
     422              : {
     423       325877 :   edge_iterator ei;
     424       325877 :   edge e;
     425              : 
     426              :   /* We can use the generic block duplication code and simply remove
     427              :      the stuff we do not need.  */
     428       325877 :   rd->dup_blocks[count] = duplicate_block (bb, NULL, NULL);
     429              : 
     430       990574 :   FOR_EACH_EDGE (e, ei, rd->dup_blocks[count]->succs)
     431              :     {
     432       664697 :       e->aux = NULL;
     433              : 
     434              :       /* If we duplicate a block with an outgoing edge marked as
     435              :          EDGE_IGNORE, we must clear EDGE_IGNORE so that it doesn't
     436              :          leak out of the current pass.
     437              : 
     438              :          It would be better to simplify switch statements and remove
     439              :          the edges before we get here, but the sequencing is nontrivial.  */
     440       664697 :       e->flags &= ~EDGE_IGNORE;
     441              :     }
     442              : 
     443              :   /* Zero out the profile, since the block is unreachable for now.  */
     444       325877 :   rd->dup_blocks[count]->count = profile_count::uninitialized ();
     445       325877 :   if (duplicate_blocks)
     446       325877 :     bitmap_set_bit (*duplicate_blocks, rd->dup_blocks[count]->index);
     447       325877 : }
     448              : 
     449              : /* Given an outgoing edge E lookup and return its entry in our hash table.
     450              : 
     451              :    If INSERT is true, then we insert the entry into the hash table if
     452              :    it is not already present.  INCOMING_EDGE is added to the list of incoming
     453              :    edges associated with E in the hash table.  */
     454              : 
     455              : redirection_data *
     456       353910 : fwd_jt_path_registry::lookup_redirection_data (edge e, insert_option insert)
     457              : {
     458       353910 :   struct redirection_data **slot;
     459       353910 :   struct redirection_data *elt;
     460       353910 :   vec<jump_thread_edge *> *path = THREAD_PATH (e);
     461              : 
     462              :   /* Build a hash table element so we can see if E is already
     463              :      in the table.  */
     464       353910 :   elt = XNEW (struct redirection_data);
     465       353910 :   elt->path = path;
     466       353910 :   elt->dup_blocks[0] = NULL;
     467       353910 :   elt->dup_blocks[1] = NULL;
     468       353910 :   elt->incoming_edges = NULL;
     469              : 
     470       353910 :   slot = m_redirection_data->find_slot (elt, insert);
     471              : 
     472              :   /* This will only happen if INSERT is false and the entry is not
     473              :      in the hash table.  */
     474       353910 :   if (slot == NULL)
     475              :     {
     476            0 :       free (elt);
     477            0 :       return NULL;
     478              :     }
     479              : 
     480              :   /* This will only happen if E was not in the hash table and
     481              :      INSERT is true.  */
     482       353910 :   if (*slot == NULL)
     483              :     {
     484       285459 :       *slot = elt;
     485       285459 :       elt->incoming_edges = XNEW (struct el);
     486       285459 :       elt->incoming_edges->e = e;
     487       285459 :       elt->incoming_edges->next = NULL;
     488       285459 :       return elt;
     489              :     }
     490              :   /* E was in the hash table.  */
     491              :   else
     492              :     {
     493              :       /* Free ELT as we do not need it anymore, we will extract the
     494              :          relevant entry from the hash table itself.  */
     495        68451 :       free (elt);
     496              : 
     497              :       /* Get the entry stored in the hash table.  */
     498        68451 :       elt = *slot;
     499              : 
     500              :       /* If insertion was requested, then we need to add INCOMING_EDGE
     501              :          to the list of incoming edges associated with E.  */
     502        68451 :       if (insert)
     503              :         {
     504        68451 :           struct el *el = XNEW (struct el);
     505        68451 :           el->next = elt->incoming_edges;
     506        68451 :           el->e = e;
     507        68451 :           elt->incoming_edges = el;
     508              :         }
     509              : 
     510        68451 :       return elt;
     511              :     }
     512              : }
     513              : 
     514              : /* Given ssa_name DEF, backtrack jump threading PATH from node IDX
     515              :    to see if it has constant value in a flow sensitive manner.  Set
     516              :    LOCUS to location of the constant phi arg and return the value.
     517              :    Return DEF directly if either PATH or idx is ZERO.  */
     518              : 
     519              : static tree
     520       120249 : get_value_locus_in_path (tree def, vec<jump_thread_edge *> *path,
     521              :                          basic_block bb, int idx, location_t *locus)
     522              : {
     523       120249 :   tree arg;
     524       120249 :   gphi *def_phi;
     525       120249 :   basic_block def_bb;
     526              : 
     527       120249 :   if (path == NULL || idx == 0)
     528              :     return def;
     529              : 
     530       166288 :   def_phi = dyn_cast <gphi *> (SSA_NAME_DEF_STMT (def));
     531        63400 :   if (!def_phi)
     532              :     return def;
     533              : 
     534        63400 :   def_bb = gimple_bb (def_phi);
     535              :   /* Don't propagate loop invariants into deeper loops.  */
     536        63400 :   if (!def_bb || bb_loop_depth (def_bb) < bb_loop_depth (bb))
     537         1346 :     return def;
     538              : 
     539              :   /* Backtrack jump threading path from IDX to see if def has constant
     540              :      value.  */
     541        81044 :   for (int j = idx - 1; j >= 0; j--)
     542              :     {
     543        65777 :       edge e = (*path)[j]->e;
     544        65777 :       if (e->dest == def_bb)
     545              :         {
     546        46787 :           arg = gimple_phi_arg_def (def_phi, e->dest_idx);
     547        46787 :           if (is_gimple_min_invariant (arg))
     548              :             {
     549        17361 :               *locus = gimple_phi_arg_location (def_phi, e->dest_idx);
     550        17361 :               return arg;
     551              :             }
     552              :           break;
     553              :         }
     554              :     }
     555              : 
     556              :   return def;
     557              : }
     558              : 
     559              : /* For each PHI in BB, copy the argument associated with SRC_E to TGT_E.
     560              :    Try to backtrack jump threading PATH from node IDX to see if the arg
     561              :    has constant value, copy constant value instead of argument itself
     562              :    if yes.  */
     563              : 
     564              : static void
     565       424346 : copy_phi_args (basic_block bb, edge src_e, edge tgt_e,
     566              :                vec<jump_thread_edge *> *path, int idx)
     567              : {
     568       424346 :   gphi_iterator gsi;
     569       424346 :   int src_indx = src_e->dest_idx;
     570              : 
     571       693206 :   for (gsi = gsi_start_phis (bb); !gsi_end_p (gsi); gsi_next (&gsi))
     572              :     {
     573       268860 :       gphi *phi = gsi.phi ();
     574       268860 :       tree def = gimple_phi_arg_def (phi, src_indx);
     575       268860 :       location_t locus = gimple_phi_arg_location (phi, src_indx);
     576              : 
     577       268860 :       if (TREE_CODE (def) == SSA_NAME
     578       486321 :           && !virtual_operand_p (gimple_phi_result (phi)))
     579       120249 :         def = get_value_locus_in_path (def, path, bb, idx, &locus);
     580              : 
     581       268860 :       add_phi_arg (phi, def, tgt_e, locus);
     582              :     }
     583       424346 : }
     584              : 
     585              : /* We have recently made a copy of ORIG_BB, including its outgoing
     586              :    edges.  The copy is NEW_BB.  Every PHI node in every direct successor of
     587              :    ORIG_BB has a new argument associated with edge from NEW_BB to the
     588              :    successor.  Initialize the PHI argument so that it is equal to the PHI
     589              :    argument associated with the edge from ORIG_BB to the successor.
     590              :    PATH and IDX are used to check if the new PHI argument has constant
     591              :    value in a flow sensitive manner.  */
     592              : 
     593              : static void
     594        75706 : update_destination_phis (basic_block orig_bb, basic_block new_bb,
     595              :                          vec<jump_thread_edge *> *path, int idx)
     596              : {
     597        75706 :   edge_iterator ei;
     598        75706 :   edge e;
     599              : 
     600       234367 :   FOR_EACH_EDGE (e, ei, orig_bb->succs)
     601              :     {
     602       158661 :       edge e2 = find_edge (new_bb, e->dest);
     603       158661 :       copy_phi_args (e->dest, e, e2, path, idx);
     604              :     }
     605        75706 : }
     606              : 
     607              : /* Given a duplicate block and its single destination (both stored
     608              :    in RD).  Create an edge between the duplicate and its single
     609              :    destination.
     610              : 
     611              :    Add an additional argument to any PHI nodes at the single
     612              :    destination.  IDX is the start node in jump threading path
     613              :    we start to check to see if the new PHI argument has constant
     614              :    value along the jump threading path.  */
     615              : 
     616              : static void
     617       250171 : create_edge_and_update_destination_phis (struct redirection_data *rd,
     618              :                                          basic_block bb, int idx)
     619              : {
     620       250171 :   edge e = make_single_succ_edge (bb, rd->path->last ()->e->dest, EDGE_FALLTHRU);
     621              : 
     622       250171 :   rescan_loop_exit (e, true, false);
     623              : 
     624              :   /* We used to copy the thread path here.  That was added in 2007
     625              :      and dutifully updated through the representation changes in 2013.
     626              : 
     627              :      In 2013 we added code to thread from an interior node through
     628              :      the backedge to another interior node.  That runs after the code
     629              :      to thread through loop headers from outside the loop.
     630              : 
     631              :      The latter may delete edges in the CFG, including those
     632              :      which appeared in the jump threading path we copied here.  Thus
     633              :      we'd end up using a dangling pointer.
     634              : 
     635              :      After reviewing the 2007/2011 code, I can't see how anything
     636              :      depended on copying the AUX field and clearly copying the jump
     637              :      threading path is problematical due to embedded edge pointers.
     638              :      It has been removed.  */
     639       250171 :   e->aux = NULL;
     640              : 
     641              :   /* If there are any PHI nodes at the destination of the outgoing edge
     642              :      from the duplicate block, then we will need to add a new argument
     643              :      to them.  The argument should have the same value as the argument
     644              :      associated with the outgoing edge stored in RD.  */
     645       250171 :   copy_phi_args (e->dest, rd->path->last ()->e, e, rd->path, idx);
     646       250171 : }
     647              : 
     648              : /* Look through PATH beginning at START and return TRUE if there are
     649              :    any additional blocks that need to be duplicated.  Otherwise,
     650              :    return FALSE.  */
     651              : static bool
     652        75706 : any_remaining_duplicated_blocks (vec<jump_thread_edge *> *path,
     653              :                                  unsigned int start)
     654              : {
     655        96107 :   for (unsigned int i = start + 1; i < path->length (); i++)
     656              :     {
     657        60819 :       if ((*path)[i]->type == EDGE_COPY_SRC_JOINER_BLOCK
     658        60819 :           || (*path)[i]->type == EDGE_COPY_SRC_BLOCK)
     659              :         return true;
     660              :     }
     661              :   return false;
     662              : }
     663              : 
     664              : 
     665              : /* Compute the amount of profile count coming into the jump threading
     666              :    path stored in RD that we are duplicating, returned in PATH_IN_COUNT_PTR and
     667              :    PATH_IN_FREQ_PTR, as well as the amount of counts flowing out of the
     668              :    duplicated path, returned in PATH_OUT_COUNT_PTR.  LOCAL_INFO is used to
     669              :    identify blocks duplicated for jump threading, which have duplicated
     670              :    edges that need to be ignored in the analysis.  Return true if path contains
     671              :    a joiner, false otherwise.
     672              : 
     673              :    In the non-joiner case, this is straightforward - all the counts
     674              :    flowing into the jump threading path should flow through the duplicated
     675              :    block and out of the duplicated path.
     676              : 
     677              :    In the joiner case, it is very tricky.  Some of the counts flowing into
     678              :    the original path go offpath at the joiner.  The problem is that while
     679              :    we know how much total count goes off-path in the original control flow,
     680              :    we don't know how many of the counts corresponding to just the jump
     681              :    threading path go offpath at the joiner.
     682              : 
     683              :    For example, assume we have the following control flow and identified
     684              :    jump threading paths:
     685              : 
     686              :                 A     B     C
     687              :                  \    |    /
     688              :                Ea \   |Eb / Ec
     689              :                    \  |  /
     690              :                     v v v
     691              :                       J       <-- Joiner
     692              :                      / \
     693              :                 Eoff/   \Eon
     694              :                    /     \
     695              :                   v       v
     696              :                 Soff     Son  <--- Normal
     697              :                          /\
     698              :                       Ed/  \ Ee
     699              :                        /    \
     700              :                       v     v
     701              :                       D      E
     702              : 
     703              :             Jump threading paths: A -> J -> Son -> D (path 1)
     704              :                                   C -> J -> Son -> E (path 2)
     705              : 
     706              :    Note that the control flow could be more complicated:
     707              :    - Each jump threading path may have more than one incoming edge.  I.e. A and
     708              :    Ea could represent multiple incoming blocks/edges that are included in
     709              :    path 1.
     710              :    - There could be EDGE_NO_COPY_SRC_BLOCK edges after the joiner (either
     711              :    before or after the "normal" copy block).  These are not duplicated onto
     712              :    the jump threading path, as they are single-successor.
     713              :    - Any of the blocks along the path may have other incoming edges that
     714              :    are not part of any jump threading path, but add profile counts along
     715              :    the path.
     716              : 
     717              :    In the above example, after all jump threading is complete, we will
     718              :    end up with the following control flow:
     719              : 
     720              :                 A          B           C
     721              :                 |          |           |
     722              :               Ea|          |Eb         |Ec
     723              :                 |          |           |
     724              :                 v          v           v
     725              :                Ja          J          Jc
     726              :                / \        / \Eon'     / \
     727              :           Eona/   \   ---/---\--------   \Eonc
     728              :              /     \ /  /     \           \
     729              :             v       v  v       v          v
     730              :            Sona     Soff      Son       Sonc
     731              :              \                 /\         /
     732              :               \___________    /  \  _____/
     733              :                           \  /    \/
     734              :                            vv      v
     735              :                             D      E
     736              : 
     737              :    The main issue to notice here is that when we are processing path 1
     738              :    (A->J->Son->D) we need to figure out the outgoing edge weights to
     739              :    the duplicated edges Ja->Sona and Ja->Soff, while ensuring that the
     740              :    sum of the incoming weights to D remain Ed.  The problem with simply
     741              :    assuming that Ja (and Jc when processing path 2) has the same outgoing
     742              :    probabilities to its successors as the original block J, is that after
     743              :    all paths are processed and other edges/counts removed (e.g. none
     744              :    of Ec will reach D after processing path 2), we may end up with not
     745              :    enough count flowing along duplicated edge Sona->D.
     746              : 
     747              :    Therefore, in the case of a joiner, we keep track of all counts
     748              :    coming in along the current path, as well as from predecessors not
     749              :    on any jump threading path (Eb in the above example).  While we
     750              :    first assume that the duplicated Eona for Ja->Sona has the same
     751              :    probability as the original, we later compensate for other jump
     752              :    threading paths that may eliminate edges.  We do that by keep track
     753              :    of all counts coming into the original path that are not in a jump
     754              :    thread (Eb in the above example, but as noted earlier, there could
     755              :    be other predecessors incoming to the path at various points, such
     756              :    as at Son).  Call this cumulative non-path count coming into the path
     757              :    before D as Enonpath.  We then ensure that the count from Sona->D is as at
     758              :    least as big as (Ed - Enonpath), but no bigger than the minimum
     759              :    weight along the jump threading path.  The probabilities of both the
     760              :    original and duplicated joiner block J and Ja will be adjusted
     761              :    accordingly after the updates.  */
     762              : 
     763              : static bool
     764       285459 : compute_path_counts (struct redirection_data *rd,
     765              :                      ssa_local_info_t *local_info,
     766              :                      profile_count *path_in_count_ptr,
     767              :                      profile_count *path_out_count_ptr)
     768              : {
     769       285459 :   edge e = rd->incoming_edges->e;
     770       285459 :   vec<jump_thread_edge *> *path = THREAD_PATH (e);
     771       285459 :   edge elast = path->last ()->e;
     772       285459 :   profile_count nonpath_count = profile_count::zero ();
     773       285459 :   bool has_joiner = false;
     774       285459 :   profile_count path_in_count = profile_count::zero ();
     775              : 
     776              :   /* Start by accumulating incoming edge counts to the path's first bb
     777              :      into a couple buckets:
     778              :         path_in_count: total count of incoming edges that flow into the
     779              :                   current path.
     780              :         nonpath_count: total count of incoming edges that are not
     781              :                   flowing along *any* path.  These are the counts
     782              :                   that will still flow along the original path after
     783              :                   all path duplication is done by potentially multiple
     784              :                   calls to this routine.
     785              :      (any other incoming edge counts are for a different jump threading
     786              :      path that will be handled by a later call to this routine.)
     787              :      To make this easier, start by recording all incoming edges that flow into
     788              :      the current path in a bitmap.  We could add up the path's incoming edge
     789              :      counts here, but we still need to walk all the first bb's incoming edges
     790              :      below to add up the counts of the other edges not included in this jump
     791              :      threading path.  */
     792       285459 :   struct el *next, *el;
     793       285459 :   auto_bitmap in_edge_srcs;
     794       639369 :   for (el = rd->incoming_edges; el; el = next)
     795              :     {
     796       353910 :       next = el->next;
     797       353910 :       bitmap_set_bit (in_edge_srcs, el->e->src->index);
     798              :     }
     799       285459 :   edge ein;
     800       285459 :   edge_iterator ei;
     801      1045783 :   FOR_EACH_EDGE (ein, ei, e->dest->preds)
     802              :     {
     803       760324 :       vec<jump_thread_edge *> *ein_path = THREAD_PATH (ein);
     804              :       /* Simply check the incoming edge src against the set captured above.  */
     805       760324 :       if (ein_path
     806      1275433 :           && bitmap_bit_p (in_edge_srcs, (*ein_path)[0]->e->src->index))
     807              :         {
     808              :           /* It is necessary but not sufficient that the last path edges
     809              :              are identical.  There may be different paths that share the
     810              :              same last path edge in the case where the last edge has a nocopy
     811              :              source block.  */
     812       353910 :           gcc_assert (ein_path->last ()->e == elast);
     813       353910 :           path_in_count += ein->count ();
     814              :         }
     815       406414 :       else if (!ein_path)
     816              :         {
     817              :           /* Keep track of the incoming edges that are not on any jump-threading
     818              :              path.  These counts will still flow out of original path after all
     819              :              jump threading is complete.  */
     820       245215 :             nonpath_count += ein->count ();
     821              :         }
     822              :     }
     823              : 
     824              :   /* Now compute the fraction of the total count coming into the first
     825              :      path bb that is from the current threading path.  */
     826       285459 :   profile_count total_count = e->dest->count;
     827              :   /* Handle incoming profile insanities.  */
     828       285459 :   if (total_count < path_in_count)
     829        21931 :     path_in_count = total_count;
     830       285459 :   profile_probability onpath_scale = path_in_count.probability_in (total_count);
     831              : 
     832              :   /* Walk the entire path to do some more computation in order to estimate
     833              :      how much of the path_in_count will flow out of the duplicated threading
     834              :      path.  In the non-joiner case this is straightforward (it should be
     835              :      the same as path_in_count, although we will handle incoming profile
     836              :      insanities by setting it equal to the minimum count along the path).
     837              : 
     838              :      In the joiner case, we need to estimate how much of the path_in_count
     839              :      will stay on the threading path after the joiner's conditional branch.
     840              :      We don't really know for sure how much of the counts
     841              :      associated with this path go to each successor of the joiner, but we'll
     842              :      estimate based on the fraction of the total count coming into the path
     843              :      bb was from the threading paths (computed above in onpath_scale).
     844              :      Afterwards, we will need to do some fixup to account for other threading
     845              :      paths and possible profile insanities.
     846              : 
     847              :      In order to estimate the joiner case's counts we also need to update
     848              :      nonpath_count with any additional counts coming into the path.  Other
     849              :      blocks along the path may have additional predecessors from outside
     850              :      the path.  */
     851       285459 :   profile_count path_out_count = path_in_count;
     852       285459 :   profile_count min_path_count = path_in_count;
     853       651055 :   for (unsigned int i = 1; i < path->length (); i++)
     854              :     {
     855       365596 :       edge epath = (*path)[i]->e;
     856       365596 :       profile_count cur_count = epath->count ();
     857       365596 :       if ((*path)[i]->type == EDGE_COPY_SRC_JOINER_BLOCK)
     858              :         {
     859        75706 :           has_joiner = true;
     860        75706 :           cur_count = cur_count.apply_probability (onpath_scale);
     861              :         }
     862              :       /* In the joiner case we need to update nonpath_count for any edges
     863              :          coming into the path that will contribute to the count flowing
     864              :          into the path successor.  */
     865       365596 :       if (has_joiner && epath != elast)
     866              :         {
     867              :           /* Look for other incoming edges after joiner.  */
     868       237247 :           FOR_EACH_EDGE (ein, ei, epath->dest->preds)
     869              :             {
     870       174343 :               if (ein != epath
     871              :                   /* Ignore in edges from blocks we have duplicated for a
     872              :                      threading path, which have duplicated edge counts until
     873              :                      they are redirected by an invocation of this routine.  */
     874       285782 :                   && !bitmap_bit_p (local_info->duplicate_blocks,
     875       111439 :                                     ein->src->index))
     876        38980 :                 nonpath_count += ein->count ();
     877              :             }
     878              :         }
     879       365596 :       if (cur_count < path_out_count)
     880       156159 :         path_out_count = cur_count;
     881       365596 :       if (epath->count () < min_path_count)
     882       134353 :         min_path_count = epath->count ();
     883              :     }
     884              : 
     885              :   /* We computed path_out_count above assuming that this path targeted
     886              :      the joiner's on-path successor with the same likelihood as it
     887              :      reached the joiner.  However, other thread paths through the joiner
     888              :      may take a different path through the normal copy source block
     889              :      (i.e. they have a different elast), meaning that they do not
     890              :      contribute any counts to this path's elast.  As a result, it may
     891              :      turn out that this path must have more count flowing to the on-path
     892              :      successor of the joiner.  Essentially, all of this path's elast
     893              :      count must be contributed by this path and any nonpath counts
     894              :      (since any path through the joiner with a different elast will not
     895              :      include a copy of this elast in its duplicated path).
     896              :      So ensure that this path's path_out_count is at least the
     897              :      difference between elast->count () and nonpath_count.  Otherwise the edge
     898              :      counts after threading will not be sane.  */
     899       285459 :   if (local_info->need_profile_correction
     900       313074 :       && has_joiner && path_out_count < elast->count () - nonpath_count)
     901              :     {
     902         8991 :       path_out_count = elast->count () - nonpath_count;
     903              :       /* But neither can we go above the minimum count along the path
     904              :          we are duplicating.  This can be an issue due to profile
     905              :          insanities coming in to this pass.  */
     906         8991 :       if (path_out_count > min_path_count)
     907         5670 :         path_out_count = min_path_count;
     908              :     }
     909              : 
     910       285459 :   *path_in_count_ptr = path_in_count;
     911       285459 :   *path_out_count_ptr = path_out_count;
     912       285459 :   return has_joiner;
     913       285459 : }
     914              : 
     915              : 
     916              : /* Update the counts and frequencies for both an original path
     917              :    edge EPATH and its duplicate EDUP.  The duplicate source block
     918              :    will get a count of PATH_IN_COUNT and PATH_IN_FREQ,
     919              :    and the duplicate edge EDUP will have a count of PATH_OUT_COUNT.  */
     920              : static void
     921       365596 : update_profile (edge epath, edge edup, profile_count path_in_count,
     922              :                 profile_count path_out_count)
     923              : {
     924              : 
     925              :   /* First update the duplicated block's count.  */
     926       365596 :   if (edup)
     927              :     {
     928       325877 :       basic_block dup_block = edup->src;
     929              : 
     930              :       /* Edup's count is reduced by path_out_count.  We need to redistribute
     931              :          probabilities to the remaining edges.  */
     932              : 
     933       325877 :       edge esucc;
     934       325877 :       edge_iterator ei;
     935       325877 :       profile_probability edup_prob
     936       325877 :          = path_out_count.probability_in (path_in_count);
     937              : 
     938              :       /* Either scale up or down the remaining edges.
     939              :          probabilities are always in range <0,1> and thus we can't do
     940              :          both by same loop.  */
     941       325877 :       if (edup->probability > edup_prob)
     942              :         {
     943        25665 :            profile_probability rev_scale
     944        25665 :              = (profile_probability::always () - edup->probability)
     945        51330 :                / (profile_probability::always () - edup_prob);
     946        80592 :            FOR_EACH_EDGE (esucc, ei, dup_block->succs)
     947        54927 :              if (esucc != edup)
     948        29262 :                esucc->probability /= rev_scale;
     949              :         }
     950       300212 :       else if (edup->probability < edup_prob)
     951              :         {
     952        20756 :            profile_probability scale
     953        20756 :              = (profile_probability::always () - edup_prob)
     954        41512 :                / (profile_probability::always () - edup->probability);
     955        63630 :           FOR_EACH_EDGE (esucc, ei, dup_block->succs)
     956        42874 :             if (esucc != edup)
     957        22118 :               esucc->probability *= scale;
     958              :         }
     959       325877 :       if (edup_prob.initialized_p ())
     960       304404 :         edup->probability = edup_prob;
     961              : 
     962       325877 :       gcc_assert (!dup_block->count.initialized_p ());
     963       325877 :       dup_block->count = path_in_count;
     964              :     }
     965              : 
     966       365596 :   if (path_in_count == profile_count::zero ())
     967         9893 :     return;
     968              : 
     969       355703 :   profile_count final_count = epath->count () - path_out_count;
     970              : 
     971              :   /* Now update the original block's count in the
     972              :      opposite manner - remove the counts/freq that will flow
     973              :      into the duplicated block.  Handle underflow due to precision/
     974              :      rounding issues.  */
     975       355703 :   epath->src->count -= path_in_count;
     976              : 
     977              :   /* Next update this path edge's original and duplicated counts.  We know
     978              :      that the duplicated path will have path_out_count flowing
     979              :      out of it (in the joiner case this is the count along the duplicated path
     980              :      out of the duplicated joiner).  This count can then be removed from the
     981              :      original path edge.  */
     982              : 
     983       355703 :   edge esucc;
     984       355703 :   edge_iterator ei;
     985       355703 :   profile_probability epath_prob = final_count.probability_in (epath->src->count);
     986              : 
     987       355703 :   if (epath->probability > epath_prob)
     988              :     {
     989       221172 :        profile_probability rev_scale
     990       221172 :          = (profile_probability::always () - epath->probability)
     991       442344 :            / (profile_probability::always () - epath_prob);
     992       665756 :        FOR_EACH_EDGE (esucc, ei, epath->src->succs)
     993       444584 :          if (esucc != epath)
     994       223412 :            esucc->probability /= rev_scale;
     995              :     }
     996       134531 :   else if (epath->probability < epath_prob)
     997              :     {
     998        18503 :        profile_probability scale
     999        18503 :          = (profile_probability::always () - epath_prob)
    1000        37006 :            / (profile_probability::always () - epath->probability);
    1001        59289 :       FOR_EACH_EDGE (esucc, ei, epath->src->succs)
    1002        40786 :         if (esucc != epath)
    1003        22283 :           esucc->probability *= scale;
    1004              :     }
    1005       355703 :   if (epath_prob.initialized_p ())
    1006       324756 :     epath->probability = epath_prob;
    1007              : }
    1008              : 
    1009              : /* Wire up the outgoing edges from the duplicate blocks and
    1010              :    update any PHIs as needed.  Also update the profile counts
    1011              :    on the original and duplicate blocks and edges.  */
    1012              : void
    1013       285459 : ssa_fix_duplicate_block_edges (struct redirection_data *rd,
    1014              :                                ssa_local_info_t *local_info)
    1015              : {
    1016       285459 :   bool multi_incomings = (rd->incoming_edges->next != NULL);
    1017       285459 :   edge e = rd->incoming_edges->e;
    1018       285459 :   vec<jump_thread_edge *> *path = THREAD_PATH (e);
    1019       285459 :   edge elast = path->last ()->e;
    1020       285459 :   profile_count path_in_count = profile_count::zero ();
    1021       285459 :   profile_count path_out_count = profile_count::zero ();
    1022              : 
    1023              :   /* First determine how much profile count to move from original
    1024              :      path to the duplicate path.  This is tricky in the presence of
    1025              :      a joiner (see comments for compute_path_counts), where some portion
    1026              :      of the path's counts will flow off-path from the joiner.  In the
    1027              :      non-joiner case the path_in_count and path_out_count should be the
    1028              :      same.  */
    1029       285459 :   bool has_joiner = compute_path_counts (rd, local_info,
    1030              :                                          &path_in_count, &path_out_count);
    1031              : 
    1032       651055 :   for (unsigned int count = 0, i = 1; i < path->length (); i++)
    1033              :     {
    1034       365596 :       edge epath = (*path)[i]->e;
    1035              : 
    1036              :       /* If we were threading through an joiner block, then we want
    1037              :          to keep its control statement and redirect an outgoing edge.
    1038              :          Else we want to remove the control statement & edges, then create
    1039              :          a new outgoing edge.  In both cases we may need to update PHIs.  */
    1040       365596 :       if ((*path)[i]->type == EDGE_COPY_SRC_JOINER_BLOCK)
    1041              :         {
    1042        75706 :           edge victim;
    1043        75706 :           edge e2;
    1044              : 
    1045        75706 :           gcc_assert (has_joiner);
    1046              : 
    1047              :           /* This updates the PHIs at the destination of the duplicate
    1048              :              block.  Pass 0 instead of i if we are threading a path which
    1049              :              has multiple incoming edges.  */
    1050       135965 :           update_destination_phis (local_info->bb, rd->dup_blocks[count],
    1051              :                                    path, multi_incomings ? 0 : i);
    1052              : 
    1053              :           /* Find the edge from the duplicate block to the block we're
    1054              :              threading through.  That's the edge we want to redirect.  */
    1055        75706 :           victim = find_edge (rd->dup_blocks[count], (*path)[i]->e->dest);
    1056              : 
    1057              :           /* If there are no remaining blocks on the path to duplicate,
    1058              :              then redirect VICTIM to the final destination of the jump
    1059              :              threading path.  */
    1060        75706 :           if (!any_remaining_duplicated_blocks (path, i))
    1061              :             {
    1062        35288 :               if (victim->dest != elast->dest)
    1063              :                 {
    1064        15746 :                   e2 = redirect_edge_and_branch (victim, elast->dest);
    1065              :                   /* If we redirected the edge, then we need to copy PHI arguments
    1066              :                      at the target.  If the edge already existed (e2 != victim
    1067              :                      case), then the PHIs in the target already have the correct
    1068              :                      arguments.  */
    1069        15746 :                   if (e2 == victim)
    1070        15514 :                     copy_phi_args (e2->dest, elast, e2,
    1071              :                                    path, multi_incomings ? 0 : i);
    1072              :                 }
    1073              :               else
    1074              :                 e2 = victim;
    1075              :             }
    1076              :           else
    1077              :             {
    1078              :               /* Redirect VICTIM to the next duplicated block in the path.  */
    1079        40418 :               e2 = redirect_edge_and_branch (victim, rd->dup_blocks[count + 1]);
    1080              : 
    1081              :               /* We need to update the PHIs in the next duplicated block.  We
    1082              :                  want the new PHI args to have the same value as they had
    1083              :                  in the source of the next duplicate block.
    1084              : 
    1085              :                  Thus, we need to know which edge we traversed into the
    1086              :                  source of the duplicate.  Furthermore, we may have
    1087              :                  traversed many edges to reach the source of the duplicate.
    1088              : 
    1089              :                  Walk through the path starting at element I until we
    1090              :                  hit an edge marked with EDGE_COPY_SRC_BLOCK.  We want
    1091              :                  the edge from the prior element.  */
    1092        41910 :               for (unsigned int j = i + 1; j < path->length (); j++)
    1093              :                 {
    1094        41910 :                   if ((*path)[j]->type == EDGE_COPY_SRC_BLOCK)
    1095              :                     {
    1096        40418 :                       copy_phi_arg_into_existing_phi ((*path)[j - 1]->e, e2);
    1097        40418 :                       break;
    1098              :                     }
    1099              :                 }
    1100              :             }
    1101              : 
    1102              :           /* Update the counts of both the original block
    1103              :              and path edge, and the duplicates.  The path duplicate's
    1104              :              incoming count are the totals for all edges
    1105              :              incoming to this jump threading path computed earlier.
    1106              :              And we know that the duplicated path will have path_out_count
    1107              :              flowing out of it (i.e. along the duplicated path out of the
    1108              :              duplicated joiner).  */
    1109        75706 :           update_profile (epath, e2, path_in_count, path_out_count);
    1110              :         }
    1111       289890 :       else if ((*path)[i]->type == EDGE_COPY_SRC_BLOCK)
    1112              :         {
    1113       250171 :           remove_ctrl_stmt_and_useless_edges (rd->dup_blocks[count], NULL);
    1114       463634 :           create_edge_and_update_destination_phis (rd, rd->dup_blocks[count],
    1115              :                                                    multi_incomings ? 0 : i);
    1116       250171 :           if (count == 1)
    1117        40418 :             single_succ_edge (rd->dup_blocks[1])->aux = NULL;
    1118              : 
    1119              :           /* Update the counts of both the original block
    1120              :              and path edge, and the duplicates.  Since we are now after
    1121              :              any joiner that may have existed on the path, the count
    1122              :              flowing along the duplicated threaded path is path_out_count.
    1123              :              If we didn't have a joiner, then cur_path_freq was the sum
    1124              :              of the total frequencies along all incoming edges to the
    1125              :              thread path (path_in_freq).  If we had a joiner, it would have
    1126              :              been updated at the end of that handling to the edge frequency
    1127              :              along the duplicated joiner path edge.  */
    1128       250171 :           update_profile (epath, EDGE_SUCC (rd->dup_blocks[count], 0),
    1129              :                           path_out_count, path_out_count);
    1130              :         }
    1131              :       else
    1132              :         {
    1133              :           /* No copy case.  In this case we don't have an equivalent block
    1134              :              on the duplicated thread path to update, but we do need
    1135              :              to remove the portion of the counts/freqs that were moved
    1136              :              to the duplicated path from the counts/freqs flowing through
    1137              :              this block on the original path.  Since all the no-copy edges
    1138              :              are after any joiner, the removed count is the same as
    1139              :              path_out_count.
    1140              : 
    1141              :              If we didn't have a joiner, then cur_path_freq was the sum
    1142              :              of the total frequencies along all incoming edges to the
    1143              :              thread path (path_in_freq).  If we had a joiner, it would have
    1144              :              been updated at the end of that handling to the edge frequency
    1145              :              along the duplicated joiner path edge.  */
    1146        39719 :            update_profile (epath, NULL, path_out_count, path_out_count);
    1147              :         }
    1148              : 
    1149              :       /* Increment the index into the duplicated path when we processed
    1150              :          a duplicated block.  */
    1151       365596 :       if ((*path)[i]->type == EDGE_COPY_SRC_JOINER_BLOCK
    1152       365596 :           || (*path)[i]->type == EDGE_COPY_SRC_BLOCK)
    1153              :         {
    1154       325877 :           count++;
    1155              :         }
    1156              :     }
    1157       285459 : }
    1158              : 
    1159              : /* Hash table traversal callback routine to create duplicate blocks.  */
    1160              : 
    1161              : int
    1162       285459 : ssa_create_duplicates (struct redirection_data **slot,
    1163              :                        ssa_local_info_t *local_info)
    1164              : {
    1165       285459 :   struct redirection_data *rd = *slot;
    1166              : 
    1167              :   /* The second duplicated block in a jump threading path is specific
    1168              :      to the path.  So it gets stored in RD rather than in LOCAL_DATA.
    1169              : 
    1170              :      Each time we're called, we have to look through the path and see
    1171              :      if a second block needs to be duplicated.
    1172              : 
    1173              :      Note the search starts with the third edge on the path.  The first
    1174              :      edge is the incoming edge, the second edge always has its source
    1175              :      duplicated.  Thus we start our search with the third edge.  */
    1176       285459 :   vec<jump_thread_edge *> *path = rd->path;
    1177       323093 :   for (unsigned int i = 2; i < path->length (); i++)
    1178              :     {
    1179        78052 :       if ((*path)[i]->type == EDGE_COPY_SRC_BLOCK
    1180        78052 :           || (*path)[i]->type == EDGE_COPY_SRC_JOINER_BLOCK)
    1181              :         {
    1182        40418 :           create_block_for_threading ((*path)[i]->e->src, rd, 1,
    1183              :                                       &local_info->duplicate_blocks);
    1184        40418 :           break;
    1185              :         }
    1186              :     }
    1187              : 
    1188              :   /* Create a template block if we have not done so already.  Otherwise
    1189              :      use the template to create a new block.  */
    1190       285459 :   if (local_info->template_block == NULL)
    1191              :     {
    1192       227967 :       create_block_for_threading ((*path)[1]->e->src, rd, 0,
    1193              :                                   &local_info->duplicate_blocks);
    1194       227967 :       local_info->template_block = rd->dup_blocks[0];
    1195       227967 :       local_info->template_last_to_copy
    1196       455934 :         = gsi_last_bb (local_info->template_block);
    1197              : 
    1198              :       /* We do not create any outgoing edges for the template.  We will
    1199              :          take care of that in a later traversal.  That way we do not
    1200              :          create edges that are going to just be deleted.  */
    1201              :     }
    1202              :   else
    1203              :     {
    1204        57492 :       gimple_seq seq = NULL;
    1205        57492 :       if (gsi_stmt (local_info->template_last_to_copy)
    1206       114984 :           != gsi_stmt (gsi_last_bb (local_info->template_block)))
    1207              :         {
    1208            0 :           if (gsi_end_p (local_info->template_last_to_copy))
    1209              :             {
    1210            0 :               seq = bb_seq (local_info->template_block);
    1211            0 :               set_bb_seq (local_info->template_block, NULL);
    1212              :             }
    1213              :           else
    1214            0 :             seq = gsi_split_seq_after (local_info->template_last_to_copy);
    1215              :         }
    1216        57492 :       create_block_for_threading (local_info->template_block, rd, 0,
    1217              :                                   &local_info->duplicate_blocks);
    1218        57492 :       if (seq)
    1219              :         {
    1220            0 :           if (gsi_end_p (local_info->template_last_to_copy))
    1221            0 :             set_bb_seq (local_info->template_block, seq);
    1222              :           else
    1223            0 :             gsi_insert_seq_after (&local_info->template_last_to_copy,
    1224              :                                   seq, GSI_SAME_STMT);
    1225              :         }
    1226              : 
    1227              :       /* Go ahead and wire up outgoing edges and update PHIs for the duplicate
    1228              :          block.   */
    1229        57492 :       ssa_fix_duplicate_block_edges (rd, local_info);
    1230              :     }
    1231              : 
    1232       285459 :   if (MAY_HAVE_DEBUG_STMTS)
    1233              :     {
    1234              :       /* Copy debug stmts from each NO_COPY src block to the block
    1235              :          that would have been its predecessor, if we can append to it
    1236              :          (we can't add stmts after a block-ending stmt), or prepending
    1237              :          to the duplicate of the successor, if there is one.  If
    1238              :          there's no duplicate successor, we'll mostly drop the blocks
    1239              :          on the floor; propagate_threaded_block_debug_into, called
    1240              :          elsewhere, will consolidate and preserve the effects of the
    1241              :          binds, but none of the markers.  */
    1242       212918 :       gimple_stmt_iterator copy_to = gsi_last_bb (rd->dup_blocks[0]);
    1243       212918 :       if (!gsi_end_p (copy_to))
    1244              :         {
    1245       210820 :           if (stmt_ends_bb_p (gsi_stmt (copy_to)))
    1246              :             {
    1247       188057 :               if (rd->dup_blocks[1])
    1248        32697 :                 copy_to = gsi_after_labels (rd->dup_blocks[1]);
    1249              :               else
    1250       155360 :                 copy_to = gsi_none ();
    1251              :             }
    1252              :           else
    1253        22763 :             gsi_next (&copy_to);
    1254              :         }
    1255       277073 :       for (unsigned int i = 2, j = 0; i < path->length (); i++)
    1256        64155 :         if ((*path)[i]->type == EDGE_NO_COPY_SRC_BLOCK
    1257        64155 :             && gsi_bb (copy_to))
    1258              :           {
    1259         7780 :             for (gimple_stmt_iterator gsi = gsi_start_bb ((*path)[i]->e->src);
    1260        10729 :                  !gsi_end_p (gsi); gsi_next (&gsi))
    1261              :               {
    1262         6839 :                 if (!is_gimple_debug (gsi_stmt (gsi)))
    1263         1930 :                   continue;
    1264         4909 :                 gimple *stmt = gsi_stmt (gsi);
    1265         4909 :                 gimple *copy = gimple_copy (stmt);
    1266         4909 :                 gsi_insert_before (&copy_to, copy, GSI_SAME_STMT);
    1267              :               }
    1268              :           }
    1269        60265 :         else if ((*path)[i]->type == EDGE_COPY_SRC_BLOCK
    1270        60265 :                  || (*path)[i]->type == EDGE_COPY_SRC_JOINER_BLOCK)
    1271              :           {
    1272        33082 :             j++;
    1273        33082 :             gcc_assert (j < 2);
    1274        33082 :             copy_to = gsi_last_bb (rd->dup_blocks[j]);
    1275        33082 :             if (!gsi_end_p (copy_to))
    1276              :               {
    1277        32987 :                 if (stmt_ends_bb_p (gsi_stmt (copy_to)))
    1278        27690 :                   copy_to = gsi_none ();
    1279              :                 else
    1280         5297 :                   gsi_next (&copy_to);
    1281              :               }
    1282              :           }
    1283              :     }
    1284              : 
    1285              :   /* Keep walking the hash table.  */
    1286       285459 :   return 1;
    1287              : }
    1288              : 
    1289              : /* We did not create any outgoing edges for the template block during
    1290              :    block creation.  This hash table traversal callback creates the
    1291              :    outgoing edge for the template block.  */
    1292              : 
    1293              : inline int
    1294       227967 : ssa_fixup_template_block (struct redirection_data **slot,
    1295              :                           ssa_local_info_t *local_info)
    1296              : {
    1297       227967 :   struct redirection_data *rd = *slot;
    1298              : 
    1299              :   /* If this is the template block halt the traversal after updating
    1300              :      it appropriately.
    1301              : 
    1302              :      If we were threading through an joiner block, then we want
    1303              :      to keep its control statement and redirect an outgoing edge.
    1304              :      Else we want to remove the control statement & edges, then create
    1305              :      a new outgoing edge.  In both cases we may need to update PHIs.  */
    1306       227967 :   if (rd->dup_blocks[0] && rd->dup_blocks[0] == local_info->template_block)
    1307              :     {
    1308       227967 :       ssa_fix_duplicate_block_edges (rd, local_info);
    1309       227967 :       return 0;
    1310              :     }
    1311              : 
    1312              :   return 1;
    1313              : }
    1314              : 
    1315              : /* Hash table traversal callback to redirect each incoming edge
    1316              :    associated with this hash table element to its new destination.  */
    1317              : 
    1318              : static int
    1319       285459 : ssa_redirect_edges (struct redirection_data **slot,
    1320              :                     ssa_local_info_t *local_info)
    1321              : {
    1322       285459 :   struct redirection_data *rd = *slot;
    1323       285459 :   struct el *next, *el;
    1324              : 
    1325              :   /* Walk over all the incoming edges associated with this hash table
    1326              :      entry.  */
    1327       639369 :   for (el = rd->incoming_edges; el; el = next)
    1328              :     {
    1329       353910 :       edge e = el->e;
    1330       353910 :       vec<jump_thread_edge *> *path = THREAD_PATH (e);
    1331              : 
    1332              :       /* Go ahead and free this element from the list.  Doing this now
    1333              :          avoids the need for another list walk when we destroy the hash
    1334              :          table.  */
    1335       353910 :       next = el->next;
    1336       353910 :       free (el);
    1337              : 
    1338       353910 :       local_info->num_threaded_edges++;
    1339              : 
    1340       353910 :       if (rd->dup_blocks[0])
    1341              :         {
    1342       353910 :           edge e2;
    1343              : 
    1344       353910 :           if (dump_file && (dump_flags & TDF_DETAILS))
    1345           25 :             fprintf (dump_file, "  Threaded jump %d --> %d to %d\n",
    1346           25 :                      e->src->index, e->dest->index, rd->dup_blocks[0]->index);
    1347              : 
    1348              :           /* Redirect the incoming edge (possibly to the joiner block) to the
    1349              :              appropriate duplicate block.  */
    1350       353910 :           e2 = redirect_edge_and_branch (e, rd->dup_blocks[0]);
    1351       353910 :           gcc_assert (e == e2);
    1352       353910 :           flush_pending_stmts (e2);
    1353              :         }
    1354              : 
    1355              :       /* Go ahead and clear E->aux.  It's not needed anymore and failure
    1356              :          to clear it will cause all kinds of unpleasant problems later.  */
    1357       353910 :       path->release ();
    1358       353910 :       e->aux = NULL;
    1359              : 
    1360              :     }
    1361              : 
    1362              :   /* Indicate that we actually threaded one or more jumps.  */
    1363       285459 :   if (rd->incoming_edges)
    1364       285459 :     local_info->jumps_threaded = true;
    1365              : 
    1366       285459 :   return 1;
    1367              : }
    1368              : 
    1369              : /* Return true if this block has no executable statements other than
    1370              :    a simple ctrl flow instruction.  When the number of outgoing edges
    1371              :    is one, this is equivalent to a "forwarder" block.  */
    1372              : 
    1373              : static bool
    1374       160037 : redirection_block_p (basic_block bb)
    1375              : {
    1376       160037 :   gimple_stmt_iterator gsi;
    1377              : 
    1378              :   /* Advance to the first executable statement.  */
    1379       160037 :   gsi = gsi_start_bb (bb);
    1380       160037 :   while (!gsi_end_p (gsi)
    1381       356548 :          && (gimple_code (gsi_stmt (gsi)) == GIMPLE_LABEL
    1382              :              || is_gimple_debug (gsi_stmt (gsi))
    1383              :              || gimple_nop_p (gsi_stmt (gsi))
    1384       159371 :              || gimple_clobber_p (gsi_stmt (gsi))))
    1385       196511 :     gsi_next (&gsi);
    1386              : 
    1387              :   /* Check if this is an empty block.  */
    1388       160037 :   if (gsi_end_p (gsi))
    1389              :     return true;
    1390              : 
    1391              :   /* Test that we've reached the terminating control statement.  */
    1392       159051 :   return gsi_stmt (gsi)
    1393       159051 :          && (gimple_code (gsi_stmt (gsi)) == GIMPLE_COND
    1394              :              || gimple_code (gsi_stmt (gsi)) == GIMPLE_GOTO
    1395              :              || gimple_code (gsi_stmt (gsi)) == GIMPLE_SWITCH);
    1396              : }
    1397              : 
    1398              : /* BB is a block which ends with a COND_EXPR or SWITCH_EXPR and when BB
    1399              :    is reached via one or more specific incoming edges, we know which
    1400              :    outgoing edge from BB will be traversed.
    1401              : 
    1402              :    We want to redirect those incoming edges to the target of the
    1403              :    appropriate outgoing edge.  Doing so avoids a conditional branch
    1404              :    and may expose new optimization opportunities.  Note that we have
    1405              :    to update dominator tree and SSA graph after such changes.
    1406              : 
    1407              :    The key to keeping the SSA graph update manageable is to duplicate
    1408              :    the side effects occurring in BB so that those side effects still
    1409              :    occur on the paths which bypass BB after redirecting edges.
    1410              : 
    1411              :    We accomplish this by creating duplicates of BB and arranging for
    1412              :    the duplicates to unconditionally pass control to one specific
    1413              :    successor of BB.  We then revector the incoming edges into BB to
    1414              :    the appropriate duplicate of BB.
    1415              : 
    1416              :    If NOLOOP_ONLY is true, we only perform the threading as long as it
    1417              :    does not affect the structure of the loops in a nontrivial way.
    1418              : 
    1419              :    If JOINERS is true, then thread through joiner blocks as well.  */
    1420              : 
    1421              : bool
    1422       727032 : fwd_jt_path_registry::thread_block_1 (basic_block bb,
    1423              :                                       bool noloop_only,
    1424              :                                       bool joiners)
    1425              : {
    1426              :   /* E is an incoming edge into BB that we may or may not want to
    1427              :      redirect to a duplicate of BB.  */
    1428       727032 :   edge e, e2;
    1429       727032 :   edge_iterator ei;
    1430       727032 :   ssa_local_info_t local_info;
    1431              : 
    1432       727032 :   local_info.duplicate_blocks = BITMAP_ALLOC (NULL);
    1433       727032 :   local_info.need_profile_correction = false;
    1434       727032 :   local_info.num_threaded_edges = 0;
    1435              : 
    1436              :   /* To avoid scanning a linear array for the element we need we instead
    1437              :      use a hash table.  For normal code there should be no noticeable
    1438              :      difference.  However, if we have a block with a large number of
    1439              :      incoming and outgoing edges such linear searches can get expensive.  */
    1440       727032 :   m_redirection_data
    1441      1454064 :     = new hash_table<struct redirection_data> (EDGE_COUNT (bb->succs));
    1442              : 
    1443              :   /* Record each unique threaded destination into a hash table for
    1444              :      efficient lookups.  */
    1445       727032 :   edge last = NULL;
    1446      2234115 :   FOR_EACH_EDGE (e, ei, bb->preds)
    1447              :     {
    1448      1507083 :       if (e->aux == NULL)
    1449       746006 :         continue;
    1450              : 
    1451       761077 :       vec<jump_thread_edge *> *path = THREAD_PATH (e);
    1452              : 
    1453      1088585 :       if (((*path)[1]->type == EDGE_COPY_SRC_JOINER_BLOCK && !joiners)
    1454       924831 :           || ((*path)[1]->type == EDGE_COPY_SRC_BLOCK && joiners))
    1455       253382 :         continue;
    1456              : 
    1457              :       /* When a NO_COPY_SRC block became non-empty cancel the path.  */
    1458       507695 :       if (path->last ()->type == EDGE_NO_COPY_SRC_BLOCK)
    1459              :         {
    1460        54696 :           auto gsi = gsi_start_nondebug_bb (path->last ()->e->src);
    1461        54696 :           if (!gsi_end_p (gsi)
    1462        54696 :               && !is_ctrl_stmt (gsi_stmt (gsi)))
    1463              :             {
    1464            0 :               cancel_thread (path, "Non-empty EDGE_NO_COPY_SRC_BLOCK");
    1465            0 :               e->aux = NULL;
    1466            0 :               continue;
    1467              :             }
    1468              :         }
    1469              : 
    1470       507695 :       e2 = path->last ()->e;
    1471       507695 :       if (!e2 || noloop_only)
    1472              :         {
    1473              :           /* If NOLOOP_ONLY is true, we only allow threading through the
    1474              :              header of a loop to exit edges.  */
    1475              : 
    1476              :           /* One case occurs when there was loop header buried in a jump
    1477              :              threading path that crosses loop boundaries.  We do not try
    1478              :              and thread this elsewhere, so just cancel the jump threading
    1479              :              request by clearing the AUX field now.  */
    1480       528712 :           if (bb->loop_father != e2->src->loop_father
    1481       505001 :               && (!loop_exit_edge_p (e2->src->loop_father, e2)
    1482          878 :                   || flow_loop_nested_p (bb->loop_father,
    1483          878 :                                          e2->dest->loop_father)))
    1484              :             {
    1485              :               /* Since this case is not handled by our special code
    1486              :                  to thread through a loop header, we must explicitly
    1487              :                  cancel the threading request here.  */
    1488        23711 :               cancel_thread (path, "Threading through unhandled loop header");
    1489        23711 :               e->aux = NULL;
    1490        23711 :               continue;
    1491              :             }
    1492              : 
    1493              :           /* Another case occurs when trying to thread through our
    1494              :              own loop header, possibly from inside the loop.  We will
    1495              :              thread these later.  */
    1496              :           unsigned int i;
    1497       935544 :           for (i = 1; i < path->length (); i++)
    1498              :             {
    1499       584288 :               if ((*path)[i]->e->src == bb->loop_father->header
    1500       584288 :                   && (!loop_exit_edge_p (bb->loop_father, e2)
    1501         2200 :                       || (*path)[1]->type == EDGE_COPY_SRC_JOINER_BLOCK))
    1502              :                 break;
    1503              :             }
    1504              : 
    1505       962580 :           if (i != path->length ())
    1506       130034 :             continue;
    1507              : 
    1508              :           /* Loop parallelization can be confused by the result of
    1509              :              threading through the loop exit test back into the loop.
    1510              :              However, theading those jumps seems to help other codes.
    1511              : 
    1512              :              I have been unable to find anything related to the shape of
    1513              :              the CFG, the contents of the affected blocks, etc which would
    1514              :              allow a more sensible test than what we're using below which
    1515              :              merely avoids the optimization when parallelizing loops.  */
    1516       351256 :           if (flag_tree_parallelize_loops > 1)
    1517              :             {
    1518          166 :               for (i = 1; i < path->length (); i++)
    1519          109 :                 if (bb->loop_father == e2->src->loop_father
    1520          109 :                     && loop_exits_from_bb_p (bb->loop_father,
    1521          109 :                                              (*path)[i]->e->src)
    1522          154 :                     && !loop_exit_edge_p (bb->loop_father, e2))
    1523              :                   break;
    1524              : 
    1525          194 :               if (i != path->length ())
    1526              :                 {
    1527           40 :                   cancel_thread (path, "Threading through loop exit");
    1528           40 :                   e->aux = NULL;
    1529           40 :                   continue;
    1530              :                 }
    1531              :             }
    1532              :         }
    1533              : 
    1534              :       /* Insert the outgoing edge into the hash table if it is not
    1535              :          already in the hash table.  */
    1536       353910 :       lookup_redirection_data (e, INSERT);
    1537              : 
    1538              :       /* When we have thread paths through a common joiner with different
    1539              :          final destinations, then we may need corrections to deal with
    1540              :          profile insanities.  See the big comment before compute_path_counts.  */
    1541       353910 :       if ((*path)[1]->type == EDGE_COPY_SRC_JOINER_BLOCK)
    1542              :         {
    1543        99803 :           if (!last)
    1544              :             last = e2;
    1545        38074 :           else if (e2 != last)
    1546        17983 :             local_info.need_profile_correction = true;
    1547              :         }
    1548              :     }
    1549              : 
    1550              :   /* We do not update dominance info.  */
    1551       727032 :   free_dominance_info (CDI_DOMINATORS);
    1552              : 
    1553              :   /* We know we only thread through the loop header to loop exits.
    1554              :      Let the basic block duplication hook know we are not creating
    1555              :      a multiple entry loop.  */
    1556       727032 :   if (noloop_only
    1557       721644 :       && bb == bb->loop_father->header)
    1558       273970 :     set_loop_copy (bb->loop_father, loop_outer (bb->loop_father));
    1559              : 
    1560              :   /* Now create duplicates of BB.
    1561              : 
    1562              :      Note that for a block with a high outgoing degree we can waste
    1563              :      a lot of time and memory creating and destroying useless edges.
    1564              : 
    1565              :      So we first duplicate BB and remove the control structure at the
    1566              :      tail of the duplicate as well as all outgoing edges from the
    1567              :      duplicate.  We then use that duplicate block as a template for
    1568              :      the rest of the duplicates.  */
    1569       727032 :   local_info.template_block = NULL;
    1570       727032 :   local_info.bb = bb;
    1571       727032 :   local_info.jumps_threaded = false;
    1572       727032 :   m_redirection_data->traverse <ssa_local_info_t *, ssa_create_duplicates>
    1573      1012491 :                             (&local_info);
    1574              : 
    1575              :   /* The template does not have an outgoing edge.  Create that outgoing
    1576              :      edge and update PHI nodes as the edge's target as necessary.
    1577              : 
    1578              :      We do this after creating all the duplicates to avoid creating
    1579              :      unnecessary edges.  */
    1580       727032 :   m_redirection_data->traverse <ssa_local_info_t *, ssa_fixup_template_block>
    1581       954999 :                             (&local_info);
    1582              : 
    1583              :   /* The hash table traversals above created the duplicate blocks (and the
    1584              :      statements within the duplicate blocks).  This loop creates PHI nodes for
    1585              :      the duplicated blocks and redirects the incoming edges into BB to reach
    1586              :      the duplicates of BB.  */
    1587       727032 :   m_redirection_data->traverse <ssa_local_info_t *, ssa_redirect_edges>
    1588      1012491 :                             (&local_info);
    1589              : 
    1590              :   /* Done with this block.  Clear REDIRECTION_DATA.  */
    1591       727032 :   delete m_redirection_data;
    1592       727032 :   m_redirection_data = NULL;
    1593              : 
    1594       727032 :   if (noloop_only
    1595       721644 :       && bb == bb->loop_father->header)
    1596       273970 :     set_loop_copy (bb->loop_father, NULL);
    1597              : 
    1598       727032 :   BITMAP_FREE (local_info.duplicate_blocks);
    1599       727032 :   local_info.duplicate_blocks = NULL;
    1600              : 
    1601       727032 :   m_num_threaded_edges += local_info.num_threaded_edges;
    1602              : 
    1603              :   /* Indicate to our caller whether or not any jumps were threaded.  */
    1604       727032 :   return local_info.jumps_threaded;
    1605              : }
    1606              : 
    1607              : /* Wrapper for thread_block_1 so that we can first handle jump
    1608              :    thread paths which do not involve copying joiner blocks, then
    1609              :    handle jump thread paths which have joiner blocks.
    1610              : 
    1611              :    By doing things this way we can be as aggressive as possible and
    1612              :    not worry that copying a joiner block will create a jump threading
    1613              :    opportunity.  */
    1614              : 
    1615              : bool
    1616       363516 : fwd_jt_path_registry::thread_block (basic_block bb, bool noloop_only)
    1617              : {
    1618       363516 :   bool retval;
    1619       363516 :   retval = thread_block_1 (bb, noloop_only, false);
    1620       363516 :   retval |= thread_block_1 (bb, noloop_only, true);
    1621       363516 :   return retval;
    1622              : }
    1623              : 
    1624              : /* Callback for dfs_enumerate_from.  Returns true if BB is different
    1625              :    from STOP and DBDS_CE_STOP.  */
    1626              : 
    1627              : static basic_block dbds_ce_stop;
    1628              : static bool
    1629      1336856 : dbds_continue_enumeration_p (const_basic_block bb, const void *stop)
    1630              : {
    1631      1336856 :   return (bb != (const_basic_block) stop
    1632      1336856 :           && bb != dbds_ce_stop);
    1633              : }
    1634              : 
    1635              : /* Evaluates the dominance relationship of latch of the LOOP and BB, and
    1636              :    returns the state.  */
    1637              : 
    1638              : enum bb_dom_status
    1639       109723 : determine_bb_domination_status (class loop *loop, basic_block bb)
    1640              : {
    1641       109723 :   basic_block *bblocks;
    1642       109723 :   unsigned nblocks, i;
    1643       109723 :   bool bb_reachable = false;
    1644       109723 :   edge_iterator ei;
    1645       109723 :   edge e;
    1646              : 
    1647              :   /* This function assumes BB is a successor of LOOP->header.
    1648              :      If that is not the case return DOMST_NONDOMINATING which
    1649              :      is always safe.  */
    1650       109723 :     {
    1651       109723 :       bool ok = false;
    1652              : 
    1653       177135 :       FOR_EACH_EDGE (e, ei, bb->preds)
    1654              :         {
    1655       136944 :           if (e->src == loop->header)
    1656              :             {
    1657              :               ok = true;
    1658              :               break;
    1659              :             }
    1660              :         }
    1661              : 
    1662       109723 :       if (!ok)
    1663              :         return DOMST_NONDOMINATING;
    1664              :     }
    1665              : 
    1666        69532 :   if (bb == loop->latch)
    1667              :     return DOMST_DOMINATING;
    1668              : 
    1669              :   /* Check that BB dominates LOOP->latch, and that it is back-reachable
    1670              :      from it.  */
    1671              : 
    1672        67802 :   bblocks = XCNEWVEC (basic_block, loop->num_nodes);
    1673        67802 :   dbds_ce_stop = loop->header;
    1674       135604 :   nblocks = dfs_enumerate_from (loop->latch, 1, dbds_continue_enumeration_p,
    1675        67802 :                                 bblocks, loop->num_nodes, bb);
    1676       831226 :   for (i = 0; i < nblocks; i++)
    1677      1962760 :     FOR_EACH_EDGE (e, ei, bblocks[i]->preds)
    1678              :       {
    1679      1199336 :         if (e->src == loop->header)
    1680              :           {
    1681        37785 :             free (bblocks);
    1682        37785 :             return DOMST_NONDOMINATING;
    1683              :           }
    1684      1161551 :         if (e->src == bb)
    1685        64808 :           bb_reachable = true;
    1686              :       }
    1687              : 
    1688        30017 :   free (bblocks);
    1689        30017 :   return (bb_reachable ? DOMST_DOMINATING : DOMST_LOOP_BROKEN);
    1690              : }
    1691              : 
    1692              : /* Thread jumps through the header of LOOP.  Returns true if cfg changes.
    1693              :    If MAY_PEEL_LOOP_HEADERS is false, we avoid threading from entry edges
    1694              :    to the inside of the loop.  */
    1695              : 
    1696              : bool
    1697       136985 : fwd_jt_path_registry::thread_through_loop_header (class loop *loop,
    1698              :                                                   bool may_peel_loop_headers)
    1699              : {
    1700       136985 :   basic_block header = loop->header;
    1701       136985 :   edge e, tgt_edge, latch = loop_latch_edge (loop);
    1702       136985 :   edge_iterator ei;
    1703       136985 :   basic_block tgt_bb, atgt_bb;
    1704       136985 :   enum bb_dom_status domst;
    1705              : 
    1706              :   /* We have already threaded through headers to exits, so all the threading
    1707              :      requests now are to the inside of the loop.  We need to avoid creating
    1708              :      irreducible regions (i.e., loops with more than one entry block), and
    1709              :      also loop with several latch edges, or new subloops of the loop (although
    1710              :      there are cases where it might be appropriate, it is difficult to decide,
    1711              :      and doing it wrongly may confuse other optimizers).
    1712              : 
    1713              :      We could handle more general cases here.  However, the intention is to
    1714              :      preserve some information about the loop, which is impossible if its
    1715              :      structure changes significantly, in a way that is not well understood.
    1716              :      Thus we only handle few important special cases, in which also updating
    1717              :      of the loop-carried information should be feasible:
    1718              : 
    1719              :      1) Propagation of latch edge to a block that dominates the latch block
    1720              :         of a loop.  This aims to handle the following idiom:
    1721              : 
    1722              :         first = 1;
    1723              :         while (1)
    1724              :           {
    1725              :             if (first)
    1726              :               initialize;
    1727              :             first = 0;
    1728              :             body;
    1729              :           }
    1730              : 
    1731              :         After threading the latch edge, this becomes
    1732              : 
    1733              :         first = 1;
    1734              :         if (first)
    1735              :           initialize;
    1736              :         while (1)
    1737              :           {
    1738              :             first = 0;
    1739              :             body;
    1740              :           }
    1741              : 
    1742              :         The original header of the loop is moved out of it, and we may thread
    1743              :         the remaining edges through it without further constraints.
    1744              : 
    1745              :      2) All entry edges are propagated to a single basic block that dominates
    1746              :         the latch block of the loop.  This aims to handle the following idiom
    1747              :         (normally created for "for" loops):
    1748              : 
    1749              :         i = 0;
    1750              :         while (1)
    1751              :           {
    1752              :             if (i >= 100)
    1753              :               break;
    1754              :             body;
    1755              :             i++;
    1756              :           }
    1757              : 
    1758              :         This becomes
    1759              : 
    1760              :         i = 0;
    1761              :         while (1)
    1762              :           {
    1763              :             body;
    1764              :             i++;
    1765              :             if (i >= 100)
    1766              :               break;
    1767              :           }
    1768              :      */
    1769              : 
    1770              :   /* Threading through the header won't improve the code if the header has just
    1771              :      one successor.  */
    1772       136985 :   if (single_succ_p (header))
    1773          253 :     goto fail;
    1774              : 
    1775       136732 :   if (!may_peel_loop_headers && !redirection_block_p (loop->header))
    1776       125828 :     goto fail;
    1777              :   else
    1778              :     {
    1779        10904 :       tgt_bb = NULL;
    1780        10904 :       tgt_edge = NULL;
    1781        25042 :       FOR_EACH_EDGE (e, ei, header->preds)
    1782              :         {
    1783        19908 :           if (!e->aux)
    1784              :             {
    1785        10878 :               if (e == latch)
    1786         9301 :                 continue;
    1787              : 
    1788              :               /* If latch is not threaded, and there is a header
    1789              :                  edge that is not threaded, we would create loop
    1790              :                  with multiple entries.  */
    1791         1577 :               goto fail;
    1792              :             }
    1793              : 
    1794         9030 :           vec<jump_thread_edge *> *path = THREAD_PATH (e);
    1795              : 
    1796         9030 :           if ((*path)[1]->type == EDGE_COPY_SRC_JOINER_BLOCK)
    1797         4193 :             goto fail;
    1798         4837 :           tgt_edge = (*path)[1]->e;
    1799         4837 :           atgt_bb = tgt_edge->dest;
    1800         4837 :           if (!tgt_bb)
    1801              :             tgt_bb = atgt_bb;
    1802              :           /* Two targets of threading would make us create loop
    1803              :              with multiple entries.  */
    1804            0 :           else if (tgt_bb != atgt_bb)
    1805            0 :             goto fail;
    1806              :         }
    1807              : 
    1808         5134 :       if (!tgt_bb)
    1809              :         {
    1810              :           /* There are no threading requests.  */
    1811              :           return false;
    1812              :         }
    1813              : 
    1814              :       /* Redirecting to empty loop latch is useless.  */
    1815         4459 :       if (tgt_bb == loop->latch
    1816         4459 :           && empty_block_p (loop->latch))
    1817            2 :         goto fail;
    1818              :     }
    1819              : 
    1820              :   /* The target block must dominate the loop latch, otherwise we would be
    1821              :      creating a subloop.  */
    1822         4457 :   domst = determine_bb_domination_status (loop, tgt_bb);
    1823         4457 :   if (domst == DOMST_NONDOMINATING)
    1824         1763 :     goto fail;
    1825         2694 :   if (domst == DOMST_LOOP_BROKEN)
    1826              :     {
    1827              :       /* If the loop ceased to exist, mark it as such, and thread through its
    1828              :          original header.  */
    1829            0 :       mark_loop_for_removal (loop);
    1830            0 :       return thread_block (header, false);
    1831              :     }
    1832              : 
    1833         2694 :   if (tgt_bb->loop_father->header == tgt_bb)
    1834              :     {
    1835              :       /* If the target of the threading is a header of a subloop, we need
    1836              :          to create a preheader for it, so that the headers of the two loops
    1837              :          do not merge.  */
    1838            0 :       if (EDGE_COUNT (tgt_bb->preds) > 2)
    1839              :         {
    1840            0 :           tgt_bb = create_preheader (tgt_bb->loop_father, 0);
    1841            0 :           gcc_assert (tgt_bb != NULL);
    1842              :         }
    1843              :       else
    1844            0 :         tgt_bb = split_edge (tgt_edge);
    1845              :     }
    1846              : 
    1847         2694 :   basic_block new_preheader;
    1848              : 
    1849              :   /* Now consider the case entry edges are redirected to the new entry
    1850              :      block.  Remember one entry edge, so that we can find the new
    1851              :      preheader (its destination after threading).  */
    1852         4100 :   FOR_EACH_EDGE (e, ei, header->preds)
    1853              :     {
    1854         4100 :       if (e->aux)
    1855              :         break;
    1856              :     }
    1857              : 
    1858              :   /* The duplicate of the header is the new preheader of the loop.  Ensure
    1859              :      that it is placed correctly in the loop hierarchy.  */
    1860         2694 :   set_loop_copy (loop, loop_outer (loop));
    1861              : 
    1862         2694 :   thread_block (header, false);
    1863         2694 :   set_loop_copy (loop, NULL);
    1864         2694 :   new_preheader = e->dest;
    1865              : 
    1866              :   /* Create the new latch block.  This is always necessary, as the latch
    1867              :      must have only a single successor, but the original header had at
    1868              :      least two successors.  */
    1869         2694 :   loop->latch = NULL;
    1870         2694 :   edge keep_edge;
    1871         2694 :   keep_edge = single_succ_edge (new_preheader);
    1872         2694 :   loop->header = keep_edge->dest;
    1873         2694 :   latch = make_forwarder_block (tgt_bb, mfb_keep_just, keep_edge);
    1874         2694 :   loop->header = latch->dest;
    1875         2694 :   loop->latch = latch->src;
    1876         2694 :   return true;
    1877              : 
    1878       133616 : fail:
    1879              :   /* We failed to thread anything.  Cancel the requests.  */
    1880       401416 :   FOR_EACH_EDGE (e, ei, header->preds)
    1881              :     {
    1882       267800 :       vec<jump_thread_edge *> *path = THREAD_PATH (e);
    1883              : 
    1884       267800 :       if (path)
    1885              :         {
    1886       127340 :           cancel_thread (path, "Failure in thread_through_loop_header");
    1887       127340 :           e->aux = NULL;
    1888              :         }
    1889              :     }
    1890              :   return false;
    1891              : }
    1892              : 
    1893              : /* E1 and E2 are edges into the same basic block.  Return TRUE if the
    1894              :    PHI arguments associated with those edges are equal or there are no
    1895              :    PHI arguments, otherwise return FALSE.  */
    1896              : 
    1897              : static bool
    1898        28634 : phi_args_equal_on_edges (edge e1, edge e2)
    1899              : {
    1900        28634 :   gphi_iterator gsi;
    1901        28634 :   int indx1 = e1->dest_idx;
    1902        28634 :   int indx2 = e2->dest_idx;
    1903              : 
    1904        44742 :   for (gsi = gsi_start_phis (e1->dest); !gsi_end_p (gsi); gsi_next (&gsi))
    1905              :     {
    1906        17479 :       gphi *phi = gsi.phi ();
    1907              : 
    1908        17479 :       if (!operand_equal_p (gimple_phi_arg_def (phi, indx1),
    1909        17479 :                             gimple_phi_arg_def (phi, indx2), 0))
    1910              :         return false;
    1911              :     }
    1912              :   return true;
    1913              : }
    1914              : 
    1915              : /* Return the number of non-debug statements and non-virtual PHIs in a
    1916              :    block.  */
    1917              : 
    1918              : static unsigned int
    1919         9528 : count_stmts_and_phis_in_block (basic_block bb)
    1920              : {
    1921         9528 :   unsigned int num_stmts = 0;
    1922              : 
    1923         9528 :   gphi_iterator gpi;
    1924        24778 :   for (gpi = gsi_start_phis (bb); !gsi_end_p (gpi); gsi_next (&gpi))
    1925        30500 :     if (!virtual_operand_p (PHI_RESULT (gpi.phi ())))
    1926         9207 :       num_stmts++;
    1927              : 
    1928         9528 :   gimple_stmt_iterator gsi;
    1929        80255 :   for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi); gsi_next (&gsi))
    1930              :     {
    1931        61199 :       gimple *stmt = gsi_stmt (gsi);
    1932        61199 :       if (!is_gimple_debug (stmt))
    1933        53610 :         num_stmts++;
    1934              :     }
    1935              : 
    1936         9528 :   return num_stmts;
    1937              : }
    1938              : 
    1939              : 
    1940              : /* Walk through the registered jump threads and convert them into a
    1941              :    form convenient for this pass.
    1942              : 
    1943              :    Any block which has incoming edges threaded to outgoing edges
    1944              :    will have its entry in THREADED_BLOCK set.
    1945              : 
    1946              :    Any threaded edge will have its new outgoing edge stored in the
    1947              :    original edge's AUX field.
    1948              : 
    1949              :    This form avoids the need to walk all the edges in the CFG to
    1950              :    discover blocks which need processing and avoids unnecessary
    1951              :    hash table lookups to map from threaded edge to new target.  */
    1952              : 
    1953              : void
    1954       154309 : fwd_jt_path_registry::mark_threaded_blocks (bitmap threaded_blocks)
    1955              : {
    1956       154309 :   unsigned int i;
    1957       154309 :   bitmap_iterator bi;
    1958       154309 :   auto_bitmap tmp;
    1959       154309 :   basic_block bb;
    1960       154309 :   edge e;
    1961       154309 :   edge_iterator ei;
    1962              : 
    1963              :   /* It is possible to have jump threads in which one is a subpath
    1964              :      of the other.  ie, (A, B), (B, C), (C, D) where B is a joiner
    1965              :      block and (B, C), (C, D) where no joiner block exists.
    1966              : 
    1967              :      When this occurs ignore the jump thread request with the joiner
    1968              :      block.  It's totally subsumed by the simpler jump thread request.
    1969              : 
    1970              :      This results in less block copying, simpler CFGs.  More importantly,
    1971              :      when we duplicate the joiner block, B, in this case we will create
    1972              :      a new threading opportunity that we wouldn't be able to optimize
    1973              :      until the next jump threading iteration.
    1974              : 
    1975              :      So first convert the jump thread requests which do not require a
    1976              :      joiner block.  */
    1977       801255 :   for (i = 0; i < m_paths.length (); i++)
    1978              :     {
    1979       646946 :       vec<jump_thread_edge *> *path = m_paths[i];
    1980              : 
    1981      1293892 :       if (path->length () > 1
    1982      1293892 :           && (*path)[1]->type != EDGE_COPY_SRC_JOINER_BLOCK)
    1983              :         {
    1984       349764 :           edge e = (*path)[0]->e;
    1985       349764 :           e->aux = (void *)path;
    1986       349764 :           bitmap_set_bit (tmp, e->dest->index);
    1987              :         }
    1988              :     }
    1989              : 
    1990              :   /* Now iterate again, converting cases where we want to thread
    1991              :      through a joiner block, but only if no other edge on the path
    1992              :      already has a jump thread attached to it.  We do this in two passes,
    1993              :      to avoid situations where the order in the paths vec can hide overlapping
    1994              :      threads (the path is recorded on the incoming edge, so we would miss
    1995              :      cases where the second path starts at a downstream edge on the same
    1996              :      path).  First record all joiner paths, deleting any in the unexpected
    1997              :      case where there is already a path for that incoming edge.  */
    1998       801255 :   for (i = 0; i < m_paths.length ();)
    1999              :     {
    2000       646946 :       vec<jump_thread_edge *> *path = m_paths[i];
    2001              : 
    2002       646946 :       if (path->length () > 1
    2003      1293892 :           && (*path)[1]->type == EDGE_COPY_SRC_JOINER_BLOCK)
    2004              :         {
    2005              :           /* Attach the path to the starting edge if none is yet recorded.  */
    2006       297182 :           if ((*path)[0]->e->aux == NULL)
    2007              :             {
    2008       249168 :               (*path)[0]->e->aux = path;
    2009       249168 :               i++;
    2010              :             }
    2011              :           else
    2012              :             {
    2013        48014 :               m_paths.unordered_remove (i);
    2014        48014 :               cancel_thread (path);
    2015              :             }
    2016              :         }
    2017              :       else
    2018              :         {
    2019       349764 :           i++;
    2020              :         }
    2021              :     }
    2022              : 
    2023              :   /* Second, look for paths that have any other jump thread attached to
    2024              :      them, and either finish converting them or cancel them.  */
    2025       753241 :   for (i = 0; i < m_paths.length ();)
    2026              :     {
    2027       598932 :       vec<jump_thread_edge *> *path = m_paths[i];
    2028       598932 :       edge e = (*path)[0]->e;
    2029              : 
    2030       598932 :       if (path->length () > 1
    2031      1197864 :           && (*path)[1]->type == EDGE_COPY_SRC_JOINER_BLOCK && e->aux == path)
    2032              :         {
    2033              :           unsigned int j;
    2034       592322 :           for (j = 1; j < path->length (); j++)
    2035       422424 :             if ((*path)[j]->e->aux != NULL)
    2036              :               break;
    2037              : 
    2038              :           /* If we iterated through the entire path without exiting the loop,
    2039              :              then we are good to go, record it.  */
    2040       249168 :           if (j == path->length ())
    2041              :             {
    2042       169898 :               bitmap_set_bit (tmp, e->dest->index);
    2043       169898 :               i++;
    2044              :             }
    2045              :           else
    2046              :             {
    2047        79270 :               e->aux = NULL;
    2048        79270 :               m_paths.unordered_remove (i);
    2049        79270 :               cancel_thread (path);
    2050              :             }
    2051              :         }
    2052              :       else
    2053              :         {
    2054       349764 :           i++;
    2055              :         }
    2056              :     }
    2057              : 
    2058              :   /* When optimizing for size, prune all thread paths where statement
    2059              :      duplication is necessary.
    2060              : 
    2061              :      We walk the jump thread path looking for copied blocks.  There's
    2062              :      two types of copied blocks.
    2063              : 
    2064              :        EDGE_COPY_SRC_JOINER_BLOCK is always copied and thus we will
    2065              :        cancel the jump threading request when optimizing for size.
    2066              : 
    2067              :        EDGE_COPY_SRC_BLOCK which is copied, but some of its statements
    2068              :        will be killed by threading.  If threading does not kill all of
    2069              :        its statements, then we should cancel the jump threading request
    2070              :        when optimizing for size.  */
    2071       154309 :   if (optimize_function_for_size_p (cfun))
    2072              :     {
    2073        22958 :       EXECUTE_IF_SET_IN_BITMAP (tmp, 0, i, bi)
    2074              :         {
    2075        52280 :           FOR_EACH_EDGE (e, ei, BASIC_BLOCK_FOR_FN (cfun, i)->preds)
    2076        35756 :             if (e->aux)
    2077              :               {
    2078              :                 vec<jump_thread_edge *> *path = THREAD_PATH (e);
    2079              : 
    2080              :                 unsigned int j;
    2081        35128 :                 for (j = 1; j < path->length (); j++)
    2082              :                   {
    2083        25304 :                     bb = (*path)[j]->e->src;
    2084        25304 :                     if (redirection_block_p (bb))
    2085              :                       ;
    2086        13975 :                     else if ((*path)[j]->type == EDGE_COPY_SRC_JOINER_BLOCK
    2087        13975 :                              || ((*path)[j]->type == EDGE_COPY_SRC_BLOCK
    2088        19056 :                                  && (count_stmts_and_phis_in_block (bb)
    2089         9528 :                                      != estimate_threading_killed_stmts (bb))))
    2090              :                       break;
    2091              :                   }
    2092              : 
    2093        46228 :                 if (j != path->length ())
    2094              :                   {
    2095        13290 :                     cancel_thread (path);
    2096        13290 :                     e->aux = NULL;
    2097              :                   }
    2098              :                 else
    2099         9824 :                   bitmap_set_bit (threaded_blocks, i);
    2100              :               }
    2101              :         }
    2102              :     }
    2103              :   else
    2104       147875 :     bitmap_copy (threaded_blocks, tmp);
    2105              : 
    2106              :   /* If we have a joiner block (J) which has two successors S1 and S2 and
    2107              :      we are threading though S1 and the final destination of the thread
    2108              :      is S2, then we must verify that any PHI nodes in S2 have the same
    2109              :      PHI arguments for the edge J->S2 and J->S1->...->S2.
    2110              : 
    2111              :      We used to detect this prior to registering the jump thread, but
    2112              :      that prohibits propagation of edge equivalences into non-dominated
    2113              :      PHI nodes as the equivalency test might occur before propagation.
    2114              : 
    2115              :      This must also occur after we truncate any jump threading paths
    2116              :      as this scenario may only show up after truncation.
    2117              : 
    2118              :      This works for now, but will need improvement as part of the FSA
    2119              :      optimization.
    2120              : 
    2121              :      Note since we've moved the thread request data to the edges,
    2122              :      we have to iterate on those rather than the threaded_edges vector.  */
    2123       526009 :   EXECUTE_IF_SET_IN_BITMAP (tmp, 0, i, bi)
    2124              :     {
    2125       371700 :       bb = BASIC_BLOCK_FOR_FN (cfun, i);
    2126      1270521 :       FOR_EACH_EDGE (e, ei, bb->preds)
    2127              :         {
    2128       898821 :           if (e->aux)
    2129              :             {
    2130       506372 :               vec<jump_thread_edge *> *path = THREAD_PATH (e);
    2131       506372 :               bool have_joiner = ((*path)[1]->type == EDGE_COPY_SRC_JOINER_BLOCK);
    2132              : 
    2133       506372 :               if (have_joiner)
    2134              :                 {
    2135       165125 :                   basic_block joiner = e->dest;
    2136       165125 :                   edge final_edge = path->last ()->e;
    2137       165125 :                   basic_block final_dest = final_edge->dest;
    2138       165125 :                   edge e2 = find_edge (joiner, final_dest);
    2139              : 
    2140       165125 :                   if (e2 && !phi_args_equal_on_edges (e2, final_edge))
    2141              :                     {
    2142         1371 :                       cancel_thread (path);
    2143         1371 :                       e->aux = NULL;
    2144              :                     }
    2145              :                 }
    2146              :             }
    2147              :         }
    2148              :     }
    2149              : 
    2150              :   /* Look for jump threading paths which cross multiple loop headers.
    2151              : 
    2152              :      The code to thread through loop headers will change the CFG in ways
    2153              :      that invalidate the cached loop iteration information.  So we must
    2154              :      detect that case and wipe the cached information.  */
    2155       526009 :   EXECUTE_IF_SET_IN_BITMAP (tmp, 0, i, bi)
    2156              :     {
    2157       371700 :       basic_block bb = BASIC_BLOCK_FOR_FN (cfun, i);
    2158      1270521 :       FOR_EACH_EDGE (e, ei, bb->preds)
    2159              :         {
    2160       898821 :           if (e->aux)
    2161              :             {
    2162       505001 :               gcc_assert (loops_state_satisfies_p
    2163              :                             (LOOPS_HAVE_MARKED_IRREDUCIBLE_REGIONS));
    2164              :               vec<jump_thread_edge *> *path = THREAD_PATH (e);
    2165              : 
    2166      1185331 :               for (unsigned int i = 0, crossed_headers = 0;
    2167      2085619 :                    i < path->length ();
    2168              :                    i++)
    2169              :                 {
    2170      1186798 :                   basic_block dest = (*path)[i]->e->dest;
    2171      1186798 :                   basic_block src = (*path)[i]->e->src;
    2172              :                   /* If we enter a loop.  */
    2173      1186798 :                   if (flow_loop_nested_p (src->loop_father, dest->loop_father))
    2174       151297 :                     ++crossed_headers;
    2175              :                   /* If we step from a block outside an irreducible region
    2176              :                      to a block inside an irreducible region, then we have
    2177              :                      crossed into a loop.  */
    2178      1035501 :                   else if (! (src->flags & BB_IRREDUCIBLE_LOOP)
    2179      1032050 :                            && (dest->flags & BB_IRREDUCIBLE_LOOP))
    2180         1067 :                       ++crossed_headers;
    2181      1186798 :                   if (crossed_headers > 1)
    2182              :                     {
    2183         1467 :                       vect_free_loop_info_assumptions
    2184         2934 :                         ((*path)[path->length () - 1]->e->dest->loop_father);
    2185         1467 :                       break;
    2186              :                     }
    2187              :                 }
    2188              :             }
    2189              :         }
    2190              :     }
    2191       154309 : }
    2192              : 
    2193              : 
    2194              : /* Verify that the REGION is a valid jump thread.  A jump thread is a special
    2195              :    case of SEME Single Entry Multiple Exits region in which all nodes in the
    2196              :    REGION have exactly one incoming edge.  The only exception is the first block
    2197              :    that may not have been connected to the rest of the cfg yet.  */
    2198              : 
    2199              : DEBUG_FUNCTION void
    2200      1233611 : verify_jump_thread (basic_block *region, unsigned n_region)
    2201              : {
    2202      2924411 :   for (unsigned i = 0; i < n_region; i++)
    2203      1690800 :     gcc_assert (EDGE_COUNT (region[i]->preds) <= 1);
    2204      1233611 : }
    2205              : 
    2206              : /* Return true when BB is one of the first N items in BBS.  */
    2207              : 
    2208              : static inline bool
    2209      2478170 : bb_in_bbs (basic_block bb, basic_block *bbs, int n)
    2210              : {
    2211      5840704 :   for (int i = 0; i < n; i++)
    2212      3375484 :     if (bb == bbs[i])
    2213              :       return true;
    2214              : 
    2215              :   return false;
    2216              : }
    2217              : 
    2218              : void
    2219           22 : jt_path_registry::debug_path (FILE *dump_file, int pathno)
    2220              : {
    2221           22 :   vec<jump_thread_edge *> *p = m_paths[pathno];
    2222           22 :   fprintf (dump_file, "path: ");
    2223          130 :   for (unsigned i = 0; i < p->length (); ++i)
    2224          108 :     fprintf (dump_file, "%d -> %d, ",
    2225          108 :              (*p)[i]->e->src->index, (*p)[i]->e->dest->index);
    2226           22 :   fprintf (dump_file, "\n");
    2227           22 : }
    2228              : 
    2229              : void
    2230            0 : jt_path_registry::debug ()
    2231              : {
    2232            0 :   for (unsigned i = 0; i < m_paths.length (); ++i)
    2233            0 :     debug_path (stderr, i);
    2234            0 : }
    2235              : 
    2236              : /* Rewire a jump_thread_edge so that the source block is now a
    2237              :    threaded source block.
    2238              : 
    2239              :    PATH_NUM is an index into the global path table PATHS.
    2240              :    EDGE_NUM is the jump thread edge number into said path.
    2241              : 
    2242              :    Returns TRUE if we were able to successfully rewire the edge.  */
    2243              : 
    2244              : bool
    2245        83138 : back_jt_path_registry::rewire_first_differing_edge (unsigned path_num,
    2246              :                                                     unsigned edge_num)
    2247              : {
    2248        83138 :   vec<jump_thread_edge *> *path = m_paths[path_num];
    2249        83138 :   edge &e = (*path)[edge_num]->e;
    2250        83138 :   if (dump_file && (dump_flags & TDF_DETAILS))
    2251           14 :     fprintf (dump_file, "rewiring edge candidate: %d -> %d\n",
    2252           14 :              e->src->index, e->dest->index);
    2253        83138 :   basic_block src_copy = get_bb_copy (e->src);
    2254        83138 :   if (src_copy == NULL)
    2255              :     {
    2256        18802 :       if (dump_file && (dump_flags & TDF_DETAILS))
    2257            9 :         fprintf (dump_file, "ignoring candidate: there is no src COPY\n");
    2258        18802 :       return false;
    2259              :     }
    2260        64336 :   edge new_edge = find_edge (src_copy, e->dest);
    2261              :   /* If the previously threaded paths created a flow graph where we
    2262              :      can no longer figure out where to go, give up.  */
    2263        64336 :   if (new_edge == NULL)
    2264              :     {
    2265         6560 :       if (dump_file && (dump_flags & TDF_DETAILS))
    2266            0 :         fprintf (dump_file, "ignoring candidate: we lost our way\n");
    2267         6560 :       return false;
    2268              :     }
    2269        57776 :   e = new_edge;
    2270        57776 :   return true;
    2271              : }
    2272              : 
    2273              : /* After a path has been jump threaded, adjust the remaining paths
    2274              :    that are subsets of this path, so these paths can be safely
    2275              :    threaded within the context of the new threaded path.
    2276              : 
    2277              :    For example, suppose we have just threaded:
    2278              : 
    2279              :    5 -> 6 -> 7 -> 8 -> 12   =>   5 -> 6' -> 7' -> 8' -> 12'
    2280              : 
    2281              :    And we have an upcoming threading candidate:
    2282              :    5 -> 6 -> 7 -> 8 -> 15 -> 20
    2283              : 
    2284              :    This function adjusts the upcoming path into:
    2285              :    8' -> 15 -> 20
    2286              : 
    2287              :    CURR_PATH_NUM is an index into the global paths table.  It
    2288              :    specifies the path that was just threaded.  */
    2289              : 
    2290              : void
    2291      1233638 : back_jt_path_registry::adjust_paths_after_duplication (unsigned curr_path_num)
    2292              : {
    2293      1233638 :   vec<jump_thread_edge *> *curr_path = m_paths[curr_path_num];
    2294              : 
    2295              :   /* Iterate through all the other paths and adjust them.  */
    2296     24779604 :   for (unsigned cand_path_num = 0; cand_path_num < m_paths.length (); )
    2297              :     {
    2298     23545966 :       if (cand_path_num == curr_path_num)
    2299              :         {
    2300      1233638 :           ++cand_path_num;
    2301      1233638 :           continue;
    2302              :         }
    2303              :       /* Make sure the candidate to adjust starts with the same path
    2304              :          as the recently threaded path.  */
    2305     22312328 :       vec<jump_thread_edge *> *cand_path = m_paths[cand_path_num];
    2306     22312328 :       if ((*cand_path)[0]->e != (*curr_path)[0]->e)
    2307              :         {
    2308     22188387 :           ++cand_path_num;
    2309     22188387 :           continue;
    2310              :         }
    2311       123941 :       if (dump_file && (dump_flags & TDF_DETAILS))
    2312              :         {
    2313           17 :           fprintf (dump_file, "adjusting candidate: ");
    2314           17 :           debug_path (dump_file, cand_path_num);
    2315              :         }
    2316              : 
    2317              :       /* Chop off from the candidate path any prefix it shares with
    2318              :          the recently threaded path.  */
    2319       247882 :       unsigned minlength = MIN (curr_path->length (), cand_path->length ());
    2320       123941 :       unsigned j;
    2321       348129 :       for (j = 0; j < minlength; ++j)
    2322              :         {
    2323       288524 :           edge cand_edge = (*cand_path)[j]->e;
    2324       288524 :           edge curr_edge = (*curr_path)[j]->e;
    2325              : 
    2326              :           /* Once the prefix no longer matches, adjust the first
    2327              :              non-matching edge to point from an adjusted edge to
    2328              :              wherever it was going.  */
    2329       288524 :           if (cand_edge != curr_edge)
    2330              :             {
    2331        64336 :               gcc_assert (cand_edge->src == curr_edge->src);
    2332        64336 :               if (!rewire_first_differing_edge (cand_path_num, j))
    2333         6560 :                 goto remove_candidate_from_list;
    2334              :               break;
    2335              :             }
    2336              :         }
    2337       117381 :       if (j == minlength)
    2338              :         {
    2339              :           /* If we consumed the max subgraph we could look at, and
    2340              :              still didn't find any different edges, it's the
    2341              :              last edge after MINLENGTH.  */
    2342        59605 :           if (cand_path->length () > minlength)
    2343              :             {
    2344        18802 :               if (!rewire_first_differing_edge (cand_path_num, j))
    2345        18802 :                 goto remove_candidate_from_list;
    2346              :             }
    2347        40803 :           else if (dump_file && (dump_flags & TDF_DETAILS))
    2348            3 :             fprintf (dump_file, "adjusting first edge after MINLENGTH.\n");
    2349              :         }
    2350        98579 :       if (j > 0)
    2351              :         {
    2352              :           /* If we are removing everything, delete the entire candidate.  */
    2353       197158 :           if (j == cand_path->length ())
    2354              :             {
    2355        40803 :             remove_candidate_from_list:
    2356        66165 :               cancel_thread (cand_path, "Adjusted candidate is EMPTY");
    2357        66165 :               m_paths.unordered_remove (cand_path_num);
    2358        66165 :               continue;
    2359              :             }
    2360              :           /* Otherwise, just remove the redundant sub-path.  */
    2361        57776 :           if (cand_path->length () - j > 1)
    2362        44065 :             cand_path->block_remove (0, j);
    2363        13711 :           else if (dump_file && (dump_flags & TDF_DETAILS))
    2364            0 :             fprintf (dump_file, "Dropping illformed candidate.\n");
    2365              :         }
    2366        57776 :       if (dump_file && (dump_flags & TDF_DETAILS))
    2367              :         {
    2368            5 :           fprintf (dump_file, "adjusted candidate: ");
    2369            5 :           debug_path (dump_file, cand_path_num);
    2370              :         }
    2371        57776 :       ++cand_path_num;
    2372              :     }
    2373      1233638 : }
    2374              : 
    2375              : /* Duplicates a jump-thread path of N_REGION basic blocks.
    2376              :    The ENTRY edge is redirected to the duplicate of the region.
    2377              : 
    2378              :    Remove the last conditional statement in the last basic block in the REGION,
    2379              :    and create a single fallthru edge pointing to the same destination as the
    2380              :    EXIT edge.
    2381              : 
    2382              :    CURRENT_PATH_NO is an index into the global paths[] table
    2383              :    specifying the jump-thread path.
    2384              : 
    2385              :    Returns false if it is unable to copy the region, true otherwise.  */
    2386              : 
    2387              : bool
    2388      1233664 : back_jt_path_registry::duplicate_thread_path (edge entry,
    2389              :                                               edge exit,
    2390              :                                               basic_block *region,
    2391              :                                               unsigned n_region,
    2392              :                                               unsigned current_path_no)
    2393              : {
    2394      1233664 :   unsigned i;
    2395      1233664 :   class loop *loop = entry->dest->loop_father;
    2396      1233664 :   edge exit_copy;
    2397      1233664 :   edge redirected;
    2398      1233664 :   profile_count curr_count;
    2399              : 
    2400      1233664 :   if (!can_copy_bbs_p (region, n_region))
    2401              :     return false;
    2402              : 
    2403              :   /* Some sanity checking.  Note that we do not check for all possible
    2404              :      missuses of the functions.  I.e. if you ask to copy something weird,
    2405              :      it will work, but the state of structures probably will not be
    2406              :      correct.  */
    2407      2924472 :   for (i = 0; i < n_region; i++)
    2408              :     {
    2409              :       /* We do not handle subloops, i.e. all the blocks must belong to the
    2410              :          same loop.  */
    2411      1690834 :       if (region[i]->loop_father != loop)
    2412              :         return false;
    2413              :     }
    2414              : 
    2415      1233638 :   initialize_original_copy_tables ();
    2416              : 
    2417      1233638 :   set_loop_copy (loop, loop);
    2418              : 
    2419      1233638 :   basic_block *region_copy = XNEWVEC (basic_block, n_region);
    2420      1233638 :   copy_bbs (region, n_region, region_copy, &exit, 1, &exit_copy, loop,
    2421              :             split_edge_bb_loc (entry), false);
    2422              : 
    2423              :   /* Fix up: copy_bbs redirects all edges pointing to copied blocks.  The
    2424              :      following code ensures that all the edges exiting the jump-thread path are
    2425              :      redirected back to the original code: these edges are exceptions
    2426              :      invalidating the property that is propagated by executing all the blocks of
    2427              :      the jump-thread path in order.  */
    2428              : 
    2429      1233638 :   curr_count = entry->count ();
    2430              : 
    2431      2924472 :   for (i = 0; i < n_region; i++)
    2432              :     {
    2433      1690834 :       edge e;
    2434      1690834 :       edge_iterator ei;
    2435      1690834 :       basic_block bb = region_copy[i];
    2436              : 
    2437              :       /* Watch inconsistent profile.  */
    2438      1690834 :       if (curr_count > region[i]->count)
    2439        54205 :         curr_count = region[i]->count;
    2440              :       /* Scale current BB.  */
    2441      2915264 :       if (region[i]->count.nonzero_p () && curr_count.initialized_p ())
    2442              :         {
    2443              :           /* In the middle of the path we only scale the frequencies.
    2444              :              In last BB we need to update probabilities of outgoing edges
    2445              :              because we know which one is taken at the threaded path.  */
    2446      1224430 :           if (i + 1 != n_region)
    2447       438640 :             scale_bbs_frequencies_profile_count (region + i, 1,
    2448              :                                                  region[i]->count - curr_count,
    2449              :                                                  region[i]->count);
    2450              :           else
    2451       785790 :             update_bb_profile_for_threading (region[i],
    2452              :                                              curr_count,
    2453              :                                              exit);
    2454      1224430 :           scale_bbs_frequencies_profile_count (region_copy + i, 1, curr_count,
    2455      1224430 :                                                region_copy[i]->count);
    2456              :         }
    2457              : 
    2458      1690834 :       if (single_succ_p (bb))
    2459              :         {
    2460              :           /* Make sure the successor is the next node in the path.  */
    2461       126421 :           gcc_assert (i + 1 == n_region
    2462              :                       || region_copy[i + 1] == single_succ_edge (bb)->dest);
    2463       126421 :           if (i + 1 != n_region)
    2464              :             {
    2465       126421 :               curr_count = single_succ_edge (bb)->count ();
    2466              :             }
    2467      1360059 :           continue;
    2468              :         }
    2469              : 
    2470              :       /* Special case the last block on the path: make sure that it does not
    2471              :          jump back on the copied path, including back to itself.  */
    2472      1564413 :       if (i + 1 == n_region)
    2473              :         {
    2474      3711808 :           FOR_EACH_EDGE (e, ei, bb->succs)
    2475      4956340 :             if (bb_in_bbs (e->dest, region_copy, n_region))
    2476              :               {
    2477        12950 :                 basic_block orig = get_bb_original (e->dest);
    2478        12950 :                 if (orig)
    2479        12950 :                   redirect_edge_and_branch_force (e, orig);
    2480              :               }
    2481      1233638 :           continue;
    2482      1233638 :         }
    2483              : 
    2484              :       /* Redirect all other edges jumping to non-adjacent blocks back to the
    2485              :          original code.  */
    2486       992328 :       FOR_EACH_EDGE (e, ei, bb->succs)
    2487       661553 :         if (region_copy[i + 1] != e->dest)
    2488              :           {
    2489       330778 :             basic_block orig = get_bb_original (e->dest);
    2490       330778 :             if (orig)
    2491        13648 :               redirect_edge_and_branch_force (e, orig);
    2492              :           }
    2493              :         else
    2494              :           {
    2495       330775 :             curr_count = e->count ();
    2496              :           }
    2497              :     }
    2498              : 
    2499              : 
    2500      1233638 :   if (flag_checking)
    2501      1233611 :     verify_jump_thread (region_copy, n_region);
    2502              : 
    2503              :   /* Remove the last branch in the jump thread path.  */
    2504      1233638 :   remove_ctrl_stmt_and_useless_edges (region_copy[n_region - 1], exit->dest);
    2505              : 
    2506              :   /* And fixup the flags on the single remaining edge.  */
    2507      1233638 :   edge fix_e = find_edge (region_copy[n_region - 1], exit->dest);
    2508      1233638 :   fix_e->flags &= ~(EDGE_TRUE_VALUE | EDGE_FALSE_VALUE | EDGE_ABNORMAL);
    2509      1233638 :   fix_e->flags |= EDGE_FALLTHRU;
    2510              : 
    2511      1233638 :   edge e = make_edge (region_copy[n_region - 1], exit->dest, EDGE_FALLTHRU);
    2512              : 
    2513      1233638 :   if (e)
    2514              :     {
    2515            0 :       rescan_loop_exit (e, true, false);
    2516            0 :       e->probability = profile_probability::always ();
    2517              :     }
    2518              : 
    2519              :   /* Redirect the entry and add the phi node arguments.  */
    2520      1233638 :   if (entry->dest == loop->header)
    2521        11916 :     mark_loop_for_removal (loop);
    2522      1233638 :   redirected = redirect_edge_and_branch (entry, get_bb_copy (entry->dest));
    2523      1233638 :   gcc_assert (redirected != NULL);
    2524      1233638 :   flush_pending_stmts (entry);
    2525              : 
    2526              :   /* Add the other PHI node arguments.  */
    2527      1233638 :   add_phi_args_after_copy (region_copy, n_region, NULL);
    2528              : 
    2529      1233638 :   free (region_copy);
    2530              : 
    2531      1233638 :   adjust_paths_after_duplication (current_path_no);
    2532              : 
    2533      1233638 :   free_original_copy_tables ();
    2534      1233638 :   return true;
    2535              : }
    2536              : 
    2537              : /* Return true when PATH is a valid jump-thread path.  */
    2538              : 
    2539              : static bool
    2540      1252832 : valid_jump_thread_path (vec<jump_thread_edge *> *path)
    2541              : {
    2542      1252832 :   unsigned len = path->length ();
    2543              : 
    2544              :   /* Check that the path is connected.  */
    2545      2971155 :   for (unsigned int j = 0; j < len - 1; j++)
    2546              :     {
    2547      1737491 :       edge e = (*path)[j]->e;
    2548      1737491 :       if (e->dest != (*path)[j+1]->e->src)
    2549              :         return false;
    2550              :     }
    2551              :   return true;
    2552              : }
    2553              : 
    2554              : /* Remove any queued jump threads that include edge E.
    2555              : 
    2556              :    We don't actually remove them here, just record the edges into ax
    2557              :    hash table.  That way we can do the search once per iteration of
    2558              :    DOM/VRP rather than for every case where DOM optimizes away a COND_EXPR.  */
    2559              : 
    2560              : void
    2561       397555 : fwd_jt_path_registry::remove_jump_threads_including (edge_def *e)
    2562              : {
    2563       397555 :   if (!m_paths.exists () || !flag_thread_jumps)
    2564              :     return;
    2565              : 
    2566       397519 :   edge *slot = m_removed_edges->find_slot (e, INSERT);
    2567       397519 :   *slot = e;
    2568              : }
    2569              : 
    2570              : /* Thread all paths that have been queued for jump threading, and
    2571              :    update the CFG accordingly.
    2572              : 
    2573              :    It is the caller's responsibility to fix the dominance information
    2574              :    and rewrite duplicated SSA_NAMEs back into SSA form.
    2575              : 
    2576              :    If PEEL_LOOP_HEADERS is false, avoid threading edges through loop
    2577              :    headers if it does not simplify the loop.
    2578              : 
    2579              :    Returns true if one or more edges were threaded.  */
    2580              : 
    2581              : bool
    2582      8337306 : jt_path_registry::thread_through_all_blocks (bool peel_loop_headers)
    2583              : {
    2584      8392648 :   if (m_paths.length () == 0)
    2585              :     return false;
    2586              : 
    2587       443482 :   m_num_threaded_edges = 0;
    2588              : 
    2589       443482 :   bool retval = update_cfg (peel_loop_headers);
    2590              : 
    2591       443482 :   statistics_counter_event (cfun, "Jumps threaded", m_num_threaded_edges);
    2592              : 
    2593       443482 :   if (retval)
    2594              :     {
    2595       388140 :       loops_state_set (LOOPS_NEED_FIXUP);
    2596       388140 :       return true;
    2597              :     }
    2598              :   return false;
    2599              : }
    2600              : 
    2601              : /* This is the backward threader version of thread_through_all_blocks
    2602              :    using a generic BB copier.  */
    2603              : 
    2604              : bool
    2605       289173 : back_jt_path_registry::update_cfg (bool /*peel_loop_headers*/)
    2606              : {
    2607       289173 :   bool retval = false;
    2608       289173 :   hash_set<edge> visited_starting_edges;
    2609              : 
    2610      1555716 :   while (m_paths.length ())
    2611              :     {
    2612      1266543 :       vec<jump_thread_edge *> *path = m_paths[0];
    2613      1266543 :       edge entry = (*path)[0]->e;
    2614              : 
    2615              :       /* Do not jump-thread twice from the same starting edge.
    2616              : 
    2617              :          Previously we only checked that we weren't threading twice
    2618              :          from the same BB, but that was too restrictive.  Imagine a
    2619              :          path that starts from GIMPLE_COND(x_123 == 0,...), where both
    2620              :          edges out of this conditional yield paths that can be
    2621              :          threaded (for example, both lead to an x_123==0 or x_123!=0
    2622              :          conditional further down the line.  */
    2623      1266543 :       if (visited_starting_edges.contains (entry)
    2624              :           /* We may not want to realize this jump thread path for
    2625              :              various reasons.  So check it first.  */
    2626      1266543 :           || !valid_jump_thread_path (path))
    2627              :         {
    2628              :           /* Remove invalid jump-thread paths.  */
    2629        32879 :           cancel_thread (path, "Avoiding threading twice from same edge");
    2630        32879 :           m_paths.unordered_remove (0);
    2631        32879 :           continue;
    2632              :         }
    2633              : 
    2634      1233664 :       unsigned len = path->length ();
    2635      1233664 :       edge exit = (*path)[len - 1]->e;
    2636      1233664 :       basic_block *region = XNEWVEC (basic_block, len - 1);
    2637              : 
    2638      2924606 :       for (unsigned int j = 0; j < len - 1; j++)
    2639      1690942 :         region[j] = (*path)[j]->e->dest;
    2640              : 
    2641      1233664 :       if (duplicate_thread_path (entry, exit, region, len - 1, 0))
    2642              :         {
    2643              :           /* We do not update dominance info.  */
    2644      1233638 :           free_dominance_info (CDI_DOMINATORS);
    2645      1233638 :           visited_starting_edges.add (entry);
    2646      1233638 :           retval = true;
    2647      1233638 :           m_num_threaded_edges++;
    2648              :         }
    2649              : 
    2650      1233664 :       path->release ();
    2651      1233664 :       m_paths.unordered_remove (0);
    2652      1233664 :       free (region);
    2653              :     }
    2654       289173 :   return retval;
    2655       289173 : }
    2656              : 
    2657              : /* This is the forward threader version of thread_through_all_blocks,
    2658              :    using a custom BB copier.  */
    2659              : 
    2660              : bool
    2661       154309 : fwd_jt_path_registry::update_cfg (bool may_peel_loop_headers)
    2662              : {
    2663       154309 :   bool retval = false;
    2664              : 
    2665              :   /* Remove any paths that referenced removed edges.  */
    2666       154309 :   if (m_removed_edges)
    2667       901672 :     for (unsigned i = 0; i < m_paths.length (); )
    2668              :       {
    2669       747363 :         unsigned int j;
    2670       747363 :         vec<jump_thread_edge *> *path = m_paths[i];
    2671              : 
    2672      2459816 :         for (j = 0; j < path->length (); j++)
    2673              :           {
    2674      1812870 :             edge e = (*path)[j]->e;
    2675      1812870 :             if (m_removed_edges->find_slot (e, NO_INSERT)
    2676      3525323 :                 || (((*path)[j]->type == EDGE_COPY_SRC_BLOCK
    2677      1156649 :                      || (*path)[j]->type == EDGE_COPY_SRC_JOINER_BLOCK)
    2678       881403 :                     && !can_duplicate_block_p (e->src)))
    2679              :               break;
    2680              :           }
    2681              : 
    2682      1494726 :         if (j != path->length ())
    2683              :           {
    2684       100417 :             cancel_thread (path, "Thread references removed edge");
    2685       100417 :             m_paths.unordered_remove (i);
    2686       100417 :             continue;
    2687              :           }
    2688       646946 :         i++;
    2689              :       }
    2690              : 
    2691       154309 :   auto_bitmap threaded_blocks;
    2692       154309 :   mark_threaded_blocks (threaded_blocks);
    2693              : 
    2694       154309 :   initialize_original_copy_tables ();
    2695              : 
    2696              :   /* The order in which we process jump threads can be important.
    2697              : 
    2698              :      Consider if we have two jump threading paths A and B.  If the
    2699              :      target edge of A is the starting edge of B and we thread path A
    2700              :      first, then we create an additional incoming edge into B->dest that
    2701              :      we cannot discover as a jump threading path on this iteration.
    2702              : 
    2703              :      If we instead thread B first, then the edge into B->dest will have
    2704              :      already been redirected before we process path A and path A will
    2705              :      natually, with no further work, target the redirected path for B.
    2706              : 
    2707              :      An post-order is sufficient here.  Compute the ordering first, then
    2708              :      process the blocks.  */
    2709       154309 :   if (!bitmap_empty_p (threaded_blocks))
    2710              :     {
    2711       141193 :       int *postorder = XNEWVEC (int, n_basic_blocks_for_fn (cfun));
    2712       141193 :       unsigned int postorder_num = post_order_compute (postorder, false, false);
    2713      8092754 :       for (unsigned int i = 0; i < postorder_num; i++)
    2714              :         {
    2715      7951561 :           unsigned int indx = postorder[i];
    2716      7951561 :           if (bitmap_bit_p (threaded_blocks, indx))
    2717              :             {
    2718       360822 :               basic_block bb = BASIC_BLOCK_FOR_FN (cfun, indx);
    2719       360822 :               retval |= thread_block (bb, true);
    2720              :             }
    2721              :         }
    2722       141193 :       free (postorder);
    2723              :     }
    2724              : 
    2725              :   /* Then perform the threading through loop headers.  We start with the
    2726              :      innermost loop, so that the changes in cfg we perform won't affect
    2727              :      further threading.  */
    2728      1058167 :   for (auto loop : loops_list (cfun, LI_FROM_INNERMOST))
    2729              :     {
    2730      1053495 :       if (!loop->header
    2731       595240 :           || !bitmap_bit_p (threaded_blocks, loop->header->index))
    2732       458255 :         continue;
    2733              : 
    2734       136985 :       retval |= thread_through_loop_header (loop, may_peel_loop_headers);
    2735       154309 :     }
    2736              : 
    2737              :   /* All jump threading paths should have been resolved at this
    2738              :      point.  Verify that is the case.  */
    2739       154309 :   basic_block bb;
    2740      9040421 :   FOR_EACH_BB_FN (bb, cfun)
    2741              :     {
    2742      8886112 :       edge_iterator ei;
    2743      8886112 :       edge e;
    2744     21864924 :       FOR_EACH_EDGE (e, ei, bb->preds)
    2745     12978812 :         gcc_assert (e->aux == NULL);
    2746              :     }
    2747              : 
    2748       154309 :   free_original_copy_tables ();
    2749              : 
    2750       154309 :   return retval;
    2751       154309 : }
    2752              : 
    2753              : bool
    2754      2644809 : jt_path_registry::cancel_invalid_paths (vec<jump_thread_edge *> &path)
    2755              : {
    2756      2644809 :   gcc_checking_assert (!path.is_empty ());
    2757      2644809 :   edge entry = path[0]->e;
    2758      2644809 :   edge exit = path[path.length () - 1]->e;
    2759      2644809 :   bool seen_latch = false;
    2760      2644809 :   int loops_crossed = 0;
    2761      2644809 :   bool crossed_latch = false;
    2762      2644809 :   bool crossed_loop_header = false;
    2763              :   // Use ->dest here instead of ->src to ignore the first block.  The
    2764              :   // first block is allowed to be in a different loop, since it'll be
    2765              :   // redirected.  See similar comment in profitable_path_p: "we don't
    2766              :   // care about that block...".
    2767      2644809 :   loop_p loop = entry->dest->loop_father;
    2768      2644809 :   loop_p curr_loop = loop;
    2769              : 
    2770      9232547 :   for (unsigned int i = 0; i < path.length (); i++)
    2771              :     {
    2772      6587738 :       edge e = path[i]->e;
    2773              : 
    2774      6587738 :       if (e == NULL)
    2775              :         {
    2776              :           // NULL outgoing edges on a path can happen for jumping to a
    2777              :           // constant address.
    2778            0 :           cancel_thread (&path, "Found NULL edge in jump threading path");
    2779            0 :           return true;
    2780              :         }
    2781              : 
    2782      6587738 :       if (loop->latch == e->src || loop->latch == e->dest)
    2783              :         {
    2784       524391 :           seen_latch = true;
    2785              :           // Like seen_latch, but excludes the first block.
    2786       524391 :           if (e->src != entry->src)
    2787       492825 :             crossed_latch = true;
    2788              :         }
    2789              : 
    2790      6587738 :       if (e->dest->loop_father != curr_loop)
    2791              :         {
    2792       373523 :           curr_loop = e->dest->loop_father;
    2793       373523 :           ++loops_crossed;
    2794              :         }
    2795              : 
    2796              :       // ?? Avoid threading through loop headers that remain in the
    2797              :       // loop, as such threadings tend to create sub-loops which
    2798              :       // _might_ be OK ??.
    2799      6587738 :       if (e->dest->loop_father->header == e->dest
    2800      6587738 :           && !flow_loop_nested_p (exit->dest->loop_father,
    2801              :                                   e->dest->loop_father))
    2802              :         crossed_loop_header = true;
    2803              : 
    2804      6587738 :       if (flag_checking && !m_backedge_threads)
    2805      3191916 :         gcc_assert ((path[i]->e->flags & EDGE_DFS_BACK) == 0);
    2806              :     }
    2807              : 
    2808              :   // If we crossed a loop into an outer loop without crossing the
    2809              :   // latch, this is just an early exit from the loop.
    2810      2644809 :   if (loops_crossed == 1
    2811      2644809 :       && !crossed_latch
    2812      2644809 :       && flow_loop_nested_p (exit->dest->loop_father, exit->src->loop_father))
    2813              :     return false;
    2814              : 
    2815      2557452 :   if (cfun->curr_properties & PROP_loop_opts_done)
    2816              :     return false;
    2817              : 
    2818      1912098 :   if (seen_latch && empty_block_p (loop->latch))
    2819              :     {
    2820       232871 :       cancel_thread (&path, "Threading through latch before loop opts "
    2821              :                      "would create non-empty latch");
    2822       232871 :       return true;
    2823              :     }
    2824      1679227 :   if (loops_crossed)
    2825              :     {
    2826       163380 :       cancel_thread (&path, "Path crosses loops");
    2827       163380 :       return true;
    2828              :     }
    2829              :   // The path should either start and end in the same loop or exit the
    2830              :   // loop it starts in but never enter a loop.  This also catches
    2831              :   // creating irreducible loops, not only rotation.
    2832      1515847 :   if (entry->src->loop_father != exit->dest->loop_father
    2833      1670049 :       && !flow_loop_nested_p (exit->src->loop_father,
    2834       154202 :                               entry->dest->loop_father))
    2835              :     {
    2836       154202 :       cancel_thread (&path, "Path rotates loop");
    2837       154202 :       return true;
    2838              :     }
    2839      1361645 :   if (crossed_loop_header)
    2840              :     {
    2841        14285 :       cancel_thread (&path, "Path crosses loop header but does not exit it");
    2842        14285 :       return true;
    2843              :     }
    2844              :   return false;
    2845              : }
    2846              : 
    2847              : /* Register a jump threading opportunity.  We queue up all the jump
    2848              :    threading opportunities discovered by a pass and update the CFG
    2849              :    and SSA form all at once.
    2850              : 
    2851              :    E is the edge we can thread, E2 is the new target edge, i.e., we
    2852              :    are effectively recording that E->dest can be changed to E2->dest
    2853              :    after fixing the SSA graph.
    2854              : 
    2855              :    Return TRUE if PATH was successfully threaded.  */
    2856              : 
    2857              : bool
    2858      2644809 : jt_path_registry::register_jump_thread (vec<jump_thread_edge *> *path)
    2859              : {
    2860      2644809 :   gcc_checking_assert (flag_thread_jumps);
    2861              : 
    2862      2644809 :   if (!dbg_cnt (registered_jump_thread))
    2863              :     {
    2864            0 :       path->release ();
    2865            0 :       return false;
    2866              :     }
    2867              : 
    2868      2644809 :   if (cancel_invalid_paths (*path))
    2869              :     return false;
    2870              : 
    2871      2080071 :   if (dump_file && (dump_flags & TDF_DETAILS))
    2872          124 :     dump_jump_thread_path (dump_file, *path, true);
    2873              : 
    2874      2080071 :   m_paths.safe_push (path);
    2875      2080071 :   return true;
    2876              : }
    2877              : 
    2878              : /* Return how many uses of T there are within BB, as long as there
    2879              :    aren't any uses outside BB.  If there are any uses outside BB,
    2880              :    return -1 if there's at most one use within BB, or -2 if there is
    2881              :    more than one use within BB.  */
    2882              : 
    2883              : static int
    2884      1800743 : uses_in_bb (tree t, basic_block bb)
    2885              : {
    2886      1800743 :   int uses = 0;
    2887      1800743 :   bool outside_bb = false;
    2888              : 
    2889      1800743 :   imm_use_iterator iter;
    2890      1800743 :   use_operand_p use_p;
    2891      7320796 :   FOR_EACH_IMM_USE_FAST (use_p, iter, t)
    2892              :     {
    2893      3828772 :       if (is_gimple_debug (USE_STMT (use_p)))
    2894       643695 :         continue;
    2895              : 
    2896      3185077 :       if (gimple_bb (USE_STMT (use_p)) != bb)
    2897              :         outside_bb = true;
    2898              :       else
    2899      2105043 :         uses++;
    2900              : 
    2901      3185077 :       if (outside_bb && uses > 1)
    2902       109462 :         return -2;
    2903       109462 :     }
    2904              : 
    2905      1691281 :   if (outside_bb)
    2906       493623 :     return -1;
    2907              : 
    2908              :   return uses;
    2909              : }
    2910              : 
    2911              : /* Starting from the final control flow stmt in BB, assuming it will
    2912              :    be removed, follow uses in to-be-removed stmts back to their defs
    2913              :    and count how many defs are to become dead and be removed as
    2914              :    well.  */
    2915              : 
    2916              : unsigned int
    2917      1616682 : estimate_threading_killed_stmts (basic_block bb)
    2918              : {
    2919      1616682 :   int killed_stmts = 0;
    2920      1616682 :   hash_map<tree, int> ssa_remaining_uses;
    2921      1616682 :   auto_vec<gimple *, 4> dead_worklist;
    2922              : 
    2923              :   /* If the block has only two predecessors, threading will turn phi
    2924              :      dsts into either src, so count them as dead stmts.  */
    2925      1616682 :   bool drop_all_phis = EDGE_COUNT (bb->preds) == 2;
    2926              : 
    2927      1616682 :   if (drop_all_phis)
    2928       718847 :     for (gphi_iterator gsi = gsi_start_phis (bb);
    2929      2419877 :          !gsi_end_p (gsi); gsi_next (&gsi))
    2930              :       {
    2931      1701030 :         gphi *phi = gsi.phi ();
    2932      1701030 :         tree dst = gimple_phi_result (phi);
    2933              : 
    2934              :         /* We don't count virtual PHIs as stmts in
    2935              :            record_temporary_equivalences_from_phis.  */
    2936      3402060 :         if (virtual_operand_p (dst))
    2937       505954 :           continue;
    2938              : 
    2939      1195076 :         killed_stmts++;
    2940              :       }
    2941              : 
    2942      3233364 :   if (gsi_end_p (gsi_last_bb (bb)))
    2943            0 :     return killed_stmts;
    2944              : 
    2945      1616682 :   gimple *stmt = gsi_stmt (gsi_last_bb (bb));
    2946      1616682 :   if (gimple_code (stmt) != GIMPLE_COND
    2947              :       && gimple_code (stmt) != GIMPLE_GOTO
    2948              :       && gimple_code (stmt) != GIMPLE_SWITCH)
    2949       484437 :     return killed_stmts;
    2950              : 
    2951              :   /* The control statement is always dead.  */
    2952      1132245 :   killed_stmts++;
    2953      1132245 :   dead_worklist.quick_push (stmt);
    2954      3303323 :   while (!dead_worklist.is_empty ())
    2955              :     {
    2956      2171078 :       stmt = dead_worklist.pop ();
    2957              : 
    2958      2171078 :       ssa_op_iter iter;
    2959      2171078 :       use_operand_p use_p;
    2960      4744963 :       FOR_EACH_SSA_USE_OPERAND (use_p, stmt, iter, SSA_OP_USE)
    2961              :         {
    2962      2573885 :           tree t = USE_FROM_PTR (use_p);
    2963      2573885 :           gimple *def = SSA_NAME_DEF_STMT (t);
    2964              : 
    2965      2573885 :           if (gimple_bb (def) == bb
    2966      2020353 :               && (gimple_code (def) != GIMPLE_PHI
    2967       134405 :                   || !drop_all_phis)
    2968      4504858 :               && !gimple_has_side_effects (def))
    2969              :             {
    2970      1856543 :               int *usesp = ssa_remaining_uses.get (t);
    2971      1856543 :               int uses;
    2972              : 
    2973      1856543 :               if (usesp)
    2974        55800 :                 uses = *usesp;
    2975              :               else
    2976      1800743 :                 uses = uses_in_bb (t, bb);
    2977              : 
    2978      1856543 :               gcc_assert (uses);
    2979              : 
    2980              :               /* Don't bother recording the expected use count if we
    2981              :                  won't find any further uses within BB.  */
    2982      1856543 :               if (!usesp && (uses < -1 || uses > 1))
    2983              :                 {
    2984       273429 :                   usesp = &ssa_remaining_uses.get_or_insert (t);
    2985       273429 :                   *usesp = uses;
    2986              :                 }
    2987              : 
    2988      1856543 :               if (uses < 0)
    2989       636301 :                 continue;
    2990              : 
    2991      1220242 :               --uses;
    2992      1220242 :               if (usesp)
    2993       186551 :                 *usesp = uses;
    2994              : 
    2995      1220242 :               if (!uses)
    2996              :                 {
    2997      1051252 :                   killed_stmts++;
    2998      1051252 :                   if (usesp)
    2999        17561 :                     ssa_remaining_uses.remove (t);
    3000      1051252 :                   if (gimple_code (def) != GIMPLE_PHI)
    3001      1038833 :                     dead_worklist.safe_push (def);
    3002              :                 }
    3003              :             }
    3004              :         }
    3005              :     }
    3006              : 
    3007      1132245 :   if (dump_file)
    3008           25 :     fprintf (dump_file, "threading bb %i kills %i stmts\n",
    3009              :              bb->index, killed_stmts);
    3010              : 
    3011      1132245 :   return killed_stmts;
    3012      1616682 : }
        

Generated by: LCOV version 2.4-beta

LCOV profile is generated on x86_64 machine using following configure options: configure --disable-bootstrap --enable-coverage=opt --enable-languages=c,c++,fortran,go,jit,lto,rust,m2 --enable-host-shared. GCC test suite is run with the built compiler.