Line data Source code
1 : /* Thread edges through blocks and update the control flow and SSA graphs.
2 : Copyright (C) 2004-2026 Free Software Foundation, Inc.
3 :
4 : This file is part of GCC.
5 :
6 : GCC is free software; you can redistribute it and/or modify
7 : it under the terms of the GNU General Public License as published by
8 : the Free Software Foundation; either version 3, or (at your option)
9 : any later version.
10 :
11 : GCC is distributed in the hope that it will be useful,
12 : but WITHOUT ANY WARRANTY; without even the implied warranty of
13 : MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14 : GNU General Public License for more details.
15 :
16 : You should have received a copy of the GNU General Public License
17 : along with GCC; see the file COPYING3. If not see
18 : <http://www.gnu.org/licenses/>. */
19 :
20 : #include "config.h"
21 : #include "system.h"
22 : #include "coretypes.h"
23 : #include "backend.h"
24 : #include "tree.h"
25 : #include "gimple.h"
26 : #include "cfghooks.h"
27 : #include "tree-pass.h"
28 : #include "ssa.h"
29 : #include "fold-const.h"
30 : #include "cfganal.h"
31 : #include "gimple-iterator.h"
32 : #include "tree-ssa.h"
33 : #include "tree-ssa-threadupdate.h"
34 : #include "cfgloop.h"
35 : #include "dbgcnt.h"
36 : #include "tree-cfg.h"
37 : #include "tree-vectorizer.h"
38 : #include "tree-pass.h"
39 :
40 : /* Given a block B, update the CFG and SSA graph to reflect redirecting
41 : one or more in-edges to B to instead reach the destination of an
42 : out-edge from B while preserving any side effects in B.
43 :
44 : i.e., given A->B and B->C, change A->B to be A->C yet still preserve the
45 : side effects of executing B.
46 :
47 : 1. Make a copy of B (including its outgoing edges and statements). Call
48 : the copy B'. Note B' has no incoming edges or PHIs at this time.
49 :
50 : 2. Remove the control statement at the end of B' and all outgoing edges
51 : except B'->C.
52 :
53 : 3. Add a new argument to each PHI in C with the same value as the existing
54 : argument associated with edge B->C. Associate the new PHI arguments
55 : with the edge B'->C.
56 :
57 : 4. For each PHI in B, find or create a PHI in B' with an identical
58 : PHI_RESULT. Add an argument to the PHI in B' which has the same
59 : value as the PHI in B associated with the edge A->B. Associate
60 : the new argument in the PHI in B' with the edge A->B.
61 :
62 : 5. Change the edge A->B to A->B'.
63 :
64 : 5a. This automatically deletes any PHI arguments associated with the
65 : edge A->B in B.
66 :
67 : 5b. This automatically associates each new argument added in step 4
68 : with the edge A->B'.
69 :
70 : 6. Repeat for other incoming edges into B.
71 :
72 : 7. Put the duplicated resources in B and all the B' blocks into SSA form.
73 :
74 : Note that block duplication can be minimized by first collecting the
75 : set of unique destination blocks that the incoming edges should
76 : be threaded to.
77 :
78 : We reduce the number of edges and statements we create by not copying all
79 : the outgoing edges and the control statement in step #1. We instead create
80 : a template block without the outgoing edges and duplicate the template.
81 :
82 : Another case this code handles is threading through a "joiner" block. In
83 : this case, we do not know the destination of the joiner block, but one
84 : of the outgoing edges from the joiner block leads to a threadable path. This
85 : case largely works as outlined above, except the duplicate of the joiner
86 : block still contains a full set of outgoing edges and its control statement.
87 : We just redirect one of its outgoing edges to our jump threading path. */
88 :
89 :
90 : /* Steps #5 and #6 of the above algorithm are best implemented by walking
91 : all the incoming edges which thread to the same destination edge at
92 : the same time. That avoids lots of table lookups to get information
93 : for the destination edge.
94 :
95 : To realize that implementation we create a list of incoming edges
96 : which thread to the same outgoing edge. Thus to implement steps
97 : #5 and #6 we traverse our hash table of outgoing edge information.
98 : For each entry we walk the list of incoming edges which thread to
99 : the current outgoing edge. */
100 :
101 : struct el
102 : {
103 : edge e;
104 : struct el *next;
105 : };
106 :
107 : /* Main data structure recording information regarding B's duplicate
108 : blocks. */
109 :
110 : /* We need to efficiently record the unique thread destinations of this
111 : block and specific information associated with those destinations. We
112 : may have many incoming edges threaded to the same outgoing edge. This
113 : can be naturally implemented with a hash table. */
114 :
115 : struct redirection_data : free_ptr_hash<redirection_data>
116 : {
117 : /* We support wiring up two block duplicates in a jump threading path.
118 :
119 : One is a normal block copy where we remove the control statement
120 : and wire up its single remaining outgoing edge to the thread path.
121 :
122 : The other is a joiner block where we leave the control statement
123 : in place, but wire one of the outgoing edges to a thread path.
124 :
125 : In theory we could have multiple block duplicates in a jump
126 : threading path, but I haven't tried that.
127 :
128 : The duplicate blocks appear in this array in the same order in
129 : which they appear in the jump thread path. */
130 : basic_block dup_blocks[2];
131 :
132 : vec<jump_thread_edge *> *path;
133 :
134 : /* A list of incoming edges which we want to thread to the
135 : same path. */
136 : struct el *incoming_edges;
137 :
138 : /* hash_table support. */
139 : static inline hashval_t hash (const redirection_data *);
140 : static inline int equal (const redirection_data *, const redirection_data *);
141 : };
142 :
143 8351647 : jump_thread_path_allocator::jump_thread_path_allocator ()
144 : {
145 8351647 : obstack_init (&m_obstack);
146 8351647 : }
147 :
148 8351647 : jump_thread_path_allocator::~jump_thread_path_allocator ()
149 : {
150 8351647 : obstack_free (&m_obstack, NULL);
151 8351647 : }
152 :
153 : jump_thread_edge *
154 25495055 : jump_thread_path_allocator::allocate_thread_edge (edge e,
155 : jump_thread_edge_type type)
156 : {
157 25495055 : void *r = obstack_alloc (&m_obstack, sizeof (jump_thread_edge));
158 25495055 : return new (r) jump_thread_edge (e, type);
159 : }
160 :
161 : vec<jump_thread_edge *> *
162 18251164 : jump_thread_path_allocator::allocate_thread_path ()
163 : {
164 : // ?? Since the paths live in an obstack, we should be able to remove all
165 : // references to path->release() throughout the code.
166 18251164 : void *r = obstack_alloc (&m_obstack, sizeof (vec <jump_thread_edge *>));
167 18251164 : return new (r) vec<jump_thread_edge *> ();
168 : }
169 :
170 8351647 : jt_path_registry::jt_path_registry (bool backedge_threads)
171 : {
172 8351647 : m_paths.create (5);
173 8351647 : m_num_threaded_edges = 0;
174 8351647 : m_backedge_threads = backedge_threads;
175 8351647 : }
176 :
177 8351647 : jt_path_registry::~jt_path_registry ()
178 : {
179 8351647 : m_paths.release ();
180 8351647 : }
181 :
182 2083233 : fwd_jt_path_registry::fwd_jt_path_registry ()
183 2083233 : : jt_path_registry (/*backedge_threads=*/false)
184 : {
185 2083233 : m_removed_edges = new hash_table<struct removed_edges> (17);
186 2083233 : m_redirection_data = NULL;
187 2083233 : }
188 :
189 4166466 : fwd_jt_path_registry::~fwd_jt_path_registry ()
190 : {
191 2083233 : delete m_removed_edges;
192 4166466 : }
193 :
194 6268414 : back_jt_path_registry::back_jt_path_registry ()
195 6268414 : : jt_path_registry (/*backedge_threads=*/true)
196 : {
197 6268414 : }
198 :
199 : void
200 25495055 : jt_path_registry::push_edge (vec<jump_thread_edge *> *path,
201 : edge e, jump_thread_edge_type type)
202 : {
203 25495055 : jump_thread_edge *x = m_allocator.allocate_thread_edge (e, type);
204 25495055 : path->safe_push (x);
205 25495055 : }
206 :
207 : vec<jump_thread_edge *> *
208 18251164 : jt_path_registry::allocate_thread_path ()
209 : {
210 18251164 : return m_allocator.allocate_thread_path ();
211 : }
212 :
213 : /* Dump a jump threading path, including annotations about each
214 : edge in the path. */
215 :
216 : static void
217 193 : dump_jump_thread_path (FILE *dump_file,
218 : const vec<jump_thread_edge *> &path,
219 : bool registering)
220 : {
221 193 : if (registering)
222 252 : fprintf (dump_file,
223 : " [%u] Registering jump thread: (%d, %d) incoming edge; ",
224 : dbg_cnt_counter (registered_jump_thread),
225 126 : path[0]->e->src->index, path[0]->e->dest->index);
226 : else
227 67 : fprintf (dump_file,
228 : " Cancelling jump thread: (%d, %d) incoming edge; ",
229 67 : path[0]->e->src->index, path[0]->e->dest->index);
230 :
231 629 : for (unsigned int i = 1; i < path.length (); i++)
232 : {
233 : /* We can get paths with a NULL edge when the final destination
234 : of a jump thread turns out to be a constant address. We dump
235 : those paths when debugging, so we have to be prepared for that
236 : possibility here. */
237 436 : if (path[i]->e == NULL)
238 0 : continue;
239 :
240 436 : fprintf (dump_file, " (%d, %d) ",
241 436 : path[i]->e->src->index, path[i]->e->dest->index);
242 436 : switch (path[i]->type)
243 : {
244 39 : case EDGE_COPY_SRC_JOINER_BLOCK:
245 39 : fprintf (dump_file, "joiner");
246 39 : break;
247 249 : case EDGE_COPY_SRC_BLOCK:
248 249 : fprintf (dump_file, "normal");
249 249 : break;
250 148 : case EDGE_NO_COPY_SRC_BLOCK:
251 148 : fprintf (dump_file, "nocopy");
252 148 : break;
253 0 : default:
254 0 : gcc_unreachable ();
255 : }
256 :
257 436 : if ((path[i]->e->flags & EDGE_DFS_BACK) != 0)
258 35 : fprintf (dump_file, " (back)");
259 : }
260 193 : fprintf (dump_file, "; \n");
261 193 : }
262 :
263 : DEBUG_FUNCTION void
264 0 : debug (const vec<jump_thread_edge *> &path)
265 : {
266 0 : dump_jump_thread_path (stderr, path, true);
267 0 : }
268 :
269 : DEBUG_FUNCTION void
270 0 : debug (const vec<jump_thread_edge *> *path)
271 : {
272 0 : debug (*path);
273 0 : }
274 :
275 : /* Release the memory associated with PATH, and if dumping is enabled,
276 : dump out the reason why the thread was canceled. */
277 :
278 : static void
279 1068193 : cancel_thread (vec<jump_thread_edge *> *path, const char *reason = NULL)
280 : {
281 1068193 : if (dump_file && (dump_flags & TDF_DETAILS))
282 : {
283 67 : if (reason)
284 59 : fprintf (dump_file, "%s: ", reason);
285 :
286 67 : dump_jump_thread_path (dump_file, *path, false);
287 67 : fprintf (dump_file, "\n");
288 : }
289 1068193 : path->release ();
290 1068193 : }
291 :
292 : /* Simple hashing function. For any given incoming edge E, we're going
293 : to be most concerned with the final destination of its jump thread
294 : path. So hash on the block index of the final edge in the path. */
295 :
296 : inline hashval_t
297 516147 : redirection_data::hash (const redirection_data *p)
298 : {
299 516147 : vec<jump_thread_edge *> *path = p->path;
300 516147 : return path->last ()->e->dest->index;
301 : }
302 :
303 : /* Given two hash table entries, return true if they have the same
304 : jump threading path. */
305 : inline int
306 158351 : redirection_data::equal (const redirection_data *p1, const redirection_data *p2)
307 : {
308 158351 : vec<jump_thread_edge *> *path1 = p1->path;
309 158351 : vec<jump_thread_edge *> *path2 = p2->path;
310 :
311 475053 : if (path1->length () != path2->length ())
312 : return false;
313 :
314 260110 : for (unsigned int i = 1; i < path1->length (); i++)
315 : {
316 189518 : if ((*path1)[i]->type != (*path2)[i]->type
317 189518 : || (*path1)[i]->e != (*path2)[i]->e)
318 : return false;
319 : }
320 :
321 : return true;
322 : }
323 :
324 : /* Data structure of information to pass to hash table traversal routines. */
325 : struct ssa_local_info_t
326 : {
327 : /* The current block we are working on. */
328 : basic_block bb;
329 :
330 : /* We only create a template block for the first duplicated block in a
331 : jump threading path as we may need many duplicates of that block.
332 :
333 : The second duplicate block in a path is specific to that path. Creating
334 : and sharing a template for that block is considerably more difficult. */
335 : basic_block template_block;
336 :
337 : /* If we append debug stmts to the template block after creating it,
338 : this iterator won't be the last one in the block, and further
339 : copies of the template block shouldn't get debug stmts after
340 : it. */
341 : gimple_stmt_iterator template_last_to_copy;
342 :
343 : /* Blocks duplicated for the thread. */
344 : bitmap duplicate_blocks;
345 :
346 : /* TRUE if we thread one or more jumps, FALSE otherwise. */
347 : bool jumps_threaded;
348 :
349 : /* When we have multiple paths through a joiner which reach different
350 : final destinations, then we may need to correct for potential
351 : profile insanities. */
352 : bool need_profile_correction;
353 :
354 : // Jump threading statistics.
355 : unsigned long num_threaded_edges;
356 : };
357 :
358 : /* When we start updating the CFG for threading, data necessary for jump
359 : threading is attached to the AUX field for the incoming edge. Use these
360 : macros to access the underlying structure attached to the AUX field. */
361 : #define THREAD_PATH(E) ((vec<jump_thread_edge *> *)(E)->aux)
362 :
363 : /* Remove the last statement in block BB if it is a control statement
364 : Also remove all outgoing edges except the edge which reaches DEST_BB.
365 : If DEST_BB is NULL, then remove all outgoing edges. */
366 :
367 : static void
368 1515269 : remove_ctrl_stmt_and_useless_edges (basic_block bb, basic_block dest_bb)
369 : {
370 1515269 : gimple_stmt_iterator gsi;
371 1515269 : edge e;
372 1515269 : edge_iterator ei;
373 :
374 1515269 : gsi = gsi_last_bb (bb);
375 :
376 : /* If the duplicate ends with a control statement, then remove it.
377 :
378 : Note that if we are duplicating the template block rather than the
379 : original basic block, then the duplicate might not have any real
380 : statements in it. */
381 1515269 : if (!gsi_end_p (gsi)
382 1515269 : && gsi_stmt (gsi)
383 1515269 : && (gimple_code (gsi_stmt (gsi)) == GIMPLE_COND
384 : || gimple_code (gsi_stmt (gsi)) == GIMPLE_GOTO
385 : || gimple_code (gsi_stmt (gsi)) == GIMPLE_SWITCH))
386 1515269 : gsi_remove (&gsi, true);
387 :
388 4564565 : for (ei = ei_start (bb->succs); (e = ei_safe_edge (ei)); )
389 : {
390 3049296 : if (e->dest != dest_bb)
391 : {
392 1788903 : free_dom_edge_info (e);
393 1788903 : remove_edge (e);
394 : }
395 : else
396 : {
397 1260393 : e->probability = profile_probability::always ();
398 1260393 : ei_next (&ei);
399 : }
400 : }
401 :
402 : /* If the remaining edge is a loop exit, there must have
403 : a removed edge that was not a loop exit.
404 :
405 : In that case BB and possibly other blocks were previously
406 : in the loop, but are now outside the loop. Thus, we need
407 : to update the loop structures. */
408 1515269 : if (single_succ_p (bb)
409 1260393 : && loop_outer (bb->loop_father)
410 1881470 : && loop_exit_edge_p (bb->loop_father, single_succ_edge (bb)))
411 56015 : loops_state_set (LOOPS_NEED_FIXUP);
412 1515269 : }
413 :
414 : /* Create a duplicate of BB. Record the duplicate block in an array
415 : indexed by COUNT stored in RD. */
416 :
417 : static void
418 333819 : create_block_for_threading (basic_block bb,
419 : struct redirection_data *rd,
420 : unsigned int count,
421 : bitmap *duplicate_blocks)
422 : {
423 333819 : edge_iterator ei;
424 333819 : edge e;
425 :
426 : /* We can use the generic block duplication code and simply remove
427 : the stuff we do not need. */
428 333819 : rd->dup_blocks[count] = duplicate_block (bb, NULL, NULL);
429 :
430 1021016 : FOR_EACH_EDGE (e, ei, rd->dup_blocks[count]->succs)
431 : {
432 687197 : e->aux = NULL;
433 :
434 : /* If we duplicate a block with an outgoing edge marked as
435 : EDGE_IGNORE, we must clear EDGE_IGNORE so that it doesn't
436 : leak out of the current pass.
437 :
438 : It would be better to simplify switch statements and remove
439 : the edges before we get here, but the sequencing is nontrivial. */
440 687197 : e->flags &= ~EDGE_IGNORE;
441 : }
442 :
443 : /* Zero out the profile, since the block is unreachable for now. */
444 333819 : rd->dup_blocks[count]->count = profile_count::uninitialized ();
445 333819 : if (duplicate_blocks)
446 333819 : bitmap_set_bit (*duplicate_blocks, rd->dup_blocks[count]->index);
447 333819 : }
448 :
449 : /* Given an outgoing edge E lookup and return its entry in our hash table.
450 :
451 : If INSERT is true, then we insert the entry into the hash table if
452 : it is not already present. INCOMING_EDGE is added to the list of incoming
453 : edges associated with E in the hash table. */
454 :
455 : redirection_data *
456 361376 : fwd_jt_path_registry::lookup_redirection_data (edge e, insert_option insert)
457 : {
458 361376 : struct redirection_data **slot;
459 361376 : struct redirection_data *elt;
460 361376 : vec<jump_thread_edge *> *path = THREAD_PATH (e);
461 :
462 : /* Build a hash table element so we can see if E is already
463 : in the table. */
464 361376 : elt = XNEW (struct redirection_data);
465 361376 : elt->path = path;
466 361376 : elt->dup_blocks[0] = NULL;
467 361376 : elt->dup_blocks[1] = NULL;
468 361376 : elt->incoming_edges = NULL;
469 :
470 361376 : slot = m_redirection_data->find_slot (elt, insert);
471 :
472 : /* This will only happen if INSERT is false and the entry is not
473 : in the hash table. */
474 361376 : if (slot == NULL)
475 : {
476 0 : free (elt);
477 0 : return NULL;
478 : }
479 :
480 : /* This will only happen if E was not in the hash table and
481 : INSERT is true. */
482 361376 : if (*slot == NULL)
483 : {
484 290784 : *slot = elt;
485 290784 : elt->incoming_edges = XNEW (struct el);
486 290784 : elt->incoming_edges->e = e;
487 290784 : elt->incoming_edges->next = NULL;
488 290784 : return elt;
489 : }
490 : /* E was in the hash table. */
491 : else
492 : {
493 : /* Free ELT as we do not need it anymore, we will extract the
494 : relevant entry from the hash table itself. */
495 70592 : free (elt);
496 :
497 : /* Get the entry stored in the hash table. */
498 70592 : elt = *slot;
499 :
500 : /* If insertion was requested, then we need to add INCOMING_EDGE
501 : to the list of incoming edges associated with E. */
502 70592 : if (insert)
503 : {
504 70592 : struct el *el = XNEW (struct el);
505 70592 : el->next = elt->incoming_edges;
506 70592 : el->e = e;
507 70592 : elt->incoming_edges = el;
508 : }
509 :
510 70592 : return elt;
511 : }
512 : }
513 :
514 : /* Given ssa_name DEF, backtrack jump threading PATH from node IDX
515 : to see if it has constant value in a flow sensitive manner. Set
516 : LOCUS to location of the constant phi arg and return the value.
517 : Return DEF directly if either PATH or idx is ZERO. */
518 :
519 : static tree
520 129300 : get_value_locus_in_path (tree def, vec<jump_thread_edge *> *path,
521 : basic_block bb, int idx, location_t *locus)
522 : {
523 129300 : tree arg;
524 129300 : gphi *def_phi;
525 129300 : basic_block def_bb;
526 :
527 129300 : if (path == NULL || idx == 0)
528 : return def;
529 :
530 179377 : def_phi = dyn_cast <gphi *> (SSA_NAME_DEF_STMT (def));
531 70072 : if (!def_phi)
532 : return def;
533 :
534 70072 : def_bb = gimple_bb (def_phi);
535 : /* Don't propagate loop invariants into deeper loops. */
536 70072 : if (!def_bb || bb_loop_depth (def_bb) < bb_loop_depth (bb))
537 1383 : return def;
538 :
539 : /* Backtrack jump threading path from IDX to see if def has constant
540 : value. */
541 88576 : for (int j = idx - 1; j >= 0; j--)
542 : {
543 72758 : edge e = (*path)[j]->e;
544 72758 : if (e->dest == def_bb)
545 : {
546 52871 : arg = gimple_phi_arg_def (def_phi, e->dest_idx);
547 52871 : if (is_gimple_min_invariant (arg))
548 : {
549 19995 : *locus = gimple_phi_arg_location (def_phi, e->dest_idx);
550 19995 : return arg;
551 : }
552 : break;
553 : }
554 : }
555 :
556 : return def;
557 : }
558 :
559 : /* For each PHI in BB, copy the argument associated with SRC_E to TGT_E.
560 : Try to backtrack jump threading PATH from node IDX to see if the arg
561 : has constant value, copy constant value instead of argument itself
562 : if yes. */
563 :
564 : static void
565 441791 : copy_phi_args (basic_block bb, edge src_e, edge tgt_e,
566 : vec<jump_thread_edge *> *path, int idx)
567 : {
568 441791 : gphi_iterator gsi;
569 441791 : int src_indx = src_e->dest_idx;
570 :
571 739848 : for (gsi = gsi_start_phis (bb); !gsi_end_p (gsi); gsi_next (&gsi))
572 : {
573 298057 : gphi *phi = gsi.phi ();
574 298057 : tree def = gimple_phi_arg_def (phi, src_indx);
575 298057 : location_t locus = gimple_phi_arg_location (phi, src_indx);
576 :
577 298057 : if (TREE_CODE (def) == SSA_NAME
578 528101 : && !virtual_operand_p (gimple_phi_result (phi)))
579 129300 : def = get_value_locus_in_path (def, path, bb, idx, &locus);
580 :
581 298057 : add_phi_arg (phi, def, tgt_e, locus);
582 : }
583 441791 : }
584 :
585 : /* We have recently made a copy of ORIG_BB, including its outgoing
586 : edges. The copy is NEW_BB. Every PHI node in every direct successor of
587 : ORIG_BB has a new argument associated with edge from NEW_BB to the
588 : successor. Initialize the PHI argument so that it is equal to the PHI
589 : argument associated with the edge from ORIG_BB to the successor.
590 : PATH and IDX are used to check if the new PHI argument has constant
591 : value in a flow sensitive manner. */
592 :
593 : static void
594 78943 : update_destination_phis (basic_block orig_bb, basic_block new_bb,
595 : vec<jump_thread_edge *> *path, int idx)
596 : {
597 78943 : edge_iterator ei;
598 78943 : edge e;
599 :
600 249727 : FOR_EACH_EDGE (e, ei, orig_bb->succs)
601 : {
602 170784 : edge e2 = find_edge (new_bb, e->dest);
603 170784 : copy_phi_args (e->dest, e, e2, path, idx);
604 : }
605 78943 : }
606 :
607 : /* Given a duplicate block and its single destination (both stored
608 : in RD). Create an edge between the duplicate and its single
609 : destination.
610 :
611 : Add an additional argument to any PHI nodes at the single
612 : destination. IDX is the start node in jump threading path
613 : we start to check to see if the new PHI argument has constant
614 : value along the jump threading path. */
615 :
616 : static void
617 254876 : create_edge_and_update_destination_phis (struct redirection_data *rd,
618 : basic_block bb, int idx)
619 : {
620 254876 : edge e = make_single_succ_edge (bb, rd->path->last ()->e->dest, EDGE_FALLTHRU);
621 :
622 254876 : rescan_loop_exit (e, true, false);
623 :
624 : /* We used to copy the thread path here. That was added in 2007
625 : and dutifully updated through the representation changes in 2013.
626 :
627 : In 2013 we added code to thread from an interior node through
628 : the backedge to another interior node. That runs after the code
629 : to thread through loop headers from outside the loop.
630 :
631 : The latter may delete edges in the CFG, including those
632 : which appeared in the jump threading path we copied here. Thus
633 : we'd end up using a dangling pointer.
634 :
635 : After reviewing the 2007/2011 code, I can't see how anything
636 : depended on copying the AUX field and clearly copying the jump
637 : threading path is problematical due to embedded edge pointers.
638 : It has been removed. */
639 254876 : e->aux = NULL;
640 :
641 : /* If there are any PHI nodes at the destination of the outgoing edge
642 : from the duplicate block, then we will need to add a new argument
643 : to them. The argument should have the same value as the argument
644 : associated with the outgoing edge stored in RD. */
645 254876 : copy_phi_args (e->dest, rd->path->last ()->e, e, rd->path, idx);
646 254876 : }
647 :
648 : /* Look through PATH beginning at START and return TRUE if there are
649 : any additional blocks that need to be duplicated. Otherwise,
650 : return FALSE. */
651 : static bool
652 78943 : any_remaining_duplicated_blocks (vec<jump_thread_edge *> *path,
653 : unsigned int start)
654 : {
655 100245 : for (unsigned int i = start + 1; i < path->length (); i++)
656 : {
657 64337 : if ((*path)[i]->type == EDGE_COPY_SRC_JOINER_BLOCK
658 64337 : || (*path)[i]->type == EDGE_COPY_SRC_BLOCK)
659 : return true;
660 : }
661 : return false;
662 : }
663 :
664 :
665 : /* Compute the amount of profile count coming into the jump threading
666 : path stored in RD that we are duplicating, returned in PATH_IN_COUNT_PTR and
667 : PATH_IN_FREQ_PTR, as well as the amount of counts flowing out of the
668 : duplicated path, returned in PATH_OUT_COUNT_PTR. LOCAL_INFO is used to
669 : identify blocks duplicated for jump threading, which have duplicated
670 : edges that need to be ignored in the analysis. Return true if path contains
671 : a joiner, false otherwise.
672 :
673 : In the non-joiner case, this is straightforward - all the counts
674 : flowing into the jump threading path should flow through the duplicated
675 : block and out of the duplicated path.
676 :
677 : In the joiner case, it is very tricky. Some of the counts flowing into
678 : the original path go offpath at the joiner. The problem is that while
679 : we know how much total count goes off-path in the original control flow,
680 : we don't know how many of the counts corresponding to just the jump
681 : threading path go offpath at the joiner.
682 :
683 : For example, assume we have the following control flow and identified
684 : jump threading paths:
685 :
686 : A B C
687 : \ | /
688 : Ea \ |Eb / Ec
689 : \ | /
690 : v v v
691 : J <-- Joiner
692 : / \
693 : Eoff/ \Eon
694 : / \
695 : v v
696 : Soff Son <--- Normal
697 : /\
698 : Ed/ \ Ee
699 : / \
700 : v v
701 : D E
702 :
703 : Jump threading paths: A -> J -> Son -> D (path 1)
704 : C -> J -> Son -> E (path 2)
705 :
706 : Note that the control flow could be more complicated:
707 : - Each jump threading path may have more than one incoming edge. I.e. A and
708 : Ea could represent multiple incoming blocks/edges that are included in
709 : path 1.
710 : - There could be EDGE_NO_COPY_SRC_BLOCK edges after the joiner (either
711 : before or after the "normal" copy block). These are not duplicated onto
712 : the jump threading path, as they are single-successor.
713 : - Any of the blocks along the path may have other incoming edges that
714 : are not part of any jump threading path, but add profile counts along
715 : the path.
716 :
717 : In the above example, after all jump threading is complete, we will
718 : end up with the following control flow:
719 :
720 : A B C
721 : | | |
722 : Ea| |Eb |Ec
723 : | | |
724 : v v v
725 : Ja J Jc
726 : / \ / \Eon' / \
727 : Eona/ \ ---/---\-------- \Eonc
728 : / \ / / \ \
729 : v v v v v
730 : Sona Soff Son Sonc
731 : \ /\ /
732 : \___________ / \ _____/
733 : \ / \/
734 : vv v
735 : D E
736 :
737 : The main issue to notice here is that when we are processing path 1
738 : (A->J->Son->D) we need to figure out the outgoing edge weights to
739 : the duplicated edges Ja->Sona and Ja->Soff, while ensuring that the
740 : sum of the incoming weights to D remain Ed. The problem with simply
741 : assuming that Ja (and Jc when processing path 2) has the same outgoing
742 : probabilities to its successors as the original block J, is that after
743 : all paths are processed and other edges/counts removed (e.g. none
744 : of Ec will reach D after processing path 2), we may end up with not
745 : enough count flowing along duplicated edge Sona->D.
746 :
747 : Therefore, in the case of a joiner, we keep track of all counts
748 : coming in along the current path, as well as from predecessors not
749 : on any jump threading path (Eb in the above example). While we
750 : first assume that the duplicated Eona for Ja->Sona has the same
751 : probability as the original, we later compensate for other jump
752 : threading paths that may eliminate edges. We do that by keep track
753 : of all counts coming into the original path that are not in a jump
754 : thread (Eb in the above example, but as noted earlier, there could
755 : be other predecessors incoming to the path at various points, such
756 : as at Son). Call this cumulative non-path count coming into the path
757 : before D as Enonpath. We then ensure that the count from Sona->D is as at
758 : least as big as (Ed - Enonpath), but no bigger than the minimum
759 : weight along the jump threading path. The probabilities of both the
760 : original and duplicated joiner block J and Ja will be adjusted
761 : accordingly after the updates. */
762 :
763 : static bool
764 290784 : compute_path_counts (struct redirection_data *rd,
765 : ssa_local_info_t *local_info,
766 : profile_count *path_in_count_ptr,
767 : profile_count *path_out_count_ptr)
768 : {
769 290784 : edge e = rd->incoming_edges->e;
770 290784 : vec<jump_thread_edge *> *path = THREAD_PATH (e);
771 290784 : edge elast = path->last ()->e;
772 290784 : profile_count nonpath_count = profile_count::zero ();
773 290784 : bool has_joiner = false;
774 290784 : profile_count path_in_count = profile_count::zero ();
775 :
776 : /* Start by accumulating incoming edge counts to the path's first bb
777 : into a couple buckets:
778 : path_in_count: total count of incoming edges that flow into the
779 : current path.
780 : nonpath_count: total count of incoming edges that are not
781 : flowing along *any* path. These are the counts
782 : that will still flow along the original path after
783 : all path duplication is done by potentially multiple
784 : calls to this routine.
785 : (any other incoming edge counts are for a different jump threading
786 : path that will be handled by a later call to this routine.)
787 : To make this easier, start by recording all incoming edges that flow into
788 : the current path in a bitmap. We could add up the path's incoming edge
789 : counts here, but we still need to walk all the first bb's incoming edges
790 : below to add up the counts of the other edges not included in this jump
791 : threading path. */
792 290784 : struct el *next, *el;
793 290784 : auto_bitmap in_edge_srcs;
794 652160 : for (el = rd->incoming_edges; el; el = next)
795 : {
796 361376 : next = el->next;
797 361376 : bitmap_set_bit (in_edge_srcs, el->e->src->index);
798 : }
799 290784 : edge ein;
800 290784 : edge_iterator ei;
801 1068276 : FOR_EACH_EDGE (ein, ei, e->dest->preds)
802 : {
803 777492 : vec<jump_thread_edge *> *ein_path = THREAD_PATH (ein);
804 : /* Simply check the incoming edge src against the set captured above. */
805 777492 : if (ein_path
806 1302147 : && bitmap_bit_p (in_edge_srcs, (*ein_path)[0]->e->src->index))
807 : {
808 : /* It is necessary but not sufficient that the last path edges
809 : are identical. There may be different paths that share the
810 : same last path edge in the case where the last edge has a nocopy
811 : source block. */
812 361376 : gcc_assert (ein_path->last ()->e == elast);
813 361376 : path_in_count += ein->count ();
814 : }
815 416116 : else if (!ein_path)
816 : {
817 : /* Keep track of the incoming edges that are not on any jump-threading
818 : path. These counts will still flow out of original path after all
819 : jump threading is complete. */
820 252837 : nonpath_count += ein->count ();
821 : }
822 : }
823 :
824 : /* Now compute the fraction of the total count coming into the first
825 : path bb that is from the current threading path. */
826 290784 : profile_count total_count = e->dest->count;
827 : /* Handle incoming profile insanities. */
828 290784 : if (total_count < path_in_count)
829 22205 : path_in_count = total_count;
830 290784 : profile_probability onpath_scale = path_in_count.probability_in (total_count);
831 :
832 : /* Walk the entire path to do some more computation in order to estimate
833 : how much of the path_in_count will flow out of the duplicated threading
834 : path. In the non-joiner case this is straightforward (it should be
835 : the same as path_in_count, although we will handle incoming profile
836 : insanities by setting it equal to the minimum count along the path).
837 :
838 : In the joiner case, we need to estimate how much of the path_in_count
839 : will stay on the threading path after the joiner's conditional branch.
840 : We don't really know for sure how much of the counts
841 : associated with this path go to each successor of the joiner, but we'll
842 : estimate based on the fraction of the total count coming into the path
843 : bb was from the threading paths (computed above in onpath_scale).
844 : Afterwards, we will need to do some fixup to account for other threading
845 : paths and possible profile insanities.
846 :
847 : In order to estimate the joiner case's counts we also need to update
848 : nonpath_count with any additional counts coming into the path. Other
849 : blocks along the path may have additional predecessors from outside
850 : the path. */
851 290784 : profile_count path_out_count = path_in_count;
852 290784 : profile_count min_path_count = path_in_count;
853 665743 : for (unsigned int i = 1; i < path->length (); i++)
854 : {
855 374959 : edge epath = (*path)[i]->e;
856 374959 : profile_count cur_count = epath->count ();
857 374959 : if ((*path)[i]->type == EDGE_COPY_SRC_JOINER_BLOCK)
858 : {
859 78943 : has_joiner = true;
860 78943 : cur_count = cur_count.apply_probability (onpath_scale);
861 : }
862 : /* In the joiner case we need to update nonpath_count for any edges
863 : coming into the path that will contribute to the count flowing
864 : into the path successor. */
865 374959 : if (has_joiner && epath != elast)
866 : {
867 : /* Look for other incoming edges after joiner. */
868 251227 : FOR_EACH_EDGE (ein, ei, epath->dest->preds)
869 : {
870 184762 : if (ein != epath
871 : /* Ignore in edges from blocks we have duplicated for a
872 : threading path, which have duplicated edge counts until
873 : they are redirected by an invocation of this routine. */
874 303059 : && !bitmap_bit_p (local_info->duplicate_blocks,
875 118297 : ein->src->index))
876 42113 : nonpath_count += ein->count ();
877 : }
878 : }
879 374959 : if (cur_count < path_out_count)
880 161163 : path_out_count = cur_count;
881 374959 : if (epath->count () < min_path_count)
882 139113 : min_path_count = epath->count ();
883 : }
884 :
885 : /* We computed path_out_count above assuming that this path targeted
886 : the joiner's on-path successor with the same likelihood as it
887 : reached the joiner. However, other thread paths through the joiner
888 : may take a different path through the normal copy source block
889 : (i.e. they have a different elast), meaning that they do not
890 : contribute any counts to this path's elast. As a result, it may
891 : turn out that this path must have more count flowing to the on-path
892 : successor of the joiner. Essentially, all of this path's elast
893 : count must be contributed by this path and any nonpath counts
894 : (since any path through the joiner with a different elast will not
895 : include a copy of this elast in its duplicated path).
896 : So ensure that this path's path_out_count is at least the
897 : difference between elast->count () and nonpath_count. Otherwise the edge
898 : counts after threading will not be sane. */
899 290784 : if (local_info->need_profile_correction
900 319308 : && has_joiner && path_out_count < elast->count () - nonpath_count)
901 : {
902 9554 : path_out_count = elast->count () - nonpath_count;
903 : /* But neither can we go above the minimum count along the path
904 : we are duplicating. This can be an issue due to profile
905 : insanities coming in to this pass. */
906 9554 : if (path_out_count > min_path_count)
907 6038 : path_out_count = min_path_count;
908 : }
909 :
910 290784 : *path_in_count_ptr = path_in_count;
911 290784 : *path_out_count_ptr = path_out_count;
912 290784 : return has_joiner;
913 290784 : }
914 :
915 :
916 : /* Update the counts and frequencies for both an original path
917 : edge EPATH and its duplicate EDUP. The duplicate source block
918 : will get a count of PATH_IN_COUNT and PATH_IN_FREQ,
919 : and the duplicate edge EDUP will have a count of PATH_OUT_COUNT. */
920 : static void
921 374959 : update_profile (edge epath, edge edup, profile_count path_in_count,
922 : profile_count path_out_count)
923 : {
924 :
925 : /* First update the duplicated block's count. */
926 374959 : if (edup)
927 : {
928 333819 : basic_block dup_block = edup->src;
929 :
930 : /* Edup's count is reduced by path_out_count. We need to redistribute
931 : probabilities to the remaining edges. */
932 :
933 333819 : edge esucc;
934 333819 : edge_iterator ei;
935 333819 : profile_probability edup_prob
936 333819 : = path_out_count.probability_in (path_in_count);
937 :
938 : /* Either scale up or down the remaining edges.
939 : probabilities are always in range <0,1> and thus we can't do
940 : both by same loop. */
941 333819 : if (edup->probability > edup_prob)
942 : {
943 26628 : profile_probability rev_scale
944 26628 : = (profile_probability::always () - edup->probability)
945 53256 : / (profile_probability::always () - edup_prob);
946 86440 : FOR_EACH_EDGE (esucc, ei, dup_block->succs)
947 59812 : if (esucc != edup)
948 33184 : esucc->probability /= rev_scale;
949 : }
950 307191 : else if (edup->probability < edup_prob)
951 : {
952 20867 : profile_probability scale
953 20867 : = (profile_probability::always () - edup_prob)
954 41734 : / (profile_probability::always () - edup->probability);
955 65924 : FOR_EACH_EDGE (esucc, ei, dup_block->succs)
956 45057 : if (esucc != edup)
957 24190 : esucc->probability *= scale;
958 : }
959 333819 : if (edup_prob.initialized_p ())
960 311855 : edup->probability = edup_prob;
961 :
962 333819 : gcc_assert (!dup_block->count.initialized_p ());
963 333819 : dup_block->count = path_in_count;
964 : }
965 :
966 374959 : if (path_in_count == profile_count::zero ())
967 10821 : return;
968 :
969 364138 : profile_count final_count = epath->count () - path_out_count;
970 :
971 : /* Now update the original block's count in the
972 : opposite manner - remove the counts/freq that will flow
973 : into the duplicated block. Handle underflow due to precision/
974 : rounding issues. */
975 364138 : epath->src->count -= path_in_count;
976 :
977 : /* Next update this path edge's original and duplicated counts. We know
978 : that the duplicated path will have path_out_count flowing
979 : out of it (in the joiner case this is the count along the duplicated path
980 : out of the duplicated joiner). This count can then be removed from the
981 : original path edge. */
982 :
983 364138 : edge esucc;
984 364138 : edge_iterator ei;
985 364138 : profile_probability epath_prob = final_count.probability_in (epath->src->count);
986 :
987 364138 : if (epath->probability > epath_prob)
988 : {
989 225571 : profile_probability rev_scale
990 225571 : = (profile_probability::always () - epath->probability)
991 451142 : / (profile_probability::always () - epath_prob);
992 679382 : FOR_EACH_EDGE (esucc, ei, epath->src->succs)
993 453811 : if (esucc != epath)
994 228240 : esucc->probability /= rev_scale;
995 : }
996 138567 : else if (epath->probability < epath_prob)
997 : {
998 20241 : profile_probability scale
999 20241 : = (profile_probability::always () - epath_prob)
1000 40482 : / (profile_probability::always () - epath->probability);
1001 67583 : FOR_EACH_EDGE (esucc, ei, epath->src->succs)
1002 47342 : if (esucc != epath)
1003 27101 : esucc->probability *= scale;
1004 : }
1005 364138 : if (epath_prob.initialized_p ())
1006 333580 : epath->probability = epath_prob;
1007 : }
1008 :
1009 : /* Wire up the outgoing edges from the duplicate blocks and
1010 : update any PHIs as needed. Also update the profile counts
1011 : on the original and duplicate blocks and edges. */
1012 : void
1013 290784 : ssa_fix_duplicate_block_edges (struct redirection_data *rd,
1014 : ssa_local_info_t *local_info)
1015 : {
1016 290784 : bool multi_incomings = (rd->incoming_edges->next != NULL);
1017 290784 : edge e = rd->incoming_edges->e;
1018 290784 : vec<jump_thread_edge *> *path = THREAD_PATH (e);
1019 290784 : edge elast = path->last ()->e;
1020 290784 : profile_count path_in_count = profile_count::zero ();
1021 290784 : profile_count path_out_count = profile_count::zero ();
1022 :
1023 : /* First determine how much profile count to move from original
1024 : path to the duplicate path. This is tricky in the presence of
1025 : a joiner (see comments for compute_path_counts), where some portion
1026 : of the path's counts will flow off-path from the joiner. In the
1027 : non-joiner case the path_in_count and path_out_count should be the
1028 : same. */
1029 290784 : bool has_joiner = compute_path_counts (rd, local_info,
1030 : &path_in_count, &path_out_count);
1031 :
1032 665743 : for (unsigned int count = 0, i = 1; i < path->length (); i++)
1033 : {
1034 374959 : edge epath = (*path)[i]->e;
1035 :
1036 : /* If we were threading through an joiner block, then we want
1037 : to keep its control statement and redirect an outgoing edge.
1038 : Else we want to remove the control statement & edges, then create
1039 : a new outgoing edge. In both cases we may need to update PHIs. */
1040 374959 : if ((*path)[i]->type == EDGE_COPY_SRC_JOINER_BLOCK)
1041 : {
1042 78943 : edge victim;
1043 78943 : edge e2;
1044 :
1045 78943 : gcc_assert (has_joiner);
1046 :
1047 : /* This updates the PHIs at the destination of the duplicate
1048 : block. Pass 0 instead of i if we are threading a path which
1049 : has multiple incoming edges. */
1050 141458 : update_destination_phis (local_info->bb, rd->dup_blocks[count],
1051 : path, multi_incomings ? 0 : i);
1052 :
1053 : /* Find the edge from the duplicate block to the block we're
1054 : threading through. That's the edge we want to redirect. */
1055 78943 : victim = find_edge (rd->dup_blocks[count], (*path)[i]->e->dest);
1056 :
1057 : /* If there are no remaining blocks on the path to duplicate,
1058 : then redirect VICTIM to the final destination of the jump
1059 : threading path. */
1060 78943 : if (!any_remaining_duplicated_blocks (path, i))
1061 : {
1062 35908 : if (victim->dest != elast->dest)
1063 : {
1064 16361 : e2 = redirect_edge_and_branch (victim, elast->dest);
1065 : /* If we redirected the edge, then we need to copy PHI arguments
1066 : at the target. If the edge already existed (e2 != victim
1067 : case), then the PHIs in the target already have the correct
1068 : arguments. */
1069 16361 : if (e2 == victim)
1070 16131 : copy_phi_args (e2->dest, elast, e2,
1071 : path, multi_incomings ? 0 : i);
1072 : }
1073 : else
1074 : e2 = victim;
1075 : }
1076 : else
1077 : {
1078 : /* Redirect VICTIM to the next duplicated block in the path. */
1079 43035 : e2 = redirect_edge_and_branch (victim, rd->dup_blocks[count + 1]);
1080 :
1081 : /* We need to update the PHIs in the next duplicated block. We
1082 : want the new PHI args to have the same value as they had
1083 : in the source of the next duplicate block.
1084 :
1085 : Thus, we need to know which edge we traversed into the
1086 : source of the duplicate. Furthermore, we may have
1087 : traversed many edges to reach the source of the duplicate.
1088 :
1089 : Walk through the path starting at element I until we
1090 : hit an edge marked with EDGE_COPY_SRC_BLOCK. We want
1091 : the edge from the prior element. */
1092 44555 : for (unsigned int j = i + 1; j < path->length (); j++)
1093 : {
1094 44555 : if ((*path)[j]->type == EDGE_COPY_SRC_BLOCK)
1095 : {
1096 43035 : copy_phi_arg_into_existing_phi ((*path)[j - 1]->e, e2);
1097 43035 : break;
1098 : }
1099 : }
1100 : }
1101 :
1102 : /* Update the counts of both the original block
1103 : and path edge, and the duplicates. The path duplicate's
1104 : incoming count are the totals for all edges
1105 : incoming to this jump threading path computed earlier.
1106 : And we know that the duplicated path will have path_out_count
1107 : flowing out of it (i.e. along the duplicated path out of the
1108 : duplicated joiner). */
1109 78943 : update_profile (epath, e2, path_in_count, path_out_count);
1110 : }
1111 296016 : else if ((*path)[i]->type == EDGE_COPY_SRC_BLOCK)
1112 : {
1113 254876 : remove_ctrl_stmt_and_useless_edges (rd->dup_blocks[count], NULL);
1114 471371 : create_edge_and_update_destination_phis (rd, rd->dup_blocks[count],
1115 : multi_incomings ? 0 : i);
1116 254876 : if (count == 1)
1117 43035 : single_succ_edge (rd->dup_blocks[1])->aux = NULL;
1118 :
1119 : /* Update the counts of both the original block
1120 : and path edge, and the duplicates. Since we are now after
1121 : any joiner that may have existed on the path, the count
1122 : flowing along the duplicated threaded path is path_out_count.
1123 : If we didn't have a joiner, then cur_path_freq was the sum
1124 : of the total frequencies along all incoming edges to the
1125 : thread path (path_in_freq). If we had a joiner, it would have
1126 : been updated at the end of that handling to the edge frequency
1127 : along the duplicated joiner path edge. */
1128 254876 : update_profile (epath, EDGE_SUCC (rd->dup_blocks[count], 0),
1129 : path_out_count, path_out_count);
1130 : }
1131 : else
1132 : {
1133 : /* No copy case. In this case we don't have an equivalent block
1134 : on the duplicated thread path to update, but we do need
1135 : to remove the portion of the counts/freqs that were moved
1136 : to the duplicated path from the counts/freqs flowing through
1137 : this block on the original path. Since all the no-copy edges
1138 : are after any joiner, the removed count is the same as
1139 : path_out_count.
1140 :
1141 : If we didn't have a joiner, then cur_path_freq was the sum
1142 : of the total frequencies along all incoming edges to the
1143 : thread path (path_in_freq). If we had a joiner, it would have
1144 : been updated at the end of that handling to the edge frequency
1145 : along the duplicated joiner path edge. */
1146 41140 : update_profile (epath, NULL, path_out_count, path_out_count);
1147 : }
1148 :
1149 : /* Increment the index into the duplicated path when we processed
1150 : a duplicated block. */
1151 374959 : if ((*path)[i]->type == EDGE_COPY_SRC_JOINER_BLOCK
1152 374959 : || (*path)[i]->type == EDGE_COPY_SRC_BLOCK)
1153 : {
1154 333819 : count++;
1155 : }
1156 : }
1157 290784 : }
1158 :
1159 : /* Hash table traversal callback routine to create duplicate blocks. */
1160 :
1161 : int
1162 290784 : ssa_create_duplicates (struct redirection_data **slot,
1163 : ssa_local_info_t *local_info)
1164 : {
1165 290784 : struct redirection_data *rd = *slot;
1166 :
1167 : /* The second duplicated block in a jump threading path is specific
1168 : to the path. So it gets stored in RD rather than in LOCAL_DATA.
1169 :
1170 : Each time we're called, we have to look through the path and see
1171 : if a second block needs to be duplicated.
1172 :
1173 : Note the search starts with the third edge on the path. The first
1174 : edge is the incoming edge, the second edge always has its source
1175 : duplicated. Thus we start our search with the third edge. */
1176 290784 : vec<jump_thread_edge *> *path = rd->path;
1177 329796 : for (unsigned int i = 2; i < path->length (); i++)
1178 : {
1179 82047 : if ((*path)[i]->type == EDGE_COPY_SRC_BLOCK
1180 82047 : || (*path)[i]->type == EDGE_COPY_SRC_JOINER_BLOCK)
1181 : {
1182 43035 : create_block_for_threading ((*path)[i]->e->src, rd, 1,
1183 : &local_info->duplicate_blocks);
1184 43035 : break;
1185 : }
1186 : }
1187 :
1188 : /* Create a template block if we have not done so already. Otherwise
1189 : use the template to create a new block. */
1190 290784 : if (local_info->template_block == NULL)
1191 : {
1192 233098 : create_block_for_threading ((*path)[1]->e->src, rd, 0,
1193 : &local_info->duplicate_blocks);
1194 233098 : local_info->template_block = rd->dup_blocks[0];
1195 233098 : local_info->template_last_to_copy
1196 466196 : = gsi_last_bb (local_info->template_block);
1197 :
1198 : /* We do not create any outgoing edges for the template. We will
1199 : take care of that in a later traversal. That way we do not
1200 : create edges that are going to just be deleted. */
1201 : }
1202 : else
1203 : {
1204 57686 : gimple_seq seq = NULL;
1205 57686 : if (gsi_stmt (local_info->template_last_to_copy)
1206 115372 : != gsi_stmt (gsi_last_bb (local_info->template_block)))
1207 : {
1208 0 : if (gsi_end_p (local_info->template_last_to_copy))
1209 : {
1210 0 : seq = bb_seq (local_info->template_block);
1211 0 : set_bb_seq (local_info->template_block, NULL);
1212 : }
1213 : else
1214 0 : seq = gsi_split_seq_after (local_info->template_last_to_copy);
1215 : }
1216 57686 : create_block_for_threading (local_info->template_block, rd, 0,
1217 : &local_info->duplicate_blocks);
1218 57686 : if (seq)
1219 : {
1220 0 : if (gsi_end_p (local_info->template_last_to_copy))
1221 0 : set_bb_seq (local_info->template_block, seq);
1222 : else
1223 0 : gsi_insert_seq_after (&local_info->template_last_to_copy,
1224 : seq, GSI_SAME_STMT);
1225 : }
1226 :
1227 : /* Go ahead and wire up outgoing edges and update PHIs for the duplicate
1228 : block. */
1229 57686 : ssa_fix_duplicate_block_edges (rd, local_info);
1230 : }
1231 :
1232 290784 : if (MAY_HAVE_DEBUG_STMTS)
1233 : {
1234 : /* Copy debug stmts from each NO_COPY src block to the block
1235 : that would have been its predecessor, if we can append to it
1236 : (we can't add stmts after a block-ending stmt), or prepending
1237 : to the duplicate of the successor, if there is one. If
1238 : there's no duplicate successor, we'll mostly drop the blocks
1239 : on the floor; propagate_threaded_block_debug_into, called
1240 : elsewhere, will consolidate and preserve the effects of the
1241 : binds, but none of the markers. */
1242 217989 : gimple_stmt_iterator copy_to = gsi_last_bb (rd->dup_blocks[0]);
1243 217989 : if (!gsi_end_p (copy_to))
1244 : {
1245 215936 : if (stmt_ends_bb_p (gsi_stmt (copy_to)))
1246 : {
1247 193264 : if (rd->dup_blocks[1])
1248 35235 : copy_to = gsi_after_labels (rd->dup_blocks[1]);
1249 : else
1250 158029 : copy_to = gsi_none ();
1251 : }
1252 : else
1253 22672 : gsi_next (©_to);
1254 : }
1255 286072 : for (unsigned int i = 2, j = 0; i < path->length (); i++)
1256 68083 : if ((*path)[i]->type == EDGE_NO_COPY_SRC_BLOCK
1257 68083 : && gsi_bb (copy_to))
1258 : {
1259 7006 : for (gimple_stmt_iterator gsi = gsi_start_bb ((*path)[i]->e->src);
1260 7495 : !gsi_end_p (gsi); gsi_next (&gsi))
1261 : {
1262 3992 : if (!is_gimple_debug (gsi_stmt (gsi)))
1263 1534 : continue;
1264 2458 : gimple *stmt = gsi_stmt (gsi);
1265 2458 : gimple *copy = gimple_copy (stmt);
1266 2458 : gsi_insert_before (©_to, copy, GSI_SAME_STMT);
1267 : }
1268 : }
1269 64580 : else if ((*path)[i]->type == EDGE_COPY_SRC_BLOCK
1270 64580 : || (*path)[i]->type == EDGE_COPY_SRC_JOINER_BLOCK)
1271 : {
1272 35634 : j++;
1273 35634 : gcc_assert (j < 2);
1274 35634 : copy_to = gsi_last_bb (rd->dup_blocks[j]);
1275 35634 : if (!gsi_end_p (copy_to))
1276 : {
1277 35539 : if (stmt_ends_bb_p (gsi_stmt (copy_to)))
1278 29785 : copy_to = gsi_none ();
1279 : else
1280 5754 : gsi_next (©_to);
1281 : }
1282 : }
1283 : }
1284 :
1285 : /* Keep walking the hash table. */
1286 290784 : return 1;
1287 : }
1288 :
1289 : /* We did not create any outgoing edges for the template block during
1290 : block creation. This hash table traversal callback creates the
1291 : outgoing edge for the template block. */
1292 :
1293 : inline int
1294 233098 : ssa_fixup_template_block (struct redirection_data **slot,
1295 : ssa_local_info_t *local_info)
1296 : {
1297 233098 : struct redirection_data *rd = *slot;
1298 :
1299 : /* If this is the template block halt the traversal after updating
1300 : it appropriately.
1301 :
1302 : If we were threading through an joiner block, then we want
1303 : to keep its control statement and redirect an outgoing edge.
1304 : Else we want to remove the control statement & edges, then create
1305 : a new outgoing edge. In both cases we may need to update PHIs. */
1306 233098 : if (rd->dup_blocks[0] && rd->dup_blocks[0] == local_info->template_block)
1307 : {
1308 233098 : ssa_fix_duplicate_block_edges (rd, local_info);
1309 233098 : return 0;
1310 : }
1311 :
1312 : return 1;
1313 : }
1314 :
1315 : /* Hash table traversal callback to redirect each incoming edge
1316 : associated with this hash table element to its new destination. */
1317 :
1318 : static int
1319 290784 : ssa_redirect_edges (struct redirection_data **slot,
1320 : ssa_local_info_t *local_info)
1321 : {
1322 290784 : struct redirection_data *rd = *slot;
1323 290784 : struct el *next, *el;
1324 :
1325 : /* Walk over all the incoming edges associated with this hash table
1326 : entry. */
1327 652160 : for (el = rd->incoming_edges; el; el = next)
1328 : {
1329 361376 : edge e = el->e;
1330 361376 : vec<jump_thread_edge *> *path = THREAD_PATH (e);
1331 :
1332 : /* Go ahead and free this element from the list. Doing this now
1333 : avoids the need for another list walk when we destroy the hash
1334 : table. */
1335 361376 : next = el->next;
1336 361376 : free (el);
1337 :
1338 361376 : local_info->num_threaded_edges++;
1339 :
1340 361376 : if (rd->dup_blocks[0])
1341 : {
1342 361376 : edge e2;
1343 :
1344 361376 : if (dump_file && (dump_flags & TDF_DETAILS))
1345 25 : fprintf (dump_file, " Threaded jump %d --> %d to %d\n",
1346 25 : e->src->index, e->dest->index, rd->dup_blocks[0]->index);
1347 :
1348 : /* Redirect the incoming edge (possibly to the joiner block) to the
1349 : appropriate duplicate block. */
1350 361376 : e2 = redirect_edge_and_branch (e, rd->dup_blocks[0]);
1351 361376 : gcc_assert (e == e2);
1352 361376 : flush_pending_stmts (e2);
1353 : }
1354 :
1355 : /* Go ahead and clear E->aux. It's not needed anymore and failure
1356 : to clear it will cause all kinds of unpleasant problems later. */
1357 361376 : path->release ();
1358 361376 : e->aux = NULL;
1359 :
1360 : }
1361 :
1362 : /* Indicate that we actually threaded one or more jumps. */
1363 290784 : if (rd->incoming_edges)
1364 290784 : local_info->jumps_threaded = true;
1365 :
1366 290784 : return 1;
1367 : }
1368 :
1369 : /* Return true if this block has no executable statements other than
1370 : a simple ctrl flow instruction. When the number of outgoing edges
1371 : is one, this is equivalent to a "forwarder" block. */
1372 :
1373 : static bool
1374 160118 : redirection_block_p (basic_block bb)
1375 : {
1376 160118 : gimple_stmt_iterator gsi;
1377 :
1378 : /* Advance to the first executable statement. */
1379 160118 : gsi = gsi_start_bb (bb);
1380 160118 : while (!gsi_end_p (gsi)
1381 357556 : && (gimple_code (gsi_stmt (gsi)) == GIMPLE_LABEL
1382 : || is_gimple_debug (gsi_stmt (gsi))
1383 : || gimple_nop_p (gsi_stmt (gsi))
1384 159437 : || gimple_clobber_p (gsi_stmt (gsi))))
1385 197438 : gsi_next (&gsi);
1386 :
1387 : /* Check if this is an empty block. */
1388 160118 : if (gsi_end_p (gsi))
1389 : return true;
1390 :
1391 : /* Test that we've reached the terminating control statement. */
1392 159128 : return gsi_stmt (gsi)
1393 159128 : && (gimple_code (gsi_stmt (gsi)) == GIMPLE_COND
1394 : || gimple_code (gsi_stmt (gsi)) == GIMPLE_GOTO
1395 : || gimple_code (gsi_stmt (gsi)) == GIMPLE_SWITCH);
1396 : }
1397 :
1398 : /* BB is a block which ends with a COND_EXPR or SWITCH_EXPR and when BB
1399 : is reached via one or more specific incoming edges, we know which
1400 : outgoing edge from BB will be traversed.
1401 :
1402 : We want to redirect those incoming edges to the target of the
1403 : appropriate outgoing edge. Doing so avoids a conditional branch
1404 : and may expose new optimization opportunities. Note that we have
1405 : to update dominator tree and SSA graph after such changes.
1406 :
1407 : The key to keeping the SSA graph update manageable is to duplicate
1408 : the side effects occurring in BB so that those side effects still
1409 : occur on the paths which bypass BB after redirecting edges.
1410 :
1411 : We accomplish this by creating duplicates of BB and arranging for
1412 : the duplicates to unconditionally pass control to one specific
1413 : successor of BB. We then revector the incoming edges into BB to
1414 : the appropriate duplicate of BB.
1415 :
1416 : If NOLOOP_ONLY is true, we only perform the threading as long as it
1417 : does not affect the structure of the loops in a nontrivial way.
1418 :
1419 : If JOINERS is true, then thread through joiner blocks as well. */
1420 :
1421 : bool
1422 737656 : fwd_jt_path_registry::thread_block_1 (basic_block bb,
1423 : bool noloop_only,
1424 : bool joiners)
1425 : {
1426 : /* E is an incoming edge into BB that we may or may not want to
1427 : redirect to a duplicate of BB. */
1428 737656 : edge e, e2;
1429 737656 : edge_iterator ei;
1430 737656 : ssa_local_info_t local_info;
1431 :
1432 737656 : local_info.duplicate_blocks = BITMAP_ALLOC (NULL);
1433 737656 : local_info.need_profile_correction = false;
1434 737656 : local_info.num_threaded_edges = 0;
1435 :
1436 : /* To avoid scanning a linear array for the element we need we instead
1437 : use a hash table. For normal code there should be no noticeable
1438 : difference. However, if we have a block with a large number of
1439 : incoming and outgoing edges such linear searches can get expensive. */
1440 737656 : m_redirection_data
1441 1475312 : = new hash_table<struct redirection_data> (EDGE_COUNT (bb->succs));
1442 :
1443 : /* Record each unique threaded destination into a hash table for
1444 : efficient lookups. */
1445 737656 : edge last = NULL;
1446 2273910 : FOR_EACH_EDGE (e, ei, bb->preds)
1447 : {
1448 1536254 : if (e->aux == NULL)
1449 762159 : continue;
1450 :
1451 774095 : vec<jump_thread_edge *> *path = THREAD_PATH (e);
1452 :
1453 1111929 : if (((*path)[1]->type == EDGE_COPY_SRC_JOINER_BLOCK && !joiners)
1454 943012 : || ((*path)[1]->type == EDGE_COPY_SRC_BLOCK && joiners))
1455 258389 : continue;
1456 :
1457 : /* When a NO_COPY_SRC block became non-empty cancel the path. */
1458 515706 : if (path->last ()->type == EDGE_NO_COPY_SRC_BLOCK)
1459 : {
1460 55869 : auto gsi = gsi_start_nondebug_bb (path->last ()->e->src);
1461 55869 : if (!gsi_end_p (gsi)
1462 55869 : && !is_ctrl_stmt (gsi_stmt (gsi)))
1463 : {
1464 0 : cancel_thread (path, "Non-empty EDGE_NO_COPY_SRC_BLOCK");
1465 0 : e->aux = NULL;
1466 0 : continue;
1467 : }
1468 : }
1469 :
1470 515706 : e2 = path->last ()->e;
1471 515706 : if (!e2 || noloop_only)
1472 : {
1473 : /* If NOLOOP_ONLY is true, we only allow threading through the
1474 : header of a loop to exit edges. */
1475 :
1476 : /* One case occurs when there was loop header buried in a jump
1477 : threading path that crosses loop boundaries. We do not try
1478 : and thread this elsewhere, so just cancel the jump threading
1479 : request by clearing the AUX field now. */
1480 536470 : if (bb->loop_father != e2->src->loop_father
1481 512952 : && (!loop_exit_edge_p (e2->src->loop_father, e2)
1482 984 : || flow_loop_nested_p (bb->loop_father,
1483 984 : e2->dest->loop_father)))
1484 : {
1485 : /* Since this case is not handled by our special code
1486 : to thread through a loop header, we must explicitly
1487 : cancel the threading request here. */
1488 23518 : cancel_thread (path, "Threading through unhandled loop header");
1489 23518 : e->aux = NULL;
1490 23518 : continue;
1491 : }
1492 :
1493 : /* Another case occurs when trying to thread through our
1494 : own loop header, possibly from inside the loop. We will
1495 : thread these later. */
1496 : unsigned int i;
1497 956708 : for (i = 1; i < path->length (); i++)
1498 : {
1499 598000 : if ((*path)[i]->e->src == bb->loop_father->header
1500 598000 : && (!loop_exit_edge_p (bb->loop_father, e2)
1501 2437 : || (*path)[1]->type == EDGE_COPY_SRC_JOINER_BLOCK))
1502 : break;
1503 : }
1504 :
1505 978868 : if (i != path->length ())
1506 130726 : continue;
1507 :
1508 : /* Loop parallelization can be confused by the result of
1509 : threading through the loop exit test back into the loop.
1510 : However, theading those jumps seems to help other codes.
1511 :
1512 : I have been unable to find anything related to the shape of
1513 : the CFG, the contents of the affected blocks, etc which would
1514 : allow a more sensible test than what we're using below which
1515 : merely avoids the optimization when parallelizing loops. */
1516 358708 : if (flag_tree_parallelize_loops > 1)
1517 : {
1518 212 : for (i = 1; i < path->length (); i++)
1519 155 : if (bb->loop_father == e2->src->loop_father
1520 155 : && loop_exits_from_bb_p (bb->loop_father,
1521 155 : (*path)[i]->e->src)
1522 246 : && !loop_exit_edge_p (bb->loop_father, e2))
1523 : break;
1524 :
1525 286 : if (i != path->length ())
1526 : {
1527 86 : cancel_thread (path, "Threading through loop exit");
1528 86 : e->aux = NULL;
1529 86 : continue;
1530 : }
1531 : }
1532 : }
1533 :
1534 : /* Insert the outgoing edge into the hash table if it is not
1535 : already in the hash table. */
1536 361376 : lookup_redirection_data (e, INSERT);
1537 :
1538 : /* When we have thread paths through a common joiner with different
1539 : final destinations, then we may need corrections to deal with
1540 : profile insanities. See the big comment before compute_path_counts. */
1541 361376 : if ((*path)[1]->type == EDGE_COPY_SRC_JOINER_BLOCK)
1542 : {
1543 104274 : if (!last)
1544 : last = e2;
1545 39762 : else if (e2 != last)
1546 18816 : local_info.need_profile_correction = true;
1547 : }
1548 : }
1549 :
1550 : /* We do not update dominance info. */
1551 737656 : free_dominance_info (CDI_DOMINATORS);
1552 :
1553 : /* We know we only thread through the loop header to loop exits.
1554 : Let the basic block duplication hook know we are not creating
1555 : a multiple entry loop. */
1556 737656 : if (noloop_only
1557 732148 : && bb == bb->loop_father->header)
1558 275278 : set_loop_copy (bb->loop_father, loop_outer (bb->loop_father));
1559 :
1560 : /* Now create duplicates of BB.
1561 :
1562 : Note that for a block with a high outgoing degree we can waste
1563 : a lot of time and memory creating and destroying useless edges.
1564 :
1565 : So we first duplicate BB and remove the control structure at the
1566 : tail of the duplicate as well as all outgoing edges from the
1567 : duplicate. We then use that duplicate block as a template for
1568 : the rest of the duplicates. */
1569 737656 : local_info.template_block = NULL;
1570 737656 : local_info.bb = bb;
1571 737656 : local_info.jumps_threaded = false;
1572 737656 : m_redirection_data->traverse <ssa_local_info_t *, ssa_create_duplicates>
1573 1028440 : (&local_info);
1574 :
1575 : /* The template does not have an outgoing edge. Create that outgoing
1576 : edge and update PHI nodes as the edge's target as necessary.
1577 :
1578 : We do this after creating all the duplicates to avoid creating
1579 : unnecessary edges. */
1580 737656 : m_redirection_data->traverse <ssa_local_info_t *, ssa_fixup_template_block>
1581 970754 : (&local_info);
1582 :
1583 : /* The hash table traversals above created the duplicate blocks (and the
1584 : statements within the duplicate blocks). This loop creates PHI nodes for
1585 : the duplicated blocks and redirects the incoming edges into BB to reach
1586 : the duplicates of BB. */
1587 737656 : m_redirection_data->traverse <ssa_local_info_t *, ssa_redirect_edges>
1588 1028440 : (&local_info);
1589 :
1590 : /* Done with this block. Clear REDIRECTION_DATA. */
1591 737656 : delete m_redirection_data;
1592 737656 : m_redirection_data = NULL;
1593 :
1594 737656 : if (noloop_only
1595 732148 : && bb == bb->loop_father->header)
1596 275278 : set_loop_copy (bb->loop_father, NULL);
1597 :
1598 737656 : BITMAP_FREE (local_info.duplicate_blocks);
1599 737656 : local_info.duplicate_blocks = NULL;
1600 :
1601 737656 : m_num_threaded_edges += local_info.num_threaded_edges;
1602 :
1603 : /* Indicate to our caller whether or not any jumps were threaded. */
1604 737656 : return local_info.jumps_threaded;
1605 : }
1606 :
1607 : /* Wrapper for thread_block_1 so that we can first handle jump
1608 : thread paths which do not involve copying joiner blocks, then
1609 : handle jump thread paths which have joiner blocks.
1610 :
1611 : By doing things this way we can be as aggressive as possible and
1612 : not worry that copying a joiner block will create a jump threading
1613 : opportunity. */
1614 :
1615 : bool
1616 368828 : fwd_jt_path_registry::thread_block (basic_block bb, bool noloop_only)
1617 : {
1618 368828 : bool retval;
1619 368828 : retval = thread_block_1 (bb, noloop_only, false);
1620 368828 : retval |= thread_block_1 (bb, noloop_only, true);
1621 368828 : return retval;
1622 : }
1623 :
1624 : /* Callback for dfs_enumerate_from. Returns true if BB is different
1625 : from STOP and DBDS_CE_STOP. */
1626 :
1627 : static basic_block dbds_ce_stop;
1628 : static bool
1629 1541557 : dbds_continue_enumeration_p (const_basic_block bb, const void *stop)
1630 : {
1631 1541557 : return (bb != (const_basic_block) stop
1632 1541557 : && bb != dbds_ce_stop);
1633 : }
1634 :
1635 : /* Evaluates the dominance relationship of latch of the LOOP and BB, and
1636 : returns the state. */
1637 :
1638 : enum bb_dom_status
1639 152315 : determine_bb_domination_status (class loop *loop, basic_block bb)
1640 : {
1641 152315 : basic_block *bblocks;
1642 152315 : unsigned nblocks, i;
1643 152315 : bool bb_reachable = false;
1644 152315 : edge_iterator ei;
1645 152315 : edge e;
1646 :
1647 : /* This function assumes BB is a successor of LOOP->header.
1648 : If that is not the case return DOMST_NONDOMINATING which
1649 : is always safe. */
1650 152315 : {
1651 152315 : bool ok = false;
1652 :
1653 252966 : FOR_EACH_EDGE (e, ei, bb->preds)
1654 : {
1655 199070 : if (e->src == loop->header)
1656 : {
1657 : ok = true;
1658 : break;
1659 : }
1660 : }
1661 :
1662 152315 : if (!ok)
1663 : return DOMST_NONDOMINATING;
1664 : }
1665 :
1666 98419 : if (bb == loop->latch)
1667 : return DOMST_DOMINATING;
1668 :
1669 : /* Check that BB dominates LOOP->latch, and that it is back-reachable
1670 : from it. */
1671 :
1672 94974 : bblocks = XCNEWVEC (basic_block, loop->num_nodes);
1673 94974 : dbds_ce_stop = loop->header;
1674 189948 : nblocks = dfs_enumerate_from (loop->latch, 1, dbds_continue_enumeration_p,
1675 94974 : bblocks, loop->num_nodes, bb);
1676 960160 : for (i = 0; i < nblocks; i++)
1677 2251510 : FOR_EACH_EDGE (e, ei, bblocks[i]->preds)
1678 : {
1679 1386324 : if (e->src == loop->header)
1680 : {
1681 48805 : free (bblocks);
1682 48805 : return DOMST_NONDOMINATING;
1683 : }
1684 1337519 : if (e->src == bb)
1685 93028 : bb_reachable = true;
1686 : }
1687 :
1688 46169 : free (bblocks);
1689 46169 : return (bb_reachable ? DOMST_DOMINATING : DOMST_LOOP_BROKEN);
1690 : }
1691 :
1692 : /* Thread jumps through the header of LOOP. Returns true if cfg changes.
1693 : If MAY_PEEL_LOOP_HEADERS is false, we avoid threading from entry edges
1694 : to the inside of the loop. */
1695 :
1696 : bool
1697 137639 : fwd_jt_path_registry::thread_through_loop_header (class loop *loop,
1698 : bool may_peel_loop_headers)
1699 : {
1700 137639 : basic_block header = loop->header;
1701 137639 : edge e, tgt_edge, latch = loop_latch_edge (loop);
1702 137639 : edge_iterator ei;
1703 137639 : basic_block tgt_bb, atgt_bb;
1704 137639 : enum bb_dom_status domst;
1705 :
1706 : /* We have already threaded through headers to exits, so all the threading
1707 : requests now are to the inside of the loop. We need to avoid creating
1708 : irreducible regions (i.e., loops with more than one entry block), and
1709 : also loop with several latch edges, or new subloops of the loop (although
1710 : there are cases where it might be appropriate, it is difficult to decide,
1711 : and doing it wrongly may confuse other optimizers).
1712 :
1713 : We could handle more general cases here. However, the intention is to
1714 : preserve some information about the loop, which is impossible if its
1715 : structure changes significantly, in a way that is not well understood.
1716 : Thus we only handle few important special cases, in which also updating
1717 : of the loop-carried information should be feasible:
1718 :
1719 : 1) Propagation of latch edge to a block that dominates the latch block
1720 : of a loop. This aims to handle the following idiom:
1721 :
1722 : first = 1;
1723 : while (1)
1724 : {
1725 : if (first)
1726 : initialize;
1727 : first = 0;
1728 : body;
1729 : }
1730 :
1731 : After threading the latch edge, this becomes
1732 :
1733 : first = 1;
1734 : if (first)
1735 : initialize;
1736 : while (1)
1737 : {
1738 : first = 0;
1739 : body;
1740 : }
1741 :
1742 : The original header of the loop is moved out of it, and we may thread
1743 : the remaining edges through it without further constraints.
1744 :
1745 : 2) All entry edges are propagated to a single basic block that dominates
1746 : the latch block of the loop. This aims to handle the following idiom
1747 : (normally created for "for" loops):
1748 :
1749 : i = 0;
1750 : while (1)
1751 : {
1752 : if (i >= 100)
1753 : break;
1754 : body;
1755 : i++;
1756 : }
1757 :
1758 : This becomes
1759 :
1760 : i = 0;
1761 : while (1)
1762 : {
1763 : body;
1764 : i++;
1765 : if (i >= 100)
1766 : break;
1767 : }
1768 : */
1769 :
1770 : /* Threading through the header won't improve the code if the header has just
1771 : one successor. */
1772 137639 : if (single_succ_p (header))
1773 252 : goto fail;
1774 :
1775 137387 : if (!may_peel_loop_headers && !redirection_block_p (loop->header))
1776 126049 : goto fail;
1777 : else
1778 : {
1779 11338 : tgt_bb = NULL;
1780 11338 : tgt_edge = NULL;
1781 25599 : FOR_EACH_EDGE (e, ei, header->preds)
1782 : {
1783 20515 : if (!e->aux)
1784 : {
1785 11037 : if (e == latch)
1786 9474 : continue;
1787 :
1788 : /* If latch is not threaded, and there is a header
1789 : edge that is not threaded, we would create loop
1790 : with multiple entries. */
1791 1563 : goto fail;
1792 : }
1793 :
1794 9478 : vec<jump_thread_edge *> *path = THREAD_PATH (e);
1795 :
1796 9478 : if ((*path)[1]->type == EDGE_COPY_SRC_JOINER_BLOCK)
1797 4691 : goto fail;
1798 4787 : tgt_edge = (*path)[1]->e;
1799 4787 : atgt_bb = tgt_edge->dest;
1800 4787 : if (!tgt_bb)
1801 : tgt_bb = atgt_bb;
1802 : /* Two targets of threading would make us create loop
1803 : with multiple entries. */
1804 0 : else if (tgt_bb != atgt_bb)
1805 0 : goto fail;
1806 : }
1807 :
1808 5084 : if (!tgt_bb)
1809 : {
1810 : /* There are no threading requests. */
1811 : return false;
1812 : }
1813 :
1814 : /* Redirecting to empty loop latch is useless. */
1815 4410 : if (tgt_bb == loop->latch
1816 4410 : && empty_block_p (loop->latch))
1817 2 : goto fail;
1818 : }
1819 :
1820 : /* The target block must dominate the loop latch, otherwise we would be
1821 : creating a subloop. */
1822 4408 : domst = determine_bb_domination_status (loop, tgt_bb);
1823 4408 : if (domst == DOMST_NONDOMINATING)
1824 1654 : goto fail;
1825 2754 : if (domst == DOMST_LOOP_BROKEN)
1826 : {
1827 : /* If the loop ceased to exist, mark it as such, and thread through its
1828 : original header. */
1829 0 : mark_loop_for_removal (loop);
1830 0 : return thread_block (header, false);
1831 : }
1832 :
1833 2754 : if (tgt_bb->loop_father->header == tgt_bb)
1834 : {
1835 : /* If the target of the threading is a header of a subloop, we need
1836 : to create a preheader for it, so that the headers of the two loops
1837 : do not merge. */
1838 0 : if (EDGE_COUNT (tgt_bb->preds) > 2)
1839 : {
1840 0 : tgt_bb = create_preheader (tgt_bb->loop_father, 0);
1841 0 : gcc_assert (tgt_bb != NULL);
1842 : }
1843 : else
1844 0 : tgt_bb = split_edge (tgt_edge);
1845 : }
1846 :
1847 2754 : basic_block new_preheader;
1848 :
1849 : /* Now consider the case entry edges are redirected to the new entry
1850 : block. Remember one entry edge, so that we can find the new
1851 : preheader (its destination after threading). */
1852 4188 : FOR_EACH_EDGE (e, ei, header->preds)
1853 : {
1854 4188 : if (e->aux)
1855 : break;
1856 : }
1857 :
1858 : /* The duplicate of the header is the new preheader of the loop. Ensure
1859 : that it is placed correctly in the loop hierarchy. */
1860 2754 : set_loop_copy (loop, loop_outer (loop));
1861 :
1862 2754 : thread_block (header, false);
1863 2754 : set_loop_copy (loop, NULL);
1864 2754 : new_preheader = e->dest;
1865 :
1866 : /* Create the new latch block. This is always necessary, as the latch
1867 : must have only a single successor, but the original header had at
1868 : least two successors. */
1869 2754 : loop->latch = NULL;
1870 2754 : mfb_kj_edge = single_succ_edge (new_preheader);
1871 2754 : loop->header = mfb_kj_edge->dest;
1872 2754 : latch = make_forwarder_block (tgt_bb, mfb_keep_just, NULL);
1873 2754 : loop->header = latch->dest;
1874 2754 : loop->latch = latch->src;
1875 2754 : return true;
1876 :
1877 134211 : fail:
1878 : /* We failed to thread anything. Cancel the requests. */
1879 403210 : FOR_EACH_EDGE (e, ei, header->preds)
1880 : {
1881 268999 : vec<jump_thread_edge *> *path = THREAD_PATH (e);
1882 :
1883 268999 : if (path)
1884 : {
1885 127972 : cancel_thread (path, "Failure in thread_through_loop_header");
1886 127972 : e->aux = NULL;
1887 : }
1888 : }
1889 : return false;
1890 : }
1891 :
1892 : /* E1 and E2 are edges into the same basic block. Return TRUE if the
1893 : PHI arguments associated with those edges are equal or there are no
1894 : PHI arguments, otherwise return FALSE. */
1895 :
1896 : static bool
1897 28799 : phi_args_equal_on_edges (edge e1, edge e2)
1898 : {
1899 28799 : gphi_iterator gsi;
1900 28799 : int indx1 = e1->dest_idx;
1901 28799 : int indx2 = e2->dest_idx;
1902 :
1903 45506 : for (gsi = gsi_start_phis (e1->dest); !gsi_end_p (gsi); gsi_next (&gsi))
1904 : {
1905 18008 : gphi *phi = gsi.phi ();
1906 :
1907 18008 : if (!operand_equal_p (gimple_phi_arg_def (phi, indx1),
1908 18008 : gimple_phi_arg_def (phi, indx2), 0))
1909 : return false;
1910 : }
1911 : return true;
1912 : }
1913 :
1914 : /* Return the number of non-debug statements and non-virtual PHIs in a
1915 : block. */
1916 :
1917 : static unsigned int
1918 9418 : count_stmts_and_phis_in_block (basic_block bb)
1919 : {
1920 9418 : unsigned int num_stmts = 0;
1921 :
1922 9418 : gphi_iterator gpi;
1923 24580 : for (gpi = gsi_start_phis (bb); !gsi_end_p (gpi); gsi_next (&gpi))
1924 30324 : if (!virtual_operand_p (PHI_RESULT (gpi.phi ())))
1925 9193 : num_stmts++;
1926 :
1927 9418 : gimple_stmt_iterator gsi;
1928 78211 : for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi); gsi_next (&gsi))
1929 : {
1930 59375 : gimple *stmt = gsi_stmt (gsi);
1931 59375 : if (!is_gimple_debug (stmt))
1932 53117 : num_stmts++;
1933 : }
1934 :
1935 9418 : return num_stmts;
1936 : }
1937 :
1938 :
1939 : /* Walk through the registered jump threads and convert them into a
1940 : form convenient for this pass.
1941 :
1942 : Any block which has incoming edges threaded to outgoing edges
1943 : will have its entry in THREADED_BLOCK set.
1944 :
1945 : Any threaded edge will have its new outgoing edge stored in the
1946 : original edge's AUX field.
1947 :
1948 : This form avoids the need to walk all the edges in the CFG to
1949 : discover blocks which need processing and avoids unnecessary
1950 : hash table lookups to map from threaded edge to new target. */
1951 :
1952 : void
1953 156638 : fwd_jt_path_registry::mark_threaded_blocks (bitmap threaded_blocks)
1954 : {
1955 156638 : unsigned int i;
1956 156638 : bitmap_iterator bi;
1957 156638 : auto_bitmap tmp;
1958 156638 : basic_block bb;
1959 156638 : edge e;
1960 156638 : edge_iterator ei;
1961 :
1962 : /* It is possible to have jump threads in which one is a subpath
1963 : of the other. ie, (A, B), (B, C), (C, D) where B is a joiner
1964 : block and (B, C), (C, D) where no joiner block exists.
1965 :
1966 : When this occurs ignore the jump thread request with the joiner
1967 : block. It's totally subsumed by the simpler jump thread request.
1968 :
1969 : This results in less block copying, simpler CFGs. More importantly,
1970 : when we duplicate the joiner block, B, in this case we will create
1971 : a new threading opportunity that we wouldn't be able to optimize
1972 : until the next jump threading iteration.
1973 :
1974 : So first convert the jump thread requests which do not require a
1975 : joiner block. */
1976 812906 : for (i = 0; i < m_paths.length (); i++)
1977 : {
1978 656268 : vec<jump_thread_edge *> *path = m_paths[i];
1979 :
1980 1312536 : if (path->length () > 1
1981 1312536 : && (*path)[1]->type != EDGE_COPY_SRC_JOINER_BLOCK)
1982 : {
1983 352451 : edge e = (*path)[0]->e;
1984 352451 : e->aux = (void *)path;
1985 352451 : bitmap_set_bit (tmp, e->dest->index);
1986 : }
1987 : }
1988 :
1989 : /* Now iterate again, converting cases where we want to thread
1990 : through a joiner block, but only if no other edge on the path
1991 : already has a jump thread attached to it. We do this in two passes,
1992 : to avoid situations where the order in the paths vec can hide overlapping
1993 : threads (the path is recorded on the incoming edge, so we would miss
1994 : cases where the second path starts at a downstream edge on the same
1995 : path). First record all joiner paths, deleting any in the unexpected
1996 : case where there is already a path for that incoming edge. */
1997 812906 : for (i = 0; i < m_paths.length ();)
1998 : {
1999 656268 : vec<jump_thread_edge *> *path = m_paths[i];
2000 :
2001 656268 : if (path->length () > 1
2002 1312536 : && (*path)[1]->type == EDGE_COPY_SRC_JOINER_BLOCK)
2003 : {
2004 : /* Attach the path to the starting edge if none is yet recorded. */
2005 303817 : if ((*path)[0]->e->aux == NULL)
2006 : {
2007 255045 : (*path)[0]->e->aux = path;
2008 255045 : i++;
2009 : }
2010 : else
2011 : {
2012 48772 : m_paths.unordered_remove (i);
2013 48772 : cancel_thread (path);
2014 : }
2015 : }
2016 : else
2017 : {
2018 352451 : i++;
2019 : }
2020 : }
2021 :
2022 : /* Second, look for paths that have any other jump thread attached to
2023 : them, and either finish converting them or cancel them. */
2024 764134 : for (i = 0; i < m_paths.length ();)
2025 : {
2026 607496 : vec<jump_thread_edge *> *path = m_paths[i];
2027 607496 : edge e = (*path)[0]->e;
2028 :
2029 607496 : if (path->length () > 1
2030 1214992 : && (*path)[1]->type == EDGE_COPY_SRC_JOINER_BLOCK && e->aux == path)
2031 : {
2032 : unsigned int j;
2033 608570 : for (j = 1; j < path->length (); j++)
2034 433603 : if ((*path)[j]->e->aux != NULL)
2035 : break;
2036 :
2037 : /* If we iterated through the entire path without exiting the loop,
2038 : then we are good to go, record it. */
2039 255045 : if (j == path->length ())
2040 : {
2041 174967 : bitmap_set_bit (tmp, e->dest->index);
2042 174967 : i++;
2043 : }
2044 : else
2045 : {
2046 80078 : e->aux = NULL;
2047 80078 : m_paths.unordered_remove (i);
2048 80078 : cancel_thread (path);
2049 : }
2050 : }
2051 : else
2052 : {
2053 352451 : i++;
2054 : }
2055 : }
2056 :
2057 : /* When optimizing for size, prune all thread paths where statement
2058 : duplication is necessary.
2059 :
2060 : We walk the jump thread path looking for copied blocks. There's
2061 : two types of copied blocks.
2062 :
2063 : EDGE_COPY_SRC_JOINER_BLOCK is always copied and thus we will
2064 : cancel the jump threading request when optimizing for size.
2065 :
2066 : EDGE_COPY_SRC_BLOCK which is copied, but some of its statements
2067 : will be killed by threading. If threading does not kill all of
2068 : its statements, then we should cancel the jump threading request
2069 : when optimizing for size. */
2070 156638 : if (optimize_function_for_size_p (cfun))
2071 : {
2072 22594 : EXECUTE_IF_SET_IN_BITMAP (tmp, 0, i, bi)
2073 : {
2074 51677 : FOR_EACH_EDGE (e, ei, BASIC_BLOCK_FOR_FN (cfun, i)->preds)
2075 35349 : if (e->aux)
2076 : {
2077 : vec<jump_thread_edge *> *path = THREAD_PATH (e);
2078 :
2079 : unsigned int j;
2080 34672 : for (j = 1; j < path->length (); j++)
2081 : {
2082 24972 : bb = (*path)[j]->e->src;
2083 24972 : if (redirection_block_p (bb))
2084 : ;
2085 13847 : else if ((*path)[j]->type == EDGE_COPY_SRC_JOINER_BLOCK
2086 13847 : || ((*path)[j]->type == EDGE_COPY_SRC_BLOCK
2087 18836 : && (count_stmts_and_phis_in_block (bb)
2088 9418 : != estimate_threading_killed_stmts (bb))))
2089 : break;
2090 : }
2091 :
2092 45730 : if (j != path->length ())
2093 : {
2094 13165 : cancel_thread (path);
2095 13165 : e->aux = NULL;
2096 : }
2097 : else
2098 9700 : bitmap_set_bit (threaded_blocks, i);
2099 : }
2100 : }
2101 : }
2102 : else
2103 150372 : bitmap_copy (threaded_blocks, tmp);
2104 :
2105 : /* If we have a joiner block (J) which has two successors S1 and S2 and
2106 : we are threading though S1 and the final destination of the thread
2107 : is S2, then we must verify that any PHI nodes in S2 have the same
2108 : PHI arguments for the edge J->S2 and J->S1->...->S2.
2109 :
2110 : We used to detect this prior to registering the jump thread, but
2111 : that prohibits propagation of edge equivalences into non-dominated
2112 : PHI nodes as the equivalency test might occur before propagation.
2113 :
2114 : This must also occur after we truncate any jump threading paths
2115 : as this scenario may only show up after truncation.
2116 :
2117 : This works for now, but will need improvement as part of the FSA
2118 : optimization.
2119 :
2120 : Note since we've moved the thread request data to the edges,
2121 : we have to iterate on those rather than the threaded_edges vector. */
2122 533485 : EXECUTE_IF_SET_IN_BITMAP (tmp, 0, i, bi)
2123 : {
2124 376847 : bb = BASIC_BLOCK_FOR_FN (cfun, i);
2125 1291403 : FOR_EACH_EDGE (e, ei, bb->preds)
2126 : {
2127 914556 : if (e->aux)
2128 : {
2129 514253 : vec<jump_thread_edge *> *path = THREAD_PATH (e);
2130 514253 : bool have_joiner = ((*path)[1]->type == EDGE_COPY_SRC_JOINER_BLOCK);
2131 :
2132 514253 : if (have_joiner)
2133 : {
2134 170218 : basic_block joiner = e->dest;
2135 170218 : edge final_edge = path->last ()->e;
2136 170218 : basic_block final_dest = final_edge->dest;
2137 170218 : edge e2 = find_edge (joiner, final_dest);
2138 :
2139 170218 : if (e2 && !phi_args_equal_on_edges (e2, final_edge))
2140 : {
2141 1301 : cancel_thread (path);
2142 1301 : e->aux = NULL;
2143 : }
2144 : }
2145 : }
2146 : }
2147 : }
2148 :
2149 : /* Look for jump threading paths which cross multiple loop headers.
2150 :
2151 : The code to thread through loop headers will change the CFG in ways
2152 : that invalidate the cached loop iteration information. So we must
2153 : detect that case and wipe the cached information. */
2154 533485 : EXECUTE_IF_SET_IN_BITMAP (tmp, 0, i, bi)
2155 : {
2156 376847 : basic_block bb = BASIC_BLOCK_FOR_FN (cfun, i);
2157 1291403 : FOR_EACH_EDGE (e, ei, bb->preds)
2158 : {
2159 914556 : if (e->aux)
2160 : {
2161 512952 : gcc_assert (loops_state_satisfies_p
2162 : (LOOPS_HAVE_MARKED_IRREDUCIBLE_REGIONS));
2163 : vec<jump_thread_edge *> *path = THREAD_PATH (e);
2164 :
2165 1207083 : for (unsigned int i = 0, crossed_headers = 0;
2166 2123315 : i < path->length ();
2167 : i++)
2168 : {
2169 1208759 : basic_block dest = (*path)[i]->e->dest;
2170 1208759 : basic_block src = (*path)[i]->e->src;
2171 : /* If we enter a loop. */
2172 1208759 : if (flow_loop_nested_p (src->loop_father, dest->loop_father))
2173 152672 : ++crossed_headers;
2174 : /* If we step from a block outside an irreducible region
2175 : to a block inside an irreducible region, then we have
2176 : crossed into a loop. */
2177 1056087 : else if (! (src->flags & BB_IRREDUCIBLE_LOOP)
2178 1050828 : && (dest->flags & BB_IRREDUCIBLE_LOOP))
2179 1726 : ++crossed_headers;
2180 1208759 : if (crossed_headers > 1)
2181 : {
2182 1676 : vect_free_loop_info_assumptions
2183 3352 : ((*path)[path->length () - 1]->e->dest->loop_father);
2184 1676 : break;
2185 : }
2186 : }
2187 : }
2188 : }
2189 : }
2190 156638 : }
2191 :
2192 :
2193 : /* Verify that the REGION is a valid jump thread. A jump thread is a special
2194 : case of SEME Single Entry Multiple Exits region in which all nodes in the
2195 : REGION have exactly one incoming edge. The only exception is the first block
2196 : that may not have been connected to the rest of the cfg yet. */
2197 :
2198 : DEBUG_FUNCTION void
2199 1260359 : verify_jump_thread (basic_block *region, unsigned n_region)
2200 : {
2201 3020964 : for (unsigned i = 0; i < n_region; i++)
2202 1760605 : gcc_assert (EDGE_COUNT (region[i]->preds) <= 1);
2203 1260359 : }
2204 :
2205 : /* Return true when BB is one of the first N items in BBS. */
2206 :
2207 : static inline bool
2208 2532883 : bb_in_bbs (basic_block bb, basic_block *bbs, int n)
2209 : {
2210 6030971 : for (int i = 0; i < n; i++)
2211 3514075 : if (bb == bbs[i])
2212 : return true;
2213 :
2214 : return false;
2215 : }
2216 :
2217 : void
2218 23 : jt_path_registry::debug_path (FILE *dump_file, int pathno)
2219 : {
2220 23 : vec<jump_thread_edge *> *p = m_paths[pathno];
2221 23 : fprintf (dump_file, "path: ");
2222 136 : for (unsigned i = 0; i < p->length (); ++i)
2223 113 : fprintf (dump_file, "%d -> %d, ",
2224 113 : (*p)[i]->e->src->index, (*p)[i]->e->dest->index);
2225 23 : fprintf (dump_file, "\n");
2226 23 : }
2227 :
2228 : void
2229 0 : jt_path_registry::debug ()
2230 : {
2231 0 : for (unsigned i = 0; i < m_paths.length (); ++i)
2232 0 : debug_path (stderr, i);
2233 0 : }
2234 :
2235 : /* Rewire a jump_thread_edge so that the source block is now a
2236 : threaded source block.
2237 :
2238 : PATH_NUM is an index into the global path table PATHS.
2239 : EDGE_NUM is the jump thread edge number into said path.
2240 :
2241 : Returns TRUE if we were able to successfully rewire the edge. */
2242 :
2243 : bool
2244 89047 : back_jt_path_registry::rewire_first_differing_edge (unsigned path_num,
2245 : unsigned edge_num)
2246 : {
2247 89047 : vec<jump_thread_edge *> *path = m_paths[path_num];
2248 89047 : edge &e = (*path)[edge_num]->e;
2249 89047 : if (dump_file && (dump_flags & TDF_DETAILS))
2250 15 : fprintf (dump_file, "rewiring edge candidate: %d -> %d\n",
2251 15 : e->src->index, e->dest->index);
2252 89047 : basic_block src_copy = get_bb_copy (e->src);
2253 89047 : if (src_copy == NULL)
2254 : {
2255 19795 : if (dump_file && (dump_flags & TDF_DETAILS))
2256 10 : fprintf (dump_file, "ignoring candidate: there is no src COPY\n");
2257 19795 : return false;
2258 : }
2259 69252 : edge new_edge = find_edge (src_copy, e->dest);
2260 : /* If the previously threaded paths created a flow graph where we
2261 : can no longer figure out where to go, give up. */
2262 69252 : if (new_edge == NULL)
2263 : {
2264 7160 : if (dump_file && (dump_flags & TDF_DETAILS))
2265 0 : fprintf (dump_file, "ignoring candidate: we lost our way\n");
2266 7160 : return false;
2267 : }
2268 62092 : e = new_edge;
2269 62092 : return true;
2270 : }
2271 :
2272 : /* After a path has been jump threaded, adjust the remaining paths
2273 : that are subsets of this path, so these paths can be safely
2274 : threaded within the context of the new threaded path.
2275 :
2276 : For example, suppose we have just threaded:
2277 :
2278 : 5 -> 6 -> 7 -> 8 -> 12 => 5 -> 6' -> 7' -> 8' -> 12'
2279 :
2280 : And we have an upcoming threading candidate:
2281 : 5 -> 6 -> 7 -> 8 -> 15 -> 20
2282 :
2283 : This function adjusts the upcoming path into:
2284 : 8' -> 15 -> 20
2285 :
2286 : CURR_PATH_NUM is an index into the global paths table. It
2287 : specifies the path that was just threaded. */
2288 :
2289 : void
2290 1260393 : back_jt_path_registry::adjust_paths_after_duplication (unsigned curr_path_num)
2291 : {
2292 1260393 : vec<jump_thread_edge *> *curr_path = m_paths[curr_path_num];
2293 :
2294 : /* Iterate through all the other paths and adjust them. */
2295 24328326 : for (unsigned cand_path_num = 0; cand_path_num < m_paths.length (); )
2296 : {
2297 23067933 : if (cand_path_num == curr_path_num)
2298 : {
2299 1260393 : ++cand_path_num;
2300 1260393 : continue;
2301 : }
2302 : /* Make sure the candidate to adjust starts with the same path
2303 : as the recently threaded path. */
2304 21807540 : vec<jump_thread_edge *> *cand_path = m_paths[cand_path_num];
2305 21807540 : if ((*cand_path)[0]->e != (*curr_path)[0]->e)
2306 : {
2307 21677009 : ++cand_path_num;
2308 21677009 : continue;
2309 : }
2310 130531 : if (dump_file && (dump_flags & TDF_DETAILS))
2311 : {
2312 18 : fprintf (dump_file, "adjusting candidate: ");
2313 18 : debug_path (dump_file, cand_path_num);
2314 : }
2315 :
2316 : /* Chop off from the candidate path any prefix it shares with
2317 : the recently threaded path. */
2318 261062 : unsigned minlength = MIN (curr_path->length (), cand_path->length ());
2319 130531 : unsigned j;
2320 367099 : for (j = 0; j < minlength; ++j)
2321 : {
2322 305820 : edge cand_edge = (*cand_path)[j]->e;
2323 305820 : edge curr_edge = (*curr_path)[j]->e;
2324 :
2325 : /* Once the prefix no longer matches, adjust the first
2326 : non-matching edge to point from an adjusted edge to
2327 : wherever it was going. */
2328 305820 : if (cand_edge != curr_edge)
2329 : {
2330 69252 : gcc_assert (cand_edge->src == curr_edge->src);
2331 69252 : if (!rewire_first_differing_edge (cand_path_num, j))
2332 7160 : goto remove_candidate_from_list;
2333 : break;
2334 : }
2335 : }
2336 123371 : if (j == minlength)
2337 : {
2338 : /* If we consumed the max subgraph we could look at, and
2339 : still didn't find any different edges, it's the
2340 : last edge after MINLENGTH. */
2341 61279 : if (cand_path->length () > minlength)
2342 : {
2343 19795 : if (!rewire_first_differing_edge (cand_path_num, j))
2344 19795 : goto remove_candidate_from_list;
2345 : }
2346 41484 : else if (dump_file && (dump_flags & TDF_DETAILS))
2347 3 : fprintf (dump_file, "adjusting first edge after MINLENGTH.\n");
2348 : }
2349 103576 : if (j > 0)
2350 : {
2351 : /* If we are removing everything, delete the entire candidate. */
2352 207152 : if (j == cand_path->length ())
2353 : {
2354 41484 : remove_candidate_from_list:
2355 68439 : cancel_thread (cand_path, "Adjusted candidate is EMPTY");
2356 68439 : m_paths.unordered_remove (cand_path_num);
2357 68439 : continue;
2358 : }
2359 : /* Otherwise, just remove the redundant sub-path. */
2360 62092 : if (cand_path->length () - j > 1)
2361 47081 : cand_path->block_remove (0, j);
2362 15011 : else if (dump_file && (dump_flags & TDF_DETAILS))
2363 0 : fprintf (dump_file, "Dropping illformed candidate.\n");
2364 : }
2365 62092 : if (dump_file && (dump_flags & TDF_DETAILS))
2366 : {
2367 5 : fprintf (dump_file, "adjusted candidate: ");
2368 5 : debug_path (dump_file, cand_path_num);
2369 : }
2370 62092 : ++cand_path_num;
2371 : }
2372 1260393 : }
2373 :
2374 : /* Duplicates a jump-thread path of N_REGION basic blocks.
2375 : The ENTRY edge is redirected to the duplicate of the region.
2376 :
2377 : Remove the last conditional statement in the last basic block in the REGION,
2378 : and create a single fallthru edge pointing to the same destination as the
2379 : EXIT edge.
2380 :
2381 : CURRENT_PATH_NO is an index into the global paths[] table
2382 : specifying the jump-thread path.
2383 :
2384 : Returns false if it is unable to copy the region, true otherwise. */
2385 :
2386 : bool
2387 1260427 : back_jt_path_registry::duplicate_thread_path (edge entry,
2388 : edge exit,
2389 : basic_block *region,
2390 : unsigned n_region,
2391 : unsigned current_path_no)
2392 : {
2393 1260427 : unsigned i;
2394 1260427 : class loop *loop = entry->dest->loop_father;
2395 1260427 : edge exit_copy;
2396 1260427 : edge redirected;
2397 1260427 : profile_count curr_count;
2398 :
2399 1260427 : if (!can_copy_bbs_p (region, n_region))
2400 : return false;
2401 :
2402 : /* Some sanity checking. Note that we do not check for all possible
2403 : missuses of the functions. I.e. if you ask to copy something weird,
2404 : it will work, but the state of structures probably will not be
2405 : correct. */
2406 3021048 : for (i = 0; i < n_region; i++)
2407 : {
2408 : /* We do not handle subloops, i.e. all the blocks must belong to the
2409 : same loop. */
2410 1760655 : if (region[i]->loop_father != loop)
2411 : return false;
2412 : }
2413 :
2414 1260393 : initialize_original_copy_tables ();
2415 :
2416 1260393 : set_loop_copy (loop, loop);
2417 :
2418 1260393 : basic_block *region_copy = XNEWVEC (basic_block, n_region);
2419 1260393 : copy_bbs (region, n_region, region_copy, &exit, 1, &exit_copy, loop,
2420 : split_edge_bb_loc (entry), false);
2421 :
2422 : /* Fix up: copy_bbs redirects all edges pointing to copied blocks. The
2423 : following code ensures that all the edges exiting the jump-thread path are
2424 : redirected back to the original code: these edges are exceptions
2425 : invalidating the property that is propagated by executing all the blocks of
2426 : the jump-thread path in order. */
2427 :
2428 1260393 : curr_count = entry->count ();
2429 :
2430 3021048 : for (i = 0; i < n_region; i++)
2431 : {
2432 1760655 : edge e;
2433 1760655 : edge_iterator ei;
2434 1760655 : basic_block bb = region_copy[i];
2435 :
2436 : /* Watch inconsistent profile. */
2437 1760655 : if (curr_count > region[i]->count)
2438 63190 : curr_count = region[i]->count;
2439 : /* Scale current BB. */
2440 3046714 : if (region[i]->count.nonzero_p () && curr_count.initialized_p ())
2441 : {
2442 : /* In the middle of the path we only scale the frequencies.
2443 : In last BB we need to update probabilities of outgoing edges
2444 : because we know which one is taken at the threaded path. */
2445 1286059 : if (i + 1 != n_region)
2446 476008 : scale_bbs_frequencies_profile_count (region + i, 1,
2447 : region[i]->count - curr_count,
2448 : region[i]->count);
2449 : else
2450 810051 : update_bb_profile_for_threading (region[i],
2451 : curr_count,
2452 : exit);
2453 1286059 : scale_bbs_frequencies_profile_count (region_copy + i, 1, curr_count,
2454 1286059 : region_copy[i]->count);
2455 : }
2456 :
2457 1760655 : if (single_succ_p (bb))
2458 : {
2459 : /* Make sure the successor is the next node in the path. */
2460 150462 : gcc_assert (i + 1 == n_region
2461 : || region_copy[i + 1] == single_succ_edge (bb)->dest);
2462 150462 : if (i + 1 != n_region)
2463 : {
2464 150462 : curr_count = single_succ_edge (bb)->count ();
2465 : }
2466 1410855 : continue;
2467 : }
2468 :
2469 : /* Special case the last block on the path: make sure that it does not
2470 : jump back on the copied path, including back to itself. */
2471 1610193 : if (i + 1 == n_region)
2472 : {
2473 3793276 : FOR_EACH_EDGE (e, ei, bb->succs)
2474 5065766 : if (bb_in_bbs (e->dest, region_copy, n_region))
2475 : {
2476 15987 : basic_block orig = get_bb_original (e->dest);
2477 15987 : if (orig)
2478 15987 : redirect_edge_and_branch_force (e, orig);
2479 : }
2480 1260393 : continue;
2481 1260393 : }
2482 :
2483 : /* Redirect all other edges jumping to non-adjacent blocks back to the
2484 : original code. */
2485 1049403 : FOR_EACH_EDGE (e, ei, bb->succs)
2486 699603 : if (region_copy[i + 1] != e->dest)
2487 : {
2488 349803 : basic_block orig = get_bb_original (e->dest);
2489 349803 : if (orig)
2490 15351 : redirect_edge_and_branch_force (e, orig);
2491 : }
2492 : else
2493 : {
2494 349800 : curr_count = e->count ();
2495 : }
2496 : }
2497 :
2498 :
2499 1260393 : if (flag_checking)
2500 1260359 : verify_jump_thread (region_copy, n_region);
2501 :
2502 : /* Remove the last branch in the jump thread path. */
2503 1260393 : remove_ctrl_stmt_and_useless_edges (region_copy[n_region - 1], exit->dest);
2504 :
2505 : /* And fixup the flags on the single remaining edge. */
2506 1260393 : edge fix_e = find_edge (region_copy[n_region - 1], exit->dest);
2507 1260393 : fix_e->flags &= ~(EDGE_TRUE_VALUE | EDGE_FALSE_VALUE | EDGE_ABNORMAL);
2508 1260393 : fix_e->flags |= EDGE_FALLTHRU;
2509 :
2510 1260393 : edge e = make_edge (region_copy[n_region - 1], exit->dest, EDGE_FALLTHRU);
2511 :
2512 1260393 : if (e)
2513 : {
2514 0 : rescan_loop_exit (e, true, false);
2515 0 : e->probability = profile_probability::always ();
2516 : }
2517 :
2518 : /* Redirect the entry and add the phi node arguments. */
2519 1260393 : if (entry->dest == loop->header)
2520 12106 : mark_loop_for_removal (loop);
2521 1260393 : redirected = redirect_edge_and_branch (entry, get_bb_copy (entry->dest));
2522 1260393 : gcc_assert (redirected != NULL);
2523 1260393 : flush_pending_stmts (entry);
2524 :
2525 : /* Add the other PHI node arguments. */
2526 1260393 : add_phi_args_after_copy (region_copy, n_region, NULL);
2527 :
2528 1260393 : free (region_copy);
2529 :
2530 1260393 : adjust_paths_after_duplication (current_path_no);
2531 :
2532 1260393 : free_original_copy_tables ();
2533 1260393 : return true;
2534 : }
2535 :
2536 : /* Return true when PATH is a valid jump-thread path. */
2537 :
2538 : static bool
2539 1280465 : valid_jump_thread_path (vec<jump_thread_edge *> *path)
2540 : {
2541 1280465 : unsigned len = path->length ();
2542 :
2543 : /* Check that the path is connected. */
2544 3070481 : for (unsigned int j = 0; j < len - 1; j++)
2545 : {
2546 1810054 : edge e = (*path)[j]->e;
2547 1810054 : if (e->dest != (*path)[j+1]->e->src)
2548 : return false;
2549 : }
2550 : return true;
2551 : }
2552 :
2553 : /* Remove any queued jump threads that include edge E.
2554 :
2555 : We don't actually remove them here, just record the edges into ax
2556 : hash table. That way we can do the search once per iteration of
2557 : DOM/VRP rather than for every case where DOM optimizes away a COND_EXPR. */
2558 :
2559 : void
2560 370387 : fwd_jt_path_registry::remove_jump_threads_including (edge_def *e)
2561 : {
2562 370387 : if (!m_paths.exists () || !flag_thread_jumps)
2563 : return;
2564 :
2565 370351 : edge *slot = m_removed_edges->find_slot (e, INSERT);
2566 370351 : *slot = e;
2567 : }
2568 :
2569 : /* Thread all paths that have been queued for jump threading, and
2570 : update the CFG accordingly.
2571 :
2572 : It is the caller's responsibility to fix the dominance information
2573 : and rewrite duplicated SSA_NAMEs back into SSA form.
2574 :
2575 : If PEEL_LOOP_HEADERS is false, avoid threading edges through loop
2576 : headers if it does not simplify the loop.
2577 :
2578 : Returns true if one or more edges were threaded. */
2579 :
2580 : bool
2581 8351647 : jt_path_registry::thread_through_all_blocks (bool peel_loop_headers)
2582 : {
2583 8406341 : if (m_paths.length () == 0)
2584 : return false;
2585 :
2586 454242 : m_num_threaded_edges = 0;
2587 :
2588 454242 : bool retval = update_cfg (peel_loop_headers);
2589 :
2590 454242 : statistics_counter_event (cfun, "Jumps threaded", m_num_threaded_edges);
2591 :
2592 454242 : if (retval)
2593 : {
2594 399548 : loops_state_set (LOOPS_NEED_FIXUP);
2595 399548 : return true;
2596 : }
2597 : return false;
2598 : }
2599 :
2600 : /* This is the backward threader version of thread_through_all_blocks
2601 : using a generic BB copier. */
2602 :
2603 : bool
2604 297604 : back_jt_path_registry::update_cfg (bool /*peel_loop_headers*/)
2605 : {
2606 297604 : bool retval = false;
2607 297604 : hash_set<edge> visited_starting_edges;
2608 :
2609 1593080 : while (m_paths.length ())
2610 : {
2611 1295476 : vec<jump_thread_edge *> *path = m_paths[0];
2612 1295476 : edge entry = (*path)[0]->e;
2613 :
2614 : /* Do not jump-thread twice from the same starting edge.
2615 :
2616 : Previously we only checked that we weren't threading twice
2617 : from the same BB, but that was too restrictive. Imagine a
2618 : path that starts from GIMPLE_COND(x_123 == 0,...), where both
2619 : edges out of this conditional yield paths that can be
2620 : threaded (for example, both lead to an x_123==0 or x_123!=0
2621 : conditional further down the line. */
2622 1295476 : if (visited_starting_edges.contains (entry)
2623 : /* We may not want to realize this jump thread path for
2624 : various reasons. So check it first. */
2625 1295476 : || !valid_jump_thread_path (path))
2626 : {
2627 : /* Remove invalid jump-thread paths. */
2628 35049 : cancel_thread (path, "Avoiding threading twice from same edge");
2629 35049 : m_paths.unordered_remove (0);
2630 35049 : continue;
2631 : }
2632 :
2633 1260427 : unsigned len = path->length ();
2634 1260427 : edge exit = (*path)[len - 1]->e;
2635 1260427 : basic_block *region = XNEWVEC (basic_block, len - 1);
2636 :
2637 3021230 : for (unsigned int j = 0; j < len - 1; j++)
2638 1760803 : region[j] = (*path)[j]->e->dest;
2639 :
2640 1260427 : if (duplicate_thread_path (entry, exit, region, len - 1, 0))
2641 : {
2642 : /* We do not update dominance info. */
2643 1260393 : free_dominance_info (CDI_DOMINATORS);
2644 1260393 : visited_starting_edges.add (entry);
2645 1260393 : retval = true;
2646 1260393 : m_num_threaded_edges++;
2647 : }
2648 :
2649 1260427 : path->release ();
2650 1260427 : m_paths.unordered_remove (0);
2651 1260427 : free (region);
2652 : }
2653 297604 : return retval;
2654 297604 : }
2655 :
2656 : /* This is the forward threader version of thread_through_all_blocks,
2657 : using a custom BB copier. */
2658 :
2659 : bool
2660 156638 : fwd_jt_path_registry::update_cfg (bool may_peel_loop_headers)
2661 : {
2662 156638 : bool retval = false;
2663 :
2664 : /* Remove any paths that referenced removed edges. */
2665 156638 : if (m_removed_edges)
2666 914703 : for (unsigned i = 0; i < m_paths.length (); )
2667 : {
2668 758065 : unsigned int j;
2669 758065 : vec<jump_thread_edge *> *path = m_paths[i];
2670 :
2671 2498718 : for (j = 0; j < path->length (); j++)
2672 : {
2673 1842450 : edge e = (*path)[j]->e;
2674 1842450 : if (m_removed_edges->find_slot (e, NO_INSERT)
2675 3583103 : || (((*path)[j]->type == EDGE_COPY_SRC_BLOCK
2676 1175827 : || (*path)[j]->type == EDGE_COPY_SRC_JOINER_BLOCK)
2677 897647 : && !can_duplicate_block_p (e->src)))
2678 : break;
2679 : }
2680 :
2681 1516130 : if (j != path->length ())
2682 : {
2683 101797 : cancel_thread (path, "Thread references removed edge");
2684 101797 : m_paths.unordered_remove (i);
2685 101797 : continue;
2686 : }
2687 656268 : i++;
2688 : }
2689 :
2690 156638 : auto_bitmap threaded_blocks;
2691 156638 : mark_threaded_blocks (threaded_blocks);
2692 :
2693 156638 : initialize_original_copy_tables ();
2694 :
2695 : /* The order in which we process jump threads can be important.
2696 :
2697 : Consider if we have two jump threading paths A and B. If the
2698 : target edge of A is the starting edge of B and we thread path A
2699 : first, then we create an additional incoming edge into B->dest that
2700 : we cannot discover as a jump threading path on this iteration.
2701 :
2702 : If we instead thread B first, then the edge into B->dest will have
2703 : already been redirected before we process path A and path A will
2704 : natually, with no further work, target the redirected path for B.
2705 :
2706 : An post-order is sufficient here. Compute the ordering first, then
2707 : process the blocks. */
2708 156638 : if (!bitmap_empty_p (threaded_blocks))
2709 : {
2710 143640 : int *postorder = XNEWVEC (int, n_basic_blocks_for_fn (cfun));
2711 143640 : unsigned int postorder_num = post_order_compute (postorder, false, false);
2712 8244399 : for (unsigned int i = 0; i < postorder_num; i++)
2713 : {
2714 8100759 : unsigned int indx = postorder[i];
2715 8100759 : if (bitmap_bit_p (threaded_blocks, indx))
2716 : {
2717 366074 : basic_block bb = BASIC_BLOCK_FOR_FN (cfun, indx);
2718 366074 : retval |= thread_block (bb, true);
2719 : }
2720 : }
2721 143640 : free (postorder);
2722 : }
2723 :
2724 : /* Then perform the threading through loop headers. We start with the
2725 : innermost loop, so that the changes in cfg we perform won't affect
2726 : further threading. */
2727 1073238 : for (auto loop : loops_list (cfun, LI_FROM_INNERMOST))
2728 : {
2729 1069009 : if (!loop->header
2730 603324 : || !bitmap_bit_p (threaded_blocks, loop->header->index))
2731 465685 : continue;
2732 :
2733 137639 : retval |= thread_through_loop_header (loop, may_peel_loop_headers);
2734 156638 : }
2735 :
2736 : /* All jump threading paths should have been resolved at this
2737 : point. Verify that is the case. */
2738 156638 : basic_block bb;
2739 9190359 : FOR_EACH_BB_FN (bb, cfun)
2740 : {
2741 9033721 : edge_iterator ei;
2742 9033721 : edge e;
2743 22244641 : FOR_EACH_EDGE (e, ei, bb->preds)
2744 13210920 : gcc_assert (e->aux == NULL);
2745 : }
2746 :
2747 156638 : free_original_copy_tables ();
2748 :
2749 156638 : return retval;
2750 156638 : }
2751 :
2752 : bool
2753 2689996 : jt_path_registry::cancel_invalid_paths (vec<jump_thread_edge *> &path)
2754 : {
2755 2689996 : gcc_checking_assert (!path.is_empty ());
2756 2689996 : edge entry = path[0]->e;
2757 2689996 : edge exit = path[path.length () - 1]->e;
2758 2689996 : bool seen_latch = false;
2759 2689996 : int loops_crossed = 0;
2760 2689996 : bool crossed_latch = false;
2761 2689996 : bool crossed_loop_header = false;
2762 : // Use ->dest here instead of ->src to ignore the first block. The
2763 : // first block is allowed to be in a different loop, since it'll be
2764 : // redirected. See similar comment in profitable_path_p: "we don't
2765 : // care about that block...".
2766 2689996 : loop_p loop = entry->dest->loop_father;
2767 2689996 : loop_p curr_loop = loop;
2768 :
2769 9442092 : for (unsigned int i = 0; i < path.length (); i++)
2770 : {
2771 6752096 : edge e = path[i]->e;
2772 :
2773 6752096 : if (e == NULL)
2774 : {
2775 : // NULL outgoing edges on a path can happen for jumping to a
2776 : // constant address.
2777 0 : cancel_thread (&path, "Found NULL edge in jump threading path");
2778 0 : return true;
2779 : }
2780 :
2781 6752096 : if (loop->latch == e->src || loop->latch == e->dest)
2782 : {
2783 578085 : seen_latch = true;
2784 : // Like seen_latch, but excludes the first block.
2785 578085 : if (e->src != entry->src)
2786 540958 : crossed_latch = true;
2787 : }
2788 :
2789 6752096 : if (e->dest->loop_father != curr_loop)
2790 : {
2791 378848 : curr_loop = e->dest->loop_father;
2792 378848 : ++loops_crossed;
2793 : }
2794 :
2795 : // ?? Avoid threading through loop headers that remain in the
2796 : // loop, as such threadings tend to create sub-loops which
2797 : // _might_ be OK ??.
2798 6752096 : if (e->dest->loop_father->header == e->dest
2799 6752096 : && !flow_loop_nested_p (exit->dest->loop_father,
2800 : e->dest->loop_father))
2801 : crossed_loop_header = true;
2802 :
2803 6752096 : if (flag_checking && !m_backedge_threads)
2804 3224794 : gcc_assert ((path[i]->e->flags & EDGE_DFS_BACK) == 0);
2805 : }
2806 :
2807 : // If we crossed a loop into an outer loop without crossing the
2808 : // latch, this is just an early exit from the loop.
2809 2689996 : if (loops_crossed == 1
2810 2689996 : && !crossed_latch
2811 2689996 : && flow_loop_nested_p (exit->dest->loop_father, exit->src->loop_father))
2812 : return false;
2813 :
2814 2599687 : if (cfun->curr_properties & PROP_loop_opts_done)
2815 : return false;
2816 :
2817 1907442 : if (seen_latch && empty_block_p (loop->latch))
2818 : {
2819 232681 : cancel_thread (&path, "Threading through latch before loop opts "
2820 : "would create non-empty latch");
2821 232681 : return true;
2822 : }
2823 1674761 : if (loops_crossed)
2824 : {
2825 164617 : cancel_thread (&path, "Path crosses loops");
2826 164617 : return true;
2827 : }
2828 : // The path should either start and end in the same loop or exit the
2829 : // loop it starts in but never enter a loop. This also catches
2830 : // creating irreducible loops, not only rotation.
2831 1510144 : if (entry->src->loop_father != exit->dest->loop_father
2832 1664321 : && !flow_loop_nested_p (exit->src->loop_father,
2833 154177 : entry->dest->loop_father))
2834 : {
2835 154177 : cancel_thread (&path, "Path rotates loop");
2836 154177 : return true;
2837 : }
2838 1355967 : if (crossed_loop_header)
2839 : {
2840 16541 : cancel_thread (&path, "Path crosses loop header but does not exit it");
2841 16541 : return true;
2842 : }
2843 : return false;
2844 : }
2845 :
2846 : /* Register a jump threading opportunity. We queue up all the jump
2847 : threading opportunities discovered by a pass and update the CFG
2848 : and SSA form all at once.
2849 :
2850 : E is the edge we can thread, E2 is the new target edge, i.e., we
2851 : are effectively recording that E->dest can be changed to E2->dest
2852 : after fixing the SSA graph.
2853 :
2854 : Return TRUE if PATH was successfully threaded. */
2855 :
2856 : bool
2857 2689996 : jt_path_registry::register_jump_thread (vec<jump_thread_edge *> *path)
2858 : {
2859 2689996 : gcc_checking_assert (flag_thread_jumps);
2860 :
2861 2689996 : if (!dbg_cnt (registered_jump_thread))
2862 : {
2863 0 : path->release ();
2864 0 : return false;
2865 : }
2866 :
2867 2689996 : if (cancel_invalid_paths (*path))
2868 : return false;
2869 :
2870 2121980 : if (dump_file && (dump_flags & TDF_DETAILS))
2871 126 : dump_jump_thread_path (dump_file, *path, true);
2872 :
2873 2121980 : m_paths.safe_push (path);
2874 2121980 : return true;
2875 : }
2876 :
2877 : /* Return how many uses of T there are within BB, as long as there
2878 : aren't any uses outside BB. If there are any uses outside BB,
2879 : return -1 if there's at most one use within BB, or -2 if there is
2880 : more than one use within BB. */
2881 :
2882 : static int
2883 1857244 : uses_in_bb (tree t, basic_block bb)
2884 : {
2885 1857244 : int uses = 0;
2886 1857244 : bool outside_bb = false;
2887 :
2888 1857244 : imm_use_iterator iter;
2889 1857244 : use_operand_p use_p;
2890 7604952 : FOR_EACH_IMM_USE_FAST (use_p, iter, t)
2891 : {
2892 4006504 : if (is_gimple_debug (USE_STMT (use_p)))
2893 702430 : continue;
2894 :
2895 3304074 : if (gimple_bb (USE_STMT (use_p)) != bb)
2896 : outside_bb = true;
2897 : else
2898 2168423 : uses++;
2899 :
2900 3304074 : if (outside_bb && uses > 1)
2901 116040 : return -2;
2902 116040 : }
2903 :
2904 1741204 : if (outside_bb)
2905 519972 : return -1;
2906 :
2907 : return uses;
2908 : }
2909 :
2910 : /* Starting from the final control flow stmt in BB, assuming it will
2911 : be removed, follow uses in to-be-removed stmts back to their defs
2912 : and count how many defs are to become dead and be removed as
2913 : well. */
2914 :
2915 : unsigned int
2916 1670487 : estimate_threading_killed_stmts (basic_block bb)
2917 : {
2918 1670487 : int killed_stmts = 0;
2919 1670487 : hash_map<tree, int> ssa_remaining_uses;
2920 1670487 : auto_vec<gimple *, 4> dead_worklist;
2921 :
2922 : /* If the block has only two predecessors, threading will turn phi
2923 : dsts into either src, so count them as dead stmts. */
2924 1670487 : bool drop_all_phis = EDGE_COUNT (bb->preds) == 2;
2925 :
2926 1670487 : if (drop_all_phis)
2927 736674 : for (gphi_iterator gsi = gsi_start_phis (bb);
2928 2498900 : !gsi_end_p (gsi); gsi_next (&gsi))
2929 : {
2930 1762226 : gphi *phi = gsi.phi ();
2931 1762226 : tree dst = gimple_phi_result (phi);
2932 :
2933 : /* We don't count virtual PHIs as stmts in
2934 : record_temporary_equivalences_from_phis. */
2935 3524452 : if (virtual_operand_p (dst))
2936 521303 : continue;
2937 :
2938 1240923 : killed_stmts++;
2939 : }
2940 :
2941 3340974 : if (gsi_end_p (gsi_last_bb (bb)))
2942 0 : return killed_stmts;
2943 :
2944 1670487 : gimple *stmt = gsi_stmt (gsi_last_bb (bb));
2945 1670487 : if (gimple_code (stmt) != GIMPLE_COND
2946 : && gimple_code (stmt) != GIMPLE_GOTO
2947 : && gimple_code (stmt) != GIMPLE_SWITCH)
2948 497608 : return killed_stmts;
2949 :
2950 : /* The control statement is always dead. */
2951 1172879 : killed_stmts++;
2952 1172879 : dead_worklist.quick_push (stmt);
2953 3406848 : while (!dead_worklist.is_empty ())
2954 : {
2955 2233969 : stmt = dead_worklist.pop ();
2956 :
2957 2233969 : ssa_op_iter iter;
2958 2233969 : use_operand_p use_p;
2959 4891810 : FOR_EACH_SSA_USE_OPERAND (use_p, stmt, iter, SSA_OP_USE)
2960 : {
2961 2657841 : tree t = USE_FROM_PTR (use_p);
2962 2657841 : gimple *def = SSA_NAME_DEF_STMT (t);
2963 :
2964 2657841 : if (gimple_bb (def) == bb
2965 2082510 : && (gimple_code (def) != GIMPLE_PHI
2966 144425 : || !drop_all_phis)
2967 4646555 : && !gimple_has_side_effects (def))
2968 : {
2969 1914676 : int *usesp = ssa_remaining_uses.get (t);
2970 1914676 : int uses;
2971 :
2972 1914676 : if (usesp)
2973 57432 : uses = *usesp;
2974 : else
2975 1857244 : uses = uses_in_bb (t, bb);
2976 :
2977 1914676 : gcc_assert (uses);
2978 :
2979 : /* Don't bother recording the expected use count if we
2980 : won't find any further uses within BB. */
2981 1914676 : if (!usesp && (uses < -1 || uses > 1))
2982 : {
2983 279681 : usesp = &ssa_remaining_uses.get_or_insert (t);
2984 279681 : *usesp = uses;
2985 : }
2986 :
2987 1914676 : if (uses < 0)
2988 670477 : continue;
2989 :
2990 1244199 : --uses;
2991 1244199 : if (usesp)
2992 186608 : *usesp = uses;
2993 :
2994 1244199 : if (!uses)
2995 : {
2996 1075272 : killed_stmts++;
2997 1075272 : if (usesp)
2998 17681 : ssa_remaining_uses.remove (t);
2999 1075272 : if (gimple_code (def) != GIMPLE_PHI)
3000 1061090 : dead_worklist.safe_push (def);
3001 : }
3002 : }
3003 : }
3004 : }
3005 :
3006 1172879 : if (dump_file)
3007 25 : fprintf (dump_file, "threading bb %i kills %i stmts\n",
3008 : bb->index, killed_stmts);
3009 :
3010 1172879 : return killed_stmts;
3011 1670487 : }
|