Line data Source code
1 : /* Thread edges through blocks and update the control flow and SSA graphs.
2 : Copyright (C) 2004-2026 Free Software Foundation, Inc.
3 :
4 : This file is part of GCC.
5 :
6 : GCC is free software; you can redistribute it and/or modify
7 : it under the terms of the GNU General Public License as published by
8 : the Free Software Foundation; either version 3, or (at your option)
9 : any later version.
10 :
11 : GCC is distributed in the hope that it will be useful,
12 : but WITHOUT ANY WARRANTY; without even the implied warranty of
13 : MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14 : GNU General Public License for more details.
15 :
16 : You should have received a copy of the GNU General Public License
17 : along with GCC; see the file COPYING3. If not see
18 : <http://www.gnu.org/licenses/>. */
19 :
20 : #include "config.h"
21 : #include "system.h"
22 : #include "coretypes.h"
23 : #include "backend.h"
24 : #include "tree.h"
25 : #include "gimple.h"
26 : #include "cfghooks.h"
27 : #include "tree-pass.h"
28 : #include "ssa.h"
29 : #include "fold-const.h"
30 : #include "cfganal.h"
31 : #include "gimple-iterator.h"
32 : #include "tree-ssa.h"
33 : #include "tree-ssa-threadupdate.h"
34 : #include "cfgloop.h"
35 : #include "dbgcnt.h"
36 : #include "tree-cfg.h"
37 : #include "tree-vectorizer.h"
38 : #include "tree-pass.h"
39 :
40 : /* Given a block B, update the CFG and SSA graph to reflect redirecting
41 : one or more in-edges to B to instead reach the destination of an
42 : out-edge from B while preserving any side effects in B.
43 :
44 : i.e., given A->B and B->C, change A->B to be A->C yet still preserve the
45 : side effects of executing B.
46 :
47 : 1. Make a copy of B (including its outgoing edges and statements). Call
48 : the copy B'. Note B' has no incoming edges or PHIs at this time.
49 :
50 : 2. Remove the control statement at the end of B' and all outgoing edges
51 : except B'->C.
52 :
53 : 3. Add a new argument to each PHI in C with the same value as the existing
54 : argument associated with edge B->C. Associate the new PHI arguments
55 : with the edge B'->C.
56 :
57 : 4. For each PHI in B, find or create a PHI in B' with an identical
58 : PHI_RESULT. Add an argument to the PHI in B' which has the same
59 : value as the PHI in B associated with the edge A->B. Associate
60 : the new argument in the PHI in B' with the edge A->B.
61 :
62 : 5. Change the edge A->B to A->B'.
63 :
64 : 5a. This automatically deletes any PHI arguments associated with the
65 : edge A->B in B.
66 :
67 : 5b. This automatically associates each new argument added in step 4
68 : with the edge A->B'.
69 :
70 : 6. Repeat for other incoming edges into B.
71 :
72 : 7. Put the duplicated resources in B and all the B' blocks into SSA form.
73 :
74 : Note that block duplication can be minimized by first collecting the
75 : set of unique destination blocks that the incoming edges should
76 : be threaded to.
77 :
78 : We reduce the number of edges and statements we create by not copying all
79 : the outgoing edges and the control statement in step #1. We instead create
80 : a template block without the outgoing edges and duplicate the template.
81 :
82 : Another case this code handles is threading through a "joiner" block. In
83 : this case, we do not know the destination of the joiner block, but one
84 : of the outgoing edges from the joiner block leads to a threadable path. This
85 : case largely works as outlined above, except the duplicate of the joiner
86 : block still contains a full set of outgoing edges and its control statement.
87 : We just redirect one of its outgoing edges to our jump threading path. */
88 :
89 :
90 : /* Steps #5 and #6 of the above algorithm are best implemented by walking
91 : all the incoming edges which thread to the same destination edge at
92 : the same time. That avoids lots of table lookups to get information
93 : for the destination edge.
94 :
95 : To realize that implementation we create a list of incoming edges
96 : which thread to the same outgoing edge. Thus to implement steps
97 : #5 and #6 we traverse our hash table of outgoing edge information.
98 : For each entry we walk the list of incoming edges which thread to
99 : the current outgoing edge. */
100 :
101 : struct el
102 : {
103 : edge e;
104 : struct el *next;
105 : };
106 :
107 : /* Main data structure recording information regarding B's duplicate
108 : blocks. */
109 :
110 : /* We need to efficiently record the unique thread destinations of this
111 : block and specific information associated with those destinations. We
112 : may have many incoming edges threaded to the same outgoing edge. This
113 : can be naturally implemented with a hash table. */
114 :
115 : struct redirection_data : free_ptr_hash<redirection_data>
116 : {
117 : /* We support wiring up two block duplicates in a jump threading path.
118 :
119 : One is a normal block copy where we remove the control statement
120 : and wire up its single remaining outgoing edge to the thread path.
121 :
122 : The other is a joiner block where we leave the control statement
123 : in place, but wire one of the outgoing edges to a thread path.
124 :
125 : In theory we could have multiple block duplicates in a jump
126 : threading path, but I haven't tried that.
127 :
128 : The duplicate blocks appear in this array in the same order in
129 : which they appear in the jump thread path. */
130 : basic_block dup_blocks[2];
131 :
132 : vec<jump_thread_edge *> *path;
133 :
134 : /* A list of incoming edges which we want to thread to the
135 : same path. */
136 : struct el *incoming_edges;
137 :
138 : /* hash_table support. */
139 : static inline hashval_t hash (const redirection_data *);
140 : static inline int equal (const redirection_data *, const redirection_data *);
141 : };
142 :
143 8337306 : jump_thread_path_allocator::jump_thread_path_allocator ()
144 : {
145 8337306 : obstack_init (&m_obstack);
146 8337306 : }
147 :
148 8337306 : jump_thread_path_allocator::~jump_thread_path_allocator ()
149 : {
150 8337306 : obstack_free (&m_obstack, NULL);
151 8337306 : }
152 :
153 : jump_thread_edge *
154 24823673 : jump_thread_path_allocator::allocate_thread_edge (edge e,
155 : jump_thread_edge_type type)
156 : {
157 24823673 : void *r = obstack_alloc (&m_obstack, sizeof (jump_thread_edge));
158 24823673 : return new (r) jump_thread_edge (e, type);
159 : }
160 :
161 : vec<jump_thread_edge *> *
162 17821925 : jump_thread_path_allocator::allocate_thread_path ()
163 : {
164 : // ?? Since the paths live in an obstack, we should be able to remove all
165 : // references to path->release() throughout the code.
166 17821925 : void *r = obstack_alloc (&m_obstack, sizeof (vec <jump_thread_edge *>));
167 17821925 : return new (r) vec<jump_thread_edge *> ();
168 : }
169 :
170 8337306 : jt_path_registry::jt_path_registry (bool backedge_threads)
171 : {
172 8337306 : m_paths.create (5);
173 8337306 : m_num_threaded_edges = 0;
174 8337306 : m_backedge_threads = backedge_threads;
175 8337306 : }
176 :
177 8337306 : jt_path_registry::~jt_path_registry ()
178 : {
179 8337306 : m_paths.release ();
180 8337306 : }
181 :
182 2079334 : fwd_jt_path_registry::fwd_jt_path_registry ()
183 2079334 : : jt_path_registry (/*backedge_threads=*/false)
184 : {
185 2079334 : m_removed_edges = new hash_table<struct removed_edges> (17);
186 2079334 : m_redirection_data = NULL;
187 2079334 : }
188 :
189 4158668 : fwd_jt_path_registry::~fwd_jt_path_registry ()
190 : {
191 2079334 : delete m_removed_edges;
192 4158668 : }
193 :
194 6257972 : back_jt_path_registry::back_jt_path_registry ()
195 6257972 : : jt_path_registry (/*backedge_threads=*/true)
196 : {
197 6257972 : }
198 :
199 : void
200 24823673 : jt_path_registry::push_edge (vec<jump_thread_edge *> *path,
201 : edge e, jump_thread_edge_type type)
202 : {
203 24823673 : jump_thread_edge *x = m_allocator.allocate_thread_edge (e, type);
204 24823673 : path->safe_push (x);
205 24823673 : }
206 :
207 : vec<jump_thread_edge *> *
208 17821925 : jt_path_registry::allocate_thread_path ()
209 : {
210 17821925 : return m_allocator.allocate_thread_path ();
211 : }
212 :
213 : /* Dump a jump threading path, including annotations about each
214 : edge in the path. */
215 :
216 : static void
217 188 : dump_jump_thread_path (FILE *dump_file,
218 : const vec<jump_thread_edge *> &path,
219 : bool registering)
220 : {
221 188 : if (registering)
222 248 : fprintf (dump_file,
223 : " [%u] Registering jump thread: (%d, %d) incoming edge; ",
224 : dbg_cnt_counter (registered_jump_thread),
225 124 : path[0]->e->src->index, path[0]->e->dest->index);
226 : else
227 64 : fprintf (dump_file,
228 : " Cancelling jump thread: (%d, %d) incoming edge; ",
229 64 : path[0]->e->src->index, path[0]->e->dest->index);
230 :
231 604 : for (unsigned int i = 1; i < path.length (); i++)
232 : {
233 : /* We can get paths with a NULL edge when the final destination
234 : of a jump thread turns out to be a constant address. We dump
235 : those paths when debugging, so we have to be prepared for that
236 : possibility here. */
237 416 : if (path[i]->e == NULL)
238 0 : continue;
239 :
240 416 : fprintf (dump_file, " (%d, %d) ",
241 416 : path[i]->e->src->index, path[i]->e->dest->index);
242 416 : switch (path[i]->type)
243 : {
244 39 : case EDGE_COPY_SRC_JOINER_BLOCK:
245 39 : fprintf (dump_file, "joiner");
246 39 : break;
247 234 : case EDGE_COPY_SRC_BLOCK:
248 234 : fprintf (dump_file, "normal");
249 234 : break;
250 143 : case EDGE_NO_COPY_SRC_BLOCK:
251 143 : fprintf (dump_file, "nocopy");
252 143 : break;
253 0 : default:
254 0 : gcc_unreachable ();
255 : }
256 :
257 416 : if ((path[i]->e->flags & EDGE_DFS_BACK) != 0)
258 30 : fprintf (dump_file, " (back)");
259 : }
260 188 : fprintf (dump_file, "; \n");
261 188 : }
262 :
263 : DEBUG_FUNCTION void
264 0 : debug (const vec<jump_thread_edge *> &path)
265 : {
266 0 : dump_jump_thread_path (stderr, path, true);
267 0 : }
268 :
269 : DEBUG_FUNCTION void
270 0 : debug (const vec<jump_thread_edge *> *path)
271 : {
272 0 : debug (*path);
273 0 : }
274 :
275 : /* Release the memory associated with PATH, and if dumping is enabled,
276 : dump out the reason why the thread was canceled. */
277 :
278 : static void
279 1057235 : cancel_thread (vec<jump_thread_edge *> *path, const char *reason = NULL)
280 : {
281 1057235 : if (dump_file && (dump_flags & TDF_DETAILS))
282 : {
283 64 : if (reason)
284 56 : fprintf (dump_file, "%s: ", reason);
285 :
286 64 : dump_jump_thread_path (dump_file, *path, false);
287 64 : fprintf (dump_file, "\n");
288 : }
289 1057235 : path->release ();
290 1057235 : }
291 :
292 : /* Simple hashing function. For any given incoming edge E, we're going
293 : to be most concerned with the final destination of its jump thread
294 : path. So hash on the block index of the final edge in the path. */
295 :
296 : inline hashval_t
297 505743 : redirection_data::hash (const redirection_data *p)
298 : {
299 505743 : vec<jump_thread_edge *> *path = p->path;
300 505743 : return path->last ()->e->dest->index;
301 : }
302 :
303 : /* Given two hash table entries, return true if they have the same
304 : jump threading path. */
305 : inline int
306 155387 : redirection_data::equal (const redirection_data *p1, const redirection_data *p2)
307 : {
308 155387 : vec<jump_thread_edge *> *path1 = p1->path;
309 155387 : vec<jump_thread_edge *> *path2 = p2->path;
310 :
311 466161 : if (path1->length () != path2->length ())
312 : return false;
313 :
314 251962 : for (unsigned int i = 1; i < path1->length (); i++)
315 : {
316 183511 : if ((*path1)[i]->type != (*path2)[i]->type
317 183511 : || (*path1)[i]->e != (*path2)[i]->e)
318 : return false;
319 : }
320 :
321 : return true;
322 : }
323 :
324 : /* Data structure of information to pass to hash table traversal routines. */
325 : struct ssa_local_info_t
326 : {
327 : /* The current block we are working on. */
328 : basic_block bb;
329 :
330 : /* We only create a template block for the first duplicated block in a
331 : jump threading path as we may need many duplicates of that block.
332 :
333 : The second duplicate block in a path is specific to that path. Creating
334 : and sharing a template for that block is considerably more difficult. */
335 : basic_block template_block;
336 :
337 : /* If we append debug stmts to the template block after creating it,
338 : this iterator won't be the last one in the block, and further
339 : copies of the template block shouldn't get debug stmts after
340 : it. */
341 : gimple_stmt_iterator template_last_to_copy;
342 :
343 : /* Blocks duplicated for the thread. */
344 : bitmap duplicate_blocks;
345 :
346 : /* TRUE if we thread one or more jumps, FALSE otherwise. */
347 : bool jumps_threaded;
348 :
349 : /* When we have multiple paths through a joiner which reach different
350 : final destinations, then we may need to correct for potential
351 : profile insanities. */
352 : bool need_profile_correction;
353 :
354 : // Jump threading statistics.
355 : unsigned long num_threaded_edges;
356 : };
357 :
358 : /* When we start updating the CFG for threading, data necessary for jump
359 : threading is attached to the AUX field for the incoming edge. Use these
360 : macros to access the underlying structure attached to the AUX field. */
361 : #define THREAD_PATH(E) ((vec<jump_thread_edge *> *)(E)->aux)
362 :
363 : /* Remove the last statement in block BB if it is a control statement
364 : Also remove all outgoing edges except the edge which reaches DEST_BB.
365 : If DEST_BB is NULL, then remove all outgoing edges. */
366 :
367 : static void
368 1483809 : remove_ctrl_stmt_and_useless_edges (basic_block bb, basic_block dest_bb)
369 : {
370 1483809 : gimple_stmt_iterator gsi;
371 1483809 : edge e;
372 1483809 : edge_iterator ei;
373 :
374 1483809 : gsi = gsi_last_bb (bb);
375 :
376 : /* If the duplicate ends with a control statement, then remove it.
377 :
378 : Note that if we are duplicating the template block rather than the
379 : original basic block, then the duplicate might not have any real
380 : statements in it. */
381 1483809 : if (!gsi_end_p (gsi)
382 1483809 : && gsi_stmt (gsi)
383 1483809 : && (gimple_code (gsi_stmt (gsi)) == GIMPLE_COND
384 : || gimple_code (gsi_stmt (gsi)) == GIMPLE_GOTO
385 : || gimple_code (gsi_stmt (gsi)) == GIMPLE_SWITCH))
386 1483809 : gsi_remove (&gsi, true);
387 :
388 4468015 : for (ei = ei_start (bb->succs); (e = ei_safe_edge (ei)); )
389 : {
390 2984206 : if (e->dest != dest_bb)
391 : {
392 1750568 : free_dom_edge_info (e);
393 1750568 : remove_edge (e);
394 : }
395 : else
396 : {
397 1233638 : e->probability = profile_probability::always ();
398 1233638 : ei_next (&ei);
399 : }
400 : }
401 :
402 : /* If the remaining edge is a loop exit, there must have
403 : a removed edge that was not a loop exit.
404 :
405 : In that case BB and possibly other blocks were previously
406 : in the loop, but are now outside the loop. Thus, we need
407 : to update the loop structures. */
408 1483809 : if (single_succ_p (bb)
409 1233638 : && loop_outer (bb->loop_father)
410 1829800 : && loop_exit_edge_p (bb->loop_father, single_succ_edge (bb)))
411 55184 : loops_state_set (LOOPS_NEED_FIXUP);
412 1483809 : }
413 :
414 : /* Create a duplicate of BB. Record the duplicate block in an array
415 : indexed by COUNT stored in RD. */
416 :
417 : static void
418 325877 : create_block_for_threading (basic_block bb,
419 : struct redirection_data *rd,
420 : unsigned int count,
421 : bitmap *duplicate_blocks)
422 : {
423 325877 : edge_iterator ei;
424 325877 : edge e;
425 :
426 : /* We can use the generic block duplication code and simply remove
427 : the stuff we do not need. */
428 325877 : rd->dup_blocks[count] = duplicate_block (bb, NULL, NULL);
429 :
430 990574 : FOR_EACH_EDGE (e, ei, rd->dup_blocks[count]->succs)
431 : {
432 664697 : e->aux = NULL;
433 :
434 : /* If we duplicate a block with an outgoing edge marked as
435 : EDGE_IGNORE, we must clear EDGE_IGNORE so that it doesn't
436 : leak out of the current pass.
437 :
438 : It would be better to simplify switch statements and remove
439 : the edges before we get here, but the sequencing is nontrivial. */
440 664697 : e->flags &= ~EDGE_IGNORE;
441 : }
442 :
443 : /* Zero out the profile, since the block is unreachable for now. */
444 325877 : rd->dup_blocks[count]->count = profile_count::uninitialized ();
445 325877 : if (duplicate_blocks)
446 325877 : bitmap_set_bit (*duplicate_blocks, rd->dup_blocks[count]->index);
447 325877 : }
448 :
449 : /* Given an outgoing edge E lookup and return its entry in our hash table.
450 :
451 : If INSERT is true, then we insert the entry into the hash table if
452 : it is not already present. INCOMING_EDGE is added to the list of incoming
453 : edges associated with E in the hash table. */
454 :
455 : redirection_data *
456 353910 : fwd_jt_path_registry::lookup_redirection_data (edge e, insert_option insert)
457 : {
458 353910 : struct redirection_data **slot;
459 353910 : struct redirection_data *elt;
460 353910 : vec<jump_thread_edge *> *path = THREAD_PATH (e);
461 :
462 : /* Build a hash table element so we can see if E is already
463 : in the table. */
464 353910 : elt = XNEW (struct redirection_data);
465 353910 : elt->path = path;
466 353910 : elt->dup_blocks[0] = NULL;
467 353910 : elt->dup_blocks[1] = NULL;
468 353910 : elt->incoming_edges = NULL;
469 :
470 353910 : slot = m_redirection_data->find_slot (elt, insert);
471 :
472 : /* This will only happen if INSERT is false and the entry is not
473 : in the hash table. */
474 353910 : if (slot == NULL)
475 : {
476 0 : free (elt);
477 0 : return NULL;
478 : }
479 :
480 : /* This will only happen if E was not in the hash table and
481 : INSERT is true. */
482 353910 : if (*slot == NULL)
483 : {
484 285459 : *slot = elt;
485 285459 : elt->incoming_edges = XNEW (struct el);
486 285459 : elt->incoming_edges->e = e;
487 285459 : elt->incoming_edges->next = NULL;
488 285459 : return elt;
489 : }
490 : /* E was in the hash table. */
491 : else
492 : {
493 : /* Free ELT as we do not need it anymore, we will extract the
494 : relevant entry from the hash table itself. */
495 68451 : free (elt);
496 :
497 : /* Get the entry stored in the hash table. */
498 68451 : elt = *slot;
499 :
500 : /* If insertion was requested, then we need to add INCOMING_EDGE
501 : to the list of incoming edges associated with E. */
502 68451 : if (insert)
503 : {
504 68451 : struct el *el = XNEW (struct el);
505 68451 : el->next = elt->incoming_edges;
506 68451 : el->e = e;
507 68451 : elt->incoming_edges = el;
508 : }
509 :
510 68451 : return elt;
511 : }
512 : }
513 :
514 : /* Given ssa_name DEF, backtrack jump threading PATH from node IDX
515 : to see if it has constant value in a flow sensitive manner. Set
516 : LOCUS to location of the constant phi arg and return the value.
517 : Return DEF directly if either PATH or idx is ZERO. */
518 :
519 : static tree
520 120249 : get_value_locus_in_path (tree def, vec<jump_thread_edge *> *path,
521 : basic_block bb, int idx, location_t *locus)
522 : {
523 120249 : tree arg;
524 120249 : gphi *def_phi;
525 120249 : basic_block def_bb;
526 :
527 120249 : if (path == NULL || idx == 0)
528 : return def;
529 :
530 166288 : def_phi = dyn_cast <gphi *> (SSA_NAME_DEF_STMT (def));
531 63400 : if (!def_phi)
532 : return def;
533 :
534 63400 : def_bb = gimple_bb (def_phi);
535 : /* Don't propagate loop invariants into deeper loops. */
536 63400 : if (!def_bb || bb_loop_depth (def_bb) < bb_loop_depth (bb))
537 1346 : return def;
538 :
539 : /* Backtrack jump threading path from IDX to see if def has constant
540 : value. */
541 81044 : for (int j = idx - 1; j >= 0; j--)
542 : {
543 65777 : edge e = (*path)[j]->e;
544 65777 : if (e->dest == def_bb)
545 : {
546 46787 : arg = gimple_phi_arg_def (def_phi, e->dest_idx);
547 46787 : if (is_gimple_min_invariant (arg))
548 : {
549 17361 : *locus = gimple_phi_arg_location (def_phi, e->dest_idx);
550 17361 : return arg;
551 : }
552 : break;
553 : }
554 : }
555 :
556 : return def;
557 : }
558 :
559 : /* For each PHI in BB, copy the argument associated with SRC_E to TGT_E.
560 : Try to backtrack jump threading PATH from node IDX to see if the arg
561 : has constant value, copy constant value instead of argument itself
562 : if yes. */
563 :
564 : static void
565 424346 : copy_phi_args (basic_block bb, edge src_e, edge tgt_e,
566 : vec<jump_thread_edge *> *path, int idx)
567 : {
568 424346 : gphi_iterator gsi;
569 424346 : int src_indx = src_e->dest_idx;
570 :
571 693206 : for (gsi = gsi_start_phis (bb); !gsi_end_p (gsi); gsi_next (&gsi))
572 : {
573 268860 : gphi *phi = gsi.phi ();
574 268860 : tree def = gimple_phi_arg_def (phi, src_indx);
575 268860 : location_t locus = gimple_phi_arg_location (phi, src_indx);
576 :
577 268860 : if (TREE_CODE (def) == SSA_NAME
578 486321 : && !virtual_operand_p (gimple_phi_result (phi)))
579 120249 : def = get_value_locus_in_path (def, path, bb, idx, &locus);
580 :
581 268860 : add_phi_arg (phi, def, tgt_e, locus);
582 : }
583 424346 : }
584 :
585 : /* We have recently made a copy of ORIG_BB, including its outgoing
586 : edges. The copy is NEW_BB. Every PHI node in every direct successor of
587 : ORIG_BB has a new argument associated with edge from NEW_BB to the
588 : successor. Initialize the PHI argument so that it is equal to the PHI
589 : argument associated with the edge from ORIG_BB to the successor.
590 : PATH and IDX are used to check if the new PHI argument has constant
591 : value in a flow sensitive manner. */
592 :
593 : static void
594 75706 : update_destination_phis (basic_block orig_bb, basic_block new_bb,
595 : vec<jump_thread_edge *> *path, int idx)
596 : {
597 75706 : edge_iterator ei;
598 75706 : edge e;
599 :
600 234367 : FOR_EACH_EDGE (e, ei, orig_bb->succs)
601 : {
602 158661 : edge e2 = find_edge (new_bb, e->dest);
603 158661 : copy_phi_args (e->dest, e, e2, path, idx);
604 : }
605 75706 : }
606 :
607 : /* Given a duplicate block and its single destination (both stored
608 : in RD). Create an edge between the duplicate and its single
609 : destination.
610 :
611 : Add an additional argument to any PHI nodes at the single
612 : destination. IDX is the start node in jump threading path
613 : we start to check to see if the new PHI argument has constant
614 : value along the jump threading path. */
615 :
616 : static void
617 250171 : create_edge_and_update_destination_phis (struct redirection_data *rd,
618 : basic_block bb, int idx)
619 : {
620 250171 : edge e = make_single_succ_edge (bb, rd->path->last ()->e->dest, EDGE_FALLTHRU);
621 :
622 250171 : rescan_loop_exit (e, true, false);
623 :
624 : /* We used to copy the thread path here. That was added in 2007
625 : and dutifully updated through the representation changes in 2013.
626 :
627 : In 2013 we added code to thread from an interior node through
628 : the backedge to another interior node. That runs after the code
629 : to thread through loop headers from outside the loop.
630 :
631 : The latter may delete edges in the CFG, including those
632 : which appeared in the jump threading path we copied here. Thus
633 : we'd end up using a dangling pointer.
634 :
635 : After reviewing the 2007/2011 code, I can't see how anything
636 : depended on copying the AUX field and clearly copying the jump
637 : threading path is problematical due to embedded edge pointers.
638 : It has been removed. */
639 250171 : e->aux = NULL;
640 :
641 : /* If there are any PHI nodes at the destination of the outgoing edge
642 : from the duplicate block, then we will need to add a new argument
643 : to them. The argument should have the same value as the argument
644 : associated with the outgoing edge stored in RD. */
645 250171 : copy_phi_args (e->dest, rd->path->last ()->e, e, rd->path, idx);
646 250171 : }
647 :
648 : /* Look through PATH beginning at START and return TRUE if there are
649 : any additional blocks that need to be duplicated. Otherwise,
650 : return FALSE. */
651 : static bool
652 75706 : any_remaining_duplicated_blocks (vec<jump_thread_edge *> *path,
653 : unsigned int start)
654 : {
655 96107 : for (unsigned int i = start + 1; i < path->length (); i++)
656 : {
657 60819 : if ((*path)[i]->type == EDGE_COPY_SRC_JOINER_BLOCK
658 60819 : || (*path)[i]->type == EDGE_COPY_SRC_BLOCK)
659 : return true;
660 : }
661 : return false;
662 : }
663 :
664 :
665 : /* Compute the amount of profile count coming into the jump threading
666 : path stored in RD that we are duplicating, returned in PATH_IN_COUNT_PTR and
667 : PATH_IN_FREQ_PTR, as well as the amount of counts flowing out of the
668 : duplicated path, returned in PATH_OUT_COUNT_PTR. LOCAL_INFO is used to
669 : identify blocks duplicated for jump threading, which have duplicated
670 : edges that need to be ignored in the analysis. Return true if path contains
671 : a joiner, false otherwise.
672 :
673 : In the non-joiner case, this is straightforward - all the counts
674 : flowing into the jump threading path should flow through the duplicated
675 : block and out of the duplicated path.
676 :
677 : In the joiner case, it is very tricky. Some of the counts flowing into
678 : the original path go offpath at the joiner. The problem is that while
679 : we know how much total count goes off-path in the original control flow,
680 : we don't know how many of the counts corresponding to just the jump
681 : threading path go offpath at the joiner.
682 :
683 : For example, assume we have the following control flow and identified
684 : jump threading paths:
685 :
686 : A B C
687 : \ | /
688 : Ea \ |Eb / Ec
689 : \ | /
690 : v v v
691 : J <-- Joiner
692 : / \
693 : Eoff/ \Eon
694 : / \
695 : v v
696 : Soff Son <--- Normal
697 : /\
698 : Ed/ \ Ee
699 : / \
700 : v v
701 : D E
702 :
703 : Jump threading paths: A -> J -> Son -> D (path 1)
704 : C -> J -> Son -> E (path 2)
705 :
706 : Note that the control flow could be more complicated:
707 : - Each jump threading path may have more than one incoming edge. I.e. A and
708 : Ea could represent multiple incoming blocks/edges that are included in
709 : path 1.
710 : - There could be EDGE_NO_COPY_SRC_BLOCK edges after the joiner (either
711 : before or after the "normal" copy block). These are not duplicated onto
712 : the jump threading path, as they are single-successor.
713 : - Any of the blocks along the path may have other incoming edges that
714 : are not part of any jump threading path, but add profile counts along
715 : the path.
716 :
717 : In the above example, after all jump threading is complete, we will
718 : end up with the following control flow:
719 :
720 : A B C
721 : | | |
722 : Ea| |Eb |Ec
723 : | | |
724 : v v v
725 : Ja J Jc
726 : / \ / \Eon' / \
727 : Eona/ \ ---/---\-------- \Eonc
728 : / \ / / \ \
729 : v v v v v
730 : Sona Soff Son Sonc
731 : \ /\ /
732 : \___________ / \ _____/
733 : \ / \/
734 : vv v
735 : D E
736 :
737 : The main issue to notice here is that when we are processing path 1
738 : (A->J->Son->D) we need to figure out the outgoing edge weights to
739 : the duplicated edges Ja->Sona and Ja->Soff, while ensuring that the
740 : sum of the incoming weights to D remain Ed. The problem with simply
741 : assuming that Ja (and Jc when processing path 2) has the same outgoing
742 : probabilities to its successors as the original block J, is that after
743 : all paths are processed and other edges/counts removed (e.g. none
744 : of Ec will reach D after processing path 2), we may end up with not
745 : enough count flowing along duplicated edge Sona->D.
746 :
747 : Therefore, in the case of a joiner, we keep track of all counts
748 : coming in along the current path, as well as from predecessors not
749 : on any jump threading path (Eb in the above example). While we
750 : first assume that the duplicated Eona for Ja->Sona has the same
751 : probability as the original, we later compensate for other jump
752 : threading paths that may eliminate edges. We do that by keep track
753 : of all counts coming into the original path that are not in a jump
754 : thread (Eb in the above example, but as noted earlier, there could
755 : be other predecessors incoming to the path at various points, such
756 : as at Son). Call this cumulative non-path count coming into the path
757 : before D as Enonpath. We then ensure that the count from Sona->D is as at
758 : least as big as (Ed - Enonpath), but no bigger than the minimum
759 : weight along the jump threading path. The probabilities of both the
760 : original and duplicated joiner block J and Ja will be adjusted
761 : accordingly after the updates. */
762 :
763 : static bool
764 285459 : compute_path_counts (struct redirection_data *rd,
765 : ssa_local_info_t *local_info,
766 : profile_count *path_in_count_ptr,
767 : profile_count *path_out_count_ptr)
768 : {
769 285459 : edge e = rd->incoming_edges->e;
770 285459 : vec<jump_thread_edge *> *path = THREAD_PATH (e);
771 285459 : edge elast = path->last ()->e;
772 285459 : profile_count nonpath_count = profile_count::zero ();
773 285459 : bool has_joiner = false;
774 285459 : profile_count path_in_count = profile_count::zero ();
775 :
776 : /* Start by accumulating incoming edge counts to the path's first bb
777 : into a couple buckets:
778 : path_in_count: total count of incoming edges that flow into the
779 : current path.
780 : nonpath_count: total count of incoming edges that are not
781 : flowing along *any* path. These are the counts
782 : that will still flow along the original path after
783 : all path duplication is done by potentially multiple
784 : calls to this routine.
785 : (any other incoming edge counts are for a different jump threading
786 : path that will be handled by a later call to this routine.)
787 : To make this easier, start by recording all incoming edges that flow into
788 : the current path in a bitmap. We could add up the path's incoming edge
789 : counts here, but we still need to walk all the first bb's incoming edges
790 : below to add up the counts of the other edges not included in this jump
791 : threading path. */
792 285459 : struct el *next, *el;
793 285459 : auto_bitmap in_edge_srcs;
794 639369 : for (el = rd->incoming_edges; el; el = next)
795 : {
796 353910 : next = el->next;
797 353910 : bitmap_set_bit (in_edge_srcs, el->e->src->index);
798 : }
799 285459 : edge ein;
800 285459 : edge_iterator ei;
801 1045783 : FOR_EACH_EDGE (ein, ei, e->dest->preds)
802 : {
803 760324 : vec<jump_thread_edge *> *ein_path = THREAD_PATH (ein);
804 : /* Simply check the incoming edge src against the set captured above. */
805 760324 : if (ein_path
806 1275433 : && bitmap_bit_p (in_edge_srcs, (*ein_path)[0]->e->src->index))
807 : {
808 : /* It is necessary but not sufficient that the last path edges
809 : are identical. There may be different paths that share the
810 : same last path edge in the case where the last edge has a nocopy
811 : source block. */
812 353910 : gcc_assert (ein_path->last ()->e == elast);
813 353910 : path_in_count += ein->count ();
814 : }
815 406414 : else if (!ein_path)
816 : {
817 : /* Keep track of the incoming edges that are not on any jump-threading
818 : path. These counts will still flow out of original path after all
819 : jump threading is complete. */
820 245215 : nonpath_count += ein->count ();
821 : }
822 : }
823 :
824 : /* Now compute the fraction of the total count coming into the first
825 : path bb that is from the current threading path. */
826 285459 : profile_count total_count = e->dest->count;
827 : /* Handle incoming profile insanities. */
828 285459 : if (total_count < path_in_count)
829 21931 : path_in_count = total_count;
830 285459 : profile_probability onpath_scale = path_in_count.probability_in (total_count);
831 :
832 : /* Walk the entire path to do some more computation in order to estimate
833 : how much of the path_in_count will flow out of the duplicated threading
834 : path. In the non-joiner case this is straightforward (it should be
835 : the same as path_in_count, although we will handle incoming profile
836 : insanities by setting it equal to the minimum count along the path).
837 :
838 : In the joiner case, we need to estimate how much of the path_in_count
839 : will stay on the threading path after the joiner's conditional branch.
840 : We don't really know for sure how much of the counts
841 : associated with this path go to each successor of the joiner, but we'll
842 : estimate based on the fraction of the total count coming into the path
843 : bb was from the threading paths (computed above in onpath_scale).
844 : Afterwards, we will need to do some fixup to account for other threading
845 : paths and possible profile insanities.
846 :
847 : In order to estimate the joiner case's counts we also need to update
848 : nonpath_count with any additional counts coming into the path. Other
849 : blocks along the path may have additional predecessors from outside
850 : the path. */
851 285459 : profile_count path_out_count = path_in_count;
852 285459 : profile_count min_path_count = path_in_count;
853 651055 : for (unsigned int i = 1; i < path->length (); i++)
854 : {
855 365596 : edge epath = (*path)[i]->e;
856 365596 : profile_count cur_count = epath->count ();
857 365596 : if ((*path)[i]->type == EDGE_COPY_SRC_JOINER_BLOCK)
858 : {
859 75706 : has_joiner = true;
860 75706 : cur_count = cur_count.apply_probability (onpath_scale);
861 : }
862 : /* In the joiner case we need to update nonpath_count for any edges
863 : coming into the path that will contribute to the count flowing
864 : into the path successor. */
865 365596 : if (has_joiner && epath != elast)
866 : {
867 : /* Look for other incoming edges after joiner. */
868 237247 : FOR_EACH_EDGE (ein, ei, epath->dest->preds)
869 : {
870 174343 : if (ein != epath
871 : /* Ignore in edges from blocks we have duplicated for a
872 : threading path, which have duplicated edge counts until
873 : they are redirected by an invocation of this routine. */
874 285782 : && !bitmap_bit_p (local_info->duplicate_blocks,
875 111439 : ein->src->index))
876 38980 : nonpath_count += ein->count ();
877 : }
878 : }
879 365596 : if (cur_count < path_out_count)
880 156159 : path_out_count = cur_count;
881 365596 : if (epath->count () < min_path_count)
882 134353 : min_path_count = epath->count ();
883 : }
884 :
885 : /* We computed path_out_count above assuming that this path targeted
886 : the joiner's on-path successor with the same likelihood as it
887 : reached the joiner. However, other thread paths through the joiner
888 : may take a different path through the normal copy source block
889 : (i.e. they have a different elast), meaning that they do not
890 : contribute any counts to this path's elast. As a result, it may
891 : turn out that this path must have more count flowing to the on-path
892 : successor of the joiner. Essentially, all of this path's elast
893 : count must be contributed by this path and any nonpath counts
894 : (since any path through the joiner with a different elast will not
895 : include a copy of this elast in its duplicated path).
896 : So ensure that this path's path_out_count is at least the
897 : difference between elast->count () and nonpath_count. Otherwise the edge
898 : counts after threading will not be sane. */
899 285459 : if (local_info->need_profile_correction
900 313074 : && has_joiner && path_out_count < elast->count () - nonpath_count)
901 : {
902 8991 : path_out_count = elast->count () - nonpath_count;
903 : /* But neither can we go above the minimum count along the path
904 : we are duplicating. This can be an issue due to profile
905 : insanities coming in to this pass. */
906 8991 : if (path_out_count > min_path_count)
907 5670 : path_out_count = min_path_count;
908 : }
909 :
910 285459 : *path_in_count_ptr = path_in_count;
911 285459 : *path_out_count_ptr = path_out_count;
912 285459 : return has_joiner;
913 285459 : }
914 :
915 :
916 : /* Update the counts and frequencies for both an original path
917 : edge EPATH and its duplicate EDUP. The duplicate source block
918 : will get a count of PATH_IN_COUNT and PATH_IN_FREQ,
919 : and the duplicate edge EDUP will have a count of PATH_OUT_COUNT. */
920 : static void
921 365596 : update_profile (edge epath, edge edup, profile_count path_in_count,
922 : profile_count path_out_count)
923 : {
924 :
925 : /* First update the duplicated block's count. */
926 365596 : if (edup)
927 : {
928 325877 : basic_block dup_block = edup->src;
929 :
930 : /* Edup's count is reduced by path_out_count. We need to redistribute
931 : probabilities to the remaining edges. */
932 :
933 325877 : edge esucc;
934 325877 : edge_iterator ei;
935 325877 : profile_probability edup_prob
936 325877 : = path_out_count.probability_in (path_in_count);
937 :
938 : /* Either scale up or down the remaining edges.
939 : probabilities are always in range <0,1> and thus we can't do
940 : both by same loop. */
941 325877 : if (edup->probability > edup_prob)
942 : {
943 25665 : profile_probability rev_scale
944 25665 : = (profile_probability::always () - edup->probability)
945 51330 : / (profile_probability::always () - edup_prob);
946 80592 : FOR_EACH_EDGE (esucc, ei, dup_block->succs)
947 54927 : if (esucc != edup)
948 29262 : esucc->probability /= rev_scale;
949 : }
950 300212 : else if (edup->probability < edup_prob)
951 : {
952 20756 : profile_probability scale
953 20756 : = (profile_probability::always () - edup_prob)
954 41512 : / (profile_probability::always () - edup->probability);
955 63630 : FOR_EACH_EDGE (esucc, ei, dup_block->succs)
956 42874 : if (esucc != edup)
957 22118 : esucc->probability *= scale;
958 : }
959 325877 : if (edup_prob.initialized_p ())
960 304404 : edup->probability = edup_prob;
961 :
962 325877 : gcc_assert (!dup_block->count.initialized_p ());
963 325877 : dup_block->count = path_in_count;
964 : }
965 :
966 365596 : if (path_in_count == profile_count::zero ())
967 9893 : return;
968 :
969 355703 : profile_count final_count = epath->count () - path_out_count;
970 :
971 : /* Now update the original block's count in the
972 : opposite manner - remove the counts/freq that will flow
973 : into the duplicated block. Handle underflow due to precision/
974 : rounding issues. */
975 355703 : epath->src->count -= path_in_count;
976 :
977 : /* Next update this path edge's original and duplicated counts. We know
978 : that the duplicated path will have path_out_count flowing
979 : out of it (in the joiner case this is the count along the duplicated path
980 : out of the duplicated joiner). This count can then be removed from the
981 : original path edge. */
982 :
983 355703 : edge esucc;
984 355703 : edge_iterator ei;
985 355703 : profile_probability epath_prob = final_count.probability_in (epath->src->count);
986 :
987 355703 : if (epath->probability > epath_prob)
988 : {
989 221172 : profile_probability rev_scale
990 221172 : = (profile_probability::always () - epath->probability)
991 442344 : / (profile_probability::always () - epath_prob);
992 665756 : FOR_EACH_EDGE (esucc, ei, epath->src->succs)
993 444584 : if (esucc != epath)
994 223412 : esucc->probability /= rev_scale;
995 : }
996 134531 : else if (epath->probability < epath_prob)
997 : {
998 18503 : profile_probability scale
999 18503 : = (profile_probability::always () - epath_prob)
1000 37006 : / (profile_probability::always () - epath->probability);
1001 59289 : FOR_EACH_EDGE (esucc, ei, epath->src->succs)
1002 40786 : if (esucc != epath)
1003 22283 : esucc->probability *= scale;
1004 : }
1005 355703 : if (epath_prob.initialized_p ())
1006 324756 : epath->probability = epath_prob;
1007 : }
1008 :
1009 : /* Wire up the outgoing edges from the duplicate blocks and
1010 : update any PHIs as needed. Also update the profile counts
1011 : on the original and duplicate blocks and edges. */
1012 : void
1013 285459 : ssa_fix_duplicate_block_edges (struct redirection_data *rd,
1014 : ssa_local_info_t *local_info)
1015 : {
1016 285459 : bool multi_incomings = (rd->incoming_edges->next != NULL);
1017 285459 : edge e = rd->incoming_edges->e;
1018 285459 : vec<jump_thread_edge *> *path = THREAD_PATH (e);
1019 285459 : edge elast = path->last ()->e;
1020 285459 : profile_count path_in_count = profile_count::zero ();
1021 285459 : profile_count path_out_count = profile_count::zero ();
1022 :
1023 : /* First determine how much profile count to move from original
1024 : path to the duplicate path. This is tricky in the presence of
1025 : a joiner (see comments for compute_path_counts), where some portion
1026 : of the path's counts will flow off-path from the joiner. In the
1027 : non-joiner case the path_in_count and path_out_count should be the
1028 : same. */
1029 285459 : bool has_joiner = compute_path_counts (rd, local_info,
1030 : &path_in_count, &path_out_count);
1031 :
1032 651055 : for (unsigned int count = 0, i = 1; i < path->length (); i++)
1033 : {
1034 365596 : edge epath = (*path)[i]->e;
1035 :
1036 : /* If we were threading through an joiner block, then we want
1037 : to keep its control statement and redirect an outgoing edge.
1038 : Else we want to remove the control statement & edges, then create
1039 : a new outgoing edge. In both cases we may need to update PHIs. */
1040 365596 : if ((*path)[i]->type == EDGE_COPY_SRC_JOINER_BLOCK)
1041 : {
1042 75706 : edge victim;
1043 75706 : edge e2;
1044 :
1045 75706 : gcc_assert (has_joiner);
1046 :
1047 : /* This updates the PHIs at the destination of the duplicate
1048 : block. Pass 0 instead of i if we are threading a path which
1049 : has multiple incoming edges. */
1050 135965 : update_destination_phis (local_info->bb, rd->dup_blocks[count],
1051 : path, multi_incomings ? 0 : i);
1052 :
1053 : /* Find the edge from the duplicate block to the block we're
1054 : threading through. That's the edge we want to redirect. */
1055 75706 : victim = find_edge (rd->dup_blocks[count], (*path)[i]->e->dest);
1056 :
1057 : /* If there are no remaining blocks on the path to duplicate,
1058 : then redirect VICTIM to the final destination of the jump
1059 : threading path. */
1060 75706 : if (!any_remaining_duplicated_blocks (path, i))
1061 : {
1062 35288 : if (victim->dest != elast->dest)
1063 : {
1064 15746 : e2 = redirect_edge_and_branch (victim, elast->dest);
1065 : /* If we redirected the edge, then we need to copy PHI arguments
1066 : at the target. If the edge already existed (e2 != victim
1067 : case), then the PHIs in the target already have the correct
1068 : arguments. */
1069 15746 : if (e2 == victim)
1070 15514 : copy_phi_args (e2->dest, elast, e2,
1071 : path, multi_incomings ? 0 : i);
1072 : }
1073 : else
1074 : e2 = victim;
1075 : }
1076 : else
1077 : {
1078 : /* Redirect VICTIM to the next duplicated block in the path. */
1079 40418 : e2 = redirect_edge_and_branch (victim, rd->dup_blocks[count + 1]);
1080 :
1081 : /* We need to update the PHIs in the next duplicated block. We
1082 : want the new PHI args to have the same value as they had
1083 : in the source of the next duplicate block.
1084 :
1085 : Thus, we need to know which edge we traversed into the
1086 : source of the duplicate. Furthermore, we may have
1087 : traversed many edges to reach the source of the duplicate.
1088 :
1089 : Walk through the path starting at element I until we
1090 : hit an edge marked with EDGE_COPY_SRC_BLOCK. We want
1091 : the edge from the prior element. */
1092 41910 : for (unsigned int j = i + 1; j < path->length (); j++)
1093 : {
1094 41910 : if ((*path)[j]->type == EDGE_COPY_SRC_BLOCK)
1095 : {
1096 40418 : copy_phi_arg_into_existing_phi ((*path)[j - 1]->e, e2);
1097 40418 : break;
1098 : }
1099 : }
1100 : }
1101 :
1102 : /* Update the counts of both the original block
1103 : and path edge, and the duplicates. The path duplicate's
1104 : incoming count are the totals for all edges
1105 : incoming to this jump threading path computed earlier.
1106 : And we know that the duplicated path will have path_out_count
1107 : flowing out of it (i.e. along the duplicated path out of the
1108 : duplicated joiner). */
1109 75706 : update_profile (epath, e2, path_in_count, path_out_count);
1110 : }
1111 289890 : else if ((*path)[i]->type == EDGE_COPY_SRC_BLOCK)
1112 : {
1113 250171 : remove_ctrl_stmt_and_useless_edges (rd->dup_blocks[count], NULL);
1114 463634 : create_edge_and_update_destination_phis (rd, rd->dup_blocks[count],
1115 : multi_incomings ? 0 : i);
1116 250171 : if (count == 1)
1117 40418 : single_succ_edge (rd->dup_blocks[1])->aux = NULL;
1118 :
1119 : /* Update the counts of both the original block
1120 : and path edge, and the duplicates. Since we are now after
1121 : any joiner that may have existed on the path, the count
1122 : flowing along the duplicated threaded path is path_out_count.
1123 : If we didn't have a joiner, then cur_path_freq was the sum
1124 : of the total frequencies along all incoming edges to the
1125 : thread path (path_in_freq). If we had a joiner, it would have
1126 : been updated at the end of that handling to the edge frequency
1127 : along the duplicated joiner path edge. */
1128 250171 : update_profile (epath, EDGE_SUCC (rd->dup_blocks[count], 0),
1129 : path_out_count, path_out_count);
1130 : }
1131 : else
1132 : {
1133 : /* No copy case. In this case we don't have an equivalent block
1134 : on the duplicated thread path to update, but we do need
1135 : to remove the portion of the counts/freqs that were moved
1136 : to the duplicated path from the counts/freqs flowing through
1137 : this block on the original path. Since all the no-copy edges
1138 : are after any joiner, the removed count is the same as
1139 : path_out_count.
1140 :
1141 : If we didn't have a joiner, then cur_path_freq was the sum
1142 : of the total frequencies along all incoming edges to the
1143 : thread path (path_in_freq). If we had a joiner, it would have
1144 : been updated at the end of that handling to the edge frequency
1145 : along the duplicated joiner path edge. */
1146 39719 : update_profile (epath, NULL, path_out_count, path_out_count);
1147 : }
1148 :
1149 : /* Increment the index into the duplicated path when we processed
1150 : a duplicated block. */
1151 365596 : if ((*path)[i]->type == EDGE_COPY_SRC_JOINER_BLOCK
1152 365596 : || (*path)[i]->type == EDGE_COPY_SRC_BLOCK)
1153 : {
1154 325877 : count++;
1155 : }
1156 : }
1157 285459 : }
1158 :
1159 : /* Hash table traversal callback routine to create duplicate blocks. */
1160 :
1161 : int
1162 285459 : ssa_create_duplicates (struct redirection_data **slot,
1163 : ssa_local_info_t *local_info)
1164 : {
1165 285459 : struct redirection_data *rd = *slot;
1166 :
1167 : /* The second duplicated block in a jump threading path is specific
1168 : to the path. So it gets stored in RD rather than in LOCAL_DATA.
1169 :
1170 : Each time we're called, we have to look through the path and see
1171 : if a second block needs to be duplicated.
1172 :
1173 : Note the search starts with the third edge on the path. The first
1174 : edge is the incoming edge, the second edge always has its source
1175 : duplicated. Thus we start our search with the third edge. */
1176 285459 : vec<jump_thread_edge *> *path = rd->path;
1177 323093 : for (unsigned int i = 2; i < path->length (); i++)
1178 : {
1179 78052 : if ((*path)[i]->type == EDGE_COPY_SRC_BLOCK
1180 78052 : || (*path)[i]->type == EDGE_COPY_SRC_JOINER_BLOCK)
1181 : {
1182 40418 : create_block_for_threading ((*path)[i]->e->src, rd, 1,
1183 : &local_info->duplicate_blocks);
1184 40418 : break;
1185 : }
1186 : }
1187 :
1188 : /* Create a template block if we have not done so already. Otherwise
1189 : use the template to create a new block. */
1190 285459 : if (local_info->template_block == NULL)
1191 : {
1192 227967 : create_block_for_threading ((*path)[1]->e->src, rd, 0,
1193 : &local_info->duplicate_blocks);
1194 227967 : local_info->template_block = rd->dup_blocks[0];
1195 227967 : local_info->template_last_to_copy
1196 455934 : = gsi_last_bb (local_info->template_block);
1197 :
1198 : /* We do not create any outgoing edges for the template. We will
1199 : take care of that in a later traversal. That way we do not
1200 : create edges that are going to just be deleted. */
1201 : }
1202 : else
1203 : {
1204 57492 : gimple_seq seq = NULL;
1205 57492 : if (gsi_stmt (local_info->template_last_to_copy)
1206 114984 : != gsi_stmt (gsi_last_bb (local_info->template_block)))
1207 : {
1208 0 : if (gsi_end_p (local_info->template_last_to_copy))
1209 : {
1210 0 : seq = bb_seq (local_info->template_block);
1211 0 : set_bb_seq (local_info->template_block, NULL);
1212 : }
1213 : else
1214 0 : seq = gsi_split_seq_after (local_info->template_last_to_copy);
1215 : }
1216 57492 : create_block_for_threading (local_info->template_block, rd, 0,
1217 : &local_info->duplicate_blocks);
1218 57492 : if (seq)
1219 : {
1220 0 : if (gsi_end_p (local_info->template_last_to_copy))
1221 0 : set_bb_seq (local_info->template_block, seq);
1222 : else
1223 0 : gsi_insert_seq_after (&local_info->template_last_to_copy,
1224 : seq, GSI_SAME_STMT);
1225 : }
1226 :
1227 : /* Go ahead and wire up outgoing edges and update PHIs for the duplicate
1228 : block. */
1229 57492 : ssa_fix_duplicate_block_edges (rd, local_info);
1230 : }
1231 :
1232 285459 : if (MAY_HAVE_DEBUG_STMTS)
1233 : {
1234 : /* Copy debug stmts from each NO_COPY src block to the block
1235 : that would have been its predecessor, if we can append to it
1236 : (we can't add stmts after a block-ending stmt), or prepending
1237 : to the duplicate of the successor, if there is one. If
1238 : there's no duplicate successor, we'll mostly drop the blocks
1239 : on the floor; propagate_threaded_block_debug_into, called
1240 : elsewhere, will consolidate and preserve the effects of the
1241 : binds, but none of the markers. */
1242 212918 : gimple_stmt_iterator copy_to = gsi_last_bb (rd->dup_blocks[0]);
1243 212918 : if (!gsi_end_p (copy_to))
1244 : {
1245 210820 : if (stmt_ends_bb_p (gsi_stmt (copy_to)))
1246 : {
1247 188057 : if (rd->dup_blocks[1])
1248 32697 : copy_to = gsi_after_labels (rd->dup_blocks[1]);
1249 : else
1250 155360 : copy_to = gsi_none ();
1251 : }
1252 : else
1253 22763 : gsi_next (©_to);
1254 : }
1255 277073 : for (unsigned int i = 2, j = 0; i < path->length (); i++)
1256 64155 : if ((*path)[i]->type == EDGE_NO_COPY_SRC_BLOCK
1257 64155 : && gsi_bb (copy_to))
1258 : {
1259 7780 : for (gimple_stmt_iterator gsi = gsi_start_bb ((*path)[i]->e->src);
1260 10729 : !gsi_end_p (gsi); gsi_next (&gsi))
1261 : {
1262 6839 : if (!is_gimple_debug (gsi_stmt (gsi)))
1263 1930 : continue;
1264 4909 : gimple *stmt = gsi_stmt (gsi);
1265 4909 : gimple *copy = gimple_copy (stmt);
1266 4909 : gsi_insert_before (©_to, copy, GSI_SAME_STMT);
1267 : }
1268 : }
1269 60265 : else if ((*path)[i]->type == EDGE_COPY_SRC_BLOCK
1270 60265 : || (*path)[i]->type == EDGE_COPY_SRC_JOINER_BLOCK)
1271 : {
1272 33082 : j++;
1273 33082 : gcc_assert (j < 2);
1274 33082 : copy_to = gsi_last_bb (rd->dup_blocks[j]);
1275 33082 : if (!gsi_end_p (copy_to))
1276 : {
1277 32987 : if (stmt_ends_bb_p (gsi_stmt (copy_to)))
1278 27690 : copy_to = gsi_none ();
1279 : else
1280 5297 : gsi_next (©_to);
1281 : }
1282 : }
1283 : }
1284 :
1285 : /* Keep walking the hash table. */
1286 285459 : return 1;
1287 : }
1288 :
1289 : /* We did not create any outgoing edges for the template block during
1290 : block creation. This hash table traversal callback creates the
1291 : outgoing edge for the template block. */
1292 :
1293 : inline int
1294 227967 : ssa_fixup_template_block (struct redirection_data **slot,
1295 : ssa_local_info_t *local_info)
1296 : {
1297 227967 : struct redirection_data *rd = *slot;
1298 :
1299 : /* If this is the template block halt the traversal after updating
1300 : it appropriately.
1301 :
1302 : If we were threading through an joiner block, then we want
1303 : to keep its control statement and redirect an outgoing edge.
1304 : Else we want to remove the control statement & edges, then create
1305 : a new outgoing edge. In both cases we may need to update PHIs. */
1306 227967 : if (rd->dup_blocks[0] && rd->dup_blocks[0] == local_info->template_block)
1307 : {
1308 227967 : ssa_fix_duplicate_block_edges (rd, local_info);
1309 227967 : return 0;
1310 : }
1311 :
1312 : return 1;
1313 : }
1314 :
1315 : /* Hash table traversal callback to redirect each incoming edge
1316 : associated with this hash table element to its new destination. */
1317 :
1318 : static int
1319 285459 : ssa_redirect_edges (struct redirection_data **slot,
1320 : ssa_local_info_t *local_info)
1321 : {
1322 285459 : struct redirection_data *rd = *slot;
1323 285459 : struct el *next, *el;
1324 :
1325 : /* Walk over all the incoming edges associated with this hash table
1326 : entry. */
1327 639369 : for (el = rd->incoming_edges; el; el = next)
1328 : {
1329 353910 : edge e = el->e;
1330 353910 : vec<jump_thread_edge *> *path = THREAD_PATH (e);
1331 :
1332 : /* Go ahead and free this element from the list. Doing this now
1333 : avoids the need for another list walk when we destroy the hash
1334 : table. */
1335 353910 : next = el->next;
1336 353910 : free (el);
1337 :
1338 353910 : local_info->num_threaded_edges++;
1339 :
1340 353910 : if (rd->dup_blocks[0])
1341 : {
1342 353910 : edge e2;
1343 :
1344 353910 : if (dump_file && (dump_flags & TDF_DETAILS))
1345 25 : fprintf (dump_file, " Threaded jump %d --> %d to %d\n",
1346 25 : e->src->index, e->dest->index, rd->dup_blocks[0]->index);
1347 :
1348 : /* Redirect the incoming edge (possibly to the joiner block) to the
1349 : appropriate duplicate block. */
1350 353910 : e2 = redirect_edge_and_branch (e, rd->dup_blocks[0]);
1351 353910 : gcc_assert (e == e2);
1352 353910 : flush_pending_stmts (e2);
1353 : }
1354 :
1355 : /* Go ahead and clear E->aux. It's not needed anymore and failure
1356 : to clear it will cause all kinds of unpleasant problems later. */
1357 353910 : path->release ();
1358 353910 : e->aux = NULL;
1359 :
1360 : }
1361 :
1362 : /* Indicate that we actually threaded one or more jumps. */
1363 285459 : if (rd->incoming_edges)
1364 285459 : local_info->jumps_threaded = true;
1365 :
1366 285459 : return 1;
1367 : }
1368 :
1369 : /* Return true if this block has no executable statements other than
1370 : a simple ctrl flow instruction. When the number of outgoing edges
1371 : is one, this is equivalent to a "forwarder" block. */
1372 :
1373 : static bool
1374 160037 : redirection_block_p (basic_block bb)
1375 : {
1376 160037 : gimple_stmt_iterator gsi;
1377 :
1378 : /* Advance to the first executable statement. */
1379 160037 : gsi = gsi_start_bb (bb);
1380 160037 : while (!gsi_end_p (gsi)
1381 356548 : && (gimple_code (gsi_stmt (gsi)) == GIMPLE_LABEL
1382 : || is_gimple_debug (gsi_stmt (gsi))
1383 : || gimple_nop_p (gsi_stmt (gsi))
1384 159371 : || gimple_clobber_p (gsi_stmt (gsi))))
1385 196511 : gsi_next (&gsi);
1386 :
1387 : /* Check if this is an empty block. */
1388 160037 : if (gsi_end_p (gsi))
1389 : return true;
1390 :
1391 : /* Test that we've reached the terminating control statement. */
1392 159051 : return gsi_stmt (gsi)
1393 159051 : && (gimple_code (gsi_stmt (gsi)) == GIMPLE_COND
1394 : || gimple_code (gsi_stmt (gsi)) == GIMPLE_GOTO
1395 : || gimple_code (gsi_stmt (gsi)) == GIMPLE_SWITCH);
1396 : }
1397 :
1398 : /* BB is a block which ends with a COND_EXPR or SWITCH_EXPR and when BB
1399 : is reached via one or more specific incoming edges, we know which
1400 : outgoing edge from BB will be traversed.
1401 :
1402 : We want to redirect those incoming edges to the target of the
1403 : appropriate outgoing edge. Doing so avoids a conditional branch
1404 : and may expose new optimization opportunities. Note that we have
1405 : to update dominator tree and SSA graph after such changes.
1406 :
1407 : The key to keeping the SSA graph update manageable is to duplicate
1408 : the side effects occurring in BB so that those side effects still
1409 : occur on the paths which bypass BB after redirecting edges.
1410 :
1411 : We accomplish this by creating duplicates of BB and arranging for
1412 : the duplicates to unconditionally pass control to one specific
1413 : successor of BB. We then revector the incoming edges into BB to
1414 : the appropriate duplicate of BB.
1415 :
1416 : If NOLOOP_ONLY is true, we only perform the threading as long as it
1417 : does not affect the structure of the loops in a nontrivial way.
1418 :
1419 : If JOINERS is true, then thread through joiner blocks as well. */
1420 :
1421 : bool
1422 727032 : fwd_jt_path_registry::thread_block_1 (basic_block bb,
1423 : bool noloop_only,
1424 : bool joiners)
1425 : {
1426 : /* E is an incoming edge into BB that we may or may not want to
1427 : redirect to a duplicate of BB. */
1428 727032 : edge e, e2;
1429 727032 : edge_iterator ei;
1430 727032 : ssa_local_info_t local_info;
1431 :
1432 727032 : local_info.duplicate_blocks = BITMAP_ALLOC (NULL);
1433 727032 : local_info.need_profile_correction = false;
1434 727032 : local_info.num_threaded_edges = 0;
1435 :
1436 : /* To avoid scanning a linear array for the element we need we instead
1437 : use a hash table. For normal code there should be no noticeable
1438 : difference. However, if we have a block with a large number of
1439 : incoming and outgoing edges such linear searches can get expensive. */
1440 727032 : m_redirection_data
1441 1454064 : = new hash_table<struct redirection_data> (EDGE_COUNT (bb->succs));
1442 :
1443 : /* Record each unique threaded destination into a hash table for
1444 : efficient lookups. */
1445 727032 : edge last = NULL;
1446 2234115 : FOR_EACH_EDGE (e, ei, bb->preds)
1447 : {
1448 1507083 : if (e->aux == NULL)
1449 746006 : continue;
1450 :
1451 761077 : vec<jump_thread_edge *> *path = THREAD_PATH (e);
1452 :
1453 1088585 : if (((*path)[1]->type == EDGE_COPY_SRC_JOINER_BLOCK && !joiners)
1454 924831 : || ((*path)[1]->type == EDGE_COPY_SRC_BLOCK && joiners))
1455 253382 : continue;
1456 :
1457 : /* When a NO_COPY_SRC block became non-empty cancel the path. */
1458 507695 : if (path->last ()->type == EDGE_NO_COPY_SRC_BLOCK)
1459 : {
1460 54696 : auto gsi = gsi_start_nondebug_bb (path->last ()->e->src);
1461 54696 : if (!gsi_end_p (gsi)
1462 54696 : && !is_ctrl_stmt (gsi_stmt (gsi)))
1463 : {
1464 0 : cancel_thread (path, "Non-empty EDGE_NO_COPY_SRC_BLOCK");
1465 0 : e->aux = NULL;
1466 0 : continue;
1467 : }
1468 : }
1469 :
1470 507695 : e2 = path->last ()->e;
1471 507695 : if (!e2 || noloop_only)
1472 : {
1473 : /* If NOLOOP_ONLY is true, we only allow threading through the
1474 : header of a loop to exit edges. */
1475 :
1476 : /* One case occurs when there was loop header buried in a jump
1477 : threading path that crosses loop boundaries. We do not try
1478 : and thread this elsewhere, so just cancel the jump threading
1479 : request by clearing the AUX field now. */
1480 528712 : if (bb->loop_father != e2->src->loop_father
1481 505001 : && (!loop_exit_edge_p (e2->src->loop_father, e2)
1482 878 : || flow_loop_nested_p (bb->loop_father,
1483 878 : e2->dest->loop_father)))
1484 : {
1485 : /* Since this case is not handled by our special code
1486 : to thread through a loop header, we must explicitly
1487 : cancel the threading request here. */
1488 23711 : cancel_thread (path, "Threading through unhandled loop header");
1489 23711 : e->aux = NULL;
1490 23711 : continue;
1491 : }
1492 :
1493 : /* Another case occurs when trying to thread through our
1494 : own loop header, possibly from inside the loop. We will
1495 : thread these later. */
1496 : unsigned int i;
1497 935544 : for (i = 1; i < path->length (); i++)
1498 : {
1499 584288 : if ((*path)[i]->e->src == bb->loop_father->header
1500 584288 : && (!loop_exit_edge_p (bb->loop_father, e2)
1501 2200 : || (*path)[1]->type == EDGE_COPY_SRC_JOINER_BLOCK))
1502 : break;
1503 : }
1504 :
1505 962580 : if (i != path->length ())
1506 130034 : continue;
1507 :
1508 : /* Loop parallelization can be confused by the result of
1509 : threading through the loop exit test back into the loop.
1510 : However, theading those jumps seems to help other codes.
1511 :
1512 : I have been unable to find anything related to the shape of
1513 : the CFG, the contents of the affected blocks, etc which would
1514 : allow a more sensible test than what we're using below which
1515 : merely avoids the optimization when parallelizing loops. */
1516 351256 : if (flag_tree_parallelize_loops > 1)
1517 : {
1518 166 : for (i = 1; i < path->length (); i++)
1519 109 : if (bb->loop_father == e2->src->loop_father
1520 109 : && loop_exits_from_bb_p (bb->loop_father,
1521 109 : (*path)[i]->e->src)
1522 154 : && !loop_exit_edge_p (bb->loop_father, e2))
1523 : break;
1524 :
1525 194 : if (i != path->length ())
1526 : {
1527 40 : cancel_thread (path, "Threading through loop exit");
1528 40 : e->aux = NULL;
1529 40 : continue;
1530 : }
1531 : }
1532 : }
1533 :
1534 : /* Insert the outgoing edge into the hash table if it is not
1535 : already in the hash table. */
1536 353910 : lookup_redirection_data (e, INSERT);
1537 :
1538 : /* When we have thread paths through a common joiner with different
1539 : final destinations, then we may need corrections to deal with
1540 : profile insanities. See the big comment before compute_path_counts. */
1541 353910 : if ((*path)[1]->type == EDGE_COPY_SRC_JOINER_BLOCK)
1542 : {
1543 99803 : if (!last)
1544 : last = e2;
1545 38074 : else if (e2 != last)
1546 17983 : local_info.need_profile_correction = true;
1547 : }
1548 : }
1549 :
1550 : /* We do not update dominance info. */
1551 727032 : free_dominance_info (CDI_DOMINATORS);
1552 :
1553 : /* We know we only thread through the loop header to loop exits.
1554 : Let the basic block duplication hook know we are not creating
1555 : a multiple entry loop. */
1556 727032 : if (noloop_only
1557 721644 : && bb == bb->loop_father->header)
1558 273970 : set_loop_copy (bb->loop_father, loop_outer (bb->loop_father));
1559 :
1560 : /* Now create duplicates of BB.
1561 :
1562 : Note that for a block with a high outgoing degree we can waste
1563 : a lot of time and memory creating and destroying useless edges.
1564 :
1565 : So we first duplicate BB and remove the control structure at the
1566 : tail of the duplicate as well as all outgoing edges from the
1567 : duplicate. We then use that duplicate block as a template for
1568 : the rest of the duplicates. */
1569 727032 : local_info.template_block = NULL;
1570 727032 : local_info.bb = bb;
1571 727032 : local_info.jumps_threaded = false;
1572 727032 : m_redirection_data->traverse <ssa_local_info_t *, ssa_create_duplicates>
1573 1012491 : (&local_info);
1574 :
1575 : /* The template does not have an outgoing edge. Create that outgoing
1576 : edge and update PHI nodes as the edge's target as necessary.
1577 :
1578 : We do this after creating all the duplicates to avoid creating
1579 : unnecessary edges. */
1580 727032 : m_redirection_data->traverse <ssa_local_info_t *, ssa_fixup_template_block>
1581 954999 : (&local_info);
1582 :
1583 : /* The hash table traversals above created the duplicate blocks (and the
1584 : statements within the duplicate blocks). This loop creates PHI nodes for
1585 : the duplicated blocks and redirects the incoming edges into BB to reach
1586 : the duplicates of BB. */
1587 727032 : m_redirection_data->traverse <ssa_local_info_t *, ssa_redirect_edges>
1588 1012491 : (&local_info);
1589 :
1590 : /* Done with this block. Clear REDIRECTION_DATA. */
1591 727032 : delete m_redirection_data;
1592 727032 : m_redirection_data = NULL;
1593 :
1594 727032 : if (noloop_only
1595 721644 : && bb == bb->loop_father->header)
1596 273970 : set_loop_copy (bb->loop_father, NULL);
1597 :
1598 727032 : BITMAP_FREE (local_info.duplicate_blocks);
1599 727032 : local_info.duplicate_blocks = NULL;
1600 :
1601 727032 : m_num_threaded_edges += local_info.num_threaded_edges;
1602 :
1603 : /* Indicate to our caller whether or not any jumps were threaded. */
1604 727032 : return local_info.jumps_threaded;
1605 : }
1606 :
1607 : /* Wrapper for thread_block_1 so that we can first handle jump
1608 : thread paths which do not involve copying joiner blocks, then
1609 : handle jump thread paths which have joiner blocks.
1610 :
1611 : By doing things this way we can be as aggressive as possible and
1612 : not worry that copying a joiner block will create a jump threading
1613 : opportunity. */
1614 :
1615 : bool
1616 363516 : fwd_jt_path_registry::thread_block (basic_block bb, bool noloop_only)
1617 : {
1618 363516 : bool retval;
1619 363516 : retval = thread_block_1 (bb, noloop_only, false);
1620 363516 : retval |= thread_block_1 (bb, noloop_only, true);
1621 363516 : return retval;
1622 : }
1623 :
1624 : /* Callback for dfs_enumerate_from. Returns true if BB is different
1625 : from STOP and DBDS_CE_STOP. */
1626 :
1627 : static basic_block dbds_ce_stop;
1628 : static bool
1629 1336856 : dbds_continue_enumeration_p (const_basic_block bb, const void *stop)
1630 : {
1631 1336856 : return (bb != (const_basic_block) stop
1632 1336856 : && bb != dbds_ce_stop);
1633 : }
1634 :
1635 : /* Evaluates the dominance relationship of latch of the LOOP and BB, and
1636 : returns the state. */
1637 :
1638 : enum bb_dom_status
1639 109723 : determine_bb_domination_status (class loop *loop, basic_block bb)
1640 : {
1641 109723 : basic_block *bblocks;
1642 109723 : unsigned nblocks, i;
1643 109723 : bool bb_reachable = false;
1644 109723 : edge_iterator ei;
1645 109723 : edge e;
1646 :
1647 : /* This function assumes BB is a successor of LOOP->header.
1648 : If that is not the case return DOMST_NONDOMINATING which
1649 : is always safe. */
1650 109723 : {
1651 109723 : bool ok = false;
1652 :
1653 177135 : FOR_EACH_EDGE (e, ei, bb->preds)
1654 : {
1655 136944 : if (e->src == loop->header)
1656 : {
1657 : ok = true;
1658 : break;
1659 : }
1660 : }
1661 :
1662 109723 : if (!ok)
1663 : return DOMST_NONDOMINATING;
1664 : }
1665 :
1666 69532 : if (bb == loop->latch)
1667 : return DOMST_DOMINATING;
1668 :
1669 : /* Check that BB dominates LOOP->latch, and that it is back-reachable
1670 : from it. */
1671 :
1672 67802 : bblocks = XCNEWVEC (basic_block, loop->num_nodes);
1673 67802 : dbds_ce_stop = loop->header;
1674 135604 : nblocks = dfs_enumerate_from (loop->latch, 1, dbds_continue_enumeration_p,
1675 67802 : bblocks, loop->num_nodes, bb);
1676 831226 : for (i = 0; i < nblocks; i++)
1677 1962760 : FOR_EACH_EDGE (e, ei, bblocks[i]->preds)
1678 : {
1679 1199336 : if (e->src == loop->header)
1680 : {
1681 37785 : free (bblocks);
1682 37785 : return DOMST_NONDOMINATING;
1683 : }
1684 1161551 : if (e->src == bb)
1685 64808 : bb_reachable = true;
1686 : }
1687 :
1688 30017 : free (bblocks);
1689 30017 : return (bb_reachable ? DOMST_DOMINATING : DOMST_LOOP_BROKEN);
1690 : }
1691 :
1692 : /* Thread jumps through the header of LOOP. Returns true if cfg changes.
1693 : If MAY_PEEL_LOOP_HEADERS is false, we avoid threading from entry edges
1694 : to the inside of the loop. */
1695 :
1696 : bool
1697 136985 : fwd_jt_path_registry::thread_through_loop_header (class loop *loop,
1698 : bool may_peel_loop_headers)
1699 : {
1700 136985 : basic_block header = loop->header;
1701 136985 : edge e, tgt_edge, latch = loop_latch_edge (loop);
1702 136985 : edge_iterator ei;
1703 136985 : basic_block tgt_bb, atgt_bb;
1704 136985 : enum bb_dom_status domst;
1705 :
1706 : /* We have already threaded through headers to exits, so all the threading
1707 : requests now are to the inside of the loop. We need to avoid creating
1708 : irreducible regions (i.e., loops with more than one entry block), and
1709 : also loop with several latch edges, or new subloops of the loop (although
1710 : there are cases where it might be appropriate, it is difficult to decide,
1711 : and doing it wrongly may confuse other optimizers).
1712 :
1713 : We could handle more general cases here. However, the intention is to
1714 : preserve some information about the loop, which is impossible if its
1715 : structure changes significantly, in a way that is not well understood.
1716 : Thus we only handle few important special cases, in which also updating
1717 : of the loop-carried information should be feasible:
1718 :
1719 : 1) Propagation of latch edge to a block that dominates the latch block
1720 : of a loop. This aims to handle the following idiom:
1721 :
1722 : first = 1;
1723 : while (1)
1724 : {
1725 : if (first)
1726 : initialize;
1727 : first = 0;
1728 : body;
1729 : }
1730 :
1731 : After threading the latch edge, this becomes
1732 :
1733 : first = 1;
1734 : if (first)
1735 : initialize;
1736 : while (1)
1737 : {
1738 : first = 0;
1739 : body;
1740 : }
1741 :
1742 : The original header of the loop is moved out of it, and we may thread
1743 : the remaining edges through it without further constraints.
1744 :
1745 : 2) All entry edges are propagated to a single basic block that dominates
1746 : the latch block of the loop. This aims to handle the following idiom
1747 : (normally created for "for" loops):
1748 :
1749 : i = 0;
1750 : while (1)
1751 : {
1752 : if (i >= 100)
1753 : break;
1754 : body;
1755 : i++;
1756 : }
1757 :
1758 : This becomes
1759 :
1760 : i = 0;
1761 : while (1)
1762 : {
1763 : body;
1764 : i++;
1765 : if (i >= 100)
1766 : break;
1767 : }
1768 : */
1769 :
1770 : /* Threading through the header won't improve the code if the header has just
1771 : one successor. */
1772 136985 : if (single_succ_p (header))
1773 253 : goto fail;
1774 :
1775 136732 : if (!may_peel_loop_headers && !redirection_block_p (loop->header))
1776 125828 : goto fail;
1777 : else
1778 : {
1779 10904 : tgt_bb = NULL;
1780 10904 : tgt_edge = NULL;
1781 25042 : FOR_EACH_EDGE (e, ei, header->preds)
1782 : {
1783 19908 : if (!e->aux)
1784 : {
1785 10878 : if (e == latch)
1786 9301 : continue;
1787 :
1788 : /* If latch is not threaded, and there is a header
1789 : edge that is not threaded, we would create loop
1790 : with multiple entries. */
1791 1577 : goto fail;
1792 : }
1793 :
1794 9030 : vec<jump_thread_edge *> *path = THREAD_PATH (e);
1795 :
1796 9030 : if ((*path)[1]->type == EDGE_COPY_SRC_JOINER_BLOCK)
1797 4193 : goto fail;
1798 4837 : tgt_edge = (*path)[1]->e;
1799 4837 : atgt_bb = tgt_edge->dest;
1800 4837 : if (!tgt_bb)
1801 : tgt_bb = atgt_bb;
1802 : /* Two targets of threading would make us create loop
1803 : with multiple entries. */
1804 0 : else if (tgt_bb != atgt_bb)
1805 0 : goto fail;
1806 : }
1807 :
1808 5134 : if (!tgt_bb)
1809 : {
1810 : /* There are no threading requests. */
1811 : return false;
1812 : }
1813 :
1814 : /* Redirecting to empty loop latch is useless. */
1815 4459 : if (tgt_bb == loop->latch
1816 4459 : && empty_block_p (loop->latch))
1817 2 : goto fail;
1818 : }
1819 :
1820 : /* The target block must dominate the loop latch, otherwise we would be
1821 : creating a subloop. */
1822 4457 : domst = determine_bb_domination_status (loop, tgt_bb);
1823 4457 : if (domst == DOMST_NONDOMINATING)
1824 1763 : goto fail;
1825 2694 : if (domst == DOMST_LOOP_BROKEN)
1826 : {
1827 : /* If the loop ceased to exist, mark it as such, and thread through its
1828 : original header. */
1829 0 : mark_loop_for_removal (loop);
1830 0 : return thread_block (header, false);
1831 : }
1832 :
1833 2694 : if (tgt_bb->loop_father->header == tgt_bb)
1834 : {
1835 : /* If the target of the threading is a header of a subloop, we need
1836 : to create a preheader for it, so that the headers of the two loops
1837 : do not merge. */
1838 0 : if (EDGE_COUNT (tgt_bb->preds) > 2)
1839 : {
1840 0 : tgt_bb = create_preheader (tgt_bb->loop_father, 0);
1841 0 : gcc_assert (tgt_bb != NULL);
1842 : }
1843 : else
1844 0 : tgt_bb = split_edge (tgt_edge);
1845 : }
1846 :
1847 2694 : basic_block new_preheader;
1848 :
1849 : /* Now consider the case entry edges are redirected to the new entry
1850 : block. Remember one entry edge, so that we can find the new
1851 : preheader (its destination after threading). */
1852 4100 : FOR_EACH_EDGE (e, ei, header->preds)
1853 : {
1854 4100 : if (e->aux)
1855 : break;
1856 : }
1857 :
1858 : /* The duplicate of the header is the new preheader of the loop. Ensure
1859 : that it is placed correctly in the loop hierarchy. */
1860 2694 : set_loop_copy (loop, loop_outer (loop));
1861 :
1862 2694 : thread_block (header, false);
1863 2694 : set_loop_copy (loop, NULL);
1864 2694 : new_preheader = e->dest;
1865 :
1866 : /* Create the new latch block. This is always necessary, as the latch
1867 : must have only a single successor, but the original header had at
1868 : least two successors. */
1869 2694 : loop->latch = NULL;
1870 2694 : edge keep_edge;
1871 2694 : keep_edge = single_succ_edge (new_preheader);
1872 2694 : loop->header = keep_edge->dest;
1873 2694 : latch = make_forwarder_block (tgt_bb, mfb_keep_just, keep_edge);
1874 2694 : loop->header = latch->dest;
1875 2694 : loop->latch = latch->src;
1876 2694 : return true;
1877 :
1878 133616 : fail:
1879 : /* We failed to thread anything. Cancel the requests. */
1880 401416 : FOR_EACH_EDGE (e, ei, header->preds)
1881 : {
1882 267800 : vec<jump_thread_edge *> *path = THREAD_PATH (e);
1883 :
1884 267800 : if (path)
1885 : {
1886 127340 : cancel_thread (path, "Failure in thread_through_loop_header");
1887 127340 : e->aux = NULL;
1888 : }
1889 : }
1890 : return false;
1891 : }
1892 :
1893 : /* E1 and E2 are edges into the same basic block. Return TRUE if the
1894 : PHI arguments associated with those edges are equal or there are no
1895 : PHI arguments, otherwise return FALSE. */
1896 :
1897 : static bool
1898 28634 : phi_args_equal_on_edges (edge e1, edge e2)
1899 : {
1900 28634 : gphi_iterator gsi;
1901 28634 : int indx1 = e1->dest_idx;
1902 28634 : int indx2 = e2->dest_idx;
1903 :
1904 44742 : for (gsi = gsi_start_phis (e1->dest); !gsi_end_p (gsi); gsi_next (&gsi))
1905 : {
1906 17479 : gphi *phi = gsi.phi ();
1907 :
1908 17479 : if (!operand_equal_p (gimple_phi_arg_def (phi, indx1),
1909 17479 : gimple_phi_arg_def (phi, indx2), 0))
1910 : return false;
1911 : }
1912 : return true;
1913 : }
1914 :
1915 : /* Return the number of non-debug statements and non-virtual PHIs in a
1916 : block. */
1917 :
1918 : static unsigned int
1919 9528 : count_stmts_and_phis_in_block (basic_block bb)
1920 : {
1921 9528 : unsigned int num_stmts = 0;
1922 :
1923 9528 : gphi_iterator gpi;
1924 24778 : for (gpi = gsi_start_phis (bb); !gsi_end_p (gpi); gsi_next (&gpi))
1925 30500 : if (!virtual_operand_p (PHI_RESULT (gpi.phi ())))
1926 9207 : num_stmts++;
1927 :
1928 9528 : gimple_stmt_iterator gsi;
1929 80255 : for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi); gsi_next (&gsi))
1930 : {
1931 61199 : gimple *stmt = gsi_stmt (gsi);
1932 61199 : if (!is_gimple_debug (stmt))
1933 53610 : num_stmts++;
1934 : }
1935 :
1936 9528 : return num_stmts;
1937 : }
1938 :
1939 :
1940 : /* Walk through the registered jump threads and convert them into a
1941 : form convenient for this pass.
1942 :
1943 : Any block which has incoming edges threaded to outgoing edges
1944 : will have its entry in THREADED_BLOCK set.
1945 :
1946 : Any threaded edge will have its new outgoing edge stored in the
1947 : original edge's AUX field.
1948 :
1949 : This form avoids the need to walk all the edges in the CFG to
1950 : discover blocks which need processing and avoids unnecessary
1951 : hash table lookups to map from threaded edge to new target. */
1952 :
1953 : void
1954 154309 : fwd_jt_path_registry::mark_threaded_blocks (bitmap threaded_blocks)
1955 : {
1956 154309 : unsigned int i;
1957 154309 : bitmap_iterator bi;
1958 154309 : auto_bitmap tmp;
1959 154309 : basic_block bb;
1960 154309 : edge e;
1961 154309 : edge_iterator ei;
1962 :
1963 : /* It is possible to have jump threads in which one is a subpath
1964 : of the other. ie, (A, B), (B, C), (C, D) where B is a joiner
1965 : block and (B, C), (C, D) where no joiner block exists.
1966 :
1967 : When this occurs ignore the jump thread request with the joiner
1968 : block. It's totally subsumed by the simpler jump thread request.
1969 :
1970 : This results in less block copying, simpler CFGs. More importantly,
1971 : when we duplicate the joiner block, B, in this case we will create
1972 : a new threading opportunity that we wouldn't be able to optimize
1973 : until the next jump threading iteration.
1974 :
1975 : So first convert the jump thread requests which do not require a
1976 : joiner block. */
1977 801255 : for (i = 0; i < m_paths.length (); i++)
1978 : {
1979 646946 : vec<jump_thread_edge *> *path = m_paths[i];
1980 :
1981 1293892 : if (path->length () > 1
1982 1293892 : && (*path)[1]->type != EDGE_COPY_SRC_JOINER_BLOCK)
1983 : {
1984 349764 : edge e = (*path)[0]->e;
1985 349764 : e->aux = (void *)path;
1986 349764 : bitmap_set_bit (tmp, e->dest->index);
1987 : }
1988 : }
1989 :
1990 : /* Now iterate again, converting cases where we want to thread
1991 : through a joiner block, but only if no other edge on the path
1992 : already has a jump thread attached to it. We do this in two passes,
1993 : to avoid situations where the order in the paths vec can hide overlapping
1994 : threads (the path is recorded on the incoming edge, so we would miss
1995 : cases where the second path starts at a downstream edge on the same
1996 : path). First record all joiner paths, deleting any in the unexpected
1997 : case where there is already a path for that incoming edge. */
1998 801255 : for (i = 0; i < m_paths.length ();)
1999 : {
2000 646946 : vec<jump_thread_edge *> *path = m_paths[i];
2001 :
2002 646946 : if (path->length () > 1
2003 1293892 : && (*path)[1]->type == EDGE_COPY_SRC_JOINER_BLOCK)
2004 : {
2005 : /* Attach the path to the starting edge if none is yet recorded. */
2006 297182 : if ((*path)[0]->e->aux == NULL)
2007 : {
2008 249168 : (*path)[0]->e->aux = path;
2009 249168 : i++;
2010 : }
2011 : else
2012 : {
2013 48014 : m_paths.unordered_remove (i);
2014 48014 : cancel_thread (path);
2015 : }
2016 : }
2017 : else
2018 : {
2019 349764 : i++;
2020 : }
2021 : }
2022 :
2023 : /* Second, look for paths that have any other jump thread attached to
2024 : them, and either finish converting them or cancel them. */
2025 753241 : for (i = 0; i < m_paths.length ();)
2026 : {
2027 598932 : vec<jump_thread_edge *> *path = m_paths[i];
2028 598932 : edge e = (*path)[0]->e;
2029 :
2030 598932 : if (path->length () > 1
2031 1197864 : && (*path)[1]->type == EDGE_COPY_SRC_JOINER_BLOCK && e->aux == path)
2032 : {
2033 : unsigned int j;
2034 592322 : for (j = 1; j < path->length (); j++)
2035 422424 : if ((*path)[j]->e->aux != NULL)
2036 : break;
2037 :
2038 : /* If we iterated through the entire path without exiting the loop,
2039 : then we are good to go, record it. */
2040 249168 : if (j == path->length ())
2041 : {
2042 169898 : bitmap_set_bit (tmp, e->dest->index);
2043 169898 : i++;
2044 : }
2045 : else
2046 : {
2047 79270 : e->aux = NULL;
2048 79270 : m_paths.unordered_remove (i);
2049 79270 : cancel_thread (path);
2050 : }
2051 : }
2052 : else
2053 : {
2054 349764 : i++;
2055 : }
2056 : }
2057 :
2058 : /* When optimizing for size, prune all thread paths where statement
2059 : duplication is necessary.
2060 :
2061 : We walk the jump thread path looking for copied blocks. There's
2062 : two types of copied blocks.
2063 :
2064 : EDGE_COPY_SRC_JOINER_BLOCK is always copied and thus we will
2065 : cancel the jump threading request when optimizing for size.
2066 :
2067 : EDGE_COPY_SRC_BLOCK which is copied, but some of its statements
2068 : will be killed by threading. If threading does not kill all of
2069 : its statements, then we should cancel the jump threading request
2070 : when optimizing for size. */
2071 154309 : if (optimize_function_for_size_p (cfun))
2072 : {
2073 22958 : EXECUTE_IF_SET_IN_BITMAP (tmp, 0, i, bi)
2074 : {
2075 52280 : FOR_EACH_EDGE (e, ei, BASIC_BLOCK_FOR_FN (cfun, i)->preds)
2076 35756 : if (e->aux)
2077 : {
2078 : vec<jump_thread_edge *> *path = THREAD_PATH (e);
2079 :
2080 : unsigned int j;
2081 35128 : for (j = 1; j < path->length (); j++)
2082 : {
2083 25304 : bb = (*path)[j]->e->src;
2084 25304 : if (redirection_block_p (bb))
2085 : ;
2086 13975 : else if ((*path)[j]->type == EDGE_COPY_SRC_JOINER_BLOCK
2087 13975 : || ((*path)[j]->type == EDGE_COPY_SRC_BLOCK
2088 19056 : && (count_stmts_and_phis_in_block (bb)
2089 9528 : != estimate_threading_killed_stmts (bb))))
2090 : break;
2091 : }
2092 :
2093 46228 : if (j != path->length ())
2094 : {
2095 13290 : cancel_thread (path);
2096 13290 : e->aux = NULL;
2097 : }
2098 : else
2099 9824 : bitmap_set_bit (threaded_blocks, i);
2100 : }
2101 : }
2102 : }
2103 : else
2104 147875 : bitmap_copy (threaded_blocks, tmp);
2105 :
2106 : /* If we have a joiner block (J) which has two successors S1 and S2 and
2107 : we are threading though S1 and the final destination of the thread
2108 : is S2, then we must verify that any PHI nodes in S2 have the same
2109 : PHI arguments for the edge J->S2 and J->S1->...->S2.
2110 :
2111 : We used to detect this prior to registering the jump thread, but
2112 : that prohibits propagation of edge equivalences into non-dominated
2113 : PHI nodes as the equivalency test might occur before propagation.
2114 :
2115 : This must also occur after we truncate any jump threading paths
2116 : as this scenario may only show up after truncation.
2117 :
2118 : This works for now, but will need improvement as part of the FSA
2119 : optimization.
2120 :
2121 : Note since we've moved the thread request data to the edges,
2122 : we have to iterate on those rather than the threaded_edges vector. */
2123 526009 : EXECUTE_IF_SET_IN_BITMAP (tmp, 0, i, bi)
2124 : {
2125 371700 : bb = BASIC_BLOCK_FOR_FN (cfun, i);
2126 1270521 : FOR_EACH_EDGE (e, ei, bb->preds)
2127 : {
2128 898821 : if (e->aux)
2129 : {
2130 506372 : vec<jump_thread_edge *> *path = THREAD_PATH (e);
2131 506372 : bool have_joiner = ((*path)[1]->type == EDGE_COPY_SRC_JOINER_BLOCK);
2132 :
2133 506372 : if (have_joiner)
2134 : {
2135 165125 : basic_block joiner = e->dest;
2136 165125 : edge final_edge = path->last ()->e;
2137 165125 : basic_block final_dest = final_edge->dest;
2138 165125 : edge e2 = find_edge (joiner, final_dest);
2139 :
2140 165125 : if (e2 && !phi_args_equal_on_edges (e2, final_edge))
2141 : {
2142 1371 : cancel_thread (path);
2143 1371 : e->aux = NULL;
2144 : }
2145 : }
2146 : }
2147 : }
2148 : }
2149 :
2150 : /* Look for jump threading paths which cross multiple loop headers.
2151 :
2152 : The code to thread through loop headers will change the CFG in ways
2153 : that invalidate the cached loop iteration information. So we must
2154 : detect that case and wipe the cached information. */
2155 526009 : EXECUTE_IF_SET_IN_BITMAP (tmp, 0, i, bi)
2156 : {
2157 371700 : basic_block bb = BASIC_BLOCK_FOR_FN (cfun, i);
2158 1270521 : FOR_EACH_EDGE (e, ei, bb->preds)
2159 : {
2160 898821 : if (e->aux)
2161 : {
2162 505001 : gcc_assert (loops_state_satisfies_p
2163 : (LOOPS_HAVE_MARKED_IRREDUCIBLE_REGIONS));
2164 : vec<jump_thread_edge *> *path = THREAD_PATH (e);
2165 :
2166 1185331 : for (unsigned int i = 0, crossed_headers = 0;
2167 2085619 : i < path->length ();
2168 : i++)
2169 : {
2170 1186798 : basic_block dest = (*path)[i]->e->dest;
2171 1186798 : basic_block src = (*path)[i]->e->src;
2172 : /* If we enter a loop. */
2173 1186798 : if (flow_loop_nested_p (src->loop_father, dest->loop_father))
2174 151297 : ++crossed_headers;
2175 : /* If we step from a block outside an irreducible region
2176 : to a block inside an irreducible region, then we have
2177 : crossed into a loop. */
2178 1035501 : else if (! (src->flags & BB_IRREDUCIBLE_LOOP)
2179 1032050 : && (dest->flags & BB_IRREDUCIBLE_LOOP))
2180 1067 : ++crossed_headers;
2181 1186798 : if (crossed_headers > 1)
2182 : {
2183 1467 : vect_free_loop_info_assumptions
2184 2934 : ((*path)[path->length () - 1]->e->dest->loop_father);
2185 1467 : break;
2186 : }
2187 : }
2188 : }
2189 : }
2190 : }
2191 154309 : }
2192 :
2193 :
2194 : /* Verify that the REGION is a valid jump thread. A jump thread is a special
2195 : case of SEME Single Entry Multiple Exits region in which all nodes in the
2196 : REGION have exactly one incoming edge. The only exception is the first block
2197 : that may not have been connected to the rest of the cfg yet. */
2198 :
2199 : DEBUG_FUNCTION void
2200 1233611 : verify_jump_thread (basic_block *region, unsigned n_region)
2201 : {
2202 2924411 : for (unsigned i = 0; i < n_region; i++)
2203 1690800 : gcc_assert (EDGE_COUNT (region[i]->preds) <= 1);
2204 1233611 : }
2205 :
2206 : /* Return true when BB is one of the first N items in BBS. */
2207 :
2208 : static inline bool
2209 2478170 : bb_in_bbs (basic_block bb, basic_block *bbs, int n)
2210 : {
2211 5840704 : for (int i = 0; i < n; i++)
2212 3375484 : if (bb == bbs[i])
2213 : return true;
2214 :
2215 : return false;
2216 : }
2217 :
2218 : void
2219 22 : jt_path_registry::debug_path (FILE *dump_file, int pathno)
2220 : {
2221 22 : vec<jump_thread_edge *> *p = m_paths[pathno];
2222 22 : fprintf (dump_file, "path: ");
2223 130 : for (unsigned i = 0; i < p->length (); ++i)
2224 108 : fprintf (dump_file, "%d -> %d, ",
2225 108 : (*p)[i]->e->src->index, (*p)[i]->e->dest->index);
2226 22 : fprintf (dump_file, "\n");
2227 22 : }
2228 :
2229 : void
2230 0 : jt_path_registry::debug ()
2231 : {
2232 0 : for (unsigned i = 0; i < m_paths.length (); ++i)
2233 0 : debug_path (stderr, i);
2234 0 : }
2235 :
2236 : /* Rewire a jump_thread_edge so that the source block is now a
2237 : threaded source block.
2238 :
2239 : PATH_NUM is an index into the global path table PATHS.
2240 : EDGE_NUM is the jump thread edge number into said path.
2241 :
2242 : Returns TRUE if we were able to successfully rewire the edge. */
2243 :
2244 : bool
2245 83138 : back_jt_path_registry::rewire_first_differing_edge (unsigned path_num,
2246 : unsigned edge_num)
2247 : {
2248 83138 : vec<jump_thread_edge *> *path = m_paths[path_num];
2249 83138 : edge &e = (*path)[edge_num]->e;
2250 83138 : if (dump_file && (dump_flags & TDF_DETAILS))
2251 14 : fprintf (dump_file, "rewiring edge candidate: %d -> %d\n",
2252 14 : e->src->index, e->dest->index);
2253 83138 : basic_block src_copy = get_bb_copy (e->src);
2254 83138 : if (src_copy == NULL)
2255 : {
2256 18802 : if (dump_file && (dump_flags & TDF_DETAILS))
2257 9 : fprintf (dump_file, "ignoring candidate: there is no src COPY\n");
2258 18802 : return false;
2259 : }
2260 64336 : edge new_edge = find_edge (src_copy, e->dest);
2261 : /* If the previously threaded paths created a flow graph where we
2262 : can no longer figure out where to go, give up. */
2263 64336 : if (new_edge == NULL)
2264 : {
2265 6560 : if (dump_file && (dump_flags & TDF_DETAILS))
2266 0 : fprintf (dump_file, "ignoring candidate: we lost our way\n");
2267 6560 : return false;
2268 : }
2269 57776 : e = new_edge;
2270 57776 : return true;
2271 : }
2272 :
2273 : /* After a path has been jump threaded, adjust the remaining paths
2274 : that are subsets of this path, so these paths can be safely
2275 : threaded within the context of the new threaded path.
2276 :
2277 : For example, suppose we have just threaded:
2278 :
2279 : 5 -> 6 -> 7 -> 8 -> 12 => 5 -> 6' -> 7' -> 8' -> 12'
2280 :
2281 : And we have an upcoming threading candidate:
2282 : 5 -> 6 -> 7 -> 8 -> 15 -> 20
2283 :
2284 : This function adjusts the upcoming path into:
2285 : 8' -> 15 -> 20
2286 :
2287 : CURR_PATH_NUM is an index into the global paths table. It
2288 : specifies the path that was just threaded. */
2289 :
2290 : void
2291 1233638 : back_jt_path_registry::adjust_paths_after_duplication (unsigned curr_path_num)
2292 : {
2293 1233638 : vec<jump_thread_edge *> *curr_path = m_paths[curr_path_num];
2294 :
2295 : /* Iterate through all the other paths and adjust them. */
2296 24779604 : for (unsigned cand_path_num = 0; cand_path_num < m_paths.length (); )
2297 : {
2298 23545966 : if (cand_path_num == curr_path_num)
2299 : {
2300 1233638 : ++cand_path_num;
2301 1233638 : continue;
2302 : }
2303 : /* Make sure the candidate to adjust starts with the same path
2304 : as the recently threaded path. */
2305 22312328 : vec<jump_thread_edge *> *cand_path = m_paths[cand_path_num];
2306 22312328 : if ((*cand_path)[0]->e != (*curr_path)[0]->e)
2307 : {
2308 22188387 : ++cand_path_num;
2309 22188387 : continue;
2310 : }
2311 123941 : if (dump_file && (dump_flags & TDF_DETAILS))
2312 : {
2313 17 : fprintf (dump_file, "adjusting candidate: ");
2314 17 : debug_path (dump_file, cand_path_num);
2315 : }
2316 :
2317 : /* Chop off from the candidate path any prefix it shares with
2318 : the recently threaded path. */
2319 247882 : unsigned minlength = MIN (curr_path->length (), cand_path->length ());
2320 123941 : unsigned j;
2321 348129 : for (j = 0; j < minlength; ++j)
2322 : {
2323 288524 : edge cand_edge = (*cand_path)[j]->e;
2324 288524 : edge curr_edge = (*curr_path)[j]->e;
2325 :
2326 : /* Once the prefix no longer matches, adjust the first
2327 : non-matching edge to point from an adjusted edge to
2328 : wherever it was going. */
2329 288524 : if (cand_edge != curr_edge)
2330 : {
2331 64336 : gcc_assert (cand_edge->src == curr_edge->src);
2332 64336 : if (!rewire_first_differing_edge (cand_path_num, j))
2333 6560 : goto remove_candidate_from_list;
2334 : break;
2335 : }
2336 : }
2337 117381 : if (j == minlength)
2338 : {
2339 : /* If we consumed the max subgraph we could look at, and
2340 : still didn't find any different edges, it's the
2341 : last edge after MINLENGTH. */
2342 59605 : if (cand_path->length () > minlength)
2343 : {
2344 18802 : if (!rewire_first_differing_edge (cand_path_num, j))
2345 18802 : goto remove_candidate_from_list;
2346 : }
2347 40803 : else if (dump_file && (dump_flags & TDF_DETAILS))
2348 3 : fprintf (dump_file, "adjusting first edge after MINLENGTH.\n");
2349 : }
2350 98579 : if (j > 0)
2351 : {
2352 : /* If we are removing everything, delete the entire candidate. */
2353 197158 : if (j == cand_path->length ())
2354 : {
2355 40803 : remove_candidate_from_list:
2356 66165 : cancel_thread (cand_path, "Adjusted candidate is EMPTY");
2357 66165 : m_paths.unordered_remove (cand_path_num);
2358 66165 : continue;
2359 : }
2360 : /* Otherwise, just remove the redundant sub-path. */
2361 57776 : if (cand_path->length () - j > 1)
2362 44065 : cand_path->block_remove (0, j);
2363 13711 : else if (dump_file && (dump_flags & TDF_DETAILS))
2364 0 : fprintf (dump_file, "Dropping illformed candidate.\n");
2365 : }
2366 57776 : if (dump_file && (dump_flags & TDF_DETAILS))
2367 : {
2368 5 : fprintf (dump_file, "adjusted candidate: ");
2369 5 : debug_path (dump_file, cand_path_num);
2370 : }
2371 57776 : ++cand_path_num;
2372 : }
2373 1233638 : }
2374 :
2375 : /* Duplicates a jump-thread path of N_REGION basic blocks.
2376 : The ENTRY edge is redirected to the duplicate of the region.
2377 :
2378 : Remove the last conditional statement in the last basic block in the REGION,
2379 : and create a single fallthru edge pointing to the same destination as the
2380 : EXIT edge.
2381 :
2382 : CURRENT_PATH_NO is an index into the global paths[] table
2383 : specifying the jump-thread path.
2384 :
2385 : Returns false if it is unable to copy the region, true otherwise. */
2386 :
2387 : bool
2388 1233664 : back_jt_path_registry::duplicate_thread_path (edge entry,
2389 : edge exit,
2390 : basic_block *region,
2391 : unsigned n_region,
2392 : unsigned current_path_no)
2393 : {
2394 1233664 : unsigned i;
2395 1233664 : class loop *loop = entry->dest->loop_father;
2396 1233664 : edge exit_copy;
2397 1233664 : edge redirected;
2398 1233664 : profile_count curr_count;
2399 :
2400 1233664 : if (!can_copy_bbs_p (region, n_region))
2401 : return false;
2402 :
2403 : /* Some sanity checking. Note that we do not check for all possible
2404 : missuses of the functions. I.e. if you ask to copy something weird,
2405 : it will work, but the state of structures probably will not be
2406 : correct. */
2407 2924472 : for (i = 0; i < n_region; i++)
2408 : {
2409 : /* We do not handle subloops, i.e. all the blocks must belong to the
2410 : same loop. */
2411 1690834 : if (region[i]->loop_father != loop)
2412 : return false;
2413 : }
2414 :
2415 1233638 : initialize_original_copy_tables ();
2416 :
2417 1233638 : set_loop_copy (loop, loop);
2418 :
2419 1233638 : basic_block *region_copy = XNEWVEC (basic_block, n_region);
2420 1233638 : copy_bbs (region, n_region, region_copy, &exit, 1, &exit_copy, loop,
2421 : split_edge_bb_loc (entry), false);
2422 :
2423 : /* Fix up: copy_bbs redirects all edges pointing to copied blocks. The
2424 : following code ensures that all the edges exiting the jump-thread path are
2425 : redirected back to the original code: these edges are exceptions
2426 : invalidating the property that is propagated by executing all the blocks of
2427 : the jump-thread path in order. */
2428 :
2429 1233638 : curr_count = entry->count ();
2430 :
2431 2924472 : for (i = 0; i < n_region; i++)
2432 : {
2433 1690834 : edge e;
2434 1690834 : edge_iterator ei;
2435 1690834 : basic_block bb = region_copy[i];
2436 :
2437 : /* Watch inconsistent profile. */
2438 1690834 : if (curr_count > region[i]->count)
2439 54205 : curr_count = region[i]->count;
2440 : /* Scale current BB. */
2441 2915264 : if (region[i]->count.nonzero_p () && curr_count.initialized_p ())
2442 : {
2443 : /* In the middle of the path we only scale the frequencies.
2444 : In last BB we need to update probabilities of outgoing edges
2445 : because we know which one is taken at the threaded path. */
2446 1224430 : if (i + 1 != n_region)
2447 438640 : scale_bbs_frequencies_profile_count (region + i, 1,
2448 : region[i]->count - curr_count,
2449 : region[i]->count);
2450 : else
2451 785790 : update_bb_profile_for_threading (region[i],
2452 : curr_count,
2453 : exit);
2454 1224430 : scale_bbs_frequencies_profile_count (region_copy + i, 1, curr_count,
2455 1224430 : region_copy[i]->count);
2456 : }
2457 :
2458 1690834 : if (single_succ_p (bb))
2459 : {
2460 : /* Make sure the successor is the next node in the path. */
2461 126421 : gcc_assert (i + 1 == n_region
2462 : || region_copy[i + 1] == single_succ_edge (bb)->dest);
2463 126421 : if (i + 1 != n_region)
2464 : {
2465 126421 : curr_count = single_succ_edge (bb)->count ();
2466 : }
2467 1360059 : continue;
2468 : }
2469 :
2470 : /* Special case the last block on the path: make sure that it does not
2471 : jump back on the copied path, including back to itself. */
2472 1564413 : if (i + 1 == n_region)
2473 : {
2474 3711808 : FOR_EACH_EDGE (e, ei, bb->succs)
2475 4956340 : if (bb_in_bbs (e->dest, region_copy, n_region))
2476 : {
2477 12950 : basic_block orig = get_bb_original (e->dest);
2478 12950 : if (orig)
2479 12950 : redirect_edge_and_branch_force (e, orig);
2480 : }
2481 1233638 : continue;
2482 1233638 : }
2483 :
2484 : /* Redirect all other edges jumping to non-adjacent blocks back to the
2485 : original code. */
2486 992328 : FOR_EACH_EDGE (e, ei, bb->succs)
2487 661553 : if (region_copy[i + 1] != e->dest)
2488 : {
2489 330778 : basic_block orig = get_bb_original (e->dest);
2490 330778 : if (orig)
2491 13648 : redirect_edge_and_branch_force (e, orig);
2492 : }
2493 : else
2494 : {
2495 330775 : curr_count = e->count ();
2496 : }
2497 : }
2498 :
2499 :
2500 1233638 : if (flag_checking)
2501 1233611 : verify_jump_thread (region_copy, n_region);
2502 :
2503 : /* Remove the last branch in the jump thread path. */
2504 1233638 : remove_ctrl_stmt_and_useless_edges (region_copy[n_region - 1], exit->dest);
2505 :
2506 : /* And fixup the flags on the single remaining edge. */
2507 1233638 : edge fix_e = find_edge (region_copy[n_region - 1], exit->dest);
2508 1233638 : fix_e->flags &= ~(EDGE_TRUE_VALUE | EDGE_FALSE_VALUE | EDGE_ABNORMAL);
2509 1233638 : fix_e->flags |= EDGE_FALLTHRU;
2510 :
2511 1233638 : edge e = make_edge (region_copy[n_region - 1], exit->dest, EDGE_FALLTHRU);
2512 :
2513 1233638 : if (e)
2514 : {
2515 0 : rescan_loop_exit (e, true, false);
2516 0 : e->probability = profile_probability::always ();
2517 : }
2518 :
2519 : /* Redirect the entry and add the phi node arguments. */
2520 1233638 : if (entry->dest == loop->header)
2521 11916 : mark_loop_for_removal (loop);
2522 1233638 : redirected = redirect_edge_and_branch (entry, get_bb_copy (entry->dest));
2523 1233638 : gcc_assert (redirected != NULL);
2524 1233638 : flush_pending_stmts (entry);
2525 :
2526 : /* Add the other PHI node arguments. */
2527 1233638 : add_phi_args_after_copy (region_copy, n_region, NULL);
2528 :
2529 1233638 : free (region_copy);
2530 :
2531 1233638 : adjust_paths_after_duplication (current_path_no);
2532 :
2533 1233638 : free_original_copy_tables ();
2534 1233638 : return true;
2535 : }
2536 :
2537 : /* Return true when PATH is a valid jump-thread path. */
2538 :
2539 : static bool
2540 1252832 : valid_jump_thread_path (vec<jump_thread_edge *> *path)
2541 : {
2542 1252832 : unsigned len = path->length ();
2543 :
2544 : /* Check that the path is connected. */
2545 2971155 : for (unsigned int j = 0; j < len - 1; j++)
2546 : {
2547 1737491 : edge e = (*path)[j]->e;
2548 1737491 : if (e->dest != (*path)[j+1]->e->src)
2549 : return false;
2550 : }
2551 : return true;
2552 : }
2553 :
2554 : /* Remove any queued jump threads that include edge E.
2555 :
2556 : We don't actually remove them here, just record the edges into ax
2557 : hash table. That way we can do the search once per iteration of
2558 : DOM/VRP rather than for every case where DOM optimizes away a COND_EXPR. */
2559 :
2560 : void
2561 397555 : fwd_jt_path_registry::remove_jump_threads_including (edge_def *e)
2562 : {
2563 397555 : if (!m_paths.exists () || !flag_thread_jumps)
2564 : return;
2565 :
2566 397519 : edge *slot = m_removed_edges->find_slot (e, INSERT);
2567 397519 : *slot = e;
2568 : }
2569 :
2570 : /* Thread all paths that have been queued for jump threading, and
2571 : update the CFG accordingly.
2572 :
2573 : It is the caller's responsibility to fix the dominance information
2574 : and rewrite duplicated SSA_NAMEs back into SSA form.
2575 :
2576 : If PEEL_LOOP_HEADERS is false, avoid threading edges through loop
2577 : headers if it does not simplify the loop.
2578 :
2579 : Returns true if one or more edges were threaded. */
2580 :
2581 : bool
2582 8337306 : jt_path_registry::thread_through_all_blocks (bool peel_loop_headers)
2583 : {
2584 8392648 : if (m_paths.length () == 0)
2585 : return false;
2586 :
2587 443482 : m_num_threaded_edges = 0;
2588 :
2589 443482 : bool retval = update_cfg (peel_loop_headers);
2590 :
2591 443482 : statistics_counter_event (cfun, "Jumps threaded", m_num_threaded_edges);
2592 :
2593 443482 : if (retval)
2594 : {
2595 388140 : loops_state_set (LOOPS_NEED_FIXUP);
2596 388140 : return true;
2597 : }
2598 : return false;
2599 : }
2600 :
2601 : /* This is the backward threader version of thread_through_all_blocks
2602 : using a generic BB copier. */
2603 :
2604 : bool
2605 289173 : back_jt_path_registry::update_cfg (bool /*peel_loop_headers*/)
2606 : {
2607 289173 : bool retval = false;
2608 289173 : hash_set<edge> visited_starting_edges;
2609 :
2610 1555716 : while (m_paths.length ())
2611 : {
2612 1266543 : vec<jump_thread_edge *> *path = m_paths[0];
2613 1266543 : edge entry = (*path)[0]->e;
2614 :
2615 : /* Do not jump-thread twice from the same starting edge.
2616 :
2617 : Previously we only checked that we weren't threading twice
2618 : from the same BB, but that was too restrictive. Imagine a
2619 : path that starts from GIMPLE_COND(x_123 == 0,...), where both
2620 : edges out of this conditional yield paths that can be
2621 : threaded (for example, both lead to an x_123==0 or x_123!=0
2622 : conditional further down the line. */
2623 1266543 : if (visited_starting_edges.contains (entry)
2624 : /* We may not want to realize this jump thread path for
2625 : various reasons. So check it first. */
2626 1266543 : || !valid_jump_thread_path (path))
2627 : {
2628 : /* Remove invalid jump-thread paths. */
2629 32879 : cancel_thread (path, "Avoiding threading twice from same edge");
2630 32879 : m_paths.unordered_remove (0);
2631 32879 : continue;
2632 : }
2633 :
2634 1233664 : unsigned len = path->length ();
2635 1233664 : edge exit = (*path)[len - 1]->e;
2636 1233664 : basic_block *region = XNEWVEC (basic_block, len - 1);
2637 :
2638 2924606 : for (unsigned int j = 0; j < len - 1; j++)
2639 1690942 : region[j] = (*path)[j]->e->dest;
2640 :
2641 1233664 : if (duplicate_thread_path (entry, exit, region, len - 1, 0))
2642 : {
2643 : /* We do not update dominance info. */
2644 1233638 : free_dominance_info (CDI_DOMINATORS);
2645 1233638 : visited_starting_edges.add (entry);
2646 1233638 : retval = true;
2647 1233638 : m_num_threaded_edges++;
2648 : }
2649 :
2650 1233664 : path->release ();
2651 1233664 : m_paths.unordered_remove (0);
2652 1233664 : free (region);
2653 : }
2654 289173 : return retval;
2655 289173 : }
2656 :
2657 : /* This is the forward threader version of thread_through_all_blocks,
2658 : using a custom BB copier. */
2659 :
2660 : bool
2661 154309 : fwd_jt_path_registry::update_cfg (bool may_peel_loop_headers)
2662 : {
2663 154309 : bool retval = false;
2664 :
2665 : /* Remove any paths that referenced removed edges. */
2666 154309 : if (m_removed_edges)
2667 901672 : for (unsigned i = 0; i < m_paths.length (); )
2668 : {
2669 747363 : unsigned int j;
2670 747363 : vec<jump_thread_edge *> *path = m_paths[i];
2671 :
2672 2459816 : for (j = 0; j < path->length (); j++)
2673 : {
2674 1812870 : edge e = (*path)[j]->e;
2675 1812870 : if (m_removed_edges->find_slot (e, NO_INSERT)
2676 3525323 : || (((*path)[j]->type == EDGE_COPY_SRC_BLOCK
2677 1156649 : || (*path)[j]->type == EDGE_COPY_SRC_JOINER_BLOCK)
2678 881403 : && !can_duplicate_block_p (e->src)))
2679 : break;
2680 : }
2681 :
2682 1494726 : if (j != path->length ())
2683 : {
2684 100417 : cancel_thread (path, "Thread references removed edge");
2685 100417 : m_paths.unordered_remove (i);
2686 100417 : continue;
2687 : }
2688 646946 : i++;
2689 : }
2690 :
2691 154309 : auto_bitmap threaded_blocks;
2692 154309 : mark_threaded_blocks (threaded_blocks);
2693 :
2694 154309 : initialize_original_copy_tables ();
2695 :
2696 : /* The order in which we process jump threads can be important.
2697 :
2698 : Consider if we have two jump threading paths A and B. If the
2699 : target edge of A is the starting edge of B and we thread path A
2700 : first, then we create an additional incoming edge into B->dest that
2701 : we cannot discover as a jump threading path on this iteration.
2702 :
2703 : If we instead thread B first, then the edge into B->dest will have
2704 : already been redirected before we process path A and path A will
2705 : natually, with no further work, target the redirected path for B.
2706 :
2707 : An post-order is sufficient here. Compute the ordering first, then
2708 : process the blocks. */
2709 154309 : if (!bitmap_empty_p (threaded_blocks))
2710 : {
2711 141193 : int *postorder = XNEWVEC (int, n_basic_blocks_for_fn (cfun));
2712 141193 : unsigned int postorder_num = post_order_compute (postorder, false, false);
2713 8092754 : for (unsigned int i = 0; i < postorder_num; i++)
2714 : {
2715 7951561 : unsigned int indx = postorder[i];
2716 7951561 : if (bitmap_bit_p (threaded_blocks, indx))
2717 : {
2718 360822 : basic_block bb = BASIC_BLOCK_FOR_FN (cfun, indx);
2719 360822 : retval |= thread_block (bb, true);
2720 : }
2721 : }
2722 141193 : free (postorder);
2723 : }
2724 :
2725 : /* Then perform the threading through loop headers. We start with the
2726 : innermost loop, so that the changes in cfg we perform won't affect
2727 : further threading. */
2728 1058167 : for (auto loop : loops_list (cfun, LI_FROM_INNERMOST))
2729 : {
2730 1053495 : if (!loop->header
2731 595240 : || !bitmap_bit_p (threaded_blocks, loop->header->index))
2732 458255 : continue;
2733 :
2734 136985 : retval |= thread_through_loop_header (loop, may_peel_loop_headers);
2735 154309 : }
2736 :
2737 : /* All jump threading paths should have been resolved at this
2738 : point. Verify that is the case. */
2739 154309 : basic_block bb;
2740 9040421 : FOR_EACH_BB_FN (bb, cfun)
2741 : {
2742 8886112 : edge_iterator ei;
2743 8886112 : edge e;
2744 21864924 : FOR_EACH_EDGE (e, ei, bb->preds)
2745 12978812 : gcc_assert (e->aux == NULL);
2746 : }
2747 :
2748 154309 : free_original_copy_tables ();
2749 :
2750 154309 : return retval;
2751 154309 : }
2752 :
2753 : bool
2754 2644809 : jt_path_registry::cancel_invalid_paths (vec<jump_thread_edge *> &path)
2755 : {
2756 2644809 : gcc_checking_assert (!path.is_empty ());
2757 2644809 : edge entry = path[0]->e;
2758 2644809 : edge exit = path[path.length () - 1]->e;
2759 2644809 : bool seen_latch = false;
2760 2644809 : int loops_crossed = 0;
2761 2644809 : bool crossed_latch = false;
2762 2644809 : bool crossed_loop_header = false;
2763 : // Use ->dest here instead of ->src to ignore the first block. The
2764 : // first block is allowed to be in a different loop, since it'll be
2765 : // redirected. See similar comment in profitable_path_p: "we don't
2766 : // care about that block...".
2767 2644809 : loop_p loop = entry->dest->loop_father;
2768 2644809 : loop_p curr_loop = loop;
2769 :
2770 9232547 : for (unsigned int i = 0; i < path.length (); i++)
2771 : {
2772 6587738 : edge e = path[i]->e;
2773 :
2774 6587738 : if (e == NULL)
2775 : {
2776 : // NULL outgoing edges on a path can happen for jumping to a
2777 : // constant address.
2778 0 : cancel_thread (&path, "Found NULL edge in jump threading path");
2779 0 : return true;
2780 : }
2781 :
2782 6587738 : if (loop->latch == e->src || loop->latch == e->dest)
2783 : {
2784 524391 : seen_latch = true;
2785 : // Like seen_latch, but excludes the first block.
2786 524391 : if (e->src != entry->src)
2787 492825 : crossed_latch = true;
2788 : }
2789 :
2790 6587738 : if (e->dest->loop_father != curr_loop)
2791 : {
2792 373523 : curr_loop = e->dest->loop_father;
2793 373523 : ++loops_crossed;
2794 : }
2795 :
2796 : // ?? Avoid threading through loop headers that remain in the
2797 : // loop, as such threadings tend to create sub-loops which
2798 : // _might_ be OK ??.
2799 6587738 : if (e->dest->loop_father->header == e->dest
2800 6587738 : && !flow_loop_nested_p (exit->dest->loop_father,
2801 : e->dest->loop_father))
2802 : crossed_loop_header = true;
2803 :
2804 6587738 : if (flag_checking && !m_backedge_threads)
2805 3191916 : gcc_assert ((path[i]->e->flags & EDGE_DFS_BACK) == 0);
2806 : }
2807 :
2808 : // If we crossed a loop into an outer loop without crossing the
2809 : // latch, this is just an early exit from the loop.
2810 2644809 : if (loops_crossed == 1
2811 2644809 : && !crossed_latch
2812 2644809 : && flow_loop_nested_p (exit->dest->loop_father, exit->src->loop_father))
2813 : return false;
2814 :
2815 2557452 : if (cfun->curr_properties & PROP_loop_opts_done)
2816 : return false;
2817 :
2818 1912098 : if (seen_latch && empty_block_p (loop->latch))
2819 : {
2820 232871 : cancel_thread (&path, "Threading through latch before loop opts "
2821 : "would create non-empty latch");
2822 232871 : return true;
2823 : }
2824 1679227 : if (loops_crossed)
2825 : {
2826 163380 : cancel_thread (&path, "Path crosses loops");
2827 163380 : return true;
2828 : }
2829 : // The path should either start and end in the same loop or exit the
2830 : // loop it starts in but never enter a loop. This also catches
2831 : // creating irreducible loops, not only rotation.
2832 1515847 : if (entry->src->loop_father != exit->dest->loop_father
2833 1670049 : && !flow_loop_nested_p (exit->src->loop_father,
2834 154202 : entry->dest->loop_father))
2835 : {
2836 154202 : cancel_thread (&path, "Path rotates loop");
2837 154202 : return true;
2838 : }
2839 1361645 : if (crossed_loop_header)
2840 : {
2841 14285 : cancel_thread (&path, "Path crosses loop header but does not exit it");
2842 14285 : return true;
2843 : }
2844 : return false;
2845 : }
2846 :
2847 : /* Register a jump threading opportunity. We queue up all the jump
2848 : threading opportunities discovered by a pass and update the CFG
2849 : and SSA form all at once.
2850 :
2851 : E is the edge we can thread, E2 is the new target edge, i.e., we
2852 : are effectively recording that E->dest can be changed to E2->dest
2853 : after fixing the SSA graph.
2854 :
2855 : Return TRUE if PATH was successfully threaded. */
2856 :
2857 : bool
2858 2644809 : jt_path_registry::register_jump_thread (vec<jump_thread_edge *> *path)
2859 : {
2860 2644809 : gcc_checking_assert (flag_thread_jumps);
2861 :
2862 2644809 : if (!dbg_cnt (registered_jump_thread))
2863 : {
2864 0 : path->release ();
2865 0 : return false;
2866 : }
2867 :
2868 2644809 : if (cancel_invalid_paths (*path))
2869 : return false;
2870 :
2871 2080071 : if (dump_file && (dump_flags & TDF_DETAILS))
2872 124 : dump_jump_thread_path (dump_file, *path, true);
2873 :
2874 2080071 : m_paths.safe_push (path);
2875 2080071 : return true;
2876 : }
2877 :
2878 : /* Return how many uses of T there are within BB, as long as there
2879 : aren't any uses outside BB. If there are any uses outside BB,
2880 : return -1 if there's at most one use within BB, or -2 if there is
2881 : more than one use within BB. */
2882 :
2883 : static int
2884 1800743 : uses_in_bb (tree t, basic_block bb)
2885 : {
2886 1800743 : int uses = 0;
2887 1800743 : bool outside_bb = false;
2888 :
2889 1800743 : imm_use_iterator iter;
2890 1800743 : use_operand_p use_p;
2891 7320796 : FOR_EACH_IMM_USE_FAST (use_p, iter, t)
2892 : {
2893 3828772 : if (is_gimple_debug (USE_STMT (use_p)))
2894 643695 : continue;
2895 :
2896 3185077 : if (gimple_bb (USE_STMT (use_p)) != bb)
2897 : outside_bb = true;
2898 : else
2899 2105043 : uses++;
2900 :
2901 3185077 : if (outside_bb && uses > 1)
2902 109462 : return -2;
2903 109462 : }
2904 :
2905 1691281 : if (outside_bb)
2906 493623 : return -1;
2907 :
2908 : return uses;
2909 : }
2910 :
2911 : /* Starting from the final control flow stmt in BB, assuming it will
2912 : be removed, follow uses in to-be-removed stmts back to their defs
2913 : and count how many defs are to become dead and be removed as
2914 : well. */
2915 :
2916 : unsigned int
2917 1616682 : estimate_threading_killed_stmts (basic_block bb)
2918 : {
2919 1616682 : int killed_stmts = 0;
2920 1616682 : hash_map<tree, int> ssa_remaining_uses;
2921 1616682 : auto_vec<gimple *, 4> dead_worklist;
2922 :
2923 : /* If the block has only two predecessors, threading will turn phi
2924 : dsts into either src, so count them as dead stmts. */
2925 1616682 : bool drop_all_phis = EDGE_COUNT (bb->preds) == 2;
2926 :
2927 1616682 : if (drop_all_phis)
2928 718847 : for (gphi_iterator gsi = gsi_start_phis (bb);
2929 2419877 : !gsi_end_p (gsi); gsi_next (&gsi))
2930 : {
2931 1701030 : gphi *phi = gsi.phi ();
2932 1701030 : tree dst = gimple_phi_result (phi);
2933 :
2934 : /* We don't count virtual PHIs as stmts in
2935 : record_temporary_equivalences_from_phis. */
2936 3402060 : if (virtual_operand_p (dst))
2937 505954 : continue;
2938 :
2939 1195076 : killed_stmts++;
2940 : }
2941 :
2942 3233364 : if (gsi_end_p (gsi_last_bb (bb)))
2943 0 : return killed_stmts;
2944 :
2945 1616682 : gimple *stmt = gsi_stmt (gsi_last_bb (bb));
2946 1616682 : if (gimple_code (stmt) != GIMPLE_COND
2947 : && gimple_code (stmt) != GIMPLE_GOTO
2948 : && gimple_code (stmt) != GIMPLE_SWITCH)
2949 484437 : return killed_stmts;
2950 :
2951 : /* The control statement is always dead. */
2952 1132245 : killed_stmts++;
2953 1132245 : dead_worklist.quick_push (stmt);
2954 3303323 : while (!dead_worklist.is_empty ())
2955 : {
2956 2171078 : stmt = dead_worklist.pop ();
2957 :
2958 2171078 : ssa_op_iter iter;
2959 2171078 : use_operand_p use_p;
2960 4744963 : FOR_EACH_SSA_USE_OPERAND (use_p, stmt, iter, SSA_OP_USE)
2961 : {
2962 2573885 : tree t = USE_FROM_PTR (use_p);
2963 2573885 : gimple *def = SSA_NAME_DEF_STMT (t);
2964 :
2965 2573885 : if (gimple_bb (def) == bb
2966 2020353 : && (gimple_code (def) != GIMPLE_PHI
2967 134405 : || !drop_all_phis)
2968 4504858 : && !gimple_has_side_effects (def))
2969 : {
2970 1856543 : int *usesp = ssa_remaining_uses.get (t);
2971 1856543 : int uses;
2972 :
2973 1856543 : if (usesp)
2974 55800 : uses = *usesp;
2975 : else
2976 1800743 : uses = uses_in_bb (t, bb);
2977 :
2978 1856543 : gcc_assert (uses);
2979 :
2980 : /* Don't bother recording the expected use count if we
2981 : won't find any further uses within BB. */
2982 1856543 : if (!usesp && (uses < -1 || uses > 1))
2983 : {
2984 273429 : usesp = &ssa_remaining_uses.get_or_insert (t);
2985 273429 : *usesp = uses;
2986 : }
2987 :
2988 1856543 : if (uses < 0)
2989 636301 : continue;
2990 :
2991 1220242 : --uses;
2992 1220242 : if (usesp)
2993 186551 : *usesp = uses;
2994 :
2995 1220242 : if (!uses)
2996 : {
2997 1051252 : killed_stmts++;
2998 1051252 : if (usesp)
2999 17561 : ssa_remaining_uses.remove (t);
3000 1051252 : if (gimple_code (def) != GIMPLE_PHI)
3001 1038833 : dead_worklist.safe_push (def);
3002 : }
3003 : }
3004 : }
3005 : }
3006 :
3007 1132245 : if (dump_file)
3008 25 : fprintf (dump_file, "threading bb %i kills %i stmts\n",
3009 : bb->index, killed_stmts);
3010 :
3011 1132245 : return killed_stmts;
3012 1616682 : }
|