Branch data Line data Source code
1 : : /* Thread edges through blocks and update the control flow and SSA graphs.
2 : : Copyright (C) 2004-2025 Free Software Foundation, Inc.
3 : :
4 : : This file is part of GCC.
5 : :
6 : : GCC is free software; you can redistribute it and/or modify
7 : : it under the terms of the GNU General Public License as published by
8 : : the Free Software Foundation; either version 3, or (at your option)
9 : : any later version.
10 : :
11 : : GCC is distributed in the hope that it will be useful,
12 : : but WITHOUT ANY WARRANTY; without even the implied warranty of
13 : : MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14 : : GNU General Public License for more details.
15 : :
16 : : You should have received a copy of the GNU General Public License
17 : : along with GCC; see the file COPYING3. If not see
18 : : <http://www.gnu.org/licenses/>. */
19 : :
20 : : #include "config.h"
21 : : #include "system.h"
22 : : #include "coretypes.h"
23 : : #include "backend.h"
24 : : #include "tree.h"
25 : : #include "gimple.h"
26 : : #include "cfghooks.h"
27 : : #include "tree-pass.h"
28 : : #include "ssa.h"
29 : : #include "fold-const.h"
30 : : #include "cfganal.h"
31 : : #include "gimple-iterator.h"
32 : : #include "tree-ssa.h"
33 : : #include "tree-ssa-threadupdate.h"
34 : : #include "cfgloop.h"
35 : : #include "dbgcnt.h"
36 : : #include "tree-cfg.h"
37 : : #include "tree-vectorizer.h"
38 : : #include "tree-pass.h"
39 : :
40 : : /* Given a block B, update the CFG and SSA graph to reflect redirecting
41 : : one or more in-edges to B to instead reach the destination of an
42 : : out-edge from B while preserving any side effects in B.
43 : :
44 : : i.e., given A->B and B->C, change A->B to be A->C yet still preserve the
45 : : side effects of executing B.
46 : :
47 : : 1. Make a copy of B (including its outgoing edges and statements). Call
48 : : the copy B'. Note B' has no incoming edges or PHIs at this time.
49 : :
50 : : 2. Remove the control statement at the end of B' and all outgoing edges
51 : : except B'->C.
52 : :
53 : : 3. Add a new argument to each PHI in C with the same value as the existing
54 : : argument associated with edge B->C. Associate the new PHI arguments
55 : : with the edge B'->C.
56 : :
57 : : 4. For each PHI in B, find or create a PHI in B' with an identical
58 : : PHI_RESULT. Add an argument to the PHI in B' which has the same
59 : : value as the PHI in B associated with the edge A->B. Associate
60 : : the new argument in the PHI in B' with the edge A->B.
61 : :
62 : : 5. Change the edge A->B to A->B'.
63 : :
64 : : 5a. This automatically deletes any PHI arguments associated with the
65 : : edge A->B in B.
66 : :
67 : : 5b. This automatically associates each new argument added in step 4
68 : : with the edge A->B'.
69 : :
70 : : 6. Repeat for other incoming edges into B.
71 : :
72 : : 7. Put the duplicated resources in B and all the B' blocks into SSA form.
73 : :
74 : : Note that block duplication can be minimized by first collecting the
75 : : set of unique destination blocks that the incoming edges should
76 : : be threaded to.
77 : :
78 : : We reduce the number of edges and statements we create by not copying all
79 : : the outgoing edges and the control statement in step #1. We instead create
80 : : a template block without the outgoing edges and duplicate the template.
81 : :
82 : : Another case this code handles is threading through a "joiner" block. In
83 : : this case, we do not know the destination of the joiner block, but one
84 : : of the outgoing edges from the joiner block leads to a threadable path. This
85 : : case largely works as outlined above, except the duplicate of the joiner
86 : : block still contains a full set of outgoing edges and its control statement.
87 : : We just redirect one of its outgoing edges to our jump threading path. */
88 : :
89 : :
90 : : /* Steps #5 and #6 of the above algorithm are best implemented by walking
91 : : all the incoming edges which thread to the same destination edge at
92 : : the same time. That avoids lots of table lookups to get information
93 : : for the destination edge.
94 : :
95 : : To realize that implementation we create a list of incoming edges
96 : : which thread to the same outgoing edge. Thus to implement steps
97 : : #5 and #6 we traverse our hash table of outgoing edge information.
98 : : For each entry we walk the list of incoming edges which thread to
99 : : the current outgoing edge. */
100 : :
101 : : struct el
102 : : {
103 : : edge e;
104 : : struct el *next;
105 : : };
106 : :
107 : : /* Main data structure recording information regarding B's duplicate
108 : : blocks. */
109 : :
110 : : /* We need to efficiently record the unique thread destinations of this
111 : : block and specific information associated with those destinations. We
112 : : may have many incoming edges threaded to the same outgoing edge. This
113 : : can be naturally implemented with a hash table. */
114 : :
115 : : struct redirection_data : free_ptr_hash<redirection_data>
116 : : {
117 : : /* We support wiring up two block duplicates in a jump threading path.
118 : :
119 : : One is a normal block copy where we remove the control statement
120 : : and wire up its single remaining outgoing edge to the thread path.
121 : :
122 : : The other is a joiner block where we leave the control statement
123 : : in place, but wire one of the outgoing edges to a thread path.
124 : :
125 : : In theory we could have multiple block duplicates in a jump
126 : : threading path, but I haven't tried that.
127 : :
128 : : The duplicate blocks appear in this array in the same order in
129 : : which they appear in the jump thread path. */
130 : : basic_block dup_blocks[2];
131 : :
132 : : vec<jump_thread_edge *> *path;
133 : :
134 : : /* A list of incoming edges which we want to thread to the
135 : : same path. */
136 : : struct el *incoming_edges;
137 : :
138 : : /* hash_table support. */
139 : : static inline hashval_t hash (const redirection_data *);
140 : : static inline int equal (const redirection_data *, const redirection_data *);
141 : : };
142 : :
143 : 7981989 : jump_thread_path_allocator::jump_thread_path_allocator ()
144 : : {
145 : 7981989 : obstack_init (&m_obstack);
146 : 7981989 : }
147 : :
148 : 7981989 : jump_thread_path_allocator::~jump_thread_path_allocator ()
149 : : {
150 : 7981989 : obstack_free (&m_obstack, NULL);
151 : 7981989 : }
152 : :
153 : : jump_thread_edge *
154 : 23244135 : jump_thread_path_allocator::allocate_thread_edge (edge e,
155 : : jump_thread_edge_type type)
156 : : {
157 : 23244135 : void *r = obstack_alloc (&m_obstack, sizeof (jump_thread_edge));
158 : 23244135 : return new (r) jump_thread_edge (e, type);
159 : : }
160 : :
161 : : vec<jump_thread_edge *> *
162 : 17070444 : jump_thread_path_allocator::allocate_thread_path ()
163 : : {
164 : : // ?? Since the paths live in an obstack, we should be able to remove all
165 : : // references to path->release() throughout the code.
166 : 17070444 : void *r = obstack_alloc (&m_obstack, sizeof (vec <jump_thread_edge *>));
167 : 17070444 : return new (r) vec<jump_thread_edge *> ();
168 : : }
169 : :
170 : 7981989 : jt_path_registry::jt_path_registry (bool backedge_threads)
171 : : {
172 : 7981989 : m_paths.create (5);
173 : 7981989 : m_num_threaded_edges = 0;
174 : 7981989 : m_backedge_threads = backedge_threads;
175 : 7981989 : }
176 : :
177 : 7981989 : jt_path_registry::~jt_path_registry ()
178 : : {
179 : 7981989 : m_paths.release ();
180 : 7981989 : }
181 : :
182 : 2005950 : fwd_jt_path_registry::fwd_jt_path_registry ()
183 : 2005950 : : jt_path_registry (/*backedge_threads=*/false)
184 : : {
185 : 2005950 : m_removed_edges = new hash_table<struct removed_edges> (17);
186 : 2005950 : m_redirection_data = NULL;
187 : 2005950 : }
188 : :
189 : 4011900 : fwd_jt_path_registry::~fwd_jt_path_registry ()
190 : : {
191 : 2005950 : delete m_removed_edges;
192 : 4011900 : }
193 : :
194 : 5976039 : back_jt_path_registry::back_jt_path_registry ()
195 : 5976039 : : jt_path_registry (/*backedge_threads=*/true)
196 : : {
197 : 5976039 : }
198 : :
199 : : void
200 : 23244135 : jt_path_registry::push_edge (vec<jump_thread_edge *> *path,
201 : : edge e, jump_thread_edge_type type)
202 : : {
203 : 23244135 : jump_thread_edge *x = m_allocator.allocate_thread_edge (e, type);
204 : 23244135 : path->safe_push (x);
205 : 23244135 : }
206 : :
207 : : vec<jump_thread_edge *> *
208 : 17070444 : jt_path_registry::allocate_thread_path ()
209 : : {
210 : 17070444 : return m_allocator.allocate_thread_path ();
211 : : }
212 : :
213 : : /* Dump a jump threading path, including annotations about each
214 : : edge in the path. */
215 : :
216 : : static void
217 : 176 : dump_jump_thread_path (FILE *dump_file,
218 : : const vec<jump_thread_edge *> &path,
219 : : bool registering)
220 : : {
221 : 176 : if (registering)
222 : 232 : fprintf (dump_file,
223 : : " [%u] Registering jump thread: (%d, %d) incoming edge; ",
224 : : dbg_cnt_counter (registered_jump_thread),
225 : 116 : path[0]->e->src->index, path[0]->e->dest->index);
226 : : else
227 : 60 : fprintf (dump_file,
228 : : " Cancelling jump thread: (%d, %d) incoming edge; ",
229 : 60 : path[0]->e->src->index, path[0]->e->dest->index);
230 : :
231 : 544 : for (unsigned int i = 1; i < path.length (); i++)
232 : : {
233 : : /* We can get paths with a NULL edge when the final destination
234 : : of a jump thread turns out to be a constant address. We dump
235 : : those paths when debugging, so we have to be prepared for that
236 : : possibility here. */
237 : 368 : if (path[i]->e == NULL)
238 : 0 : continue;
239 : :
240 : 368 : fprintf (dump_file, " (%d, %d) ",
241 : 368 : path[i]->e->src->index, path[i]->e->dest->index);
242 : 368 : switch (path[i]->type)
243 : : {
244 : 39 : case EDGE_COPY_SRC_JOINER_BLOCK:
245 : 39 : fprintf (dump_file, "joiner");
246 : 39 : break;
247 : 198 : case EDGE_COPY_SRC_BLOCK:
248 : 198 : fprintf (dump_file, "normal");
249 : 198 : break;
250 : 131 : case EDGE_NO_COPY_SRC_BLOCK:
251 : 131 : fprintf (dump_file, "nocopy");
252 : 131 : break;
253 : 0 : default:
254 : 0 : gcc_unreachable ();
255 : : }
256 : :
257 : 368 : if ((path[i]->e->flags & EDGE_DFS_BACK) != 0)
258 : 22 : fprintf (dump_file, " (back)");
259 : : }
260 : 176 : fprintf (dump_file, "; \n");
261 : 176 : }
262 : :
263 : : DEBUG_FUNCTION void
264 : 0 : debug (const vec<jump_thread_edge *> &path)
265 : : {
266 : 0 : dump_jump_thread_path (stderr, path, true);
267 : 0 : }
268 : :
269 : : DEBUG_FUNCTION void
270 : 0 : debug (const vec<jump_thread_edge *> *path)
271 : : {
272 : 0 : debug (*path);
273 : 0 : }
274 : :
275 : : /* Release the memory associated with PATH, and if dumping is enabled,
276 : : dump out the reason why the thread was canceled. */
277 : :
278 : : static void
279 : 935991 : cancel_thread (vec<jump_thread_edge *> *path, const char *reason = NULL)
280 : : {
281 : 935991 : if (dump_file && (dump_flags & TDF_DETAILS))
282 : : {
283 : 60 : if (reason)
284 : 52 : fprintf (dump_file, "%s: ", reason);
285 : :
286 : 60 : dump_jump_thread_path (dump_file, *path, false);
287 : 60 : fprintf (dump_file, "\n");
288 : : }
289 : 935991 : path->release ();
290 : 935991 : }
291 : :
292 : : /* Simple hashing function. For any given incoming edge E, we're going
293 : : to be most concerned with the final destination of its jump thread
294 : : path. So hash on the block index of the final edge in the path. */
295 : :
296 : : inline hashval_t
297 : 416764 : redirection_data::hash (const redirection_data *p)
298 : : {
299 : 416764 : vec<jump_thread_edge *> *path = p->path;
300 : 416764 : return path->last ()->e->dest->index;
301 : : }
302 : :
303 : : /* Given two hash table entries, return true if they have the same
304 : : jump threading path. */
305 : : inline int
306 : 119848 : redirection_data::equal (const redirection_data *p1, const redirection_data *p2)
307 : : {
308 : 119848 : vec<jump_thread_edge *> *path1 = p1->path;
309 : 119848 : vec<jump_thread_edge *> *path2 = p2->path;
310 : :
311 : 359544 : if (path1->length () != path2->length ())
312 : : return false;
313 : :
314 : 195603 : for (unsigned int i = 1; i < path1->length (); i++)
315 : : {
316 : 146489 : if ((*path1)[i]->type != (*path2)[i]->type
317 : 146489 : || (*path1)[i]->e != (*path2)[i]->e)
318 : : return false;
319 : : }
320 : :
321 : : return true;
322 : : }
323 : :
324 : : /* Data structure of information to pass to hash table traversal routines. */
325 : : struct ssa_local_info_t
326 : : {
327 : : /* The current block we are working on. */
328 : : basic_block bb;
329 : :
330 : : /* We only create a template block for the first duplicated block in a
331 : : jump threading path as we may need many duplicates of that block.
332 : :
333 : : The second duplicate block in a path is specific to that path. Creating
334 : : and sharing a template for that block is considerably more difficult. */
335 : : basic_block template_block;
336 : :
337 : : /* If we append debug stmts to the template block after creating it,
338 : : this iterator won't be the last one in the block, and further
339 : : copies of the template block shouldn't get debug stmts after
340 : : it. */
341 : : gimple_stmt_iterator template_last_to_copy;
342 : :
343 : : /* Blocks duplicated for the thread. */
344 : : bitmap duplicate_blocks;
345 : :
346 : : /* TRUE if we thread one or more jumps, FALSE otherwise. */
347 : : bool jumps_threaded;
348 : :
349 : : /* When we have multiple paths through a joiner which reach different
350 : : final destinations, then we may need to correct for potential
351 : : profile insanities. */
352 : : bool need_profile_correction;
353 : :
354 : : // Jump threading statistics.
355 : : unsigned long num_threaded_edges;
356 : : };
357 : :
358 : : /* When we start updating the CFG for threading, data necessary for jump
359 : : threading is attached to the AUX field for the incoming edge. Use these
360 : : macros to access the underlying structure attached to the AUX field. */
361 : : #define THREAD_PATH(E) ((vec<jump_thread_edge *> *)(E)->aux)
362 : :
363 : : /* Remove the last statement in block BB if it is a control statement
364 : : Also remove all outgoing edges except the edge which reaches DEST_BB.
365 : : If DEST_BB is NULL, then remove all outgoing edges. */
366 : :
367 : : static void
368 : 1252454 : remove_ctrl_stmt_and_useless_edges (basic_block bb, basic_block dest_bb)
369 : : {
370 : 1252454 : gimple_stmt_iterator gsi;
371 : 1252454 : edge e;
372 : 1252454 : edge_iterator ei;
373 : :
374 : 1252454 : gsi = gsi_last_bb (bb);
375 : :
376 : : /* If the duplicate ends with a control statement, then remove it.
377 : :
378 : : Note that if we are duplicating the template block rather than the
379 : : original basic block, then the duplicate might not have any real
380 : : statements in it. */
381 : 1252454 : if (!gsi_end_p (gsi)
382 : 1252454 : && gsi_stmt (gsi)
383 : 1252454 : && (gimple_code (gsi_stmt (gsi)) == GIMPLE_COND
384 : 2851 : || gimple_code (gsi_stmt (gsi)) == GIMPLE_GOTO
385 : 2833 : || gimple_code (gsi_stmt (gsi)) == GIMPLE_SWITCH))
386 : 1252454 : gsi_remove (&gsi, true);
387 : :
388 : 3770443 : for (ei = ei_start (bb->succs); (e = ei_safe_edge (ei)); )
389 : : {
390 : 2517989 : if (e->dest != dest_bb)
391 : : {
392 : 1486747 : free_dom_edge_info (e);
393 : 1486747 : remove_edge (e);
394 : : }
395 : : else
396 : : {
397 : 1031242 : e->probability = profile_probability::always ();
398 : 1031242 : ei_next (&ei);
399 : : }
400 : : }
401 : :
402 : : /* If the remaining edge is a loop exit, there must have
403 : : a removed edge that was not a loop exit.
404 : :
405 : : In that case BB and possibly other blocks were previously
406 : : in the loop, but are now outside the loop. Thus, we need
407 : : to update the loop structures. */
408 : 1252454 : if (single_succ_p (bb)
409 : 1031242 : && loop_outer (bb->loop_father)
410 : 1534713 : && loop_exit_edge_p (bb->loop_father, single_succ_edge (bb)))
411 : 46109 : loops_state_set (LOOPS_NEED_FIXUP);
412 : 1252454 : }
413 : :
414 : : /* Create a duplicate of BB. Record the duplicate block in an array
415 : : indexed by COUNT stored in RD. */
416 : :
417 : : static void
418 : 291819 : create_block_for_threading (basic_block bb,
419 : : struct redirection_data *rd,
420 : : unsigned int count,
421 : : bitmap *duplicate_blocks)
422 : : {
423 : 291819 : edge_iterator ei;
424 : 291819 : edge e;
425 : :
426 : : /* We can use the generic block duplication code and simply remove
427 : : the stuff we do not need. */
428 : 291819 : rd->dup_blocks[count] = duplicate_block (bb, NULL, NULL);
429 : :
430 : 890176 : FOR_EACH_EDGE (e, ei, rd->dup_blocks[count]->succs)
431 : : {
432 : 598357 : e->aux = NULL;
433 : :
434 : : /* If we duplicate a block with an outgoing edge marked as
435 : : EDGE_IGNORE, we must clear EDGE_IGNORE so that it doesn't
436 : : leak out of the current pass.
437 : :
438 : : It would be better to simplify switch statements and remove
439 : : the edges before we get here, but the sequencing is nontrivial. */
440 : 598357 : e->flags &= ~EDGE_IGNORE;
441 : : }
442 : :
443 : : /* Zero out the profile, since the block is unreachable for now. */
444 : 291819 : rd->dup_blocks[count]->count = profile_count::uninitialized ();
445 : 291819 : if (duplicate_blocks)
446 : 291819 : bitmap_set_bit (*duplicate_blocks, rd->dup_blocks[count]->index);
447 : 291819 : }
448 : :
449 : : /* Given an outgoing edge E lookup and return its entry in our hash table.
450 : :
451 : : If INSERT is true, then we insert the entry into the hash table if
452 : : it is not already present. INCOMING_EDGE is added to the list of incoming
453 : : edges associated with E in the hash table. */
454 : :
455 : : redirection_data *
456 : 299876 : fwd_jt_path_registry::lookup_redirection_data (edge e, insert_option insert)
457 : : {
458 : 299876 : struct redirection_data **slot;
459 : 299876 : struct redirection_data *elt;
460 : 299876 : vec<jump_thread_edge *> *path = THREAD_PATH (e);
461 : :
462 : : /* Build a hash table element so we can see if E is already
463 : : in the table. */
464 : 299876 : elt = XNEW (struct redirection_data);
465 : 299876 : elt->path = path;
466 : 299876 : elt->dup_blocks[0] = NULL;
467 : 299876 : elt->dup_blocks[1] = NULL;
468 : 299876 : elt->incoming_edges = NULL;
469 : :
470 : 299876 : slot = m_redirection_data->find_slot (elt, insert);
471 : :
472 : : /* This will only happen if INSERT is false and the entry is not
473 : : in the hash table. */
474 : 299876 : if (slot == NULL)
475 : : {
476 : 0 : free (elt);
477 : 0 : return NULL;
478 : : }
479 : :
480 : : /* This will only happen if E was not in the hash table and
481 : : INSERT is true. */
482 : 299876 : if (*slot == NULL)
483 : : {
484 : 250762 : *slot = elt;
485 : 250762 : elt->incoming_edges = XNEW (struct el);
486 : 250762 : elt->incoming_edges->e = e;
487 : 250762 : elt->incoming_edges->next = NULL;
488 : 250762 : return elt;
489 : : }
490 : : /* E was in the hash table. */
491 : : else
492 : : {
493 : : /* Free ELT as we do not need it anymore, we will extract the
494 : : relevant entry from the hash table itself. */
495 : 49114 : free (elt);
496 : :
497 : : /* Get the entry stored in the hash table. */
498 : 49114 : elt = *slot;
499 : :
500 : : /* If insertion was requested, then we need to add INCOMING_EDGE
501 : : to the list of incoming edges associated with E. */
502 : 49114 : if (insert)
503 : : {
504 : 49114 : struct el *el = XNEW (struct el);
505 : 49114 : el->next = elt->incoming_edges;
506 : 49114 : el->e = e;
507 : 49114 : elt->incoming_edges = el;
508 : : }
509 : :
510 : 49114 : return elt;
511 : : }
512 : : }
513 : :
514 : : /* Given ssa_name DEF, backtrack jump threading PATH from node IDX
515 : : to see if it has constant value in a flow sensitive manner. Set
516 : : LOCUS to location of the constant phi arg and return the value.
517 : : Return DEF directly if either PATH or idx is ZERO. */
518 : :
519 : : static tree
520 : 112634 : get_value_locus_in_path (tree def, vec<jump_thread_edge *> *path,
521 : : basic_block bb, int idx, location_t *locus)
522 : : {
523 : 112634 : tree arg;
524 : 112634 : gphi *def_phi;
525 : 112634 : basic_block def_bb;
526 : :
527 : 112634 : if (path == NULL || idx == 0)
528 : : return def;
529 : :
530 : 157906 : def_phi = dyn_cast <gphi *> (SSA_NAME_DEF_STMT (def));
531 : 63005 : if (!def_phi)
532 : : return def;
533 : :
534 : 63005 : def_bb = gimple_bb (def_phi);
535 : : /* Don't propagate loop invariants into deeper loops. */
536 : 63005 : if (!def_bb || bb_loop_depth (def_bb) < bb_loop_depth (bb))
537 : 1157 : return def;
538 : :
539 : : /* Backtrack jump threading path from IDX to see if def has constant
540 : : value. */
541 : 80888 : for (int j = idx - 1; j >= 0; j--)
542 : : {
543 : 65365 : edge e = (*path)[j]->e;
544 : 65365 : if (e->dest == def_bb)
545 : : {
546 : 46325 : arg = gimple_phi_arg_def (def_phi, e->dest_idx);
547 : 46325 : if (is_gimple_min_invariant (arg))
548 : : {
549 : 17733 : *locus = gimple_phi_arg_location (def_phi, e->dest_idx);
550 : 17733 : return arg;
551 : : }
552 : : break;
553 : : }
554 : : }
555 : :
556 : : return def;
557 : : }
558 : :
559 : : /* For each PHI in BB, copy the argument associated with SRC_E to TGT_E.
560 : : Try to backtrack jump threading PATH from node IDX to see if the arg
561 : : has constant value, copy constant value instead of argument itself
562 : : if yes. */
563 : :
564 : : static void
565 : 384095 : copy_phi_args (basic_block bb, edge src_e, edge tgt_e,
566 : : vec<jump_thread_edge *> *path, int idx)
567 : : {
568 : 384095 : gphi_iterator gsi;
569 : 384095 : int src_indx = src_e->dest_idx;
570 : :
571 : 628658 : for (gsi = gsi_start_phis (bb); !gsi_end_p (gsi); gsi_next (&gsi))
572 : : {
573 : 244563 : gphi *phi = gsi.phi ();
574 : 244563 : tree def = gimple_phi_arg_def (phi, src_indx);
575 : 244563 : location_t locus = gimple_phi_arg_location (phi, src_indx);
576 : :
577 : 244563 : if (TREE_CODE (def) == SSA_NAME
578 : 438519 : && !virtual_operand_p (gimple_phi_result (phi)))
579 : 112634 : def = get_value_locus_in_path (def, path, bb, idx, &locus);
580 : :
581 : 244563 : add_phi_arg (phi, def, tgt_e, locus);
582 : : }
583 : 384095 : }
584 : :
585 : : /* We have recently made a copy of ORIG_BB, including its outgoing
586 : : edges. The copy is NEW_BB. Every PHI node in every direct successor of
587 : : ORIG_BB has a new argument associated with edge from NEW_BB to the
588 : : successor. Initialize the PHI argument so that it is equal to the PHI
589 : : argument associated with the edge from ORIG_BB to the successor.
590 : : PATH and IDX are used to check if the new PHI argument has constant
591 : : value in a flow sensitive manner. */
592 : :
593 : : static void
594 : 70607 : update_destination_phis (basic_block orig_bb, basic_block new_bb,
595 : : vec<jump_thread_edge *> *path, int idx)
596 : : {
597 : 70607 : edge_iterator ei;
598 : 70607 : edge e;
599 : :
600 : 220917 : FOR_EACH_EDGE (e, ei, orig_bb->succs)
601 : : {
602 : 150310 : edge e2 = find_edge (new_bb, e->dest);
603 : 150310 : copy_phi_args (e->dest, e, e2, path, idx);
604 : : }
605 : 70607 : }
606 : :
607 : : /* Given a duplicate block and its single destination (both stored
608 : : in RD). Create an edge between the duplicate and its single
609 : : destination.
610 : :
611 : : Add an additional argument to any PHI nodes at the single
612 : : destination. IDX is the start node in jump threading path
613 : : we start to check to see if the new PHI argument has constant
614 : : value along the jump threading path. */
615 : :
616 : : static void
617 : 221212 : create_edge_and_update_destination_phis (struct redirection_data *rd,
618 : : basic_block bb, int idx)
619 : : {
620 : 221212 : edge e = make_single_succ_edge (bb, rd->path->last ()->e->dest, EDGE_FALLTHRU);
621 : :
622 : 221212 : rescan_loop_exit (e, true, false);
623 : :
624 : : /* We used to copy the thread path here. That was added in 2007
625 : : and dutifully updated through the representation changes in 2013.
626 : :
627 : : In 2013 we added code to thread from an interior node through
628 : : the backedge to another interior node. That runs after the code
629 : : to thread through loop headers from outside the loop.
630 : :
631 : : The latter may delete edges in the CFG, including those
632 : : which appeared in the jump threading path we copied here. Thus
633 : : we'd end up using a dangling pointer.
634 : :
635 : : After reviewing the 2007/2011 code, I can't see how anything
636 : : depended on copying the AUX field and clearly copying the jump
637 : : threading path is problematical due to embedded edge pointers.
638 : : It has been removed. */
639 : 221212 : e->aux = NULL;
640 : :
641 : : /* If there are any PHI nodes at the destination of the outgoing edge
642 : : from the duplicate block, then we will need to add a new argument
643 : : to them. The argument should have the same value as the argument
644 : : associated with the outgoing edge stored in RD. */
645 : 221212 : copy_phi_args (e->dest, rd->path->last ()->e, e, rd->path, idx);
646 : 221212 : }
647 : :
648 : : /* Look through PATH beginning at START and return TRUE if there are
649 : : any additional blocks that need to be duplicated. Otherwise,
650 : : return FALSE. */
651 : : static bool
652 : 70607 : any_remaining_duplicated_blocks (vec<jump_thread_edge *> *path,
653 : : unsigned int start)
654 : : {
655 : 86476 : for (unsigned int i = start + 1; i < path->length (); i++)
656 : : {
657 : 56926 : if ((*path)[i]->type == EDGE_COPY_SRC_JOINER_BLOCK
658 : 56926 : || (*path)[i]->type == EDGE_COPY_SRC_BLOCK)
659 : : return true;
660 : : }
661 : : return false;
662 : : }
663 : :
664 : :
665 : : /* Compute the amount of profile count coming into the jump threading
666 : : path stored in RD that we are duplicating, returned in PATH_IN_COUNT_PTR and
667 : : PATH_IN_FREQ_PTR, as well as the amount of counts flowing out of the
668 : : duplicated path, returned in PATH_OUT_COUNT_PTR. LOCAL_INFO is used to
669 : : identify blocks duplicated for jump threading, which have duplicated
670 : : edges that need to be ignored in the analysis. Return true if path contains
671 : : a joiner, false otherwise.
672 : :
673 : : In the non-joiner case, this is straightforward - all the counts
674 : : flowing into the jump threading path should flow through the duplicated
675 : : block and out of the duplicated path.
676 : :
677 : : In the joiner case, it is very tricky. Some of the counts flowing into
678 : : the original path go offpath at the joiner. The problem is that while
679 : : we know how much total count goes off-path in the original control flow,
680 : : we don't know how many of the counts corresponding to just the jump
681 : : threading path go offpath at the joiner.
682 : :
683 : : For example, assume we have the following control flow and identified
684 : : jump threading paths:
685 : :
686 : : A B C
687 : : \ | /
688 : : Ea \ |Eb / Ec
689 : : \ | /
690 : : v v v
691 : : J <-- Joiner
692 : : / \
693 : : Eoff/ \Eon
694 : : / \
695 : : v v
696 : : Soff Son <--- Normal
697 : : /\
698 : : Ed/ \ Ee
699 : : / \
700 : : v v
701 : : D E
702 : :
703 : : Jump threading paths: A -> J -> Son -> D (path 1)
704 : : C -> J -> Son -> E (path 2)
705 : :
706 : : Note that the control flow could be more complicated:
707 : : - Each jump threading path may have more than one incoming edge. I.e. A and
708 : : Ea could represent multiple incoming blocks/edges that are included in
709 : : path 1.
710 : : - There could be EDGE_NO_COPY_SRC_BLOCK edges after the joiner (either
711 : : before or after the "normal" copy block). These are not duplicated onto
712 : : the jump threading path, as they are single-successor.
713 : : - Any of the blocks along the path may have other incoming edges that
714 : : are not part of any jump threading path, but add profile counts along
715 : : the path.
716 : :
717 : : In the above example, after all jump threading is complete, we will
718 : : end up with the following control flow:
719 : :
720 : : A B C
721 : : | | |
722 : : Ea| |Eb |Ec
723 : : | | |
724 : : v v v
725 : : Ja J Jc
726 : : / \ / \Eon' / \
727 : : Eona/ \ ---/---\-------- \Eonc
728 : : / \ / / \ \
729 : : v v v v v
730 : : Sona Soff Son Sonc
731 : : \ /\ /
732 : : \___________ / \ _____/
733 : : \ / \/
734 : : vv v
735 : : D E
736 : :
737 : : The main issue to notice here is that when we are processing path 1
738 : : (A->J->Son->D) we need to figure out the outgoing edge weights to
739 : : the duplicated edges Ja->Sona and Ja->Soff, while ensuring that the
740 : : sum of the incoming weights to D remain Ed. The problem with simply
741 : : assuming that Ja (and Jc when processing path 2) has the same outgoing
742 : : probabilities to its successors as the original block J, is that after
743 : : all paths are processed and other edges/counts removed (e.g. none
744 : : of Ec will reach D after processing path 2), we may end up with not
745 : : enough count flowing along duplicated edge Sona->D.
746 : :
747 : : Therefore, in the case of a joiner, we keep track of all counts
748 : : coming in along the current path, as well as from predecessors not
749 : : on any jump threading path (Eb in the above example). While we
750 : : first assume that the duplicated Eona for Ja->Sona has the same
751 : : probability as the original, we later compensate for other jump
752 : : threading paths that may eliminate edges. We do that by keep track
753 : : of all counts coming into the original path that are not in a jump
754 : : thread (Eb in the above example, but as noted earlier, there could
755 : : be other predecessors incoming to the path at various points, such
756 : : as at Son). Call this cumulative non-path count coming into the path
757 : : before D as Enonpath. We then ensure that the count from Sona->D is as at
758 : : least as big as (Ed - Enonpath), but no bigger than the minimum
759 : : weight along the jump threading path. The probabilities of both the
760 : : original and duplicated joiner block J and Ja will be adjusted
761 : : accordingly after the updates. */
762 : :
763 : : static bool
764 : 250762 : compute_path_counts (struct redirection_data *rd,
765 : : ssa_local_info_t *local_info,
766 : : profile_count *path_in_count_ptr,
767 : : profile_count *path_out_count_ptr)
768 : : {
769 : 250762 : edge e = rd->incoming_edges->e;
770 : 250762 : vec<jump_thread_edge *> *path = THREAD_PATH (e);
771 : 250762 : edge elast = path->last ()->e;
772 : 250762 : profile_count nonpath_count = profile_count::zero ();
773 : 250762 : bool has_joiner = false;
774 : 250762 : profile_count path_in_count = profile_count::zero ();
775 : :
776 : : /* Start by accumulating incoming edge counts to the path's first bb
777 : : into a couple buckets:
778 : : path_in_count: total count of incoming edges that flow into the
779 : : current path.
780 : : nonpath_count: total count of incoming edges that are not
781 : : flowing along *any* path. These are the counts
782 : : that will still flow along the original path after
783 : : all path duplication is done by potentially multiple
784 : : calls to this routine.
785 : : (any other incoming edge counts are for a different jump threading
786 : : path that will be handled by a later call to this routine.)
787 : : To make this easier, start by recording all incoming edges that flow into
788 : : the current path in a bitmap. We could add up the path's incoming edge
789 : : counts here, but we still need to walk all the first bb's incoming edges
790 : : below to add up the counts of the other edges not included in this jump
791 : : threading path. */
792 : 250762 : struct el *next, *el;
793 : 250762 : auto_bitmap in_edge_srcs;
794 : 550638 : for (el = rd->incoming_edges; el; el = next)
795 : : {
796 : 299876 : next = el->next;
797 : 299876 : bitmap_set_bit (in_edge_srcs, el->e->src->index);
798 : : }
799 : 250762 : edge ein;
800 : 250762 : edge_iterator ei;
801 : 892495 : FOR_EACH_EDGE (ein, ei, e->dest->preds)
802 : : {
803 : 641733 : vec<jump_thread_edge *> *ein_path = THREAD_PATH (ein);
804 : : /* Simply check the incoming edge src against the set captured above. */
805 : 641733 : if (ein_path
806 : 1075244 : && bitmap_bit_p (in_edge_srcs, (*ein_path)[0]->e->src->index))
807 : : {
808 : : /* It is necessary but not sufficient that the last path edges
809 : : are identical. There may be different paths that share the
810 : : same last path edge in the case where the last edge has a nocopy
811 : : source block. */
812 : 299876 : gcc_assert (ein_path->last ()->e == elast);
813 : 299876 : path_in_count += ein->count ();
814 : : }
815 : 341857 : else if (!ein_path)
816 : : {
817 : : /* Keep track of the incoming edges that are not on any jump-threading
818 : : path. These counts will still flow out of original path after all
819 : : jump threading is complete. */
820 : 208222 : nonpath_count += ein->count ();
821 : : }
822 : : }
823 : :
824 : : /* Now compute the fraction of the total count coming into the first
825 : : path bb that is from the current threading path. */
826 : 250762 : profile_count total_count = e->dest->count;
827 : : /* Handle incoming profile insanities. */
828 : 250762 : if (total_count < path_in_count)
829 : 19896 : path_in_count = total_count;
830 : 250762 : profile_probability onpath_scale = path_in_count.probability_in (total_count);
831 : :
832 : : /* Walk the entire path to do some more computation in order to estimate
833 : : how much of the path_in_count will flow out of the duplicated threading
834 : : path. In the non-joiner case this is straightforward (it should be
835 : : the same as path_in_count, although we will handle incoming profile
836 : : insanities by setting it equal to the minimum count along the path).
837 : :
838 : : In the joiner case, we need to estimate how much of the path_in_count
839 : : will stay on the threading path after the joiner's conditional branch.
840 : : We don't really know for sure how much of the counts
841 : : associated with this path go to each successor of the joiner, but we'll
842 : : estimate based on the fraction of the total count coming into the path
843 : : bb was from the threading paths (computed above in onpath_scale).
844 : : Afterwards, we will need to do some fixup to account for other threading
845 : : paths and possible profile insanities.
846 : :
847 : : In order to estimate the joiner case's counts we also need to update
848 : : nonpath_count with any additional counts coming into the path. Other
849 : : blocks along the path may have additional predecessors from outside
850 : : the path. */
851 : 250762 : profile_count path_out_count = path_in_count;
852 : 250762 : profile_count min_path_count = path_in_count;
853 : 573950 : for (unsigned int i = 1; i < path->length (); i++)
854 : : {
855 : 323188 : edge epath = (*path)[i]->e;
856 : 323188 : profile_count cur_count = epath->count ();
857 : 323188 : if ((*path)[i]->type == EDGE_COPY_SRC_JOINER_BLOCK)
858 : : {
859 : 70607 : has_joiner = true;
860 : 70607 : cur_count = cur_count.apply_probability (onpath_scale);
861 : : }
862 : : /* In the joiner case we need to update nonpath_count for any edges
863 : : coming into the path that will contribute to the count flowing
864 : : into the path successor. */
865 : 323188 : if (has_joiner && epath != elast)
866 : : {
867 : : /* Look for other incoming edges after joiner. */
868 : 223288 : FOR_EACH_EDGE (ein, ei, epath->dest->preds)
869 : : {
870 : 164531 : if (ein != epath
871 : : /* Ignore in edges from blocks we have duplicated for a
872 : : threading path, which have duplicated edge counts until
873 : : they are redirected by an invocation of this routine. */
874 : 270305 : && !bitmap_bit_p (local_info->duplicate_blocks,
875 : 105774 : ein->src->index))
876 : 35448 : nonpath_count += ein->count ();
877 : : }
878 : : }
879 : 323188 : if (cur_count < path_out_count)
880 : 138291 : path_out_count = cur_count;
881 : 323188 : if (epath->count () < min_path_count)
882 : 123147 : min_path_count = epath->count ();
883 : : }
884 : :
885 : : /* We computed path_out_count above assuming that this path targeted
886 : : the joiner's on-path successor with the same likelihood as it
887 : : reached the joiner. However, other thread paths through the joiner
888 : : may take a different path through the normal copy source block
889 : : (i.e. they have a different elast), meaning that they do not
890 : : contribute any counts to this path's elast. As a result, it may
891 : : turn out that this path must have more count flowing to the on-path
892 : : successor of the joiner. Essentially, all of this path's elast
893 : : count must be contributed by this path and any nonpath counts
894 : : (since any path through the joiner with a different elast will not
895 : : include a copy of this elast in its duplicated path).
896 : : So ensure that this path's path_out_count is at least the
897 : : difference between elast->count () and nonpath_count. Otherwise the edge
898 : : counts after threading will not be sane. */
899 : 250762 : if (local_info->need_profile_correction
900 : 279822 : && has_joiner && path_out_count < elast->count () - nonpath_count)
901 : : {
902 : 10400 : path_out_count = elast->count () - nonpath_count;
903 : : /* But neither can we go above the minimum count along the path
904 : : we are duplicating. This can be an issue due to profile
905 : : insanities coming in to this pass. */
906 : 10400 : if (path_out_count > min_path_count)
907 : 7385 : path_out_count = min_path_count;
908 : : }
909 : :
910 : 250762 : *path_in_count_ptr = path_in_count;
911 : 250762 : *path_out_count_ptr = path_out_count;
912 : 250762 : return has_joiner;
913 : 250762 : }
914 : :
915 : :
916 : : /* Update the counts and frequencies for both an original path
917 : : edge EPATH and its duplicate EDUP. The duplicate source block
918 : : will get a count of PATH_IN_COUNT and PATH_IN_FREQ,
919 : : and the duplicate edge EDUP will have a count of PATH_OUT_COUNT. */
920 : : static void
921 : 323188 : update_profile (edge epath, edge edup, profile_count path_in_count,
922 : : profile_count path_out_count)
923 : : {
924 : :
925 : : /* First update the duplicated block's count. */
926 : 323188 : if (edup)
927 : : {
928 : 291819 : basic_block dup_block = edup->src;
929 : :
930 : : /* Edup's count is reduced by path_out_count. We need to redistribute
931 : : probabilities to the remaining edges. */
932 : :
933 : 291819 : edge esucc;
934 : 291819 : edge_iterator ei;
935 : 291819 : profile_probability edup_prob
936 : 291819 : = path_out_count.probability_in (path_in_count);
937 : :
938 : : /* Either scale up or down the remaining edges.
939 : : probabilities are always in range <0,1> and thus we can't do
940 : : both by same loop. */
941 : 291819 : if (edup->probability > edup_prob)
942 : : {
943 : 21819 : profile_probability rev_scale
944 : 43638 : = (profile_probability::always () - edup->probability)
945 : 21819 : / (profile_probability::always () - edup_prob);
946 : 70511 : FOR_EACH_EDGE (esucc, ei, dup_block->succs)
947 : 48692 : if (esucc != edup)
948 : 26873 : esucc->probability /= rev_scale;
949 : : }
950 : 270000 : else if (edup->probability < edup_prob)
951 : : {
952 : 19996 : profile_probability scale
953 : 19996 : = (profile_probability::always () - edup_prob)
954 : 19996 : / (profile_probability::always () - edup->probability);
955 : 64668 : FOR_EACH_EDGE (esucc, ei, dup_block->succs)
956 : 44672 : if (esucc != edup)
957 : 24676 : esucc->probability *= scale;
958 : : }
959 : 291819 : if (edup_prob.initialized_p ())
960 : 272470 : edup->probability = edup_prob;
961 : :
962 : 291819 : gcc_assert (!dup_block->count.initialized_p ());
963 : 291819 : dup_block->count = path_in_count;
964 : : }
965 : :
966 : 323188 : if (path_in_count == profile_count::zero ())
967 : 9612 : return;
968 : :
969 : 313576 : profile_count final_count = epath->count () - path_out_count;
970 : :
971 : : /* Now update the original block's count in the
972 : : opposite manner - remove the counts/freq that will flow
973 : : into the duplicated block. Handle underflow due to precision/
974 : : rounding issues. */
975 : 313576 : epath->src->count -= path_in_count;
976 : :
977 : : /* Next update this path edge's original and duplicated counts. We know
978 : : that the duplicated path will have path_out_count flowing
979 : : out of it (in the joiner case this is the count along the duplicated path
980 : : out of the duplicated joiner). This count can then be removed from the
981 : : original path edge. */
982 : :
983 : 313576 : edge esucc;
984 : 313576 : edge_iterator ei;
985 : 313576 : profile_probability epath_prob = final_count.probability_in (epath->src->count);
986 : :
987 : 313576 : if (epath->probability > epath_prob)
988 : : {
989 : 195203 : profile_probability rev_scale
990 : 390406 : = (profile_probability::always () - epath->probability)
991 : 195203 : / (profile_probability::always () - epath_prob);
992 : 588908 : FOR_EACH_EDGE (esucc, ei, epath->src->succs)
993 : 393705 : if (esucc != epath)
994 : 198502 : esucc->probability /= rev_scale;
995 : : }
996 : 118373 : else if (epath->probability < epath_prob)
997 : : {
998 : 15944 : profile_probability scale
999 : 15944 : = (profile_probability::always () - epath_prob)
1000 : 15944 : / (profile_probability::always () - epath->probability);
1001 : 51854 : FOR_EACH_EDGE (esucc, ei, epath->src->succs)
1002 : 35910 : if (esucc != epath)
1003 : 19966 : esucc->probability *= scale;
1004 : : }
1005 : 313576 : if (epath_prob.initialized_p ())
1006 : 283691 : epath->probability = epath_prob;
1007 : : }
1008 : :
1009 : : /* Wire up the outgoing edges from the duplicate blocks and
1010 : : update any PHIs as needed. Also update the profile counts
1011 : : on the original and duplicate blocks and edges. */
1012 : : void
1013 : 250762 : ssa_fix_duplicate_block_edges (struct redirection_data *rd,
1014 : : ssa_local_info_t *local_info)
1015 : : {
1016 : 250762 : bool multi_incomings = (rd->incoming_edges->next != NULL);
1017 : 250762 : edge e = rd->incoming_edges->e;
1018 : 250762 : vec<jump_thread_edge *> *path = THREAD_PATH (e);
1019 : 250762 : edge elast = path->last ()->e;
1020 : 250762 : profile_count path_in_count = profile_count::zero ();
1021 : 250762 : profile_count path_out_count = profile_count::zero ();
1022 : :
1023 : : /* First determine how much profile count to move from original
1024 : : path to the duplicate path. This is tricky in the presence of
1025 : : a joiner (see comments for compute_path_counts), where some portion
1026 : : of the path's counts will flow off-path from the joiner. In the
1027 : : non-joiner case the path_in_count and path_out_count should be the
1028 : : same. */
1029 : 250762 : bool has_joiner = compute_path_counts (rd, local_info,
1030 : : &path_in_count, &path_out_count);
1031 : :
1032 : 573950 : for (unsigned int count = 0, i = 1; i < path->length (); i++)
1033 : : {
1034 : 323188 : edge epath = (*path)[i]->e;
1035 : :
1036 : : /* If we were threading through an joiner block, then we want
1037 : : to keep its control statement and redirect an outgoing edge.
1038 : : Else we want to remove the control statement & edges, then create
1039 : : a new outgoing edge. In both cases we may need to update PHIs. */
1040 : 323188 : if ((*path)[i]->type == EDGE_COPY_SRC_JOINER_BLOCK)
1041 : : {
1042 : 70607 : edge victim;
1043 : 70607 : edge e2;
1044 : :
1045 : 70607 : gcc_assert (has_joiner);
1046 : :
1047 : : /* This updates the PHIs at the destination of the duplicate
1048 : : block. Pass 0 instead of i if we are threading a path which
1049 : : has multiple incoming edges. */
1050 : 127850 : update_destination_phis (local_info->bb, rd->dup_blocks[count],
1051 : : path, multi_incomings ? 0 : i);
1052 : :
1053 : : /* Find the edge from the duplicate block to the block we're
1054 : : threading through. That's the edge we want to redirect. */
1055 : 70607 : victim = find_edge (rd->dup_blocks[count], (*path)[i]->e->dest);
1056 : :
1057 : : /* If there are no remaining blocks on the path to duplicate,
1058 : : then redirect VICTIM to the final destination of the jump
1059 : : threading path. */
1060 : 70607 : if (!any_remaining_duplicated_blocks (path, i))
1061 : : {
1062 : 29550 : if (victim->dest != elast->dest)
1063 : : {
1064 : 12793 : e2 = redirect_edge_and_branch (victim, elast->dest);
1065 : : /* If we redirected the edge, then we need to copy PHI arguments
1066 : : at the target. If the edge already existed (e2 != victim
1067 : : case), then the PHIs in the target already have the correct
1068 : : arguments. */
1069 : 12793 : if (e2 == victim)
1070 : 12573 : copy_phi_args (e2->dest, elast, e2,
1071 : : path, multi_incomings ? 0 : i);
1072 : : }
1073 : : else
1074 : : e2 = victim;
1075 : : }
1076 : : else
1077 : : {
1078 : : /* Redirect VICTIM to the next duplicated block in the path. */
1079 : 41057 : e2 = redirect_edge_and_branch (victim, rd->dup_blocks[count + 1]);
1080 : :
1081 : : /* We need to update the PHIs in the next duplicated block. We
1082 : : want the new PHI args to have the same value as they had
1083 : : in the source of the next duplicate block.
1084 : :
1085 : : Thus, we need to know which edge we traversed into the
1086 : : source of the duplicate. Furthermore, we may have
1087 : : traversed many edges to reach the source of the duplicate.
1088 : :
1089 : : Walk through the path starting at element I until we
1090 : : hit an edge marked with EDGE_COPY_SRC_BLOCK. We want
1091 : : the edge from the prior element. */
1092 : 42352 : for (unsigned int j = i + 1; j < path->length (); j++)
1093 : : {
1094 : 42352 : if ((*path)[j]->type == EDGE_COPY_SRC_BLOCK)
1095 : : {
1096 : 41057 : copy_phi_arg_into_existing_phi ((*path)[j - 1]->e, e2);
1097 : 41057 : break;
1098 : : }
1099 : : }
1100 : : }
1101 : :
1102 : : /* Update the counts of both the original block
1103 : : and path edge, and the duplicates. The path duplicate's
1104 : : incoming count are the totals for all edges
1105 : : incoming to this jump threading path computed earlier.
1106 : : And we know that the duplicated path will have path_out_count
1107 : : flowing out of it (i.e. along the duplicated path out of the
1108 : : duplicated joiner). */
1109 : 70607 : update_profile (epath, e2, path_in_count, path_out_count);
1110 : : }
1111 : 252581 : else if ((*path)[i]->type == EDGE_COPY_SRC_BLOCK)
1112 : : {
1113 : 221212 : remove_ctrl_stmt_and_useless_edges (rd->dup_blocks[count], NULL);
1114 : 416216 : create_edge_and_update_destination_phis (rd, rd->dup_blocks[count],
1115 : : multi_incomings ? 0 : i);
1116 : 221212 : if (count == 1)
1117 : 41057 : single_succ_edge (rd->dup_blocks[1])->aux = NULL;
1118 : :
1119 : : /* Update the counts of both the original block
1120 : : and path edge, and the duplicates. Since we are now after
1121 : : any joiner that may have existed on the path, the count
1122 : : flowing along the duplicated threaded path is path_out_count.
1123 : : If we didn't have a joiner, then cur_path_freq was the sum
1124 : : of the total frequencies along all incoming edges to the
1125 : : thread path (path_in_freq). If we had a joiner, it would have
1126 : : been updated at the end of that handling to the edge frequency
1127 : : along the duplicated joiner path edge. */
1128 : 221212 : update_profile (epath, EDGE_SUCC (rd->dup_blocks[count], 0),
1129 : : path_out_count, path_out_count);
1130 : : }
1131 : : else
1132 : : {
1133 : : /* No copy case. In this case we don't have an equivalent block
1134 : : on the duplicated thread path to update, but we do need
1135 : : to remove the portion of the counts/freqs that were moved
1136 : : to the duplicated path from the counts/freqs flowing through
1137 : : this block on the original path. Since all the no-copy edges
1138 : : are after any joiner, the removed count is the same as
1139 : : path_out_count.
1140 : :
1141 : : If we didn't have a joiner, then cur_path_freq was the sum
1142 : : of the total frequencies along all incoming edges to the
1143 : : thread path (path_in_freq). If we had a joiner, it would have
1144 : : been updated at the end of that handling to the edge frequency
1145 : : along the duplicated joiner path edge. */
1146 : 31369 : update_profile (epath, NULL, path_out_count, path_out_count);
1147 : : }
1148 : :
1149 : : /* Increment the index into the duplicated path when we processed
1150 : : a duplicated block. */
1151 : 323188 : if ((*path)[i]->type == EDGE_COPY_SRC_JOINER_BLOCK
1152 : 323188 : || (*path)[i]->type == EDGE_COPY_SRC_BLOCK)
1153 : : {
1154 : 291819 : count++;
1155 : : }
1156 : : }
1157 : 250762 : }
1158 : :
1159 : : /* Hash table traversal callback routine to create duplicate blocks. */
1160 : :
1161 : : int
1162 : 250762 : ssa_create_duplicates (struct redirection_data **slot,
1163 : : ssa_local_info_t *local_info)
1164 : : {
1165 : 250762 : struct redirection_data *rd = *slot;
1166 : :
1167 : : /* The second duplicated block in a jump threading path is specific
1168 : : to the path. So it gets stored in RD rather than in LOCAL_DATA.
1169 : :
1170 : : Each time we're called, we have to look through the path and see
1171 : : if a second block needs to be duplicated.
1172 : :
1173 : : Note the search starts with the third edge on the path. The first
1174 : : edge is the incoming edge, the second edge always has its source
1175 : : duplicated. Thus we start our search with the third edge. */
1176 : 250762 : vec<jump_thread_edge *> *path = rd->path;
1177 : 280300 : for (unsigned int i = 2; i < path->length (); i++)
1178 : : {
1179 : 70595 : if ((*path)[i]->type == EDGE_COPY_SRC_BLOCK
1180 : 70595 : || (*path)[i]->type == EDGE_COPY_SRC_JOINER_BLOCK)
1181 : : {
1182 : 41057 : create_block_for_threading ((*path)[i]->e->src, rd, 1,
1183 : : &local_info->duplicate_blocks);
1184 : 41057 : break;
1185 : : }
1186 : : }
1187 : :
1188 : : /* Create a template block if we have not done so already. Otherwise
1189 : : use the template to create a new block. */
1190 : 250762 : if (local_info->template_block == NULL)
1191 : : {
1192 : 201315 : create_block_for_threading ((*path)[1]->e->src, rd, 0,
1193 : : &local_info->duplicate_blocks);
1194 : 201315 : local_info->template_block = rd->dup_blocks[0];
1195 : 201315 : local_info->template_last_to_copy
1196 : 402630 : = gsi_last_bb (local_info->template_block);
1197 : :
1198 : : /* We do not create any outgoing edges for the template. We will
1199 : : take care of that in a later traversal. That way we do not
1200 : : create edges that are going to just be deleted. */
1201 : : }
1202 : : else
1203 : : {
1204 : 49447 : gimple_seq seq = NULL;
1205 : 49447 : if (gsi_stmt (local_info->template_last_to_copy)
1206 : 98894 : != gsi_stmt (gsi_last_bb (local_info->template_block)))
1207 : : {
1208 : 0 : if (gsi_end_p (local_info->template_last_to_copy))
1209 : : {
1210 : 0 : seq = bb_seq (local_info->template_block);
1211 : 0 : set_bb_seq (local_info->template_block, NULL);
1212 : : }
1213 : : else
1214 : 0 : seq = gsi_split_seq_after (local_info->template_last_to_copy);
1215 : : }
1216 : 49447 : create_block_for_threading (local_info->template_block, rd, 0,
1217 : : &local_info->duplicate_blocks);
1218 : 49447 : if (seq)
1219 : : {
1220 : 0 : if (gsi_end_p (local_info->template_last_to_copy))
1221 : 0 : set_bb_seq (local_info->template_block, seq);
1222 : : else
1223 : 0 : gsi_insert_seq_after (&local_info->template_last_to_copy,
1224 : : seq, GSI_SAME_STMT);
1225 : : }
1226 : :
1227 : : /* Go ahead and wire up outgoing edges and update PHIs for the duplicate
1228 : : block. */
1229 : 49447 : ssa_fix_duplicate_block_edges (rd, local_info);
1230 : : }
1231 : :
1232 : 250762 : if (MAY_HAVE_DEBUG_STMTS)
1233 : : {
1234 : : /* Copy debug stmts from each NO_COPY src block to the block
1235 : : that would have been its predecessor, if we can append to it
1236 : : (we can't add stmts after a block-ending stmt), or prepending
1237 : : to the duplicate of the successor, if there is one. If
1238 : : there's no duplicate successor, we'll mostly drop the blocks
1239 : : on the floor; propagate_threaded_block_debug_into, called
1240 : : elsewhere, will consolidate and preserve the effects of the
1241 : : binds, but none of the markers. */
1242 : 186224 : gimple_stmt_iterator copy_to = gsi_last_bb (rd->dup_blocks[0]);
1243 : 186224 : if (!gsi_end_p (copy_to))
1244 : : {
1245 : 183546 : if (stmt_ends_bb_p (gsi_stmt (copy_to)))
1246 : : {
1247 : 164081 : if (rd->dup_blocks[1])
1248 : 30285 : copy_to = gsi_after_labels (rd->dup_blocks[1]);
1249 : : else
1250 : 133796 : copy_to = gsi_none ();
1251 : : }
1252 : : else
1253 : 19465 : gsi_next (©_to);
1254 : : }
1255 : 244213 : for (unsigned int i = 2, j = 0; i < path->length (); i++)
1256 : 57989 : if ((*path)[i]->type == EDGE_NO_COPY_SRC_BLOCK
1257 : 57989 : && gsi_bb (copy_to))
1258 : : {
1259 : 5562 : for (gimple_stmt_iterator gsi = gsi_start_bb ((*path)[i]->e->src);
1260 : 6651 : !gsi_end_p (gsi); gsi_next (&gsi))
1261 : : {
1262 : 3870 : if (!is_gimple_debug (gsi_stmt (gsi)))
1263 : 916 : continue;
1264 : 2954 : gimple *stmt = gsi_stmt (gsi);
1265 : 2954 : gimple *copy = gimple_copy (stmt);
1266 : 2954 : gsi_insert_before (©_to, copy, GSI_SAME_STMT);
1267 : : }
1268 : : }
1269 : 55208 : else if ((*path)[i]->type == EDGE_COPY_SRC_BLOCK
1270 : 55208 : || (*path)[i]->type == EDGE_COPY_SRC_JOINER_BLOCK)
1271 : : {
1272 : 34527 : j++;
1273 : 34527 : gcc_assert (j < 2);
1274 : 34527 : copy_to = gsi_last_bb (rd->dup_blocks[j]);
1275 : 34527 : if (!gsi_end_p (copy_to))
1276 : : {
1277 : 34434 : if (stmt_ends_bb_p (gsi_stmt (copy_to)))
1278 : 27206 : copy_to = gsi_none ();
1279 : : else
1280 : 7228 : gsi_next (©_to);
1281 : : }
1282 : : }
1283 : : }
1284 : :
1285 : : /* Keep walking the hash table. */
1286 : 250762 : return 1;
1287 : : }
1288 : :
1289 : : /* We did not create any outgoing edges for the template block during
1290 : : block creation. This hash table traversal callback creates the
1291 : : outgoing edge for the template block. */
1292 : :
1293 : : inline int
1294 : 201315 : ssa_fixup_template_block (struct redirection_data **slot,
1295 : : ssa_local_info_t *local_info)
1296 : : {
1297 : 201315 : struct redirection_data *rd = *slot;
1298 : :
1299 : : /* If this is the template block halt the traversal after updating
1300 : : it appropriately.
1301 : :
1302 : : If we were threading through an joiner block, then we want
1303 : : to keep its control statement and redirect an outgoing edge.
1304 : : Else we want to remove the control statement & edges, then create
1305 : : a new outgoing edge. In both cases we may need to update PHIs. */
1306 : 201315 : if (rd->dup_blocks[0] && rd->dup_blocks[0] == local_info->template_block)
1307 : : {
1308 : 201315 : ssa_fix_duplicate_block_edges (rd, local_info);
1309 : 201315 : return 0;
1310 : : }
1311 : :
1312 : : return 1;
1313 : : }
1314 : :
1315 : : /* Hash table traversal callback to redirect each incoming edge
1316 : : associated with this hash table element to its new destination. */
1317 : :
1318 : : static int
1319 : 250762 : ssa_redirect_edges (struct redirection_data **slot,
1320 : : ssa_local_info_t *local_info)
1321 : : {
1322 : 250762 : struct redirection_data *rd = *slot;
1323 : 250762 : struct el *next, *el;
1324 : :
1325 : : /* Walk over all the incoming edges associated with this hash table
1326 : : entry. */
1327 : 550638 : for (el = rd->incoming_edges; el; el = next)
1328 : : {
1329 : 299876 : edge e = el->e;
1330 : 299876 : vec<jump_thread_edge *> *path = THREAD_PATH (e);
1331 : :
1332 : : /* Go ahead and free this element from the list. Doing this now
1333 : : avoids the need for another list walk when we destroy the hash
1334 : : table. */
1335 : 299876 : next = el->next;
1336 : 299876 : free (el);
1337 : :
1338 : 299876 : local_info->num_threaded_edges++;
1339 : :
1340 : 299876 : if (rd->dup_blocks[0])
1341 : : {
1342 : 299876 : edge e2;
1343 : :
1344 : 299876 : if (dump_file && (dump_flags & TDF_DETAILS))
1345 : 25 : fprintf (dump_file, " Threaded jump %d --> %d to %d\n",
1346 : 25 : e->src->index, e->dest->index, rd->dup_blocks[0]->index);
1347 : :
1348 : : /* Redirect the incoming edge (possibly to the joiner block) to the
1349 : : appropriate duplicate block. */
1350 : 299876 : e2 = redirect_edge_and_branch (e, rd->dup_blocks[0]);
1351 : 299876 : gcc_assert (e == e2);
1352 : 299876 : flush_pending_stmts (e2);
1353 : : }
1354 : :
1355 : : /* Go ahead and clear E->aux. It's not needed anymore and failure
1356 : : to clear it will cause all kinds of unpleasant problems later. */
1357 : 299876 : path->release ();
1358 : 299876 : e->aux = NULL;
1359 : :
1360 : : }
1361 : :
1362 : : /* Indicate that we actually threaded one or more jumps. */
1363 : 250762 : if (rd->incoming_edges)
1364 : 250762 : local_info->jumps_threaded = true;
1365 : :
1366 : 250762 : return 1;
1367 : : }
1368 : :
1369 : : /* Return true if this block has no executable statements other than
1370 : : a simple ctrl flow instruction. When the number of outgoing edges
1371 : : is one, this is equivalent to a "forwarder" block. */
1372 : :
1373 : : static bool
1374 : 147070 : redirection_block_p (basic_block bb)
1375 : : {
1376 : 147070 : gimple_stmt_iterator gsi;
1377 : :
1378 : : /* Advance to the first executable statement. */
1379 : 147070 : gsi = gsi_start_bb (bb);
1380 : 147070 : while (!gsi_end_p (gsi)
1381 : 277719 : && (gimple_code (gsi_stmt (gsi)) == GIMPLE_LABEL
1382 : 274784 : || is_gimple_debug (gsi_stmt (gsi))
1383 : 146182 : || gimple_nop_p (gsi_stmt (gsi))
1384 : 146182 : || gimple_clobber_p (gsi_stmt (gsi))))
1385 : 130649 : gsi_next (&gsi);
1386 : :
1387 : : /* Check if this is an empty block. */
1388 : 147070 : if (gsi_end_p (gsi))
1389 : : return true;
1390 : :
1391 : : /* Test that we've reached the terminating control statement. */
1392 : 146009 : return gsi_stmt (gsi)
1393 : 146009 : && (gimple_code (gsi_stmt (gsi)) == GIMPLE_COND
1394 : 130328 : || gimple_code (gsi_stmt (gsi)) == GIMPLE_GOTO
1395 : 130316 : || gimple_code (gsi_stmt (gsi)) == GIMPLE_SWITCH);
1396 : : }
1397 : :
1398 : : /* BB is a block which ends with a COND_EXPR or SWITCH_EXPR and when BB
1399 : : is reached via one or more specific incoming edges, we know which
1400 : : outgoing edge from BB will be traversed.
1401 : :
1402 : : We want to redirect those incoming edges to the target of the
1403 : : appropriate outgoing edge. Doing so avoids a conditional branch
1404 : : and may expose new optimization opportunities. Note that we have
1405 : : to update dominator tree and SSA graph after such changes.
1406 : :
1407 : : The key to keeping the SSA graph update manageable is to duplicate
1408 : : the side effects occurring in BB so that those side effects still
1409 : : occur on the paths which bypass BB after redirecting edges.
1410 : :
1411 : : We accomplish this by creating duplicates of BB and arranging for
1412 : : the duplicates to unconditionally pass control to one specific
1413 : : successor of BB. We then revector the incoming edges into BB to
1414 : : the appropriate duplicate of BB.
1415 : :
1416 : : If NOLOOP_ONLY is true, we only perform the threading as long as it
1417 : : does not affect the structure of the loops in a nontrivial way.
1418 : :
1419 : : If JOINERS is true, then thread through joiner blocks as well. */
1420 : :
1421 : : bool
1422 : 655742 : fwd_jt_path_registry::thread_block_1 (basic_block bb,
1423 : : bool noloop_only,
1424 : : bool joiners)
1425 : : {
1426 : : /* E is an incoming edge into BB that we may or may not want to
1427 : : redirect to a duplicate of BB. */
1428 : 655742 : edge e, e2;
1429 : 655742 : edge_iterator ei;
1430 : 655742 : ssa_local_info_t local_info;
1431 : :
1432 : 655742 : local_info.duplicate_blocks = BITMAP_ALLOC (NULL);
1433 : 655742 : local_info.need_profile_correction = false;
1434 : 655742 : local_info.num_threaded_edges = 0;
1435 : :
1436 : : /* To avoid scanning a linear array for the element we need we instead
1437 : : use a hash table. For normal code there should be no noticeable
1438 : : difference. However, if we have a block with a large number of
1439 : : incoming and outgoing edges such linear searches can get expensive. */
1440 : 655742 : m_redirection_data
1441 : 1311484 : = new hash_table<struct redirection_data> (EDGE_COUNT (bb->succs));
1442 : :
1443 : : /* Record each unique threaded destination into a hash table for
1444 : : efficient lookups. */
1445 : 655742 : edge last = NULL;
1446 : 1984849 : FOR_EACH_EDGE (e, ei, bb->preds)
1447 : : {
1448 : 1329107 : if (e->aux == NULL)
1449 : 656727 : continue;
1450 : :
1451 : 672380 : vec<jump_thread_edge *> *path = THREAD_PATH (e);
1452 : :
1453 : 965790 : if (((*path)[1]->type == EDGE_COPY_SRC_JOINER_BLOCK && !joiners)
1454 : 819085 : || ((*path)[1]->type == EDGE_COPY_SRC_BLOCK && joiners))
1455 : 231327 : continue;
1456 : :
1457 : : /* When a NO_COPY_SRC block became non-empty cancel the path. */
1458 : 441053 : if (path->last ()->type == EDGE_NO_COPY_SRC_BLOCK)
1459 : : {
1460 : 41717 : auto gsi = gsi_start_nondebug_bb (path->last ()->e->src);
1461 : 41717 : if (!gsi_end_p (gsi)
1462 : 41717 : && !is_ctrl_stmt (gsi_stmt (gsi)))
1463 : : {
1464 : 0 : cancel_thread (path, "Non-empty EDGE_NO_COPY_SRC_BLOCK");
1465 : 0 : e->aux = NULL;
1466 : 0 : continue;
1467 : : }
1468 : : }
1469 : :
1470 : 441053 : e2 = path->last ()->e;
1471 : 441053 : if (!e2 || noloop_only)
1472 : : {
1473 : : /* If NOLOOP_ONLY is true, we only allow threading through the
1474 : : header of a loop to exit edges. */
1475 : :
1476 : : /* One case occurs when there was loop header buried in a jump
1477 : : threading path that crosses loop boundaries. We do not try
1478 : : and thread this elsewhere, so just cancel the jump threading
1479 : : request by clearing the AUX field now. */
1480 : 460368 : if (bb->loop_father != e2->src->loop_father
1481 : 439125 : && (!loop_exit_edge_p (e2->src->loop_father, e2)
1482 : 607 : || flow_loop_nested_p (bb->loop_father,
1483 : 607 : e2->dest->loop_father)))
1484 : : {
1485 : : /* Since this case is not handled by our special code
1486 : : to thread through a loop header, we must explicitly
1487 : : cancel the threading request here. */
1488 : 21243 : cancel_thread (path, "Threading through unhandled loop header");
1489 : 21243 : e->aux = NULL;
1490 : 21243 : continue;
1491 : : }
1492 : :
1493 : : /* Another case occurs when trying to thread through our
1494 : : own loop header, possibly from inside the loop. We will
1495 : : thread these later. */
1496 : : unsigned int i;
1497 : 804449 : for (i = 1; i < path->length (); i++)
1498 : : {
1499 : 506497 : if ((*path)[i]->e->src == bb->loop_father->header
1500 : 506497 : && (!loop_exit_edge_p (bb->loop_father, e2)
1501 : 1892 : || (*path)[1]->type == EDGE_COPY_SRC_JOINER_BLOCK))
1502 : : break;
1503 : : }
1504 : :
1505 : 835764 : if (i != path->length ())
1506 : 119930 : continue;
1507 : :
1508 : : /* Loop parallelization can be confused by the result of
1509 : : threading through the loop exit test back into the loop.
1510 : : However, theading those jumps seems to help other codes.
1511 : :
1512 : : I have been unable to find anything related to the shape of
1513 : : the CFG, the contents of the affected blocks, etc which would
1514 : : allow a more sensible test than what we're using below which
1515 : : merely avoids the optimization when parallelizing loops. */
1516 : 297952 : if (flag_tree_parallelize_loops > 1)
1517 : : {
1518 : 127 : for (i = 1; i < path->length (); i++)
1519 : 71 : if (bb->loop_father == e2->src->loop_father
1520 : 71 : && loop_exits_from_bb_p (bb->loop_father,
1521 : 71 : (*path)[i]->e->src)
1522 : 78 : && !loop_exit_edge_p (bb->loop_father, e2))
1523 : : break;
1524 : :
1525 : 120 : if (i != path->length ())
1526 : : {
1527 : 4 : cancel_thread (path, "Threading through loop exit");
1528 : 4 : e->aux = NULL;
1529 : 4 : continue;
1530 : : }
1531 : : }
1532 : : }
1533 : :
1534 : : /* Insert the outgoing edge into the hash table if it is not
1535 : : already in the hash table. */
1536 : 299876 : lookup_redirection_data (e, INSERT);
1537 : :
1538 : : /* When we have thread paths through a common joiner with different
1539 : : final destinations, then we may need corrections to deal with
1540 : : profile insanities. See the big comment before compute_path_counts. */
1541 : 299876 : if ((*path)[1]->type == EDGE_COPY_SRC_JOINER_BLOCK)
1542 : : {
1543 : 90285 : if (!last)
1544 : : last = e2;
1545 : 34362 : else if (e2 != last)
1546 : 19315 : local_info.need_profile_correction = true;
1547 : : }
1548 : : }
1549 : :
1550 : : /* We do not update dominance info. */
1551 : 655742 : free_dominance_info (CDI_DOMINATORS);
1552 : :
1553 : : /* We know we only thread through the loop header to loop exits.
1554 : : Let the basic block duplication hook know we are not creating
1555 : : a multiple entry loop. */
1556 : 655742 : if (noloop_only
1557 : 651886 : && bb == bb->loop_father->header)
1558 : 255008 : set_loop_copy (bb->loop_father, loop_outer (bb->loop_father));
1559 : :
1560 : : /* Now create duplicates of BB.
1561 : :
1562 : : Note that for a block with a high outgoing degree we can waste
1563 : : a lot of time and memory creating and destroying useless edges.
1564 : :
1565 : : So we first duplicate BB and remove the control structure at the
1566 : : tail of the duplicate as well as all outgoing edges from the
1567 : : duplicate. We then use that duplicate block as a template for
1568 : : the rest of the duplicates. */
1569 : 655742 : local_info.template_block = NULL;
1570 : 655742 : local_info.bb = bb;
1571 : 655742 : local_info.jumps_threaded = false;
1572 : 655742 : m_redirection_data->traverse <ssa_local_info_t *, ssa_create_duplicates>
1573 : 906504 : (&local_info);
1574 : :
1575 : : /* The template does not have an outgoing edge. Create that outgoing
1576 : : edge and update PHI nodes as the edge's target as necessary.
1577 : :
1578 : : We do this after creating all the duplicates to avoid creating
1579 : : unnecessary edges. */
1580 : 655742 : m_redirection_data->traverse <ssa_local_info_t *, ssa_fixup_template_block>
1581 : 857057 : (&local_info);
1582 : :
1583 : : /* The hash table traversals above created the duplicate blocks (and the
1584 : : statements within the duplicate blocks). This loop creates PHI nodes for
1585 : : the duplicated blocks and redirects the incoming edges into BB to reach
1586 : : the duplicates of BB. */
1587 : 655742 : m_redirection_data->traverse <ssa_local_info_t *, ssa_redirect_edges>
1588 : 906504 : (&local_info);
1589 : :
1590 : : /* Done with this block. Clear REDIRECTION_DATA. */
1591 : 655742 : delete m_redirection_data;
1592 : 655742 : m_redirection_data = NULL;
1593 : :
1594 : 655742 : if (noloop_only
1595 : 651886 : && bb == bb->loop_father->header)
1596 : 255008 : set_loop_copy (bb->loop_father, NULL);
1597 : :
1598 : 655742 : BITMAP_FREE (local_info.duplicate_blocks);
1599 : 655742 : local_info.duplicate_blocks = NULL;
1600 : :
1601 : 655742 : m_num_threaded_edges += local_info.num_threaded_edges;
1602 : :
1603 : : /* Indicate to our caller whether or not any jumps were threaded. */
1604 : 655742 : return local_info.jumps_threaded;
1605 : : }
1606 : :
1607 : : /* Wrapper for thread_block_1 so that we can first handle jump
1608 : : thread paths which do not involve copying joiner blocks, then
1609 : : handle jump thread paths which have joiner blocks.
1610 : :
1611 : : By doing things this way we can be as aggressive as possible and
1612 : : not worry that copying a joiner block will create a jump threading
1613 : : opportunity. */
1614 : :
1615 : : bool
1616 : 327871 : fwd_jt_path_registry::thread_block (basic_block bb, bool noloop_only)
1617 : : {
1618 : 327871 : bool retval;
1619 : 327871 : retval = thread_block_1 (bb, noloop_only, false);
1620 : 327871 : retval |= thread_block_1 (bb, noloop_only, true);
1621 : 327871 : return retval;
1622 : : }
1623 : :
1624 : : /* Callback for dfs_enumerate_from. Returns true if BB is different
1625 : : from STOP and DBDS_CE_STOP. */
1626 : :
1627 : : static basic_block dbds_ce_stop;
1628 : : static bool
1629 : 1280499 : dbds_continue_enumeration_p (const_basic_block bb, const void *stop)
1630 : : {
1631 : 1280499 : return (bb != (const_basic_block) stop
1632 : 1280499 : && bb != dbds_ce_stop);
1633 : : }
1634 : :
1635 : : /* Evaluates the dominance relationship of latch of the LOOP and BB, and
1636 : : returns the state. */
1637 : :
1638 : : enum bb_dom_status
1639 : 102705 : determine_bb_domination_status (class loop *loop, basic_block bb)
1640 : : {
1641 : 102705 : basic_block *bblocks;
1642 : 102705 : unsigned nblocks, i;
1643 : 102705 : bool bb_reachable = false;
1644 : 102705 : edge_iterator ei;
1645 : 102705 : edge e;
1646 : :
1647 : : /* This function assumes BB is a successor of LOOP->header.
1648 : : If that is not the case return DOMST_NONDOMINATING which
1649 : : is always safe. */
1650 : 102705 : {
1651 : 102705 : bool ok = false;
1652 : :
1653 : 157161 : FOR_EACH_EDGE (e, ei, bb->preds)
1654 : : {
1655 : 117889 : if (e->src == loop->header)
1656 : : {
1657 : : ok = true;
1658 : : break;
1659 : : }
1660 : : }
1661 : :
1662 : 102705 : if (!ok)
1663 : : return DOMST_NONDOMINATING;
1664 : : }
1665 : :
1666 : 63433 : if (bb == loop->latch)
1667 : : return DOMST_DOMINATING;
1668 : :
1669 : : /* Check that BB dominates LOOP->latch, and that it is back-reachable
1670 : : from it. */
1671 : :
1672 : 61730 : bblocks = XCNEWVEC (basic_block, loop->num_nodes);
1673 : 61730 : dbds_ce_stop = loop->header;
1674 : 123460 : nblocks = dfs_enumerate_from (loop->latch, 1, dbds_continue_enumeration_p,
1675 : 61730 : bblocks, loop->num_nodes, bb);
1676 : 758837 : for (i = 0; i < nblocks; i++)
1677 : 1780094 : FOR_EACH_EDGE (e, ei, bblocks[i]->preds)
1678 : : {
1679 : 1082987 : if (e->src == loop->header)
1680 : : {
1681 : 34264 : free (bblocks);
1682 : 34264 : return DOMST_NONDOMINATING;
1683 : : }
1684 : 1048723 : if (e->src == bb)
1685 : 61957 : bb_reachable = true;
1686 : : }
1687 : :
1688 : 27466 : free (bblocks);
1689 : 27466 : return (bb_reachable ? DOMST_DOMINATING : DOMST_LOOP_BROKEN);
1690 : : }
1691 : :
1692 : : /* Thread jumps through the header of LOOP. Returns true if cfg changes.
1693 : : If MAY_PEEL_LOOP_HEADERS is false, we avoid threading from entry edges
1694 : : to the inside of the loop. */
1695 : :
1696 : : bool
1697 : 127504 : fwd_jt_path_registry::thread_through_loop_header (class loop *loop,
1698 : : bool may_peel_loop_headers)
1699 : : {
1700 : 127504 : basic_block header = loop->header;
1701 : 127504 : edge e, tgt_edge, latch = loop_latch_edge (loop);
1702 : 127504 : edge_iterator ei;
1703 : 127504 : basic_block tgt_bb, atgt_bb;
1704 : 127504 : enum bb_dom_status domst;
1705 : :
1706 : : /* We have already threaded through headers to exits, so all the threading
1707 : : requests now are to the inside of the loop. We need to avoid creating
1708 : : irreducible regions (i.e., loops with more than one entry block), and
1709 : : also loop with several latch edges, or new subloops of the loop (although
1710 : : there are cases where it might be appropriate, it is difficult to decide,
1711 : : and doing it wrongly may confuse other optimizers).
1712 : :
1713 : : We could handle more general cases here. However, the intention is to
1714 : : preserve some information about the loop, which is impossible if its
1715 : : structure changes significantly, in a way that is not well understood.
1716 : : Thus we only handle few important special cases, in which also updating
1717 : : of the loop-carried information should be feasible:
1718 : :
1719 : : 1) Propagation of latch edge to a block that dominates the latch block
1720 : : of a loop. This aims to handle the following idiom:
1721 : :
1722 : : first = 1;
1723 : : while (1)
1724 : : {
1725 : : if (first)
1726 : : initialize;
1727 : : first = 0;
1728 : : body;
1729 : : }
1730 : :
1731 : : After threading the latch edge, this becomes
1732 : :
1733 : : first = 1;
1734 : : if (first)
1735 : : initialize;
1736 : : while (1)
1737 : : {
1738 : : first = 0;
1739 : : body;
1740 : : }
1741 : :
1742 : : The original header of the loop is moved out of it, and we may thread
1743 : : the remaining edges through it without further constraints.
1744 : :
1745 : : 2) All entry edges are propagated to a single basic block that dominates
1746 : : the latch block of the loop. This aims to handle the following idiom
1747 : : (normally created for "for" loops):
1748 : :
1749 : : i = 0;
1750 : : while (1)
1751 : : {
1752 : : if (i >= 100)
1753 : : break;
1754 : : body;
1755 : : i++;
1756 : : }
1757 : :
1758 : : This becomes
1759 : :
1760 : : i = 0;
1761 : : while (1)
1762 : : {
1763 : : body;
1764 : : i++;
1765 : : if (i >= 100)
1766 : : break;
1767 : : }
1768 : : */
1769 : :
1770 : : /* Threading through the header won't improve the code if the header has just
1771 : : one successor. */
1772 : 127504 : if (single_succ_p (header))
1773 : 1652 : goto fail;
1774 : :
1775 : 125852 : if (!may_peel_loop_headers && !redirection_block_p (loop->header))
1776 : 117187 : goto fail;
1777 : : else
1778 : : {
1779 : 8665 : tgt_bb = NULL;
1780 : 8665 : tgt_edge = NULL;
1781 : 19417 : FOR_EACH_EDGE (e, ei, header->preds)
1782 : : {
1783 : 15552 : if (!e->aux)
1784 : : {
1785 : 8451 : if (e == latch)
1786 : 7102 : continue;
1787 : :
1788 : : /* If latch is not threaded, and there is a header
1789 : : edge that is not threaded, we would create loop
1790 : : with multiple entries. */
1791 : 1349 : goto fail;
1792 : : }
1793 : :
1794 : 7101 : vec<jump_thread_edge *> *path = THREAD_PATH (e);
1795 : :
1796 : 7101 : if ((*path)[1]->type == EDGE_COPY_SRC_JOINER_BLOCK)
1797 : 3451 : goto fail;
1798 : 3650 : tgt_edge = (*path)[1]->e;
1799 : 3650 : atgt_bb = tgt_edge->dest;
1800 : 3650 : if (!tgt_bb)
1801 : : tgt_bb = atgt_bb;
1802 : : /* Two targets of threading would make us create loop
1803 : : with multiple entries. */
1804 : 0 : else if (tgt_bb != atgt_bb)
1805 : 0 : goto fail;
1806 : : }
1807 : :
1808 : 3865 : if (!tgt_bb)
1809 : : {
1810 : : /* There are no threading requests. */
1811 : : return false;
1812 : : }
1813 : :
1814 : : /* Redirecting to empty loop latch is useless. */
1815 : 3285 : if (tgt_bb == loop->latch
1816 : 3285 : && empty_block_p (loop->latch))
1817 : 2 : goto fail;
1818 : : }
1819 : :
1820 : : /* The target block must dominate the loop latch, otherwise we would be
1821 : : creating a subloop. */
1822 : 3283 : domst = determine_bb_domination_status (loop, tgt_bb);
1823 : 3283 : if (domst == DOMST_NONDOMINATING)
1824 : 1355 : goto fail;
1825 : 1928 : if (domst == DOMST_LOOP_BROKEN)
1826 : : {
1827 : : /* If the loop ceased to exist, mark it as such, and thread through its
1828 : : original header. */
1829 : 0 : mark_loop_for_removal (loop);
1830 : 0 : return thread_block (header, false);
1831 : : }
1832 : :
1833 : 1928 : if (tgt_bb->loop_father->header == tgt_bb)
1834 : : {
1835 : : /* If the target of the threading is a header of a subloop, we need
1836 : : to create a preheader for it, so that the headers of the two loops
1837 : : do not merge. */
1838 : 0 : if (EDGE_COUNT (tgt_bb->preds) > 2)
1839 : : {
1840 : 0 : tgt_bb = create_preheader (tgt_bb->loop_father, 0);
1841 : 0 : gcc_assert (tgt_bb != NULL);
1842 : : }
1843 : : else
1844 : 0 : tgt_bb = split_edge (tgt_edge);
1845 : : }
1846 : :
1847 : 1928 : basic_block new_preheader;
1848 : :
1849 : : /* Now consider the case entry edges are redirected to the new entry
1850 : : block. Remember one entry edge, so that we can find the new
1851 : : preheader (its destination after threading). */
1852 : 2854 : FOR_EACH_EDGE (e, ei, header->preds)
1853 : : {
1854 : 2854 : if (e->aux)
1855 : : break;
1856 : : }
1857 : :
1858 : : /* The duplicate of the header is the new preheader of the loop. Ensure
1859 : : that it is placed correctly in the loop hierarchy. */
1860 : 1928 : set_loop_copy (loop, loop_outer (loop));
1861 : :
1862 : 1928 : thread_block (header, false);
1863 : 1928 : set_loop_copy (loop, NULL);
1864 : 1928 : new_preheader = e->dest;
1865 : :
1866 : : /* Create the new latch block. This is always necessary, as the latch
1867 : : must have only a single successor, but the original header had at
1868 : : least two successors. */
1869 : 1928 : loop->latch = NULL;
1870 : 1928 : mfb_kj_edge = single_succ_edge (new_preheader);
1871 : 1928 : loop->header = mfb_kj_edge->dest;
1872 : 1928 : latch = make_forwarder_block (tgt_bb, mfb_keep_just, NULL);
1873 : 1928 : loop->header = latch->dest;
1874 : 1928 : loop->latch = latch->src;
1875 : 1928 : return true;
1876 : :
1877 : 124996 : fail:
1878 : : /* We failed to thread anything. Cancel the requests. */
1879 : 375535 : FOR_EACH_EDGE (e, ei, header->preds)
1880 : : {
1881 : 250539 : vec<jump_thread_edge *> *path = THREAD_PATH (e);
1882 : :
1883 : 250539 : if (path)
1884 : : {
1885 : 118002 : cancel_thread (path, "Failure in thread_through_loop_header");
1886 : 118002 : e->aux = NULL;
1887 : : }
1888 : : }
1889 : : return false;
1890 : : }
1891 : :
1892 : : /* E1 and E2 are edges into the same basic block. Return TRUE if the
1893 : : PHI arguments associated with those edges are equal or there are no
1894 : : PHI arguments, otherwise return FALSE. */
1895 : :
1896 : : static bool
1897 : 24502 : phi_args_equal_on_edges (edge e1, edge e2)
1898 : : {
1899 : 24502 : gphi_iterator gsi;
1900 : 24502 : int indx1 = e1->dest_idx;
1901 : 24502 : int indx2 = e2->dest_idx;
1902 : :
1903 : 37533 : for (gsi = gsi_start_phis (e1->dest); !gsi_end_p (gsi); gsi_next (&gsi))
1904 : : {
1905 : 13886 : gphi *phi = gsi.phi ();
1906 : :
1907 : 13886 : if (!operand_equal_p (gimple_phi_arg_def (phi, indx1),
1908 : 13886 : gimple_phi_arg_def (phi, indx2), 0))
1909 : : return false;
1910 : : }
1911 : : return true;
1912 : : }
1913 : :
1914 : : /* Return the number of non-debug statements and non-virtual PHIs in a
1915 : : block. */
1916 : :
1917 : : static unsigned int
1918 : 9120 : count_stmts_and_phis_in_block (basic_block bb)
1919 : : {
1920 : 9120 : unsigned int num_stmts = 0;
1921 : :
1922 : 9120 : gphi_iterator gpi;
1923 : 24067 : for (gpi = gsi_start_phis (bb); !gsi_end_p (gpi); gsi_next (&gpi))
1924 : 29894 : if (!virtual_operand_p (PHI_RESULT (gpi.phi ())))
1925 : 8943 : num_stmts++;
1926 : :
1927 : 9120 : gimple_stmt_iterator gsi;
1928 : 74033 : for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi); gsi_next (&gsi))
1929 : : {
1930 : 55793 : gimple *stmt = gsi_stmt (gsi);
1931 : 55793 : if (!is_gimple_debug (stmt))
1932 : 52151 : num_stmts++;
1933 : : }
1934 : :
1935 : 9120 : return num_stmts;
1936 : : }
1937 : :
1938 : :
1939 : : /* Walk through the registered jump threads and convert them into a
1940 : : form convenient for this pass.
1941 : :
1942 : : Any block which has incoming edges threaded to outgoing edges
1943 : : will have its entry in THREADED_BLOCK set.
1944 : :
1945 : : Any threaded edge will have its new outgoing edge stored in the
1946 : : original edge's AUX field.
1947 : :
1948 : : This form avoids the need to walk all the edges in the CFG to
1949 : : discover blocks which need processing and avoids unnecessary
1950 : : hash table lookups to map from threaded edge to new target. */
1951 : :
1952 : : void
1953 : 145953 : fwd_jt_path_registry::mark_threaded_blocks (bitmap threaded_blocks)
1954 : : {
1955 : 145953 : unsigned int i;
1956 : 145953 : bitmap_iterator bi;
1957 : 145953 : auto_bitmap tmp;
1958 : 145953 : basic_block bb;
1959 : 145953 : edge e;
1960 : 145953 : edge_iterator ei;
1961 : :
1962 : : /* It is possible to have jump threads in which one is a subpath
1963 : : of the other. ie, (A, B), (B, C), (C, D) where B is a joiner
1964 : : block and (B, C), (C, D) where no joiner block exists.
1965 : :
1966 : : When this occurs ignore the jump thread request with the joiner
1967 : : block. It's totally subsumed by the simpler jump thread request.
1968 : :
1969 : : This results in less block copying, simpler CFGs. More importantly,
1970 : : when we duplicate the joiner block, B, in this case we will create
1971 : : a new threading opportunity that we wouldn't be able to optimize
1972 : : until the next jump threading iteration.
1973 : :
1974 : : So first convert the jump thread requests which do not require a
1975 : : joiner block. */
1976 : 711771 : for (i = 0; i < m_paths.length (); i++)
1977 : : {
1978 : 565818 : vec<jump_thread_edge *> *path = m_paths[i];
1979 : :
1980 : 1131636 : if (path->length () > 1
1981 : 1131636 : && (*path)[1]->type != EDGE_COPY_SRC_JOINER_BLOCK)
1982 : : {
1983 : 300483 : edge e = (*path)[0]->e;
1984 : 300483 : e->aux = (void *)path;
1985 : 300483 : bitmap_set_bit (tmp, e->dest->index);
1986 : : }
1987 : : }
1988 : :
1989 : : /* Now iterate again, converting cases where we want to thread
1990 : : through a joiner block, but only if no other edge on the path
1991 : : already has a jump thread attached to it. We do this in two passes,
1992 : : to avoid situations where the order in the paths vec can hide overlapping
1993 : : threads (the path is recorded on the incoming edge, so we would miss
1994 : : cases where the second path starts at a downstream edge on the same
1995 : : path). First record all joiner paths, deleting any in the unexpected
1996 : : case where there is already a path for that incoming edge. */
1997 : 711771 : for (i = 0; i < m_paths.length ();)
1998 : : {
1999 : 565818 : vec<jump_thread_edge *> *path = m_paths[i];
2000 : :
2001 : 565818 : if (path->length () > 1
2002 : 1131636 : && (*path)[1]->type == EDGE_COPY_SRC_JOINER_BLOCK)
2003 : : {
2004 : : /* Attach the path to the starting edge if none is yet recorded. */
2005 : 265335 : if ((*path)[0]->e->aux == NULL)
2006 : : {
2007 : 223622 : (*path)[0]->e->aux = path;
2008 : 223622 : i++;
2009 : : }
2010 : : else
2011 : : {
2012 : 41713 : m_paths.unordered_remove (i);
2013 : 41713 : cancel_thread (path);
2014 : : }
2015 : : }
2016 : : else
2017 : : {
2018 : 300483 : i++;
2019 : : }
2020 : : }
2021 : :
2022 : : /* Second, look for paths that have any other jump thread attached to
2023 : : them, and either finish converting them or cancel them. */
2024 : 670058 : for (i = 0; i < m_paths.length ();)
2025 : : {
2026 : 524105 : vec<jump_thread_edge *> *path = m_paths[i];
2027 : 524105 : edge e = (*path)[0]->e;
2028 : :
2029 : 524105 : if (path->length () > 1
2030 : 1048210 : && (*path)[1]->type == EDGE_COPY_SRC_JOINER_BLOCK && e->aux == path)
2031 : : {
2032 : : unsigned int j;
2033 : 528223 : for (j = 1; j < path->length (); j++)
2034 : 376360 : if ((*path)[j]->e->aux != NULL)
2035 : : break;
2036 : :
2037 : : /* If we iterated through the entire path without exiting the loop,
2038 : : then we are good to go, record it. */
2039 : 223622 : if (j == path->length ())
2040 : : {
2041 : 151863 : bitmap_set_bit (tmp, e->dest->index);
2042 : 151863 : i++;
2043 : : }
2044 : : else
2045 : : {
2046 : 71759 : e->aux = NULL;
2047 : 71759 : m_paths.unordered_remove (i);
2048 : 71759 : cancel_thread (path);
2049 : : }
2050 : : }
2051 : : else
2052 : : {
2053 : 300483 : i++;
2054 : : }
2055 : : }
2056 : :
2057 : : /* When optimizing for size, prune all thread paths where statement
2058 : : duplication is necessary.
2059 : :
2060 : : We walk the jump thread path looking for copied blocks. There's
2061 : : two types of copied blocks.
2062 : :
2063 : : EDGE_COPY_SRC_JOINER_BLOCK is always copied and thus we will
2064 : : cancel the jump threading request when optimizing for size.
2065 : :
2066 : : EDGE_COPY_SRC_BLOCK which is copied, but some of its statements
2067 : : will be killed by threading. If threading does not kill all of
2068 : : its statements, then we should cancel the jump threading request
2069 : : when optimizing for size. */
2070 : 145953 : if (optimize_function_for_size_p (cfun))
2071 : : {
2072 : 20895 : EXECUTE_IF_SET_IN_BITMAP (tmp, 0, i, bi)
2073 : : {
2074 : 47468 : FOR_EACH_EDGE (e, ei, BASIC_BLOCK_FOR_FN (cfun, i)->preds)
2075 : 32422 : if (e->aux)
2076 : : {
2077 : : vec<jump_thread_edge *> *path = THREAD_PATH (e);
2078 : :
2079 : : unsigned int j;
2080 : 31515 : for (j = 1; j < path->length (); j++)
2081 : : {
2082 : 22943 : bb = (*path)[j]->e->src;
2083 : 22943 : if (redirection_block_p (bb))
2084 : : ;
2085 : 13095 : else if ((*path)[j]->type == EDGE_COPY_SRC_JOINER_BLOCK
2086 : 13095 : || ((*path)[j]->type == EDGE_COPY_SRC_BLOCK
2087 : 18240 : && (count_stmts_and_phis_in_block (bb)
2088 : 9120 : != estimate_threading_killed_stmts (bb))))
2089 : : break;
2090 : : }
2091 : :
2092 : 41876 : if (j != path->length ())
2093 : : {
2094 : 12366 : cancel_thread (path);
2095 : 12366 : e->aux = NULL;
2096 : : }
2097 : : else
2098 : 8572 : bitmap_set_bit (threaded_blocks, i);
2099 : : }
2100 : : }
2101 : : }
2102 : : else
2103 : 140104 : bitmap_copy (threaded_blocks, tmp);
2104 : :
2105 : : /* If we have a joiner block (J) which has two successors S1 and S2 and
2106 : : we are threading though S1 and the final destination of the thread
2107 : : is S2, then we must verify that any PHI nodes in S2 have the same
2108 : : PHI arguments for the edge J->S2 and J->S1->...->S2.
2109 : :
2110 : : We used to detect this prior to registering the jump thread, but
2111 : : that prohibits propagation of edge equivalences into non-dominated
2112 : : PHI nodes as the equivalency test might occur before propagation.
2113 : :
2114 : : This must also occur after we truncate any jump threading paths
2115 : : as this scenario may only show up after truncation.
2116 : :
2117 : : This works for now, but will need improvement as part of the FSA
2118 : : optimization.
2119 : :
2120 : : Note since we've moved the thread request data to the edges,
2121 : : we have to iterate on those rather than the threaded_edges vector. */
2122 : 482015 : EXECUTE_IF_SET_IN_BITMAP (tmp, 0, i, bi)
2123 : : {
2124 : 336062 : bb = BASIC_BLOCK_FOR_FN (cfun, i);
2125 : 1123390 : FOR_EACH_EDGE (e, ei, bb->preds)
2126 : : {
2127 : 787328 : if (e->aux)
2128 : : {
2129 : 439980 : vec<jump_thread_edge *> *path = THREAD_PATH (e);
2130 : 439980 : bool have_joiner = ((*path)[1]->type == EDGE_COPY_SRC_JOINER_BLOCK);
2131 : :
2132 : 439980 : if (have_joiner)
2133 : : {
2134 : 147560 : basic_block joiner = e->dest;
2135 : 147560 : edge final_edge = path->last ()->e;
2136 : 147560 : basic_block final_dest = final_edge->dest;
2137 : 147560 : edge e2 = find_edge (joiner, final_dest);
2138 : :
2139 : 147560 : if (e2 && !phi_args_equal_on_edges (e2, final_edge))
2140 : : {
2141 : 855 : cancel_thread (path);
2142 : 855 : e->aux = NULL;
2143 : : }
2144 : : }
2145 : : }
2146 : : }
2147 : : }
2148 : :
2149 : : /* Look for jump threading paths which cross multiple loop headers.
2150 : :
2151 : : The code to thread through loop headers will change the CFG in ways
2152 : : that invalidate the cached loop iteration information. So we must
2153 : : detect that case and wipe the cached information. */
2154 : 482015 : EXECUTE_IF_SET_IN_BITMAP (tmp, 0, i, bi)
2155 : : {
2156 : 336062 : basic_block bb = BASIC_BLOCK_FOR_FN (cfun, i);
2157 : 1123390 : FOR_EACH_EDGE (e, ei, bb->preds)
2158 : : {
2159 : 787328 : if (e->aux)
2160 : : {
2161 : 439125 : gcc_assert (loops_state_satisfies_p
2162 : : (LOOPS_HAVE_MARKED_IRREDUCIBLE_REGIONS));
2163 : : vec<jump_thread_edge *> *path = THREAD_PATH (e);
2164 : :
2165 : 1027051 : for (unsigned int i = 0, crossed_headers = 0;
2166 : 1817457 : i < path->length ();
2167 : : i++)
2168 : : {
2169 : 1030129 : basic_block dest = (*path)[i]->e->dest;
2170 : 1030129 : basic_block src = (*path)[i]->e->src;
2171 : : /* If we enter a loop. */
2172 : 1030129 : if (flow_loop_nested_p (src->loop_father, dest->loop_father))
2173 : 140650 : ++crossed_headers;
2174 : : /* If we step from a block outside an irreducible region
2175 : : to a block inside an irreducible region, then we have
2176 : : crossed into a loop. */
2177 : 889479 : else if (! (src->flags & BB_IRREDUCIBLE_LOOP)
2178 : 886720 : && (dest->flags & BB_IRREDUCIBLE_LOOP))
2179 : 1153 : ++crossed_headers;
2180 : 1030129 : if (crossed_headers > 1)
2181 : : {
2182 : 3078 : vect_free_loop_info_assumptions
2183 : 6156 : ((*path)[path->length () - 1]->e->dest->loop_father);
2184 : 3078 : break;
2185 : : }
2186 : : }
2187 : : }
2188 : : }
2189 : : }
2190 : 145953 : }
2191 : :
2192 : :
2193 : : /* Verify that the REGION is a valid jump thread. A jump thread is a special
2194 : : case of SEME Single Entry Multiple Exits region in which all nodes in the
2195 : : REGION have exactly one incoming edge. The only exception is the first block
2196 : : that may not have been connected to the rest of the cfg yet. */
2197 : :
2198 : : DEBUG_FUNCTION void
2199 : 1031217 : verify_jump_thread (basic_block *region, unsigned n_region)
2200 : : {
2201 : 2480029 : for (unsigned i = 0; i < n_region; i++)
2202 : 1448812 : gcc_assert (EDGE_COUNT (region[i]->preds) <= 1);
2203 : 1031217 : }
2204 : :
2205 : : /* Return true when BB is one of the first N items in BBS. */
2206 : :
2207 : : static inline bool
2208 : 2069942 : bb_in_bbs (basic_block bb, basic_block *bbs, int n)
2209 : : {
2210 : 4952596 : for (int i = 0; i < n; i++)
2211 : 2892131 : if (bb == bbs[i])
2212 : : return true;
2213 : :
2214 : : return false;
2215 : : }
2216 : :
2217 : : void
2218 : 18 : jt_path_registry::debug_path (FILE *dump_file, int pathno)
2219 : : {
2220 : 18 : vec<jump_thread_edge *> *p = m_paths[pathno];
2221 : 18 : fprintf (dump_file, "path: ");
2222 : 102 : for (unsigned i = 0; i < p->length (); ++i)
2223 : 84 : fprintf (dump_file, "%d -> %d, ",
2224 : 84 : (*p)[i]->e->src->index, (*p)[i]->e->dest->index);
2225 : 18 : fprintf (dump_file, "\n");
2226 : 18 : }
2227 : :
2228 : : void
2229 : 0 : jt_path_registry::debug ()
2230 : : {
2231 : 0 : for (unsigned i = 0; i < m_paths.length (); ++i)
2232 : 0 : debug_path (stderr, i);
2233 : 0 : }
2234 : :
2235 : : /* Rewire a jump_thread_edge so that the source block is now a
2236 : : threaded source block.
2237 : :
2238 : : PATH_NUM is an index into the global path table PATHS.
2239 : : EDGE_NUM is the jump thread edge number into said path.
2240 : :
2241 : : Returns TRUE if we were able to successfully rewire the edge. */
2242 : :
2243 : : bool
2244 : 77935 : back_jt_path_registry::rewire_first_differing_edge (unsigned path_num,
2245 : : unsigned edge_num)
2246 : : {
2247 : 77935 : vec<jump_thread_edge *> *path = m_paths[path_num];
2248 : 77935 : edge &e = (*path)[edge_num]->e;
2249 : 77935 : if (dump_file && (dump_flags & TDF_DETAILS))
2250 : 10 : fprintf (dump_file, "rewiring edge candidate: %d -> %d\n",
2251 : 10 : e->src->index, e->dest->index);
2252 : 77935 : basic_block src_copy = get_bb_copy (e->src);
2253 : 77935 : if (src_copy == NULL)
2254 : : {
2255 : 18903 : if (dump_file && (dump_flags & TDF_DETAILS))
2256 : 5 : fprintf (dump_file, "ignoring candidate: there is no src COPY\n");
2257 : 18903 : return false;
2258 : : }
2259 : 59032 : edge new_edge = find_edge (src_copy, e->dest);
2260 : : /* If the previously threaded paths created a flow graph where we
2261 : : can no longer figure out where to go, give up. */
2262 : 59032 : if (new_edge == NULL)
2263 : : {
2264 : 6316 : if (dump_file && (dump_flags & TDF_DETAILS))
2265 : 0 : fprintf (dump_file, "ignoring candidate: we lost our way\n");
2266 : 6316 : return false;
2267 : : }
2268 : 52716 : e = new_edge;
2269 : 52716 : return true;
2270 : : }
2271 : :
2272 : : /* After a path has been jump threaded, adjust the remaining paths
2273 : : that are subsets of this path, so these paths can be safely
2274 : : threaded within the context of the new threaded path.
2275 : :
2276 : : For example, suppose we have just threaded:
2277 : :
2278 : : 5 -> 6 -> 7 -> 8 -> 12 => 5 -> 6' -> 7' -> 8' -> 12'
2279 : :
2280 : : And we have an upcoming threading candidate:
2281 : : 5 -> 6 -> 7 -> 8 -> 15 -> 20
2282 : :
2283 : : This function adjusts the upcoming path into:
2284 : : 8' -> 15 -> 20
2285 : :
2286 : : CURR_PATH_NUM is an index into the global paths table. It
2287 : : specifies the path that was just threaded. */
2288 : :
2289 : : void
2290 : 1031242 : back_jt_path_registry::adjust_paths_after_duplication (unsigned curr_path_num)
2291 : : {
2292 : 1031242 : vec<jump_thread_edge *> *curr_path = m_paths[curr_path_num];
2293 : :
2294 : : /* Iterate through all the other paths and adjust them. */
2295 : 18651128 : for (unsigned cand_path_num = 0; cand_path_num < m_paths.length (); )
2296 : : {
2297 : 17619886 : if (cand_path_num == curr_path_num)
2298 : : {
2299 : 1031242 : ++cand_path_num;
2300 : 1031242 : continue;
2301 : : }
2302 : : /* Make sure the candidate to adjust starts with the same path
2303 : : as the recently threaded path. */
2304 : 16588644 : vec<jump_thread_edge *> *cand_path = m_paths[cand_path_num];
2305 : 16588644 : if ((*cand_path)[0]->e != (*curr_path)[0]->e)
2306 : : {
2307 : 16481061 : ++cand_path_num;
2308 : 16481061 : continue;
2309 : : }
2310 : 107583 : if (dump_file && (dump_flags & TDF_DETAILS))
2311 : : {
2312 : 13 : fprintf (dump_file, "adjusting candidate: ");
2313 : 13 : debug_path (dump_file, cand_path_num);
2314 : : }
2315 : :
2316 : : /* Chop off from the candidate path any prefix it shares with
2317 : : the recently threaded path. */
2318 : 215166 : unsigned minlength = MIN (curr_path->length (), cand_path->length ());
2319 : 107583 : unsigned j;
2320 : 301173 : for (j = 0; j < minlength; ++j)
2321 : : {
2322 : 252622 : edge cand_edge = (*cand_path)[j]->e;
2323 : 252622 : edge curr_edge = (*curr_path)[j]->e;
2324 : :
2325 : : /* Once the prefix no longer matches, adjust the first
2326 : : non-matching edge to point from an adjusted edge to
2327 : : wherever it was going. */
2328 : 252622 : if (cand_edge != curr_edge)
2329 : : {
2330 : 59032 : gcc_assert (cand_edge->src == curr_edge->src);
2331 : 59032 : if (!rewire_first_differing_edge (cand_path_num, j))
2332 : 6316 : goto remove_candidate_from_list;
2333 : : break;
2334 : : }
2335 : : }
2336 : 101267 : if (j == minlength)
2337 : : {
2338 : : /* If we consumed the max subgraph we could look at, and
2339 : : still didn't find any different edges, it's the
2340 : : last edge after MINLENGTH. */
2341 : 48551 : if (cand_path->length () > minlength)
2342 : : {
2343 : 18903 : if (!rewire_first_differing_edge (cand_path_num, j))
2344 : 18903 : goto remove_candidate_from_list;
2345 : : }
2346 : 29648 : else if (dump_file && (dump_flags & TDF_DETAILS))
2347 : 3 : fprintf (dump_file, "adjusting first edge after MINLENGTH.\n");
2348 : : }
2349 : 82364 : if (j > 0)
2350 : : {
2351 : : /* If we are removing everything, delete the entire candidate. */
2352 : 164728 : if (j == cand_path->length ())
2353 : : {
2354 : 29648 : remove_candidate_from_list:
2355 : 54867 : cancel_thread (cand_path, "Adjusted candidate is EMPTY");
2356 : 54867 : m_paths.unordered_remove (cand_path_num);
2357 : 54867 : continue;
2358 : : }
2359 : : /* Otherwise, just remove the redundant sub-path. */
2360 : 52716 : if (cand_path->length () - j > 1)
2361 : 40763 : cand_path->block_remove (0, j);
2362 : 11953 : else if (dump_file && (dump_flags & TDF_DETAILS))
2363 : 0 : fprintf (dump_file, "Dropping illformed candidate.\n");
2364 : : }
2365 : 52716 : if (dump_file && (dump_flags & TDF_DETAILS))
2366 : : {
2367 : 5 : fprintf (dump_file, "adjusted candidate: ");
2368 : 5 : debug_path (dump_file, cand_path_num);
2369 : : }
2370 : 52716 : ++cand_path_num;
2371 : : }
2372 : 1031242 : }
2373 : :
2374 : : /* Duplicates a jump-thread path of N_REGION basic blocks.
2375 : : The ENTRY edge is redirected to the duplicate of the region.
2376 : :
2377 : : Remove the last conditional statement in the last basic block in the REGION,
2378 : : and create a single fallthru edge pointing to the same destination as the
2379 : : EXIT edge.
2380 : :
2381 : : CURRENT_PATH_NO is an index into the global paths[] table
2382 : : specifying the jump-thread path.
2383 : :
2384 : : Returns false if it is unable to copy the region, true otherwise. */
2385 : :
2386 : : bool
2387 : 1031268 : back_jt_path_registry::duplicate_thread_path (edge entry,
2388 : : edge exit,
2389 : : basic_block *region,
2390 : : unsigned n_region,
2391 : : unsigned current_path_no)
2392 : : {
2393 : 1031268 : unsigned i;
2394 : 1031268 : class loop *loop = entry->dest->loop_father;
2395 : 1031268 : edge exit_copy;
2396 : 1031268 : edge redirected;
2397 : 1031268 : profile_count curr_count;
2398 : :
2399 : 1031268 : if (!can_copy_bbs_p (region, n_region))
2400 : : return false;
2401 : :
2402 : : /* Some sanity checking. Note that we do not check for all possible
2403 : : missuses of the functions. I.e. if you ask to copy something weird,
2404 : : it will work, but the state of structures probably will not be
2405 : : correct. */
2406 : 2480086 : for (i = 0; i < n_region; i++)
2407 : : {
2408 : : /* We do not handle subloops, i.e. all the blocks must belong to the
2409 : : same loop. */
2410 : 1448844 : if (region[i]->loop_father != loop)
2411 : : return false;
2412 : : }
2413 : :
2414 : 1031242 : initialize_original_copy_tables ();
2415 : :
2416 : 1031242 : set_loop_copy (loop, loop);
2417 : :
2418 : 1031242 : basic_block *region_copy = XNEWVEC (basic_block, n_region);
2419 : 1031242 : copy_bbs (region, n_region, region_copy, &exit, 1, &exit_copy, loop,
2420 : : split_edge_bb_loc (entry), false);
2421 : :
2422 : : /* Fix up: copy_bbs redirects all edges pointing to copied blocks. The
2423 : : following code ensures that all the edges exiting the jump-thread path are
2424 : : redirected back to the original code: these edges are exceptions
2425 : : invalidating the property that is propagated by executing all the blocks of
2426 : : the jump-thread path in order. */
2427 : :
2428 : 1031242 : curr_count = entry->count ();
2429 : :
2430 : 2480086 : for (i = 0; i < n_region; i++)
2431 : : {
2432 : 1448844 : edge e;
2433 : 1448844 : edge_iterator ei;
2434 : 1448844 : basic_block bb = region_copy[i];
2435 : :
2436 : : /* Watch inconsistent profile. */
2437 : 1448844 : if (curr_count > region[i]->count)
2438 : 47277 : curr_count = region[i]->count;
2439 : : /* Scale current BB. */
2440 : 2464185 : if (region[i]->count.nonzero_p () && curr_count.initialized_p ())
2441 : : {
2442 : : /* In the middle of the path we only scale the frequencies.
2443 : : In last BB we need to update probabilities of outgoing edges
2444 : : because we know which one is taken at the threaded path. */
2445 : 1015341 : if (i + 1 != n_region)
2446 : 375264 : scale_bbs_frequencies_profile_count (region + i, 1,
2447 : : region[i]->count - curr_count,
2448 : : region[i]->count);
2449 : : else
2450 : 640077 : update_bb_profile_for_threading (region[i],
2451 : : curr_count,
2452 : : exit);
2453 : 1015341 : scale_bbs_frequencies_profile_count (region_copy + i, 1, curr_count,
2454 : 1015341 : region_copy[i]->count);
2455 : : }
2456 : :
2457 : 1448844 : if (single_succ_p (bb))
2458 : : {
2459 : : /* Make sure the successor is the next node in the path. */
2460 : 150514 : gcc_assert (i + 1 == n_region
2461 : : || region_copy[i + 1] == single_succ_edge (bb)->dest);
2462 : 150514 : if (i + 1 != n_region)
2463 : : {
2464 : 150514 : curr_count = single_succ_edge (bb)->count ();
2465 : : }
2466 : 1181756 : continue;
2467 : : }
2468 : :
2469 : : /* Special case the last block on the path: make sure that it does not
2470 : : jump back on the copied path, including back to itself. */
2471 : 1298330 : if (i + 1 == n_region)
2472 : : {
2473 : 3101184 : FOR_EACH_EDGE (e, ei, bb->succs)
2474 : 4139884 : if (bb_in_bbs (e->dest, region_copy, n_region))
2475 : : {
2476 : 9477 : basic_block orig = get_bb_original (e->dest);
2477 : 9477 : if (orig)
2478 : 9477 : redirect_edge_and_branch_force (e, orig);
2479 : : }
2480 : 1031242 : continue;
2481 : 1031242 : }
2482 : :
2483 : : /* Redirect all other edges jumping to non-adjacent blocks back to the
2484 : : original code. */
2485 : 801267 : FOR_EACH_EDGE (e, ei, bb->succs)
2486 : 534179 : if (region_copy[i + 1] != e->dest)
2487 : : {
2488 : 267091 : basic_block orig = get_bb_original (e->dest);
2489 : 267091 : if (orig)
2490 : 12243 : redirect_edge_and_branch_force (e, orig);
2491 : : }
2492 : : else
2493 : : {
2494 : 267088 : curr_count = e->count ();
2495 : : }
2496 : : }
2497 : :
2498 : :
2499 : 1031242 : if (flag_checking)
2500 : 1031217 : verify_jump_thread (region_copy, n_region);
2501 : :
2502 : : /* Remove the last branch in the jump thread path. */
2503 : 1031242 : remove_ctrl_stmt_and_useless_edges (region_copy[n_region - 1], exit->dest);
2504 : :
2505 : : /* And fixup the flags on the single remaining edge. */
2506 : 1031242 : edge fix_e = find_edge (region_copy[n_region - 1], exit->dest);
2507 : 1031242 : fix_e->flags &= ~(EDGE_TRUE_VALUE | EDGE_FALSE_VALUE | EDGE_ABNORMAL);
2508 : 1031242 : fix_e->flags |= EDGE_FALLTHRU;
2509 : :
2510 : 1031242 : edge e = make_edge (region_copy[n_region - 1], exit->dest, EDGE_FALLTHRU);
2511 : :
2512 : 1031242 : if (e)
2513 : : {
2514 : 0 : rescan_loop_exit (e, true, false);
2515 : 0 : e->probability = profile_probability::always ();
2516 : : }
2517 : :
2518 : : /* Redirect the entry and add the phi node arguments. */
2519 : 1031242 : if (entry->dest == loop->header)
2520 : 11176 : mark_loop_for_removal (loop);
2521 : 1031242 : redirected = redirect_edge_and_branch (entry, get_bb_copy (entry->dest));
2522 : 1031242 : gcc_assert (redirected != NULL);
2523 : 1031242 : flush_pending_stmts (entry);
2524 : :
2525 : : /* Add the other PHI node arguments. */
2526 : 1031242 : add_phi_args_after_copy (region_copy, n_region, NULL);
2527 : :
2528 : 1031242 : free (region_copy);
2529 : :
2530 : 1031242 : adjust_paths_after_duplication (current_path_no);
2531 : :
2532 : 1031242 : free_original_copy_tables ();
2533 : 1031242 : return true;
2534 : : }
2535 : :
2536 : : /* Return true when PATH is a valid jump-thread path. */
2537 : :
2538 : : static bool
2539 : 1049473 : valid_jump_thread_path (vec<jump_thread_edge *> *path)
2540 : : {
2541 : 1049473 : unsigned len = path->length ();
2542 : :
2543 : : /* Check that the path is connected. */
2544 : 2524871 : for (unsigned int j = 0; j < len - 1; j++)
2545 : : {
2546 : 1493603 : edge e = (*path)[j]->e;
2547 : 1493603 : if (e->dest != (*path)[j+1]->e->src)
2548 : : return false;
2549 : : }
2550 : : return true;
2551 : : }
2552 : :
2553 : : /* Remove any queued jump threads that include edge E.
2554 : :
2555 : : We don't actually remove them here, just record the edges into ax
2556 : : hash table. That way we can do the search once per iteration of
2557 : : DOM/VRP rather than for every case where DOM optimizes away a COND_EXPR. */
2558 : :
2559 : : void
2560 : 336083 : fwd_jt_path_registry::remove_jump_threads_including (edge_def *e)
2561 : : {
2562 : 336083 : if (!m_paths.exists () || !flag_thread_jumps)
2563 : : return;
2564 : :
2565 : 336047 : edge *slot = m_removed_edges->find_slot (e, INSERT);
2566 : 336047 : *slot = e;
2567 : : }
2568 : :
2569 : : /* Thread all paths that have been queued for jump threading, and
2570 : : update the CFG accordingly.
2571 : :
2572 : : It is the caller's responsibility to fix the dominance information
2573 : : and rewrite duplicated SSA_NAMEs back into SSA form.
2574 : :
2575 : : If PEEL_LOOP_HEADERS is false, avoid threading edges through loop
2576 : : headers if it does not simplify the loop.
2577 : :
2578 : : Returns true if one or more edges were threaded. */
2579 : :
2580 : : bool
2581 : 7981989 : jt_path_registry::thread_through_all_blocks (bool peel_loop_headers)
2582 : : {
2583 : 8036353 : if (m_paths.length () == 0)
2584 : : return false;
2585 : :
2586 : 407543 : m_num_threaded_edges = 0;
2587 : :
2588 : 407543 : bool retval = update_cfg (peel_loop_headers);
2589 : :
2590 : 407543 : statistics_counter_event (cfun, "Jumps threaded", m_num_threaded_edges);
2591 : :
2592 : 407543 : if (retval)
2593 : : {
2594 : 353179 : loops_state_set (LOOPS_NEED_FIXUP);
2595 : 353179 : return true;
2596 : : }
2597 : : return false;
2598 : : }
2599 : :
2600 : : /* This is the backward threader version of thread_through_all_blocks
2601 : : using a generic BB copier. */
2602 : :
2603 : : bool
2604 : 261590 : back_jt_path_registry::update_cfg (bool /*peel_loop_headers*/)
2605 : : {
2606 : 261590 : bool retval = false;
2607 : 261590 : hash_set<edge> visited_starting_edges;
2608 : :
2609 : 1323016 : while (m_paths.length ())
2610 : : {
2611 : 1061426 : vec<jump_thread_edge *> *path = m_paths[0];
2612 : 1061426 : edge entry = (*path)[0]->e;
2613 : :
2614 : : /* Do not jump-thread twice from the same starting edge.
2615 : :
2616 : : Previously we only checked that we weren't threading twice
2617 : : from the same BB, but that was too restrictive. Imagine a
2618 : : path that starts from GIMPLE_COND(x_123 == 0,...), where both
2619 : : edges out of this conditional yield paths that can be
2620 : : threaded (for example, both lead to an x_123==0 or x_123!=0
2621 : : conditional further down the line. */
2622 : 1061426 : if (visited_starting_edges.contains (entry)
2623 : : /* We may not want to realize this jump thread path for
2624 : : various reasons. So check it first. */
2625 : 1061426 : || !valid_jump_thread_path (path))
2626 : : {
2627 : : /* Remove invalid jump-thread paths. */
2628 : 30158 : cancel_thread (path, "Avoiding threading twice from same edge");
2629 : 30158 : m_paths.unordered_remove (0);
2630 : 30158 : continue;
2631 : : }
2632 : :
2633 : 1031268 : unsigned len = path->length ();
2634 : 1031268 : edge exit = (*path)[len - 1]->e;
2635 : 1031268 : basic_block *region = XNEWVEC (basic_block, len - 1);
2636 : :
2637 : 2480220 : for (unsigned int j = 0; j < len - 1; j++)
2638 : 1448952 : region[j] = (*path)[j]->e->dest;
2639 : :
2640 : 1031268 : if (duplicate_thread_path (entry, exit, region, len - 1, 0))
2641 : : {
2642 : : /* We do not update dominance info. */
2643 : 1031242 : free_dominance_info (CDI_DOMINATORS);
2644 : 1031242 : visited_starting_edges.add (entry);
2645 : 1031242 : retval = true;
2646 : 1031242 : m_num_threaded_edges++;
2647 : : }
2648 : :
2649 : 1031268 : path->release ();
2650 : 1031268 : m_paths.unordered_remove (0);
2651 : 1031268 : free (region);
2652 : : }
2653 : 261590 : return retval;
2654 : 261590 : }
2655 : :
2656 : : /* This is the forward threader version of thread_through_all_blocks,
2657 : : using a custom BB copier. */
2658 : :
2659 : : bool
2660 : 145953 : fwd_jt_path_registry::update_cfg (bool may_peel_loop_headers)
2661 : : {
2662 : 145953 : bool retval = false;
2663 : :
2664 : : /* Remove any paths that referenced removed edges. */
2665 : 145953 : if (m_removed_edges)
2666 : 801730 : for (unsigned i = 0; i < m_paths.length (); )
2667 : : {
2668 : 655777 : unsigned int j;
2669 : 655777 : vec<jump_thread_edge *> *path = m_paths[i];
2670 : :
2671 : 2155852 : for (j = 0; j < path->length (); j++)
2672 : : {
2673 : 1590034 : edge e = (*path)[j]->e;
2674 : 1590034 : if (m_removed_edges->find_slot (e, NO_INSERT)
2675 : 3090109 : || (((*path)[j]->type == EDGE_COPY_SRC_BLOCK
2676 : 1010884 : || (*path)[j]->type == EDGE_COPY_SRC_JOINER_BLOCK)
2677 : 777206 : && !can_duplicate_block_p (e->src)))
2678 : : break;
2679 : : }
2680 : :
2681 : 1311554 : if (j != path->length ())
2682 : : {
2683 : 89959 : cancel_thread (path, "Thread references removed edge");
2684 : 89959 : m_paths.unordered_remove (i);
2685 : 89959 : continue;
2686 : : }
2687 : 565818 : i++;
2688 : : }
2689 : :
2690 : 145953 : auto_bitmap threaded_blocks;
2691 : 145953 : mark_threaded_blocks (threaded_blocks);
2692 : :
2693 : 145953 : initialize_original_copy_tables ();
2694 : :
2695 : : /* The order in which we process jump threads can be important.
2696 : :
2697 : : Consider if we have two jump threading paths A and B. If the
2698 : : target edge of A is the starting edge of B and we thread path A
2699 : : first, then we create an additional incoming edge into B->dest that
2700 : : we cannot discover as a jump threading path on this iteration.
2701 : :
2702 : : If we instead thread B first, then the edge into B->dest will have
2703 : : already been redirected before we process path A and path A will
2704 : : natually, with no further work, target the redirected path for B.
2705 : :
2706 : : An post-order is sufficient here. Compute the ordering first, then
2707 : : process the blocks. */
2708 : 145953 : if (!bitmap_empty_p (threaded_blocks))
2709 : : {
2710 : 131722 : int *postorder = XNEWVEC (int, n_basic_blocks_for_fn (cfun));
2711 : 131722 : unsigned int postorder_num = post_order_compute (postorder, false, false);
2712 : 7357162 : for (unsigned int i = 0; i < postorder_num; i++)
2713 : : {
2714 : 7225440 : unsigned int indx = postorder[i];
2715 : 7225440 : if (bitmap_bit_p (threaded_blocks, indx))
2716 : : {
2717 : 325943 : basic_block bb = BASIC_BLOCK_FOR_FN (cfun, indx);
2718 : 325943 : retval |= thread_block (bb, true);
2719 : : }
2720 : : }
2721 : 131722 : free (postorder);
2722 : : }
2723 : :
2724 : : /* Then perform the threading through loop headers. We start with the
2725 : : innermost loop, so that the changes in cfg we perform won't affect
2726 : : further threading. */
2727 : 969591 : for (auto loop : loops_list (cfun, LI_FROM_INNERMOST))
2728 : : {
2729 : 935960 : if (!loop->header
2730 : 531732 : || !bitmap_bit_p (threaded_blocks, loop->header->index))
2731 : 404228 : continue;
2732 : :
2733 : 127504 : retval |= thread_through_loop_header (loop, may_peel_loop_headers);
2734 : 145953 : }
2735 : :
2736 : : /* All jump threading paths should have been resolved at this
2737 : : point. Verify that is the case. */
2738 : 145953 : basic_block bb;
2739 : 8410713 : FOR_EACH_BB_FN (bb, cfun)
2740 : : {
2741 : 8264760 : edge_iterator ei;
2742 : 8264760 : edge e;
2743 : 20317612 : FOR_EACH_EDGE (e, ei, bb->preds)
2744 : 12052852 : gcc_assert (e->aux == NULL);
2745 : : }
2746 : :
2747 : 145953 : free_original_copy_tables ();
2748 : :
2749 : 145953 : return retval;
2750 : 145953 : }
2751 : :
2752 : : bool
2753 : 2267135 : jt_path_registry::cancel_invalid_paths (vec<jump_thread_edge *> &path)
2754 : : {
2755 : 2267135 : gcc_checking_assert (!path.is_empty ());
2756 : 2267135 : edge entry = path[0]->e;
2757 : 2267135 : edge exit = path[path.length () - 1]->e;
2758 : 2267135 : bool seen_latch = false;
2759 : 2267135 : int loops_crossed = 0;
2760 : 2267135 : bool crossed_latch = false;
2761 : 2267135 : bool crossed_loop_header = false;
2762 : : // Use ->dest here instead of ->src to ignore the first block. The
2763 : : // first block is allowed to be in a different loop, since it'll be
2764 : : // redirected. See similar comment in profitable_path_p: "we don't
2765 : : // care about that block...".
2766 : 2267135 : loop_p loop = entry->dest->loop_father;
2767 : 2267135 : loop_p curr_loop = loop;
2768 : :
2769 : 7964430 : for (unsigned int i = 0; i < path.length (); i++)
2770 : : {
2771 : 5697295 : edge e = path[i]->e;
2772 : :
2773 : 5697295 : if (e == NULL)
2774 : : {
2775 : : // NULL outgoing edges on a path can happen for jumping to a
2776 : : // constant address.
2777 : 0 : cancel_thread (&path, "Found NULL edge in jump threading path");
2778 : 0 : return true;
2779 : : }
2780 : :
2781 : 5697295 : if (loop->latch == e->src || loop->latch == e->dest)
2782 : : {
2783 : 479743 : seen_latch = true;
2784 : : // Like seen_latch, but excludes the first block.
2785 : 479743 : if (e->src != entry->src)
2786 : 450696 : crossed_latch = true;
2787 : : }
2788 : :
2789 : 5697295 : if (e->dest->loop_father != curr_loop)
2790 : : {
2791 : 317330 : curr_loop = e->dest->loop_father;
2792 : 317330 : ++loops_crossed;
2793 : : }
2794 : :
2795 : : // ?? Avoid threading through loop headers that remain in the
2796 : : // loop, as such threadings tend to create sub-loops which
2797 : : // _might_ be OK ??.
2798 : 5697295 : if (e->dest->loop_father->header == e->dest
2799 : 5697295 : && !flow_loop_nested_p (exit->dest->loop_father,
2800 : : e->dest->loop_father))
2801 : : crossed_loop_header = true;
2802 : :
2803 : 5697295 : if (flag_checking && !m_backedge_threads)
2804 : 2787312 : gcc_assert ((path[i]->e->flags & EDGE_DFS_BACK) == 0);
2805 : : }
2806 : :
2807 : : // If we crossed a loop into an outer loop without crossing the
2808 : : // latch, this is just an early exit from the loop.
2809 : 2267135 : if (loops_crossed == 1
2810 : 2267135 : && !crossed_latch
2811 : 2267135 : && flow_loop_nested_p (exit->dest->loop_father, exit->src->loop_father))
2812 : : return false;
2813 : :
2814 : 2189014 : if (cfun->curr_properties & PROP_loop_opts_done)
2815 : : return false;
2816 : :
2817 : 1634335 : if (seen_latch && empty_block_p (loop->latch))
2818 : : {
2819 : 213300 : cancel_thread (&path, "Threading through latch before loop opts "
2820 : : "would create non-empty latch");
2821 : 213300 : return true;
2822 : : }
2823 : 1421035 : if (loops_crossed)
2824 : : {
2825 : 139470 : cancel_thread (&path, "Path crosses loops");
2826 : 139470 : return true;
2827 : : }
2828 : : // The path should either start and end in the same loop or exit the
2829 : : // loop it starts in but never enter a loop. This also catches
2830 : : // creating irreducible loops, not only rotation.
2831 : 1281565 : if (entry->src->loop_father != exit->dest->loop_father
2832 : 1409512 : && !flow_loop_nested_p (exit->src->loop_father,
2833 : 127947 : entry->dest->loop_father))
2834 : : {
2835 : 127947 : cancel_thread (&path, "Path rotates loop");
2836 : 127947 : return true;
2837 : : }
2838 : 1153618 : if (crossed_loop_header)
2839 : : {
2840 : 14348 : cancel_thread (&path, "Path crosses loop header but does not exit it");
2841 : 14348 : return true;
2842 : : }
2843 : : return false;
2844 : : }
2845 : :
2846 : : /* Register a jump threading opportunity. We queue up all the jump
2847 : : threading opportunities discovered by a pass and update the CFG
2848 : : and SSA form all at once.
2849 : :
2850 : : E is the edge we can thread, E2 is the new target edge, i.e., we
2851 : : are effectively recording that E->dest can be changed to E2->dest
2852 : : after fixing the SSA graph.
2853 : :
2854 : : Return TRUE if PATH was successfully threaded. */
2855 : :
2856 : : bool
2857 : 2267135 : jt_path_registry::register_jump_thread (vec<jump_thread_edge *> *path)
2858 : : {
2859 : 2267135 : gcc_checking_assert (flag_thread_jumps);
2860 : :
2861 : 2267135 : if (!dbg_cnt (registered_jump_thread))
2862 : : {
2863 : 0 : path->release ();
2864 : 0 : return false;
2865 : : }
2866 : :
2867 : 2267135 : if (cancel_invalid_paths (*path))
2868 : : return false;
2869 : :
2870 : 1772070 : if (dump_file && (dump_flags & TDF_DETAILS))
2871 : 116 : dump_jump_thread_path (dump_file, *path, true);
2872 : :
2873 : 1772070 : m_paths.safe_push (path);
2874 : 1772070 : return true;
2875 : : }
2876 : :
2877 : : /* Return how many uses of T there are within BB, as long as there
2878 : : aren't any uses outside BB. If there are any uses outside BB,
2879 : : return -1 if there's at most one use within BB, or -2 if there is
2880 : : more than one use within BB. */
2881 : :
2882 : : static int
2883 : 1656424 : uses_in_bb (tree t, basic_block bb)
2884 : : {
2885 : 1656424 : int uses = 0;
2886 : 1656424 : bool outside_bb = false;
2887 : :
2888 : 1656424 : imm_use_iterator iter;
2889 : 1656424 : use_operand_p use_p;
2890 : 5038584 : FOR_EACH_IMM_USE_FAST (use_p, iter, t)
2891 : : {
2892 : 3484864 : if (is_gimple_debug (USE_STMT (use_p)))
2893 : 569796 : continue;
2894 : :
2895 : 2915068 : if (gimple_bb (USE_STMT (use_p)) != bb)
2896 : : outside_bb = true;
2897 : : else
2898 : 1945104 : uses++;
2899 : :
2900 : 2915068 : if (outside_bb && uses > 1)
2901 : : return -2;
2902 : : }
2903 : :
2904 : 1553720 : if (outside_bb)
2905 : 450931 : return -1;
2906 : :
2907 : : return uses;
2908 : : }
2909 : :
2910 : : /* Starting from the final control flow stmt in BB, assuming it will
2911 : : be removed, follow uses in to-be-removed stmts back to their defs
2912 : : and count how many defs are to become dead and be removed as
2913 : : well. */
2914 : :
2915 : : unsigned int
2916 : 1509235 : estimate_threading_killed_stmts (basic_block bb)
2917 : : {
2918 : 1509235 : int killed_stmts = 0;
2919 : 1509235 : hash_map<tree, int> ssa_remaining_uses;
2920 : 1509235 : auto_vec<gimple *, 4> dead_worklist;
2921 : :
2922 : : /* If the block has only two predecessors, threading will turn phi
2923 : : dsts into either src, so count them as dead stmts. */
2924 : 1509235 : bool drop_all_phis = EDGE_COUNT (bb->preds) == 2;
2925 : :
2926 : 1509235 : if (drop_all_phis)
2927 : 689172 : for (gphi_iterator gsi = gsi_start_phis (bb);
2928 : 2323106 : !gsi_end_p (gsi); gsi_next (&gsi))
2929 : : {
2930 : 1633934 : gphi *phi = gsi.phi ();
2931 : 1633934 : tree dst = gimple_phi_result (phi);
2932 : :
2933 : : /* We don't count virtual PHIs as stmts in
2934 : : record_temporary_equivalences_from_phis. */
2935 : 3267868 : if (virtual_operand_p (dst))
2936 : 482309 : continue;
2937 : :
2938 : 1151625 : killed_stmts++;
2939 : : }
2940 : :
2941 : 3018470 : if (gsi_end_p (gsi_last_bb (bb)))
2942 : 0 : return killed_stmts;
2943 : :
2944 : 1509235 : gimple *stmt = gsi_stmt (gsi_last_bb (bb));
2945 : 1509235 : if (gimple_code (stmt) != GIMPLE_COND
2946 : : && gimple_code (stmt) != GIMPLE_GOTO
2947 : : && gimple_code (stmt) != GIMPLE_SWITCH)
2948 : 477063 : return killed_stmts;
2949 : :
2950 : : /* The control statement is always dead. */
2951 : 1032172 : killed_stmts++;
2952 : 1032172 : dead_worklist.quick_push (stmt);
2953 : 3015111 : while (!dead_worklist.is_empty ())
2954 : : {
2955 : 1982939 : stmt = dead_worklist.pop ();
2956 : :
2957 : 1982939 : ssa_op_iter iter;
2958 : 1982939 : use_operand_p use_p;
2959 : 4336003 : FOR_EACH_SSA_USE_OPERAND (use_p, stmt, iter, SSA_OP_USE)
2960 : : {
2961 : 2353064 : tree t = USE_FROM_PTR (use_p);
2962 : 2353064 : gimple *def = SSA_NAME_DEF_STMT (t);
2963 : :
2964 : 2353064 : if (gimple_bb (def) == bb
2965 : 1859706 : && (gimple_code (def) != GIMPLE_PHI
2966 : 131961 : || !drop_all_phis)
2967 : 4128625 : && !gimple_has_side_effects (def))
2968 : : {
2969 : 1708180 : int *usesp = ssa_remaining_uses.get (t);
2970 : 1708180 : int uses;
2971 : :
2972 : 1708180 : if (usesp)
2973 : 51756 : uses = *usesp;
2974 : : else
2975 : 1656424 : uses = uses_in_bb (t, bb);
2976 : :
2977 : 1708180 : gcc_assert (uses);
2978 : :
2979 : : /* Don't bother recording the expected use count if we
2980 : : won't find any further uses within BB. */
2981 : 1708180 : if (!usesp && (uses < -1 || uses > 1))
2982 : : {
2983 : 257722 : usesp = &ssa_remaining_uses.get_or_insert (t);
2984 : 257722 : *usesp = uses;
2985 : : }
2986 : :
2987 : 1708180 : if (uses < 0)
2988 : 584976 : continue;
2989 : :
2990 : 1123204 : --uses;
2991 : 1123204 : if (usesp)
2992 : 175433 : *usesp = uses;
2993 : :
2994 : 1123204 : if (!uses)
2995 : : {
2996 : 963856 : killed_stmts++;
2997 : 963856 : if (usesp)
2998 : 16085 : ssa_remaining_uses.remove (t);
2999 : 963856 : if (gimple_code (def) != GIMPLE_PHI)
3000 : 950767 : dead_worklist.safe_push (def);
3001 : : }
3002 : : }
3003 : : }
3004 : : }
3005 : :
3006 : 1032172 : if (dump_file)
3007 : 21 : fprintf (dump_file, "threading bb %i kills %i stmts\n",
3008 : : bb->index, killed_stmts);
3009 : :
3010 : 1032172 : return killed_stmts;
3011 : 1509235 : }
|