Line data Source code
1 : /* SSA Jump Threading
2 : Copyright (C) 2005-2026 Free Software Foundation, Inc.
3 :
4 : This file is part of GCC.
5 :
6 : GCC is free software; you can redistribute it and/or modify
7 : it under the terms of the GNU General Public License as published by
8 : the Free Software Foundation; either version 3, or (at your option)
9 : any later version.
10 :
11 : GCC is distributed in the hope that it will be useful,
12 : but WITHOUT ANY WARRANTY; without even the implied warranty of
13 : MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14 : GNU General Public License for more details.
15 :
16 : You should have received a copy of the GNU General Public License
17 : along with GCC; see the file COPYING3. If not see
18 : <http://www.gnu.org/licenses/>. */
19 :
20 : #include "config.h"
21 : #include "system.h"
22 : #include "coretypes.h"
23 : #include "backend.h"
24 : #include "predict.h"
25 : #include "tree.h"
26 : #include "gimple.h"
27 : #include "fold-const.h"
28 : #include "cfgloop.h"
29 : #include "gimple-iterator.h"
30 : #include "tree-cfg.h"
31 : #include "tree-ssa-threadupdate.h"
32 : #include "tree-ssa-loop.h"
33 : #include "cfganal.h"
34 : #include "tree-pass.h"
35 : #include "gimple-ssa.h"
36 : #include "tree-phinodes.h"
37 : #include "tree-inline.h"
38 : #include "tree-vectorizer.h"
39 : #include "value-range.h"
40 : #include "gimple-range.h"
41 : #include "tree-ssa-threadedge.h"
42 : #include "gimple-range-path.h"
43 : #include "ssa.h"
44 : #include "tree-cfgcleanup.h"
45 : #include "tree-pretty-print.h"
46 : #include "cfghooks.h"
47 : #include "dbgcnt.h"
48 :
49 : // Path registry for the backwards threader. After all paths have been
50 : // registered with register_path(), thread_through_all_blocks() is called
51 : // to modify the CFG.
52 :
53 25031888 : class back_threader_registry : public back_jt_path_registry
54 : {
55 : public:
56 : bool register_path (const vec<basic_block> &, edge taken);
57 : };
58 :
59 : // Class to abstract the profitability code for the backwards threader.
60 :
61 : class back_threader_profitability
62 : {
63 : public:
64 : back_threader_profitability (bool speed_p, gimple *stmt);
65 : bool possibly_profitable_path_p (const vec<basic_block> &, bool *);
66 : bool profitable_path_p (const vec<basic_block> &,
67 : edge taken, bool *irreducible_loop);
68 : private:
69 : const bool m_speed_p;
70 : int m_exit_jump_benefit;
71 : bool m_threaded_multiway_branch;
72 : // The following are computed by possibly_profitable_path_p
73 : bool m_threaded_through_latch;
74 : bool m_multiway_branch_in_path;
75 : bool m_contains_hot_bb;
76 : int m_n_insns;
77 : };
78 :
79 21287484 : back_threader_profitability::back_threader_profitability (bool speed_p,
80 21287484 : gimple *last)
81 21287484 : : m_speed_p (speed_p)
82 : {
83 21287484 : m_threaded_multiway_branch = (gimple_code (last) == GIMPLE_SWITCH
84 21287484 : || gimple_code (last) == GIMPLE_GOTO);
85 : // The forward threader has estimate_threading_killed_stmts, in
86 : // particular it estimates further DCE from eliminating the exit
87 : // control stmt.
88 21287484 : m_exit_jump_benefit = estimate_num_insns (last, &eni_size_weights);
89 21287484 : }
90 :
91 : // Back threader flags.
92 : #define BT_NONE 0
93 : // Generate fast code at the expense of code size.
94 : #define BT_SPEED 1
95 : // Resolve unknown SSAs on entry to a threading path. If set, use the
96 : // ranger. If not, assume all ranges on entry to a path are VARYING.
97 : #define BT_RESOLVE 2
98 :
99 : class back_threader
100 : {
101 : public:
102 : back_threader (function *fun, unsigned flags, bool first);
103 : ~back_threader ();
104 : unsigned thread_blocks ();
105 : private:
106 : void maybe_thread_block (basic_block bb);
107 : bool debug_counter ();
108 : edge maybe_register_path (back_threader_profitability &);
109 : void maybe_register_path_dump (edge taken_edge);
110 : void find_paths_to_names (basic_block bb, bitmap imports, unsigned,
111 : back_threader_profitability &);
112 : edge find_taken_edge (const vec<basic_block> &path);
113 : edge find_taken_edge_cond (const vec<basic_block> &path, gcond *);
114 : edge find_taken_edge_switch (const vec<basic_block> &path, gswitch *);
115 : virtual void debug ();
116 : virtual void dump (FILE *out);
117 :
118 : back_threader_registry m_registry;
119 :
120 : // Current path being analyzed.
121 : auto_vec<basic_block> m_path;
122 : // Hash to mark visited BBs while analyzing a path.
123 : hash_set<basic_block> m_visited_bbs;
124 : // The set of SSA names, any of which could potentially change the
125 : // value of the final conditional in a path.
126 : auto_bitmap m_imports;
127 : // The last statement in the path.
128 : gimple *m_last_stmt;
129 : // Marker to differentiate unreachable edges.
130 : static const edge UNREACHABLE_EDGE;
131 : // Set to TRUE if unknown SSA names along a path should be resolved
132 : // with the ranger. Otherwise, unknown SSA names are assumed to be
133 : // VARYING. Setting to true is more precise but slower.
134 : function *m_fun;
135 : // Ranger for the path solver.
136 : gimple_ranger *m_ranger;
137 : unsigned m_flags;
138 : // Set to TRUE for the first of each thread[12] pass or the first of
139 : // each threadfull[12] pass. This is used to differentiate between
140 : // the different threading passes so we can set up debug counters.
141 : bool m_first;
142 : };
143 :
144 : // Used to differentiate unreachable edges, so we may stop the search
145 : // in a the given direction.
146 : const edge back_threader::UNREACHABLE_EDGE = (edge) -1;
147 :
148 6257972 : back_threader::back_threader (function *fun, unsigned flags, bool first)
149 6257972 : : m_first (first)
150 : {
151 6257972 : if (flags & BT_SPEED)
152 3846545 : loop_optimizer_init (LOOPS_HAVE_PREHEADERS | LOOPS_HAVE_SIMPLE_LATCHES);
153 : else
154 2411427 : loop_optimizer_init (AVOID_CFG_MODIFICATIONS);
155 :
156 6257972 : m_fun = fun;
157 6257972 : m_flags = flags;
158 6257972 : m_last_stmt = NULL;
159 :
160 : // The path solver needs EDGE_DFS_BACK in resolving mode.
161 6257972 : if (flags & BT_RESOLVE)
162 1923274 : mark_dfs_back_edges ();
163 :
164 6257972 : m_ranger = new gimple_ranger;
165 6257972 : }
166 :
167 6257972 : back_threader::~back_threader ()
168 : {
169 6257972 : delete m_ranger;
170 6257972 : loop_optimizer_finalize ();
171 6257972 : }
172 :
173 : // A wrapper for the various debug counters for the threading passes.
174 : // Returns TRUE if it's OK to register the current threading
175 : // candidate.
176 :
177 : bool
178 1367929 : back_threader::debug_counter ()
179 : {
180 : // The ethread pass is mostly harmless ;-).
181 1367929 : if ((m_flags & BT_SPEED) == 0)
182 : return true;
183 :
184 921527 : if (m_flags & BT_RESOLVE)
185 : {
186 646152 : if (m_first && !dbg_cnt (back_threadfull1))
187 : return false;
188 :
189 646152 : if (!m_first && !dbg_cnt (back_threadfull2))
190 : return false;
191 : }
192 : else
193 : {
194 275375 : if (m_first && !dbg_cnt (back_thread1))
195 : return false;
196 :
197 275375 : if (!m_first && !dbg_cnt (back_thread2))
198 : return false;
199 : }
200 : return true;
201 : }
202 :
203 : static void
204 538 : dump_path (FILE *dump_file, const vec<basic_block> &path)
205 : {
206 2935 : for (unsigned i = path.length (); i > 0; --i)
207 : {
208 1859 : basic_block bb = path[i - 1];
209 1859 : fprintf (dump_file, "%d", bb->index);
210 1859 : if (i > 1)
211 1321 : fprintf (dump_file, "->");
212 : }
213 538 : }
214 :
215 : // Dump details of an attempt to register a path.
216 :
217 : void
218 538 : back_threader::maybe_register_path_dump (edge taken)
219 : {
220 538 : if (m_path.is_empty ())
221 : return;
222 :
223 538 : fprintf (dump_file, "path: ");
224 538 : dump_path (dump_file, m_path);
225 538 : fprintf (dump_file, "->");
226 :
227 538 : if (taken == UNREACHABLE_EDGE)
228 10 : fprintf (dump_file, "xx REJECTED (unreachable)\n");
229 528 : else if (taken)
230 84 : fprintf (dump_file, "%d SUCCESS\n", taken->dest->index);
231 : else
232 444 : fprintf (dump_file, "xx REJECTED\n");
233 : }
234 :
235 : // If an outgoing edge can be determined out of the current path,
236 : // register it for jump threading and return the taken edge.
237 : //
238 : // Return NULL if it is unprofitable to thread this path, or the
239 : // outgoing edge is unknown. Return UNREACHABLE_EDGE if the path is
240 : // unreachable.
241 :
242 : edge
243 24598146 : back_threader::maybe_register_path (back_threader_profitability &profit)
244 : {
245 24598146 : edge taken_edge = find_taken_edge (m_path);
246 :
247 24598146 : if (taken_edge && taken_edge != UNREACHABLE_EDGE)
248 : {
249 1495097 : bool irreducible = false;
250 1495097 : if (profit.profitable_path_p (m_path, taken_edge, &irreducible)
251 1367929 : && debug_counter ()
252 2863026 : && m_registry.register_path (m_path, taken_edge))
253 : {
254 1332708 : if (irreducible)
255 34267 : vect_free_loop_info_assumptions (m_path[0]->loop_father);
256 : }
257 : else
258 : taken_edge = NULL;
259 : }
260 :
261 24598146 : if (dump_file && (dump_flags & TDF_DETAILS))
262 538 : maybe_register_path_dump (taken_edge);
263 :
264 24598146 : return taken_edge;
265 : }
266 :
267 : // Return the known taken edge out of a path. If the path can be
268 : // determined to be unreachable, return UNREACHABLE_EDGE. If no
269 : // outgoing edge can be calculated, return NULL.
270 :
271 : edge
272 24598146 : back_threader::find_taken_edge (const vec<basic_block> &path)
273 : {
274 24598146 : gcc_checking_assert (path.length () > 1);
275 24598146 : switch (gimple_code (m_last_stmt))
276 : {
277 24483428 : case GIMPLE_COND:
278 24483428 : return find_taken_edge_cond (path, as_a<gcond *> (m_last_stmt));
279 :
280 114718 : case GIMPLE_SWITCH:
281 114718 : return find_taken_edge_switch (path, as_a<gswitch *> (m_last_stmt));
282 :
283 : default:
284 : return NULL;
285 : }
286 : }
287 :
288 : // Same as find_taken_edge, but for paths ending in a switch.
289 :
290 : edge
291 114718 : back_threader::find_taken_edge_switch (const vec<basic_block> &path,
292 : gswitch *sw)
293 : {
294 114718 : tree name = gimple_switch_index (sw);
295 114718 : int_range_max r;
296 :
297 114718 : path_range_query solver (*m_ranger, path, m_imports, m_flags & BT_RESOLVE);
298 114718 : solver.range_of_expr (r, name, sw);
299 :
300 114718 : if (r.undefined_p ())
301 : return UNREACHABLE_EDGE;
302 :
303 114297 : if (r.varying_p ())
304 : return NULL;
305 :
306 62730 : tree label = find_case_label_range (sw, &r);
307 62730 : if (!label)
308 : return NULL;
309 :
310 5311 : return find_edge (gimple_bb (sw), label_to_block (cfun, CASE_LABEL (label)));
311 114718 : }
312 :
313 : // Same as find_taken_edge, but for paths ending in a GIMPLE_COND.
314 :
315 : edge
316 24483428 : back_threader::find_taken_edge_cond (const vec<basic_block> &path,
317 : gcond *cond)
318 : {
319 24483428 : int_range_max r;
320 :
321 24483428 : path_range_query solver (*m_ranger, path, m_imports, m_flags & BT_RESOLVE);
322 24483428 : solver.range_of_stmt (r, cond);
323 :
324 24483428 : if (solver.unreachable_path_p ())
325 : return UNREACHABLE_EDGE;
326 :
327 24395902 : int_range<2> true_range = range_true ();
328 24395902 : int_range<2> false_range = range_false ();
329 :
330 24395902 : if (r == true_range || r == false_range)
331 : {
332 1489786 : edge e_true, e_false;
333 1489786 : basic_block bb = gimple_bb (cond);
334 1489786 : extract_true_false_edges_from_block (bb, &e_true, &e_false);
335 1489786 : return r == true_range ? e_true : e_false;
336 : }
337 : return NULL;
338 24483428 : }
339 :
340 : // Find jump threading paths to any of the SSA names in the
341 : // INTERESTING bitmap, and register any such paths.
342 : //
343 : // BB is the current path being processed.
344 : //
345 : // OVERALL_PATHS is the search space up to this block
346 :
347 : void
348 57624451 : back_threader::find_paths_to_names (basic_block bb, bitmap interesting,
349 : unsigned overall_paths,
350 : back_threader_profitability &profit)
351 : {
352 57624451 : if (m_visited_bbs.add (bb))
353 1487495 : return;
354 :
355 56136956 : m_path.safe_push (bb);
356 :
357 : // Try to resolve the path without looking back. Avoid resolving paths
358 : // we know are large but are not (yet) recognized as Finite State Machine.
359 : // ??? Ideally we'd explore the cheapest path to the loop backedge here,
360 : // avoiding the exponential greedy search and only start that from there.
361 : // Precomputing a path-size-to-immediate-dominator-of-successor for each
362 : // edge might help here. Alternatively copying divergent control flow
363 : // on the way to the backedge could be worthwhile.
364 56136956 : bool large_non_fsm;
365 56136956 : if (m_path.length () > 1
366 56136956 : && (!profit.possibly_profitable_path_p (m_path, &large_non_fsm)
367 24652026 : || (!large_non_fsm
368 24598146 : && maybe_register_path (profit))))
369 : ;
370 :
371 : // The backwards thread copier cannot copy blocks that do not belong
372 : // to the same loop, so when the new source of the path entry no
373 : // longer belongs to it we don't need to search further.
374 44518855 : else if (m_path[0]->loop_father != bb->loop_father)
375 : ;
376 :
377 : // Continue looking for ways to extend the path but limit the
378 : // search space along a branch
379 43112818 : else if ((overall_paths = overall_paths * EDGE_COUNT (bb->preds))
380 43112818 : <= (unsigned)param_max_jump_thread_paths)
381 : {
382 : // For further greedy searching we want to remove interesting
383 : // names defined in BB but add ones on the PHI edges for the
384 : // respective edges and adding imports from those stmts.
385 : // We do this by starting with all names
386 : // not defined in BB as interesting, collecting a list of
387 : // interesting PHIs in BB on the fly. Then we iterate over
388 : // predecessor edges, adding interesting PHI edge defs to
389 : // the set of interesting names to consider when processing it.
390 42999783 : auto_bitmap new_interesting;
391 42999783 : auto_vec<int, 16> new_imports;
392 42999783 : auto_vec<gphi *, 4> interesting_phis;
393 42999783 : bitmap_iterator bi;
394 42999783 : unsigned i;
395 42999783 : auto_vec<tree, 16> worklist;
396 97167907 : EXECUTE_IF_SET_IN_BITMAP (interesting, 0, i, bi)
397 : {
398 54168124 : tree name = ssa_name (i);
399 54168124 : gimple *def_stmt = SSA_NAME_DEF_STMT (name);
400 : /* Imports remain interesting. */
401 54168124 : if (gimple_bb (def_stmt) != bb)
402 : {
403 27115194 : bitmap_set_bit (new_interesting, i);
404 27115194 : continue;
405 : }
406 27052930 : worklist.quick_push (name);
407 97777559 : while (!worklist.is_empty ())
408 : {
409 43671699 : tree name = worklist.pop ();
410 43671699 : gimple *def_stmt = SSA_NAME_DEF_STMT (name);
411 : /* Newly discovered imports are interesting. */
412 43671699 : if (gimple_bb (def_stmt) != bb)
413 : {
414 5229133 : bitmap_set_bit (new_interesting, SSA_NAME_VERSION (name));
415 5229133 : continue;
416 : }
417 : /* Local PHIs participate in renaming below. */
418 71661237 : if (gphi *phi = dyn_cast<gphi *> (def_stmt))
419 : {
420 5223895 : tree res = gimple_phi_result (phi);
421 5223895 : if (!SSA_NAME_OCCURS_IN_ABNORMAL_PHI (res))
422 5223766 : interesting_phis.safe_push (phi);
423 : }
424 : /* For other local defs process their uses, amending
425 : imports on the way. */
426 : else
427 : {
428 33218671 : tree ssa[3];
429 33218671 : unsigned lim = gimple_range_ssa_names (ssa, 3, def_stmt);
430 52186445 : for (unsigned j = 0; j < lim; ++j)
431 : {
432 18967774 : tree rhs = ssa[j];
433 18967774 : if (rhs
434 37935548 : && bitmap_set_bit (m_imports,
435 18967774 : SSA_NAME_VERSION (rhs)))
436 : {
437 16618769 : new_imports.safe_push (SSA_NAME_VERSION (rhs));
438 16618769 : worklist.safe_push (rhs);
439 : }
440 : }
441 : }
442 : }
443 : }
444 42999783 : if (!bitmap_empty_p (new_interesting)
445 42999783 : || !interesting_phis.is_empty ())
446 : {
447 57249064 : auto_vec<int, 4> unwind (interesting_phis.length ());
448 57249064 : auto_vec<int, 4> imports_unwind (interesting_phis.length ());
449 28624532 : edge_iterator iter;
450 28624532 : edge e;
451 68564263 : FOR_EACH_EDGE (e, iter, bb->preds)
452 : {
453 43542495 : if (e->flags & EDGE_ABNORMAL
454 : // This is like path_crosses_loops in profitable_path_p but
455 : // more restrictive to avoid peeling off loop iterations (see
456 : // tree-ssa/pr14341.c for an example).
457 : // ??? Note this restriction only applied when visiting an
458 : // interesting PHI with the former resolve_phi.
459 39939731 : || (!interesting_phis.is_empty ()
460 10925262 : && m_path[0]->loop_father != e->src->loop_father))
461 3602764 : continue;
462 116842423 : for (gphi *phi : interesting_phis)
463 : {
464 7831522 : tree def = PHI_ARG_DEF_FROM_EDGE (phi, e);
465 7831522 : if (TREE_CODE (def) == SSA_NAME)
466 : {
467 6537677 : int ver = SSA_NAME_VERSION (def);
468 6537677 : if (bitmap_set_bit (new_interesting, ver))
469 : {
470 6518209 : if (bitmap_set_bit (m_imports, ver))
471 5047483 : imports_unwind.quick_push (ver);
472 6518209 : unwind.quick_push (ver);
473 : }
474 : }
475 : }
476 36336967 : find_paths_to_names (e->src, new_interesting, overall_paths,
477 : profit);
478 : // Restore new_interesting.
479 115529110 : for (int def : unwind)
480 6518209 : bitmap_clear_bit (new_interesting, def);
481 36336967 : unwind.truncate (0);
482 : // Restore and m_imports.
483 114058384 : for (int def : imports_unwind)
484 5047483 : bitmap_clear_bit (m_imports, def);
485 36336967 : imports_unwind.truncate (0);
486 : }
487 28624532 : }
488 : /* m_imports tracks all interesting names on the path, so when
489 : backtracking we have to restore it. */
490 145618118 : for (int j : new_imports)
491 16618769 : bitmap_clear_bit (m_imports, j);
492 42999783 : }
493 113035 : else if (dump_file && (dump_flags & TDF_DETAILS))
494 9 : fprintf (dump_file, " FAIL: Search space limit %d reached.\n",
495 : param_max_jump_thread_paths);
496 :
497 : // Reset things to their original state.
498 56136956 : m_path.pop ();
499 56136956 : m_visited_bbs.remove (bb);
500 : }
501 :
502 : // Search backwards from BB looking for paths where the final
503 : // conditional maybe threaded to a successor block. Record such paths
504 : // for jump threading.
505 :
506 : void
507 25362657 : back_threader::maybe_thread_block (basic_block bb)
508 : {
509 25362657 : if (EDGE_COUNT (bb->succs) <= 1)
510 4075173 : return;
511 :
512 25362657 : gimple *stmt = *gsi_last_bb (bb);
513 25362657 : if (!stmt)
514 : return;
515 :
516 25362657 : enum gimple_code code = gimple_code (stmt);
517 25362657 : if (code != GIMPLE_SWITCH
518 25362657 : && code != GIMPLE_COND)
519 : return;
520 :
521 21336662 : m_last_stmt = stmt;
522 21336662 : m_visited_bbs.empty ();
523 21336662 : m_path.truncate (0);
524 :
525 : // We compute imports of the path during discovery starting
526 : // just with names used in the conditional.
527 21336662 : bitmap_clear (m_imports);
528 21336662 : ssa_op_iter iter;
529 21336662 : tree name;
530 47386668 : FOR_EACH_SSA_TREE_OPERAND (name, stmt, iter, SSA_OP_USE)
531 : {
532 26099184 : if (!gimple_range_ssa_p (name))
533 : return;
534 26050006 : bitmap_set_bit (m_imports, SSA_NAME_VERSION (name));
535 : }
536 :
537 : // Interesting is the set of imports we still not have see
538 : // the definition of. So while imports only grow, the
539 : // set of interesting defs dwindles and once empty we can
540 : // stop searching.
541 21287484 : auto_bitmap interesting;
542 21287484 : bitmap_copy (interesting, m_imports);
543 21287484 : back_threader_profitability profit (m_flags & BT_SPEED, stmt);
544 21287484 : find_paths_to_names (bb, interesting, 1, profit);
545 21287484 : }
546 :
547 : DEBUG_FUNCTION void
548 0 : debug (const vec <basic_block> &path)
549 : {
550 0 : dump_path (stderr, path);
551 0 : fputc ('\n', stderr);
552 0 : }
553 :
554 : void
555 0 : back_threader::dump (FILE *out)
556 : {
557 0 : fprintf (out, "\nCandidates for pre-computation:\n");
558 0 : fprintf (out, "===================================\n");
559 :
560 0 : bitmap_iterator bi;
561 0 : unsigned i;
562 :
563 0 : EXECUTE_IF_SET_IN_BITMAP (m_imports, 0, i, bi)
564 : {
565 0 : tree name = ssa_name (i);
566 0 : print_generic_expr (out, name, TDF_NONE);
567 0 : fprintf (out, "\n");
568 : }
569 0 : }
570 :
571 : void
572 0 : back_threader::debug ()
573 : {
574 0 : dump (stderr);
575 0 : }
576 :
577 : /* Examine jump threading path PATH and return TRUE if it is possibly
578 : profitable to thread it, otherwise return FALSE. If this function
579 : returns TRUE profitable_path_p might not be satisfied but when
580 : the path is extended it might be. In particular indicate in
581 : *LARGE_NON_FSM whether the thread is too large for a non-FSM thread
582 : but would be OK if we extend the path to cover the loop backedge.
583 :
584 : ?? It seems we should be able to loosen some of the restrictions in
585 : this function after loop optimizations have run. */
586 :
587 : bool
588 34849472 : back_threader_profitability::possibly_profitable_path_p
589 : (const vec<basic_block> &m_path,
590 : bool *large_non_fsm)
591 : {
592 34849472 : gcc_checking_assert (!m_path.is_empty ());
593 :
594 : /* We can an empty path here (excluding the DEF block) when the
595 : statement that makes a conditional generate a compile-time
596 : constant result is in the same block as the conditional.
597 :
598 : That's not really a jump threading opportunity, but instead is
599 : simple cprop & simplification. We could handle it here if we
600 : wanted by wiring up all the incoming edges. If we run this
601 : early in IPA, that might be worth doing. For now we just
602 : reject that case. */
603 34849472 : if (m_path.length () <= 1)
604 : return false;
605 :
606 34849472 : gimple_stmt_iterator gsi;
607 34849472 : loop_p loop = m_path[0]->loop_father;
608 :
609 : // We recompute the following, when we rewrite possibly_profitable_path_p
610 : // to work incrementally on added BBs we have to unwind them on backtracking
611 34849472 : m_n_insns = 0;
612 34849472 : m_threaded_through_latch = false;
613 34849472 : m_multiway_branch_in_path = false;
614 34849472 : m_contains_hot_bb = false;
615 :
616 34849472 : if (dump_file && (dump_flags & TDF_DETAILS))
617 646 : fprintf (dump_file, "Checking profitability of path (backwards): ");
618 :
619 : /* Count the number of instructions on the path: as these instructions
620 : will have to be duplicated, we will not record the path if there
621 : are too many instructions on the path. Also check that all the
622 : blocks in the path belong to a single loop. */
623 146832875 : for (unsigned j = 0; j < m_path.length (); j++)
624 : {
625 112018248 : basic_block bb = m_path[j];
626 :
627 112018248 : if (dump_file && (dump_flags & TDF_DETAILS))
628 2375 : fprintf (dump_file, " bb:%i", bb->index);
629 : /* Remember, blocks in the path are stored in opposite order in
630 : the PATH array. The last entry in the array represents the
631 : block with an outgoing edge that we will redirect to the jump
632 : threading path. Thus we don't care how many statements are
633 : in that block because it will not be copied or whether or not
634 : it ends in a multiway branch. */
635 224036496 : if (j < m_path.length () - 1)
636 : {
637 77203621 : int orig_n_insns = m_n_insns;
638 77203621 : if (!m_contains_hot_bb && m_speed_p)
639 33740701 : m_contains_hot_bb |= optimize_bb_for_speed_p (bb);
640 77203621 : for (gsi = gsi_after_labels (bb);
641 301804515 : !gsi_end_p (gsi);
642 224600894 : gsi_next_nondebug (&gsi))
643 : {
644 : /* Do not allow OpenACC loop markers and __builtin_constant_p on
645 : threading paths. The latter is disallowed, because an
646 : expression might be constant on two threading paths, and
647 : become non-constant (i.e.: phi) when they merge. */
648 224635739 : gimple *stmt = gsi_stmt (gsi);
649 224635739 : if (gimple_call_internal_p (stmt, IFN_UNIQUE)
650 224635739 : || gimple_call_builtin_p (stmt, BUILT_IN_CONSTANT_P))
651 : {
652 34845 : if (dump_file && (dump_flags & TDF_DETAILS))
653 0 : fputc ('\n', dump_file);
654 34845 : return false;
655 : }
656 : /* Do not count empty statements and labels. */
657 224600894 : if (gimple_code (stmt) != GIMPLE_NOP
658 224600894 : && !is_gimple_debug (stmt))
659 185406795 : m_n_insns += estimate_num_insns (stmt, &eni_size_weights);
660 : }
661 77168776 : if (dump_file && (dump_flags & TDF_DETAILS))
662 1729 : fprintf (dump_file, " (%i insns)", m_n_insns-orig_n_insns);
663 :
664 : /* We do not look at the block with the threaded branch
665 : in this loop. So if any block with a last statement that
666 : is a GIMPLE_SWITCH or GIMPLE_GOTO is seen, then we have a
667 : multiway branch on our path.
668 :
669 : The block in PATH[0] is special, it's the block were we're
670 : going to be able to eliminate its branch. */
671 77168776 : if (j > 0)
672 : {
673 42331132 : gimple *last = *gsi_last_bb (bb);
674 42331132 : if (last
675 42331132 : && (gimple_code (last) == GIMPLE_SWITCH
676 40128134 : || gimple_code (last) == GIMPLE_GOTO))
677 223130 : m_multiway_branch_in_path = true;
678 : }
679 : }
680 :
681 : /* Note if we thread through the latch, we will want to include
682 : the last entry in the array when determining if we thread
683 : through the loop latch. */
684 111983403 : if (loop->latch == bb)
685 : {
686 5908288 : m_threaded_through_latch = true;
687 5908288 : if (dump_file && (dump_flags & TDF_DETAILS))
688 104 : fprintf (dump_file, " (latch)");
689 : }
690 : }
691 :
692 : /* We are going to remove the control statement at the end of the
693 : last block in the threading path. So don't count it against our
694 : statement count. */
695 34814627 : m_n_insns -= m_exit_jump_benefit;
696 :
697 34814627 : if (dump_file && (dump_flags & TDF_DETAILS))
698 646 : fprintf (dump_file, "\n Control statement insns: %i\n"
699 : " Overall: %i insns\n",
700 : m_exit_jump_benefit, m_n_insns);
701 :
702 : /* Threading is profitable if the path duplicated is hot but also
703 : in a case we separate cold path from hot path and permit optimization
704 : of the hot path later. Be on the agressive side here. In some testcases,
705 : as in PR 78407 this leads to noticeable improvements. */
706 34814627 : if (m_speed_p)
707 : {
708 31316773 : if (m_n_insns >= param_max_fsm_thread_path_insns)
709 : {
710 7175 : if (dump_file && (dump_flags & TDF_DETAILS))
711 0 : fprintf (dump_file, " FAIL: Jump-thread path not considered: "
712 : "the number of instructions on the path "
713 : "exceeds PARAM_MAX_FSM_THREAD_PATH_INSNS.\n");
714 7175 : return false;
715 : }
716 62619196 : edge entry = find_edge (m_path[m_path.length () - 1],
717 31309598 : m_path[m_path.length () - 2]);
718 31309598 : if (probably_never_executed_edge_p (cfun, entry))
719 : {
720 130324 : if (dump_file && (dump_flags & TDF_DETAILS))
721 0 : fprintf (dump_file, " FAIL: Jump-thread path not considered: "
722 : "path entry is probably never executed.\n");
723 130324 : return false;
724 : }
725 : }
726 3497854 : else if (m_n_insns > 1)
727 : {
728 1290464 : if (dump_file && (dump_flags & TDF_DETAILS))
729 15 : fprintf (dump_file, " FAIL: Jump-thread path not considered: "
730 : "duplication of %i insns is needed and optimizing for size.\n",
731 : m_n_insns);
732 1290464 : return false;
733 : }
734 :
735 : /* The generic copier used by the backthreader does not re-use an
736 : existing threading path to reduce code duplication. So for that
737 : case, drastically reduce the number of statements we are allowed
738 : to copy. We don't know yet whether we will thread through the latch
739 : so we have to be permissive and continue threading, but indicate
740 : to the caller the thread, if final, wouldn't be profitable. */
741 33386664 : if ((!m_threaded_multiway_branch
742 182417 : || !loop->latch
743 181676 : || loop->latch->index == EXIT_BLOCK)
744 33292385 : && (m_n_insns * param_fsm_scale_path_stmts
745 33292385 : >= param_max_jump_thread_duplication_stmts))
746 : {
747 8734638 : if (dump_file && (dump_flags & TDF_DETAILS))
748 83 : fprintf (dump_file,
749 : " FAIL: Did not thread around loop and would copy too "
750 : "many statements.\n");
751 8734638 : return false;
752 : }
753 4311777 : *large_non_fsm = (!(m_threaded_through_latch && m_threaded_multiway_branch)
754 24652026 : && (m_n_insns * param_fsm_scale_path_stmts
755 24631100 : >= param_max_jump_thread_duplication_stmts));
756 :
757 24652026 : if (dump_file && (dump_flags & TDF_DETAILS))
758 548 : fputc ('\n', dump_file);
759 : return true;
760 : }
761 :
762 : /* Examine jump threading path PATH and return TRUE if it is profitable to
763 : thread it, otherwise return FALSE.
764 :
765 : The taken edge out of the path is TAKEN_EDGE.
766 :
767 : CREATES_IRREDUCIBLE_LOOP is set to TRUE if threading this path
768 : would create an irreducible loop.
769 :
770 : ?? It seems we should be able to loosen some of the restrictions in
771 : this function after loop optimizations have run. */
772 :
773 : bool
774 1495097 : back_threader_profitability::profitable_path_p (const vec<basic_block> &m_path,
775 : edge taken_edge,
776 : bool *creates_irreducible_loop)
777 : {
778 : // We can assume that possibly_profitable_path_p holds here
779 :
780 1495097 : loop_p loop = m_path[0]->loop_father;
781 :
782 1495097 : if (dump_file && (dump_flags & TDF_DETAILS))
783 114 : fprintf (dump_file, "Checking profitability of path (backwards): ");
784 :
785 : /* If this path threaded through the loop latch back into the
786 : same loop and the destination does not dominate the loop
787 : latch, then this thread would create an irreducible loop. */
788 1495097 : *creates_irreducible_loop = false;
789 1495097 : if (m_threaded_through_latch
790 117038 : && loop == taken_edge->dest->loop_father
791 1600363 : && (determine_bb_domination_status (loop, taken_edge->dest)
792 : == DOMST_NONDOMINATING))
793 76213 : *creates_irreducible_loop = true;
794 :
795 : /* Threading is profitable if the path duplicated is hot but also
796 : in a case we separate cold path from hot path and permit optimization
797 : of the hot path later. Be on the agressive side here. In some testcases,
798 : as in PR 78407 this leads to noticeable improvements. */
799 1495097 : if (m_speed_p
800 1495097 : && (optimize_edge_for_speed_p (taken_edge) || m_contains_hot_bb))
801 : {
802 964267 : if (probably_never_executed_edge_p (cfun, taken_edge))
803 : {
804 20795 : if (dump_file && (dump_flags & TDF_DETAILS))
805 0 : fprintf (dump_file, " FAIL: Jump-thread path not considered: "
806 : "path leads to probably never executed edge.\n");
807 20795 : return false;
808 : }
809 : }
810 530830 : else if (m_n_insns > 1)
811 : {
812 44600 : if (dump_file && (dump_flags & TDF_DETAILS))
813 0 : fprintf (dump_file, " FAIL: Jump-thread path not considered: "
814 : "duplication of %i insns is needed and optimizing for size.\n",
815 : m_n_insns);
816 44600 : return false;
817 : }
818 :
819 : /* We avoid creating irreducible inner loops unless we thread through
820 : a multiway branch, in which case we have deemed it worth losing
821 : other loop optimizations later.
822 :
823 : We also consider it worth creating an irreducible inner loop after
824 : loop optimizations if the number of copied statement is low. */
825 1429702 : if (!m_threaded_multiway_branch
826 1424599 : && *creates_irreducible_loop
827 70383 : && (!(cfun->curr_properties & PROP_loop_opts_done)
828 33907 : || (m_n_insns * param_fsm_scale_path_stmts
829 33907 : >= param_max_jump_thread_duplication_stmts)))
830 : {
831 36476 : if (dump_file && (dump_flags & TDF_DETAILS))
832 1 : fprintf (dump_file,
833 : " FAIL: Would create irreducible loop early without "
834 : "threading multiway branch.\n");
835 : /* We compute creates_irreducible_loop only late. */
836 36476 : return false;
837 : }
838 :
839 : /* The generic copier used by the backthreader does not re-use an
840 : existing threading path to reduce code duplication. So for that
841 : case, drastically reduce the number of statements we are allowed
842 : to copy. */
843 1393226 : if (!(m_threaded_through_latch && m_threaded_multiway_branch)
844 1390765 : && (m_n_insns * param_fsm_scale_path_stmts
845 1390765 : >= param_max_jump_thread_duplication_stmts))
846 : {
847 0 : if (dump_file && (dump_flags & TDF_DETAILS))
848 0 : fprintf (dump_file,
849 : " FAIL: Did not thread around loop and would copy too "
850 : "many statements.\n");
851 0 : return false;
852 : }
853 :
854 : /* When there is a multi-way branch on the path, then threading can
855 : explode the CFG due to duplicating the edges for that multi-way
856 : branch. So like above, only allow a multi-way branch on the path
857 : if we actually thread a multi-way branch. */
858 1393226 : if (!m_threaded_multiway_branch && m_multiway_branch_in_path)
859 : {
860 143 : if (dump_file && (dump_flags & TDF_DETAILS))
861 6 : fprintf (dump_file,
862 : " FAIL: Thread through multiway branch without threading "
863 : "a multiway branch.\n");
864 143 : return false;
865 : }
866 :
867 : /* Threading through an empty latch would cause code to be added to
868 : the latch. This could alter the loop form sufficiently to cause
869 : loop optimizations to fail. Disable these threads until after
870 : loop optimizations have run. */
871 1316773 : if ((m_threaded_through_latch || taken_edge->dest == loop->latch)
872 129277 : && !(cfun->curr_properties & PROP_loop_opts_done)
873 1465981 : && empty_block_p (loop->latch))
874 : {
875 25154 : if (dump_file && (dump_flags & TDF_DETAILS))
876 15 : fprintf (dump_file,
877 : " FAIL: Thread through latch before loop opts would create "
878 : "non-empty latch\n");
879 25154 : return false;
880 : }
881 1367929 : if (dump_file && (dump_flags & TDF_DETAILS))
882 92 : fputc ('\n', dump_file);
883 : return true;
884 : }
885 :
886 :
887 : /* The current path PATH is a vector of blocks forming a jump threading
888 : path in reverse order. TAKEN_EDGE is the edge taken from path[0].
889 :
890 : Convert the current path into the form used by register_jump_thread and
891 : register it.
892 :
893 : Return TRUE if successful or FALSE otherwise. */
894 :
895 : bool
896 1367929 : back_threader_registry::register_path (const vec<basic_block> &m_path,
897 : edge taken_edge)
898 : {
899 1367929 : vec<jump_thread_edge *> *jump_thread_path = allocate_thread_path ();
900 :
901 : // The generic copier ignores the edge type. We can build the
902 : // thread edges with any type.
903 3395770 : for (unsigned int j = 0; j + 1 < m_path.length (); j++)
904 : {
905 2027841 : basic_block bb1 = m_path[m_path.length () - j - 1];
906 2027841 : basic_block bb2 = m_path[m_path.length () - j - 2];
907 :
908 2027841 : edge e = find_edge (bb1, bb2);
909 2027841 : gcc_assert (e);
910 2027841 : push_edge (jump_thread_path, e, EDGE_COPY_SRC_BLOCK);
911 : }
912 :
913 1367929 : push_edge (jump_thread_path, taken_edge, EDGE_NO_COPY_SRC_BLOCK);
914 1367929 : return register_jump_thread (jump_thread_path);
915 : }
916 :
917 : // Thread all suitable paths in the current function.
918 : //
919 : // Return TODO_flags.
920 :
921 : unsigned int
922 6257972 : back_threader::thread_blocks ()
923 : {
924 6257972 : basic_block bb;
925 60030505 : FOR_EACH_BB_FN (bb, m_fun)
926 79135190 : if (EDGE_COUNT (bb->succs) > 1)
927 25362657 : maybe_thread_block (bb);
928 :
929 6257972 : bool changed = m_registry.thread_through_all_blocks (true);
930 :
931 6257972 : if (m_flags & BT_SPEED)
932 7518405 : return changed ? TODO_cleanup_cfg : 0;
933 :
934 : return false;
935 : }
936 :
937 : namespace {
938 :
939 : const pass_data pass_data_early_thread_jumps =
940 : {
941 : GIMPLE_PASS,
942 : "ethread",
943 : OPTGROUP_NONE,
944 : TV_TREE_SSA_THREAD_JUMPS,
945 : ( PROP_cfg | PROP_ssa ),
946 : 0,
947 : 0,
948 : 0,
949 : ( TODO_cleanup_cfg | TODO_update_ssa ),
950 : };
951 :
952 : const pass_data pass_data_thread_jumps =
953 : {
954 : GIMPLE_PASS,
955 : "thread",
956 : OPTGROUP_NONE,
957 : TV_TREE_SSA_THREAD_JUMPS,
958 : ( PROP_cfg | PROP_ssa ),
959 : 0,
960 : 0,
961 : 0,
962 : TODO_update_ssa,
963 : };
964 :
965 : const pass_data pass_data_thread_jumps_full =
966 : {
967 : GIMPLE_PASS,
968 : "threadfull",
969 : OPTGROUP_NONE,
970 : TV_TREE_SSA_THREAD_JUMPS,
971 : ( PROP_cfg | PROP_ssa ),
972 : 0,
973 : 0,
974 : 0,
975 : TODO_update_ssa,
976 : };
977 :
978 : // Early jump threading pass optimizing for size.
979 : class pass_early_thread_jumps : public gimple_opt_pass
980 : {
981 : public:
982 288047 : pass_early_thread_jumps (gcc::context *ctxt)
983 576094 : : gimple_opt_pass (pass_data_early_thread_jumps, ctxt)
984 : {}
985 :
986 0 : opt_pass * clone () override
987 : {
988 0 : return new pass_early_thread_jumps (m_ctxt);
989 : }
990 288047 : void set_pass_param (unsigned int, bool param) override
991 : {
992 288047 : m_first = param;
993 288047 : }
994 2411726 : bool gate (function *) override
995 : {
996 2411726 : return flag_thread_jumps;
997 : }
998 2411427 : unsigned int execute (function *fun) override
999 : {
1000 2411427 : back_threader threader (fun, BT_NONE, m_first);
1001 2411427 : return threader.thread_blocks ();
1002 2411427 : }
1003 : private:
1004 : bool m_first;
1005 : };
1006 :
1007 : // Jump threading pass without resolving of unknown SSAs.
1008 : class pass_thread_jumps : public gimple_opt_pass
1009 : {
1010 : public:
1011 576094 : pass_thread_jumps (gcc::context *ctxt)
1012 1152188 : : gimple_opt_pass (pass_data_thread_jumps, ctxt)
1013 : {}
1014 288047 : opt_pass * clone (void) override
1015 : {
1016 288047 : return new pass_thread_jumps (m_ctxt);
1017 : }
1018 576094 : void set_pass_param (unsigned int, bool param) override
1019 : {
1020 576094 : m_first = param;
1021 576094 : }
1022 2079078 : bool gate (function *) override
1023 : {
1024 2079078 : return flag_thread_jumps && flag_expensive_optimizations;
1025 : }
1026 1923271 : unsigned int execute (function *fun) override
1027 : {
1028 1923271 : back_threader threader (fun, BT_SPEED, m_first);
1029 1923271 : return threader.thread_blocks ();
1030 1923271 : }
1031 : private:
1032 : bool m_first;
1033 : };
1034 :
1035 : // Jump threading pass that fully resolves unknown SSAs.
1036 : class pass_thread_jumps_full : public gimple_opt_pass
1037 : {
1038 : public:
1039 576094 : pass_thread_jumps_full (gcc::context *ctxt)
1040 1152188 : : gimple_opt_pass (pass_data_thread_jumps_full, ctxt)
1041 : {}
1042 288047 : opt_pass * clone (void) override
1043 : {
1044 288047 : return new pass_thread_jumps_full (m_ctxt);
1045 : }
1046 576094 : void set_pass_param (unsigned int, bool param) override
1047 : {
1048 576094 : m_first = param;
1049 576094 : }
1050 2079078 : bool gate (function *) override
1051 : {
1052 2079078 : return flag_thread_jumps && flag_expensive_optimizations;
1053 : }
1054 1923274 : unsigned int execute (function *fun) override
1055 : {
1056 1923274 : back_threader threader (fun, BT_SPEED | BT_RESOLVE, m_first);
1057 1923274 : return threader.thread_blocks ();
1058 1923274 : }
1059 : private:
1060 : bool m_first;
1061 : };
1062 :
1063 : } // namespace {
1064 :
1065 : gimple_opt_pass *
1066 288047 : make_pass_thread_jumps (gcc::context *ctxt)
1067 : {
1068 288047 : return new pass_thread_jumps (ctxt);
1069 : }
1070 :
1071 : gimple_opt_pass *
1072 288047 : make_pass_thread_jumps_full (gcc::context *ctxt)
1073 : {
1074 288047 : return new pass_thread_jumps_full (ctxt);
1075 : }
1076 :
1077 : gimple_opt_pass *
1078 288047 : make_pass_early_thread_jumps (gcc::context *ctxt)
1079 : {
1080 288047 : return new pass_early_thread_jumps (ctxt);
1081 : }
|