Line data Source code
1 : /* SSA Jump Threading
2 : Copyright (C) 2005-2026 Free Software Foundation, Inc.
3 :
4 : This file is part of GCC.
5 :
6 : GCC is free software; you can redistribute it and/or modify
7 : it under the terms of the GNU General Public License as published by
8 : the Free Software Foundation; either version 3, or (at your option)
9 : any later version.
10 :
11 : GCC is distributed in the hope that it will be useful,
12 : but WITHOUT ANY WARRANTY; without even the implied warranty of
13 : MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14 : GNU General Public License for more details.
15 :
16 : You should have received a copy of the GNU General Public License
17 : along with GCC; see the file COPYING3. If not see
18 : <http://www.gnu.org/licenses/>. */
19 :
20 : #include "config.h"
21 : #include "system.h"
22 : #include "coretypes.h"
23 : #include "backend.h"
24 : #include "predict.h"
25 : #include "tree.h"
26 : #include "gimple.h"
27 : #include "fold-const.h"
28 : #include "cfgloop.h"
29 : #include "gimple-iterator.h"
30 : #include "tree-cfg.h"
31 : #include "tree-ssa-threadupdate.h"
32 : #include "tree-ssa-loop.h"
33 : #include "cfganal.h"
34 : #include "tree-pass.h"
35 : #include "gimple-ssa.h"
36 : #include "tree-phinodes.h"
37 : #include "tree-inline.h"
38 : #include "tree-vectorizer.h"
39 : #include "value-range.h"
40 : #include "gimple-range.h"
41 : #include "tree-ssa-threadedge.h"
42 : #include "gimple-range-path.h"
43 : #include "ssa.h"
44 : #include "tree-cfgcleanup.h"
45 : #include "tree-pretty-print.h"
46 : #include "cfghooks.h"
47 : #include "dbgcnt.h"
48 :
49 : // Path registry for the backwards threader. After all paths have been
50 : // registered with register_path(), thread_through_all_blocks() is called
51 : // to modify the CFG.
52 :
53 25073656 : class back_threader_registry : public back_jt_path_registry
54 : {
55 : public:
56 : bool register_path (const vec<basic_block> &, edge taken);
57 : };
58 :
59 : // Class to abstract the profitability code for the backwards threader.
60 :
61 : class back_threader_profitability
62 : {
63 : public:
64 : back_threader_profitability (bool speed_p, gimple *stmt);
65 : bool possibly_profitable_path_p (const vec<basic_block> &, bool *);
66 : bool profitable_path_p (const vec<basic_block> &,
67 : edge taken, bool *irreducible_loop);
68 : private:
69 : const bool m_speed_p;
70 : int m_exit_jump_benefit;
71 : bool m_threaded_multiway_branch;
72 : // The following are computed by possibly_profitable_path_p
73 : bool m_threaded_through_latch;
74 : bool m_multiway_branch_in_path;
75 : bool m_contains_hot_bb;
76 : int m_n_insns;
77 : };
78 :
79 21488932 : back_threader_profitability::back_threader_profitability (bool speed_p,
80 21488932 : gimple *last)
81 21488932 : : m_speed_p (speed_p)
82 : {
83 21488932 : m_threaded_multiway_branch = (gimple_code (last) == GIMPLE_SWITCH
84 21488932 : || gimple_code (last) == GIMPLE_GOTO);
85 : // The forward threader has estimate_threading_killed_stmts, in
86 : // particular it estimates further DCE from eliminating the exit
87 : // control stmt.
88 21488932 : m_exit_jump_benefit = estimate_num_insns (last, &eni_size_weights);
89 21488932 : }
90 :
91 : // Back threader flags.
92 : #define BT_NONE 0
93 : // Generate fast code at the expense of code size.
94 : #define BT_SPEED 1
95 : // Resolve unknown SSAs on entry to a threading path. If set, use the
96 : // ranger. If not, assume all ranges on entry to a path are VARYING.
97 : #define BT_RESOLVE 2
98 :
99 : class back_threader
100 : {
101 : public:
102 : back_threader (function *fun, unsigned flags, bool first);
103 : ~back_threader ();
104 : unsigned thread_blocks ();
105 : private:
106 : void maybe_thread_block (basic_block bb);
107 : bool debug_counter ();
108 : edge maybe_register_path (back_threader_profitability &);
109 : void maybe_register_path_dump (edge taken_edge);
110 : void find_paths_to_names (basic_block bb, bitmap imports, unsigned,
111 : back_threader_profitability &);
112 : edge find_taken_edge (const vec<basic_block> &path);
113 : edge find_taken_edge_cond (const vec<basic_block> &path, gcond *);
114 : edge find_taken_edge_switch (const vec<basic_block> &path, gswitch *);
115 : virtual void debug ();
116 : virtual void dump (FILE *out);
117 :
118 : back_threader_registry m_registry;
119 :
120 : // Current path being analyzed.
121 : auto_vec<basic_block> m_path;
122 : // Hash to mark visited BBs while analyzing a path.
123 : hash_set<basic_block> m_visited_bbs;
124 : // The set of SSA names, any of which could potentially change the
125 : // value of the final conditional in a path.
126 : auto_bitmap m_imports;
127 : // The last statement in the path.
128 : gimple *m_last_stmt;
129 : // Marker to differentiate unreachable edges.
130 : static const edge UNREACHABLE_EDGE;
131 : // Set to TRUE if unknown SSA names along a path should be resolved
132 : // with the ranger. Otherwise, unknown SSA names are assumed to be
133 : // VARYING. Setting to true is more precise but slower.
134 : function *m_fun;
135 : // Ranger for the path solver.
136 : gimple_ranger *m_ranger;
137 : unsigned m_flags;
138 : // Set to TRUE for the first of each thread[12] pass or the first of
139 : // each threadfull[12] pass. This is used to differentiate between
140 : // the different threading passes so we can set up debug counters.
141 : bool m_first;
142 : };
143 :
144 : // Used to differentiate unreachable edges, so we may stop the search
145 : // in a the given direction.
146 : const edge back_threader::UNREACHABLE_EDGE = (edge) -1;
147 :
148 6268414 : back_threader::back_threader (function *fun, unsigned flags, bool first)
149 6268414 : : m_first (first)
150 : {
151 6268414 : if (flags & BT_SPEED)
152 3856285 : loop_optimizer_init (LOOPS_HAVE_PREHEADERS | LOOPS_HAVE_SIMPLE_LATCHES);
153 : else
154 2412129 : loop_optimizer_init (AVOID_CFG_MODIFICATIONS);
155 :
156 6268414 : m_fun = fun;
157 6268414 : m_flags = flags;
158 6268414 : m_last_stmt = NULL;
159 :
160 : // The path solver needs EDGE_DFS_BACK in resolving mode.
161 6268414 : if (flags & BT_RESOLVE)
162 1928144 : mark_dfs_back_edges ();
163 :
164 6268414 : m_ranger = new gimple_ranger;
165 6268414 : }
166 :
167 6268414 : back_threader::~back_threader ()
168 : {
169 6268414 : delete m_ranger;
170 6268414 : loop_optimizer_finalize ();
171 6268414 : }
172 :
173 : // A wrapper for the various debug counters for the threading passes.
174 : // Returns TRUE if it's OK to register the current threading
175 : // candidate.
176 :
177 : bool
178 1401792 : back_threader::debug_counter ()
179 : {
180 : // The ethread pass is mostly harmless ;-).
181 1401792 : if ((m_flags & BT_SPEED) == 0)
182 : return true;
183 :
184 952452 : if (m_flags & BT_RESOLVE)
185 : {
186 657193 : if (m_first && !dbg_cnt (back_threadfull1))
187 : return false;
188 :
189 657193 : if (!m_first && !dbg_cnt (back_threadfull2))
190 : return false;
191 : }
192 : else
193 : {
194 295259 : if (m_first && !dbg_cnt (back_thread1))
195 : return false;
196 :
197 295259 : if (!m_first && !dbg_cnt (back_thread2))
198 : return false;
199 : }
200 : return true;
201 : }
202 :
203 : static void
204 545 : dump_path (FILE *dump_file, const vec<basic_block> &path)
205 : {
206 2978 : for (unsigned i = path.length (); i > 0; --i)
207 : {
208 1888 : basic_block bb = path[i - 1];
209 1888 : fprintf (dump_file, "%d", bb->index);
210 1888 : if (i > 1)
211 1343 : fprintf (dump_file, "->");
212 : }
213 545 : }
214 :
215 : // Dump details of an attempt to register a path.
216 :
217 : void
218 545 : back_threader::maybe_register_path_dump (edge taken)
219 : {
220 545 : if (m_path.is_empty ())
221 : return;
222 :
223 545 : fprintf (dump_file, "path: ");
224 545 : dump_path (dump_file, m_path);
225 545 : fprintf (dump_file, "->");
226 :
227 545 : if (taken == UNREACHABLE_EDGE)
228 10 : fprintf (dump_file, "xx REJECTED (unreachable)\n");
229 535 : else if (taken)
230 86 : fprintf (dump_file, "%d SUCCESS\n", taken->dest->index);
231 : else
232 449 : fprintf (dump_file, "xx REJECTED\n");
233 : }
234 :
235 : // If an outgoing edge can be determined out of the current path,
236 : // register it for jump threading and return the taken edge.
237 : //
238 : // Return NULL if it is unprofitable to thread this path, or the
239 : // outgoing edge is unknown. Return UNREACHABLE_EDGE if the path is
240 : // unreachable.
241 :
242 : edge
243 26127049 : back_threader::maybe_register_path (back_threader_profitability &profit)
244 : {
245 26127049 : edge taken_edge = find_taken_edge (m_path);
246 :
247 26127049 : if (taken_edge && taken_edge != UNREACHABLE_EDGE)
248 : {
249 1546765 : bool irreducible = false;
250 1546765 : if (profit.profitable_path_p (m_path, taken_edge, &irreducible)
251 1401792 : && debug_counter ()
252 2948557 : && m_registry.register_path (m_path, taken_edge))
253 : {
254 1363915 : if (irreducible)
255 48106 : vect_free_loop_info_assumptions (m_path[0]->loop_father);
256 : }
257 : else
258 : taken_edge = NULL;
259 : }
260 :
261 26127049 : if (dump_file && (dump_flags & TDF_DETAILS))
262 545 : maybe_register_path_dump (taken_edge);
263 :
264 26127049 : return taken_edge;
265 : }
266 :
267 : // Return the known taken edge out of a path. If the path can be
268 : // determined to be unreachable, return UNREACHABLE_EDGE. If no
269 : // outgoing edge can be calculated, return NULL.
270 :
271 : edge
272 26127049 : back_threader::find_taken_edge (const vec<basic_block> &path)
273 : {
274 26127049 : gcc_checking_assert (path.length () > 1);
275 26127049 : switch (gimple_code (m_last_stmt))
276 : {
277 26003643 : case GIMPLE_COND:
278 26003643 : return find_taken_edge_cond (path, as_a<gcond *> (m_last_stmt));
279 :
280 123406 : case GIMPLE_SWITCH:
281 123406 : return find_taken_edge_switch (path, as_a<gswitch *> (m_last_stmt));
282 :
283 : default:
284 : return NULL;
285 : }
286 : }
287 :
288 : // Same as find_taken_edge, but for paths ending in a switch.
289 :
290 : edge
291 123406 : back_threader::find_taken_edge_switch (const vec<basic_block> &path,
292 : gswitch *sw)
293 : {
294 123406 : tree name = gimple_switch_index (sw);
295 123406 : int_range_max r;
296 :
297 123406 : path_range_query solver (*m_ranger, path, m_imports, m_flags & BT_RESOLVE);
298 123406 : solver.range_of_expr (r, name, sw);
299 :
300 123406 : if (r.undefined_p ())
301 : return UNREACHABLE_EDGE;
302 :
303 122985 : if (r.varying_p ())
304 : return NULL;
305 :
306 68893 : tree label = find_case_label_range (sw, &r);
307 68893 : if (!label)
308 : return NULL;
309 :
310 7008 : return find_edge (gimple_bb (sw), label_to_block (cfun, CASE_LABEL (label)));
311 123406 : }
312 :
313 : // Same as find_taken_edge, but for paths ending in a GIMPLE_COND.
314 :
315 : edge
316 26003643 : back_threader::find_taken_edge_cond (const vec<basic_block> &path,
317 : gcond *cond)
318 : {
319 26003643 : int_range_max r;
320 :
321 26003643 : path_range_query solver (*m_ranger, path, m_imports, m_flags & BT_RESOLVE);
322 26003643 : solver.range_of_stmt (r, cond);
323 :
324 26003643 : if (solver.unreachable_path_p ())
325 : return UNREACHABLE_EDGE;
326 :
327 25918504 : int_range<2> true_range = range_true ();
328 25918504 : int_range<2> false_range = range_false ();
329 :
330 25918504 : if (r == true_range || r == false_range)
331 : {
332 1539757 : edge e_true, e_false;
333 1539757 : basic_block bb = gimple_bb (cond);
334 1539757 : extract_true_false_edges_from_block (bb, &e_true, &e_false);
335 1539757 : return r == true_range ? e_true : e_false;
336 : }
337 : return NULL;
338 26003643 : }
339 :
340 : // Find jump threading paths to any of the SSA names in the
341 : // INTERESTING bitmap, and register any such paths.
342 : //
343 : // BB is the current path being processed.
344 : //
345 : // OVERALL_PATHS is the search space up to this block
346 :
347 : void
348 58526157 : back_threader::find_paths_to_names (basic_block bb, bitmap interesting,
349 : unsigned overall_paths,
350 : back_threader_profitability &profit)
351 : {
352 58526157 : m_path.safe_push (bb);
353 :
354 : // Try to resolve the path without looking back. Avoid resolving paths
355 : // we know are large but are not (yet) recognized as Finite State Machine.
356 : // ??? Ideally we'd explore the cheapest path to the loop backedge here,
357 : // avoiding the exponential greedy search and only start that from there.
358 : // Precomputing a path-size-to-immediate-dominator-of-successor for each
359 : // edge might help here. Alternatively copying divergent control flow
360 : // on the way to the backedge could be worthwhile.
361 58526157 : bool large_non_fsm;
362 58526157 : if (m_path.length () > 1
363 58526157 : && (!profit.possibly_profitable_path_p (m_path, &large_non_fsm)
364 26180624 : || (!large_non_fsm
365 26127049 : && maybe_register_path (profit))))
366 : ;
367 :
368 : // The backwards thread copier cannot copy blocks that do not belong
369 : // to the same loop, so when the new source of the path entry no
370 : // longer belongs to it we don't need to search further.
371 46220081 : else if (m_path[0]->loop_father != bb->loop_father)
372 : ;
373 :
374 : // Continue looking for ways to extend the path but limit the
375 : // search space along a branch
376 44784249 : else if ((overall_paths = overall_paths * EDGE_COUNT (bb->preds))
377 44784249 : <= (unsigned)param_max_jump_thread_paths
378 44784249 : && !m_visited_bbs.add (bb))
379 : {
380 : // For further greedy searching we want to remove interesting
381 : // names defined in BB but add ones on the PHI edges for the
382 : // respective edges and adding imports from those stmts.
383 : // We do this by starting with all names
384 : // not defined in BB as interesting, collecting a list of
385 : // interesting PHIs in BB on the fly. Then we iterate over
386 : // predecessor edges, adding interesting PHI edge defs to
387 : // the set of interesting names to consider when processing it.
388 43598077 : auto_bitmap new_interesting;
389 43598077 : auto_vec<int, 16> new_imports;
390 43598077 : auto_vec<gphi *, 4> interesting_phis;
391 43598077 : bitmap_iterator bi;
392 43598077 : unsigned i;
393 43598077 : auto_vec<tree, 16> worklist;
394 98637373 : EXECUTE_IF_SET_IN_BITMAP (interesting, 0, i, bi)
395 : {
396 55039296 : tree name = ssa_name (i);
397 55039296 : gimple *def_stmt = SSA_NAME_DEF_STMT (name);
398 : /* Imports remain interesting. */
399 55039296 : if (gimple_bb (def_stmt) != bb)
400 : {
401 27578630 : bitmap_set_bit (new_interesting, i);
402 27578630 : continue;
403 : }
404 27460666 : worklist.quick_push (name);
405 99330406 : while (!worklist.is_empty ())
406 : {
407 44409074 : tree name = worklist.pop ();
408 44409074 : gimple *def_stmt = SSA_NAME_DEF_STMT (name);
409 : /* Newly discovered imports are interesting. */
410 44409074 : if (gimple_bb (def_stmt) != bb)
411 : {
412 5361886 : bitmap_set_bit (new_interesting, SSA_NAME_VERSION (name));
413 5361886 : continue;
414 : }
415 : /* Local PHIs participate in renaming below. */
416 72716729 : if (gphi *phi = dyn_cast<gphi *> (def_stmt))
417 : {
418 5377647 : tree res = gimple_phi_result (phi);
419 5377647 : if (!SSA_NAME_OCCURS_IN_ABNORMAL_PHI (res))
420 5377518 : interesting_phis.safe_push (phi);
421 : }
422 : /* For other local defs process their uses, amending
423 : imports on the way. */
424 : else
425 : {
426 33669541 : tree ssa[3];
427 33669541 : unsigned lim = gimple_range_ssa_names (ssa, 3, def_stmt);
428 52987860 : for (unsigned j = 0; j < lim; ++j)
429 : {
430 19318319 : tree rhs = ssa[j];
431 19318319 : if (rhs
432 38636638 : && bitmap_set_bit (m_imports,
433 19318319 : SSA_NAME_VERSION (rhs)))
434 : {
435 16948408 : new_imports.safe_push (SSA_NAME_VERSION (rhs));
436 16948408 : worklist.safe_push (rhs);
437 : }
438 : }
439 : }
440 : }
441 : }
442 43598077 : if (!bitmap_empty_p (new_interesting)
443 43598077 : || !interesting_phis.is_empty ())
444 : {
445 58227424 : auto_vec<int, 4> unwind (interesting_phis.length ());
446 58227424 : auto_vec<int, 4> imports_unwind (interesting_phis.length ());
447 29113712 : edge_iterator iter;
448 29113712 : edge e;
449 69821615 : FOR_EACH_EDGE (e, iter, bb->preds)
450 : {
451 44378581 : if (e->flags & EDGE_ABNORMAL
452 : // This is like path_crosses_loops in profitable_path_p but
453 : // more restrictive to avoid peeling off loop iterations (see
454 : // tree-ssa/pr14341.c for an example).
455 : // ??? Note this restriction only applied when visiting an
456 : // interesting PHI with the former resolve_phi.
457 40707903 : || (!interesting_phis.is_empty ()
458 11269831 : && m_path[0]->loop_father != e->src->loop_father))
459 3670678 : continue;
460 119234000 : for (gphi *phi : interesting_phis)
461 : {
462 8122325 : tree def = PHI_ARG_DEF_FROM_EDGE (phi, e);
463 8122325 : if (TREE_CODE (def) == SSA_NAME)
464 : {
465 6805266 : int ver = SSA_NAME_VERSION (def);
466 6805266 : if (bitmap_set_bit (new_interesting, ver))
467 : {
468 6784751 : if (bitmap_set_bit (m_imports, ver))
469 5293620 : imports_unwind.quick_push (ver);
470 6784751 : unwind.quick_push (ver);
471 : }
472 : }
473 : }
474 37037225 : find_paths_to_names (e->src, new_interesting, overall_paths,
475 : profit);
476 : // Restore new_interesting.
477 117896426 : for (int def : unwind)
478 6784751 : bitmap_clear_bit (new_interesting, def);
479 37037225 : unwind.truncate (0);
480 : // Restore and m_imports.
481 116405295 : for (int def : imports_unwind)
482 5293620 : bitmap_clear_bit (m_imports, def);
483 37037225 : imports_unwind.truncate (0);
484 : }
485 29113712 : }
486 : /* m_imports tracks all interesting names on the path, so when
487 : backtracking we have to restore it. */
488 147742639 : for (int j : new_imports)
489 16948408 : bitmap_clear_bit (m_imports, j);
490 43598077 : m_visited_bbs.remove (bb);
491 43598077 : }
492 1186172 : else if (dump_file && (dump_flags & TDF_DETAILS))
493 14 : fprintf (dump_file, " FAIL: Search space limit %d reached.\n",
494 : param_max_jump_thread_paths);
495 :
496 : // Reset things to their original state.
497 58526157 : m_path.pop ();
498 58526157 : }
499 :
500 : // Search backwards from BB looking for paths where the final
501 : // conditional maybe threaded to a successor block. Record such paths
502 : // for jump threading.
503 :
504 : void
505 25586575 : back_threader::maybe_thread_block (basic_block bb)
506 : {
507 25586575 : if (EDGE_COUNT (bb->succs) <= 1)
508 4097643 : return;
509 :
510 25586575 : gimple *stmt = *gsi_last_bb (bb);
511 25586575 : if (!stmt)
512 : return;
513 :
514 25586575 : enum gimple_code code = gimple_code (stmt);
515 25586575 : if (code != GIMPLE_SWITCH
516 25586575 : && code != GIMPLE_COND)
517 : return;
518 :
519 21538135 : m_last_stmt = stmt;
520 21538135 : m_visited_bbs.empty ();
521 21538135 : m_path.truncate (0);
522 :
523 : // We compute imports of the path during discovery starting
524 : // just with names used in the conditional.
525 21538135 : bitmap_clear (m_imports);
526 21538135 : ssa_op_iter iter;
527 21538135 : tree name;
528 47864283 : FOR_EACH_SSA_TREE_OPERAND (name, stmt, iter, SSA_OP_USE)
529 : {
530 26375351 : if (!gimple_range_ssa_p (name))
531 : return;
532 26326148 : bitmap_set_bit (m_imports, SSA_NAME_VERSION (name));
533 : }
534 :
535 : // Interesting is the set of imports we still not have see
536 : // the definition of. So while imports only grow, the
537 : // set of interesting defs dwindles and once empty we can
538 : // stop searching.
539 21488932 : auto_bitmap interesting;
540 21488932 : bitmap_copy (interesting, m_imports);
541 21488932 : back_threader_profitability profit (m_flags & BT_SPEED, stmt);
542 21488932 : find_paths_to_names (bb, interesting, 1, profit);
543 21488932 : }
544 :
545 : DEBUG_FUNCTION void
546 0 : debug (const vec <basic_block> &path)
547 : {
548 0 : dump_path (stderr, path);
549 0 : fputc ('\n', stderr);
550 0 : }
551 :
552 : void
553 0 : back_threader::dump (FILE *out)
554 : {
555 0 : fprintf (out, "\nCandidates for pre-computation:\n");
556 0 : fprintf (out, "===================================\n");
557 :
558 0 : bitmap_iterator bi;
559 0 : unsigned i;
560 :
561 0 : EXECUTE_IF_SET_IN_BITMAP (m_imports, 0, i, bi)
562 : {
563 0 : tree name = ssa_name (i);
564 0 : print_generic_expr (out, name, TDF_NONE);
565 0 : fprintf (out, "\n");
566 : }
567 0 : }
568 :
569 : void
570 0 : back_threader::debug ()
571 : {
572 0 : dump (stderr);
573 0 : }
574 :
575 : /* Examine jump threading path PATH and return TRUE if it is possibly
576 : profitable to thread it, otherwise return FALSE. If this function
577 : returns TRUE profitable_path_p might not be satisfied but when
578 : the path is extended it might be. In particular indicate in
579 : *LARGE_NON_FSM whether the thread is too large for a non-FSM thread
580 : but would be OK if we extend the path to cover the loop backedge.
581 :
582 : ?? It seems we should be able to loosen some of the restrictions in
583 : this function after loop optimizations have run. */
584 :
585 : bool
586 37037225 : back_threader_profitability::possibly_profitable_path_p
587 : (const vec<basic_block> &m_path,
588 : bool *large_non_fsm)
589 : {
590 37037225 : gcc_checking_assert (!m_path.is_empty ());
591 :
592 : /* We can an empty path here (excluding the DEF block) when the
593 : statement that makes a conditional generate a compile-time
594 : constant result is in the same block as the conditional.
595 :
596 : That's not really a jump threading opportunity, but instead is
597 : simple cprop & simplification. We could handle it here if we
598 : wanted by wiring up all the incoming edges. If we run this
599 : early in IPA, that might be worth doing. For now we just
600 : reject that case. */
601 37037225 : if (m_path.length () <= 1)
602 : return false;
603 :
604 37037225 : gimple_stmt_iterator gsi;
605 37037225 : loop_p loop = m_path[0]->loop_father;
606 :
607 : // We recompute the following, when we rewrite possibly_profitable_path_p
608 : // to work incrementally on added BBs we have to unwind them on backtracking
609 37037225 : m_n_insns = 0;
610 37037225 : m_threaded_through_latch = false;
611 37037225 : m_multiway_branch_in_path = false;
612 37037225 : m_contains_hot_bb = false;
613 :
614 37037225 : if (dump_file && (dump_flags & TDF_DETAILS))
615 657 : fprintf (dump_file, "Checking profitability of path (backwards): ");
616 :
617 : /* Count the number of instructions on the path: as these instructions
618 : will have to be duplicated, we will not record the path if there
619 : are too many instructions on the path. Also check that all the
620 : blocks in the path belong to a single loop. */
621 157024833 : for (unsigned j = 0; j < m_path.length (); j++)
622 : {
623 120020539 : basic_block bb = m_path[j];
624 :
625 120020539 : if (dump_file && (dump_flags & TDF_DETAILS))
626 2427 : fprintf (dump_file, " bb:%i", bb->index);
627 : /* Remember, blocks in the path are stored in opposite order in
628 : the PATH array. The last entry in the array represents the
629 : block with an outgoing edge that we will redirect to the jump
630 : threading path. Thus we don't care how many statements are
631 : in that block because it will not be copied or whether or not
632 : it ends in a multiway branch. */
633 240041078 : if (j < m_path.length () - 1)
634 : {
635 83016245 : int orig_n_insns = m_n_insns;
636 83016245 : if (!m_contains_hot_bb && m_speed_p)
637 36056580 : m_contains_hot_bb |= optimize_bb_for_speed_p (bb);
638 83016245 : for (gsi = gsi_after_labels (bb);
639 325118352 : !gsi_end_p (gsi);
640 242102107 : gsi_next_nondebug (&gsi))
641 : {
642 : /* Do not allow OpenACC loop markers and __builtin_constant_p on
643 : threading paths. The latter is disallowed, because an
644 : expression might be constant on two threading paths, and
645 : become non-constant (i.e.: phi) when they merge. */
646 242135038 : gimple *stmt = gsi_stmt (gsi);
647 242135038 : if (gimple_call_internal_p (stmt, IFN_UNIQUE)
648 242135038 : || gimple_call_builtin_p (stmt, BUILT_IN_CONSTANT_P))
649 : {
650 32931 : if (dump_file && (dump_flags & TDF_DETAILS))
651 0 : fputc ('\n', dump_file);
652 32931 : return false;
653 : }
654 : /* Do not count empty statements and labels. */
655 242102107 : if (gimple_code (stmt) != GIMPLE_NOP
656 242102107 : && !is_gimple_debug (stmt))
657 200943562 : m_n_insns += estimate_num_insns (stmt, &eni_size_weights);
658 : }
659 82983314 : if (dump_file && (dump_flags & TDF_DETAILS))
660 1770 : fprintf (dump_file, " (%i insns)", m_n_insns-orig_n_insns);
661 :
662 : /* We do not look at the block with the threaded branch
663 : in this loop. So if any block with a last statement that
664 : is a GIMPLE_SWITCH or GIMPLE_GOTO is seen, then we have a
665 : multiway branch on our path.
666 :
667 : The block in PATH[0] is special, it's the block were we're
668 : going to be able to eliminate its branch. */
669 82983314 : if (j > 0)
670 : {
671 45956621 : gimple *last = *gsi_last_bb (bb);
672 45956621 : if (last
673 45956621 : && (gimple_code (last) == GIMPLE_SWITCH
674 42425965 : || gimple_code (last) == GIMPLE_GOTO))
675 278589 : m_multiway_branch_in_path = true;
676 : }
677 : }
678 :
679 : /* Note if we thread through the latch, we will want to include
680 : the last entry in the array when determining if we thread
681 : through the loop latch. */
682 119987608 : if (loop->latch == bb)
683 : {
684 7537114 : m_threaded_through_latch = true;
685 7537114 : if (dump_file && (dump_flags & TDF_DETAILS))
686 115 : fprintf (dump_file, " (latch)");
687 : }
688 : }
689 :
690 : /* We are going to remove the control statement at the end of the
691 : last block in the threading path. So don't count it against our
692 : statement count. */
693 37004294 : m_n_insns -= m_exit_jump_benefit;
694 :
695 37004294 : if (dump_file && (dump_flags & TDF_DETAILS))
696 657 : fprintf (dump_file, "\n Control statement insns: %i\n"
697 : " Overall: %i insns\n",
698 : m_exit_jump_benefit, m_n_insns);
699 :
700 : /* Threading is profitable if the path duplicated is hot but also
701 : in a case we separate cold path from hot path and permit optimization
702 : of the hot path later. Be on the agressive side here. In some testcases,
703 : as in PR 78407 this leads to noticeable improvements. */
704 37004294 : if (m_speed_p)
705 : {
706 33374810 : if (m_n_insns >= param_max_fsm_thread_path_insns)
707 : {
708 7304 : if (dump_file && (dump_flags & TDF_DETAILS))
709 0 : fprintf (dump_file, " FAIL: Jump-thread path not considered: "
710 : "the number of instructions on the path "
711 : "exceeds PARAM_MAX_FSM_THREAD_PATH_INSNS.\n");
712 7304 : return false;
713 : }
714 66735012 : edge entry = find_edge (m_path[m_path.length () - 1],
715 33367506 : m_path[m_path.length () - 2]);
716 33367506 : if (probably_never_executed_edge_p (cfun, entry))
717 : {
718 125875 : if (dump_file && (dump_flags & TDF_DETAILS))
719 0 : fprintf (dump_file, " FAIL: Jump-thread path not considered: "
720 : "path entry is probably never executed.\n");
721 125875 : return false;
722 : }
723 : }
724 3629484 : else if (m_n_insns > 1)
725 : {
726 1418055 : if (dump_file && (dump_flags & TDF_DETAILS))
727 15 : fprintf (dump_file, " FAIL: Jump-thread path not considered: "
728 : "duplication of %i insns is needed and optimizing for size.\n",
729 : m_n_insns);
730 1418055 : return false;
731 : }
732 :
733 : /* The generic copier used by the backthreader does not re-use an
734 : existing threading path to reduce code duplication. So for that
735 : case, drastically reduce the number of statements we are allowed
736 : to copy. We don't know yet whether we will thread through the latch
737 : so we have to be permissive and continue threading, but indicate
738 : to the caller the thread, if final, wouldn't be profitable. */
739 35453060 : if ((!m_threaded_multiway_branch
740 191343 : || !loop->latch
741 190583 : || loop->latch->index == EXIT_BLOCK)
742 35355395 : && (m_n_insns * param_fsm_scale_path_stmts
743 35355395 : >= param_max_jump_thread_duplication_stmts))
744 : {
745 9272436 : if (dump_file && (dump_flags & TDF_DETAILS))
746 87 : fprintf (dump_file,
747 : " FAIL: Did not thread around loop and would copy too "
748 : "many statements.\n");
749 9272436 : return false;
750 : }
751 5481936 : *large_non_fsm = (!(m_threaded_through_latch && m_threaded_multiway_branch)
752 26180624 : && (m_n_insns * param_fsm_scale_path_stmts
753 26156026 : >= param_max_jump_thread_duplication_stmts));
754 :
755 26180624 : if (dump_file && (dump_flags & TDF_DETAILS))
756 555 : fputc ('\n', dump_file);
757 : return true;
758 : }
759 :
760 : /* Examine jump threading path PATH and return TRUE if it is profitable to
761 : thread it, otherwise return FALSE.
762 :
763 : The taken edge out of the path is TAKEN_EDGE.
764 :
765 : CREATES_IRREDUCIBLE_LOOP is set to TRUE if threading this path
766 : would create an irreducible loop.
767 :
768 : ?? It seems we should be able to loosen some of the restrictions in
769 : this function after loop optimizations have run. */
770 :
771 : bool
772 1546765 : back_threader_profitability::profitable_path_p (const vec<basic_block> &m_path,
773 : edge taken_edge,
774 : bool *creates_irreducible_loop)
775 : {
776 : // We can assume that possibly_profitable_path_p holds here
777 :
778 1546765 : loop_p loop = m_path[0]->loop_father;
779 :
780 1546765 : if (dump_file && (dump_flags & TDF_DETAILS))
781 117 : fprintf (dump_file, "Checking profitability of path (backwards): ");
782 :
783 : /* If this path threaded through the loop latch back into the
784 : same loop and the destination does not dominate the loop
785 : latch, then this thread would create an irreducible loop. */
786 1546765 : *creates_irreducible_loop = false;
787 1546765 : if (m_threaded_through_latch
788 160685 : && loop == taken_edge->dest->loop_father
789 1694672 : && (determine_bb_domination_status (loop, taken_edge->dest)
790 : == DOMST_NONDOMINATING))
791 101047 : *creates_irreducible_loop = true;
792 :
793 : /* Threading is profitable if the path duplicated is hot but also
794 : in a case we separate cold path from hot path and permit optimization
795 : of the hot path later. Be on the agressive side here. In some testcases,
796 : as in PR 78407 this leads to noticeable improvements. */
797 1546765 : if (m_speed_p
798 1546765 : && (optimize_edge_for_speed_p (taken_edge) || m_contains_hot_bb))
799 : {
800 1010504 : if (probably_never_executed_edge_p (cfun, taken_edge))
801 : {
802 21862 : if (dump_file && (dump_flags & TDF_DETAILS))
803 0 : fprintf (dump_file, " FAIL: Jump-thread path not considered: "
804 : "path leads to probably never executed edge.\n");
805 21862 : return false;
806 : }
807 : }
808 536261 : else if (m_n_insns > 1)
809 : {
810 46872 : if (dump_file && (dump_flags & TDF_DETAILS))
811 0 : fprintf (dump_file, " FAIL: Jump-thread path not considered: "
812 : "duplication of %i insns is needed and optimizing for size.\n",
813 : m_n_insns);
814 46872 : return false;
815 : }
816 :
817 : /* We avoid creating irreducible inner loops unless we thread through
818 : a multiway branch, in which case we have deemed it worth losing
819 : other loop optimizations later.
820 :
821 : We also consider it worth creating an irreducible inner loop after
822 : loop optimizations if the number of copied statement is low. */
823 1478031 : if (!m_threaded_multiway_branch
824 1471316 : && *creates_irreducible_loop
825 91464 : && (!(cfun->curr_properties & PROP_loop_opts_done)
826 47563 : || (m_n_insns * param_fsm_scale_path_stmts
827 47563 : >= param_max_jump_thread_duplication_stmts)))
828 : {
829 43901 : if (dump_file && (dump_flags & TDF_DETAILS))
830 1 : fprintf (dump_file,
831 : " FAIL: Would create irreducible loop early without "
832 : "threading multiway branch.\n");
833 : /* We compute creates_irreducible_loop only late. */
834 43901 : return false;
835 : }
836 :
837 : /* The generic copier used by the backthreader does not re-use an
838 : existing threading path to reduce code duplication. So for that
839 : case, drastically reduce the number of statements we are allowed
840 : to copy. */
841 1434130 : if (!(m_threaded_through_latch && m_threaded_multiway_branch)
842 1430298 : && (m_n_insns * param_fsm_scale_path_stmts
843 1430298 : >= param_max_jump_thread_duplication_stmts))
844 : {
845 0 : if (dump_file && (dump_flags & TDF_DETAILS))
846 0 : fprintf (dump_file,
847 : " FAIL: Did not thread around loop and would copy too "
848 : "many statements.\n");
849 0 : return false;
850 : }
851 :
852 : /* When there is a multi-way branch on the path, then threading can
853 : explode the CFG due to duplicating the edges for that multi-way
854 : branch. So like above, only allow a multi-way branch on the path
855 : if we actually thread a multi-way branch. */
856 1434130 : if (!m_threaded_multiway_branch && m_multiway_branch_in_path)
857 : {
858 154 : if (dump_file && (dump_flags & TDF_DETAILS))
859 6 : fprintf (dump_file,
860 : " FAIL: Thread through multiway branch without threading "
861 : "a multiway branch.\n");
862 154 : return false;
863 : }
864 :
865 : /* Threading through an empty latch would cause code to be added to
866 : the latch. This could alter the loop form sufficiently to cause
867 : loop optimizations to fail. Disable these threads until after
868 : loop optimizations have run. */
869 1324626 : if ((m_threaded_through_latch || taken_edge->dest == loop->latch)
870 162996 : && !(cfun->curr_properties & PROP_loop_opts_done)
871 1516400 : && empty_block_p (loop->latch))
872 : {
873 32184 : if (dump_file && (dump_flags & TDF_DETAILS))
874 15 : fprintf (dump_file,
875 : " FAIL: Thread through latch before loop opts would create "
876 : "non-empty latch\n");
877 32184 : return false;
878 : }
879 1401792 : if (dump_file && (dump_flags & TDF_DETAILS))
880 95 : fputc ('\n', dump_file);
881 : return true;
882 : }
883 :
884 :
885 : /* The current path PATH is a vector of blocks forming a jump threading
886 : path in reverse order. TAKEN_EDGE is the edge taken from path[0].
887 :
888 : Convert the current path into the form used by register_jump_thread and
889 : register it.
890 :
891 : Return TRUE if successful or FALSE otherwise. */
892 :
893 : bool
894 1401792 : back_threader_registry::register_path (const vec<basic_block> &m_path,
895 : edge taken_edge)
896 : {
897 1401792 : vec<jump_thread_edge *> *jump_thread_path = allocate_thread_path ();
898 :
899 : // The generic copier ignores the edge type. We can build the
900 : // thread edges with any type.
901 3527250 : for (unsigned int j = 0; j + 1 < m_path.length (); j++)
902 : {
903 2125458 : basic_block bb1 = m_path[m_path.length () - j - 1];
904 2125458 : basic_block bb2 = m_path[m_path.length () - j - 2];
905 :
906 2125458 : edge e = find_edge (bb1, bb2);
907 2125458 : gcc_assert (e);
908 2125458 : push_edge (jump_thread_path, e, EDGE_COPY_SRC_BLOCK);
909 : }
910 :
911 1401792 : push_edge (jump_thread_path, taken_edge, EDGE_NO_COPY_SRC_BLOCK);
912 1401792 : return register_jump_thread (jump_thread_path);
913 : }
914 :
915 : // Thread all suitable paths in the current function.
916 : //
917 : // Return TODO_flags.
918 :
919 : unsigned int
920 6268414 : back_threader::thread_blocks ()
921 : {
922 6268414 : basic_block bb;
923 60517321 : FOR_EACH_BB_FN (bb, m_fun)
924 79835482 : if (EDGE_COUNT (bb->succs) > 1)
925 25586575 : maybe_thread_block (bb);
926 :
927 6268414 : bool changed = m_registry.thread_through_all_blocks (true);
928 :
929 6268414 : if (m_flags & BT_SPEED)
930 7530286 : return changed ? TODO_cleanup_cfg : 0;
931 :
932 : return false;
933 : }
934 :
935 : namespace {
936 :
937 : const pass_data pass_data_early_thread_jumps =
938 : {
939 : GIMPLE_PASS,
940 : "ethread",
941 : OPTGROUP_NONE,
942 : TV_TREE_SSA_THREAD_JUMPS,
943 : ( PROP_cfg | PROP_ssa ),
944 : 0,
945 : 0,
946 : 0,
947 : ( TODO_cleanup_cfg | TODO_update_ssa ),
948 : };
949 :
950 : const pass_data pass_data_thread_jumps =
951 : {
952 : GIMPLE_PASS,
953 : "thread",
954 : OPTGROUP_NONE,
955 : TV_TREE_SSA_THREAD_JUMPS,
956 : ( PROP_cfg | PROP_ssa ),
957 : 0,
958 : 0,
959 : 0,
960 : TODO_update_ssa,
961 : };
962 :
963 : const pass_data pass_data_thread_jumps_full =
964 : {
965 : GIMPLE_PASS,
966 : "threadfull",
967 : OPTGROUP_NONE,
968 : TV_TREE_SSA_THREAD_JUMPS,
969 : ( PROP_cfg | PROP_ssa ),
970 : 0,
971 : 0,
972 : 0,
973 : TODO_update_ssa,
974 : };
975 :
976 : // Early jump threading pass optimizing for size.
977 : class pass_early_thread_jumps : public gimple_opt_pass
978 : {
979 : public:
980 285722 : pass_early_thread_jumps (gcc::context *ctxt)
981 571444 : : gimple_opt_pass (pass_data_early_thread_jumps, ctxt)
982 : {}
983 :
984 0 : opt_pass * clone () override
985 : {
986 0 : return new pass_early_thread_jumps (m_ctxt);
987 : }
988 285722 : void set_pass_param (unsigned int, bool param) override
989 : {
990 285722 : m_first = param;
991 285722 : }
992 2412428 : bool gate (function *) override
993 : {
994 2412428 : return flag_thread_jumps;
995 : }
996 2412129 : unsigned int execute (function *fun) override
997 : {
998 2412129 : back_threader threader (fun, BT_NONE, m_first);
999 2412129 : return threader.thread_blocks ();
1000 2412129 : }
1001 : private:
1002 : bool m_first;
1003 : };
1004 :
1005 : // Jump threading pass without resolving of unknown SSAs.
1006 : class pass_thread_jumps : public gimple_opt_pass
1007 : {
1008 : public:
1009 571444 : pass_thread_jumps (gcc::context *ctxt)
1010 1142888 : : gimple_opt_pass (pass_data_thread_jumps, ctxt)
1011 : {}
1012 285722 : opt_pass * clone (void) override
1013 : {
1014 285722 : return new pass_thread_jumps (m_ctxt);
1015 : }
1016 571444 : void set_pass_param (unsigned int, bool param) override
1017 : {
1018 571444 : m_first = param;
1019 571444 : }
1020 2082968 : bool gate (function *) override
1021 : {
1022 2082968 : return flag_thread_jumps && flag_expensive_optimizations;
1023 : }
1024 1928141 : unsigned int execute (function *fun) override
1025 : {
1026 1928141 : back_threader threader (fun, BT_SPEED, m_first);
1027 1928141 : return threader.thread_blocks ();
1028 1928141 : }
1029 : private:
1030 : bool m_first;
1031 : };
1032 :
1033 : // Jump threading pass that fully resolves unknown SSAs.
1034 : class pass_thread_jumps_full : public gimple_opt_pass
1035 : {
1036 : public:
1037 571444 : pass_thread_jumps_full (gcc::context *ctxt)
1038 1142888 : : gimple_opt_pass (pass_data_thread_jumps_full, ctxt)
1039 : {}
1040 285722 : opt_pass * clone (void) override
1041 : {
1042 285722 : return new pass_thread_jumps_full (m_ctxt);
1043 : }
1044 571444 : void set_pass_param (unsigned int, bool param) override
1045 : {
1046 571444 : m_first = param;
1047 571444 : }
1048 2082968 : bool gate (function *) override
1049 : {
1050 2082968 : return flag_thread_jumps && flag_expensive_optimizations;
1051 : }
1052 1928144 : unsigned int execute (function *fun) override
1053 : {
1054 1928144 : back_threader threader (fun, BT_SPEED | BT_RESOLVE, m_first);
1055 1928144 : return threader.thread_blocks ();
1056 1928144 : }
1057 : private:
1058 : bool m_first;
1059 : };
1060 :
1061 : } // namespace {
1062 :
1063 : gimple_opt_pass *
1064 285722 : make_pass_thread_jumps (gcc::context *ctxt)
1065 : {
1066 285722 : return new pass_thread_jumps (ctxt);
1067 : }
1068 :
1069 : gimple_opt_pass *
1070 285722 : make_pass_thread_jumps_full (gcc::context *ctxt)
1071 : {
1072 285722 : return new pass_thread_jumps_full (ctxt);
1073 : }
1074 :
1075 : gimple_opt_pass *
1076 285722 : make_pass_early_thread_jumps (gcc::context *ctxt)
1077 : {
1078 285722 : return new pass_early_thread_jumps (ctxt);
1079 : }
|