Branch data Line data Source code
1 : : /* SSA Jump Threading
2 : : Copyright (C) 2005-2025 Free Software Foundation, Inc.
3 : :
4 : : This file is part of GCC.
5 : :
6 : : GCC is free software; you can redistribute it and/or modify
7 : : it under the terms of the GNU General Public License as published by
8 : : the Free Software Foundation; either version 3, or (at your option)
9 : : any later version.
10 : :
11 : : GCC is distributed in the hope that it will be useful,
12 : : but WITHOUT ANY WARRANTY; without even the implied warranty of
13 : : MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14 : : GNU General Public License for more details.
15 : :
16 : : You should have received a copy of the GNU General Public License
17 : : along with GCC; see the file COPYING3. If not see
18 : : <http://www.gnu.org/licenses/>. */
19 : :
20 : : #include "config.h"
21 : : #include "system.h"
22 : : #include "coretypes.h"
23 : : #include "backend.h"
24 : : #include "predict.h"
25 : : #include "tree.h"
26 : : #include "gimple.h"
27 : : #include "fold-const.h"
28 : : #include "cfgloop.h"
29 : : #include "gimple-iterator.h"
30 : : #include "tree-cfg.h"
31 : : #include "tree-ssa-threadupdate.h"
32 : : #include "tree-ssa-loop.h"
33 : : #include "cfganal.h"
34 : : #include "tree-pass.h"
35 : : #include "gimple-ssa.h"
36 : : #include "tree-phinodes.h"
37 : : #include "tree-inline.h"
38 : : #include "tree-vectorizer.h"
39 : : #include "value-range.h"
40 : : #include "gimple-range.h"
41 : : #include "tree-ssa-threadedge.h"
42 : : #include "gimple-range-path.h"
43 : : #include "ssa.h"
44 : : #include "tree-cfgcleanup.h"
45 : : #include "tree-pretty-print.h"
46 : : #include "cfghooks.h"
47 : : #include "dbgcnt.h"
48 : :
49 : : // Path registry for the backwards threader. After all paths have been
50 : : // registered with register_path(), thread_through_all_blocks() is called
51 : : // to modify the CFG.
52 : :
53 : 24932316 : class back_threader_registry : public back_jt_path_registry
54 : : {
55 : : public:
56 : : bool register_path (const vec<basic_block> &, edge taken);
57 : : };
58 : :
59 : : // Class to abstract the profitability code for the backwards threader.
60 : :
61 : : class back_threader_profitability
62 : : {
63 : : public:
64 : : back_threader_profitability (bool speed_p, gimple *stmt);
65 : : bool possibly_profitable_path_p (const vec<basic_block> &, bool *);
66 : : bool profitable_path_p (const vec<basic_block> &,
67 : : edge taken, bool *irreducible_loop);
68 : : private:
69 : : const bool m_speed_p;
70 : : int m_exit_jump_benefit;
71 : : bool m_threaded_multiway_branch;
72 : : // The following are computed by possibly_profitable_path_p
73 : : bool m_threaded_through_latch;
74 : : bool m_multiway_branch_in_path;
75 : : bool m_contains_hot_bb;
76 : : int m_n_insns;
77 : : };
78 : :
79 : 21236015 : back_threader_profitability::back_threader_profitability (bool speed_p,
80 : 21236015 : gimple *last)
81 : 21236015 : : m_speed_p (speed_p)
82 : : {
83 : 21236015 : m_threaded_multiway_branch = (gimple_code (last) == GIMPLE_SWITCH
84 : 21236015 : || gimple_code (last) == GIMPLE_GOTO);
85 : : // The forward threader has estimate_threading_killed_stmts, in
86 : : // particular it estimates further DCE from eliminating the exit
87 : : // control stmt.
88 : 21236015 : m_exit_jump_benefit = estimate_num_insns (last, &eni_size_weights);
89 : 21236015 : }
90 : :
91 : : // Back threader flags.
92 : : #define BT_NONE 0
93 : : // Generate fast code at the expense of code size.
94 : : #define BT_SPEED 1
95 : : // Resolve unknown SSAs on entry to a threading path. If set, use the
96 : : // ranger. If not, assume all ranges on entry to a path are VARYING.
97 : : #define BT_RESOLVE 2
98 : :
99 : : class back_threader
100 : : {
101 : : public:
102 : : back_threader (function *fun, unsigned flags, bool first);
103 : : ~back_threader ();
104 : : unsigned thread_blocks ();
105 : : private:
106 : : void maybe_thread_block (basic_block bb);
107 : : bool debug_counter ();
108 : : edge maybe_register_path (back_threader_profitability &);
109 : : void maybe_register_path_dump (edge taken_edge);
110 : : void find_paths_to_names (basic_block bb, bitmap imports, unsigned,
111 : : back_threader_profitability &);
112 : : edge find_taken_edge (const vec<basic_block> &path);
113 : : edge find_taken_edge_cond (const vec<basic_block> &path, gcond *);
114 : : edge find_taken_edge_switch (const vec<basic_block> &path, gswitch *);
115 : : virtual void debug ();
116 : : virtual void dump (FILE *out);
117 : :
118 : : back_threader_registry m_registry;
119 : :
120 : : // Current path being analyzed.
121 : : auto_vec<basic_block> m_path;
122 : : // Hash to mark visited BBs while analyzing a path.
123 : : hash_set<basic_block> m_visited_bbs;
124 : : // The set of SSA names, any of which could potentially change the
125 : : // value of the final conditional in a path.
126 : : auto_bitmap m_imports;
127 : : // The last statement in the path.
128 : : gimple *m_last_stmt;
129 : : // Marker to differentiate unreachable edges.
130 : : static const edge UNREACHABLE_EDGE;
131 : : // Set to TRUE if unknown SSA names along a path should be resolved
132 : : // with the ranger. Otherwise, unknown SSA names are assumed to be
133 : : // VARYING. Setting to true is more precise but slower.
134 : : function *m_fun;
135 : : // Ranger for the path solver.
136 : : gimple_ranger *m_ranger;
137 : : unsigned m_flags;
138 : : // Set to TRUE for the first of each thread[12] pass or the first of
139 : : // each threadfull[12] pass. This is used to differentiate between
140 : : // the different threading passes so we can set up debug counters.
141 : : bool m_first;
142 : : };
143 : :
144 : : // Used to differentiate unreachable edges, so we may stop the search
145 : : // in a the given direction.
146 : : const edge back_threader::UNREACHABLE_EDGE = (edge) -1;
147 : :
148 : 6233079 : back_threader::back_threader (function *fun, unsigned flags, bool first)
149 : 6233079 : : m_first (first)
150 : : {
151 : 6233079 : if (flags & BT_SPEED)
152 : 3792043 : loop_optimizer_init (LOOPS_HAVE_PREHEADERS | LOOPS_HAVE_SIMPLE_LATCHES);
153 : : else
154 : 2441036 : loop_optimizer_init (AVOID_CFG_MODIFICATIONS);
155 : :
156 : 6233079 : m_fun = fun;
157 : 6233079 : m_flags = flags;
158 : 6233079 : m_last_stmt = NULL;
159 : :
160 : : // The path solver needs EDGE_DFS_BACK in resolving mode.
161 : 6233079 : if (flags & BT_RESOLVE)
162 : 1896023 : mark_dfs_back_edges ();
163 : :
164 : 6233079 : m_ranger = new gimple_ranger;
165 : 6233079 : }
166 : :
167 : 6233079 : back_threader::~back_threader ()
168 : : {
169 : 6233079 : delete m_ranger;
170 : 6233079 : loop_optimizer_finalize ();
171 : 6233079 : }
172 : :
173 : : // A wrapper for the various debug counters for the threading passes.
174 : : // Returns TRUE if it's OK to register the current threading
175 : : // candidate.
176 : :
177 : : bool
178 : 1296925 : back_threader::debug_counter ()
179 : : {
180 : : // The ethread pass is mostly harmless ;-).
181 : 1296925 : if ((m_flags & BT_SPEED) == 0)
182 : : return true;
183 : :
184 : 878137 : if (m_flags & BT_RESOLVE)
185 : : {
186 : 596994 : if (m_first && !dbg_cnt (back_threadfull1))
187 : : return false;
188 : :
189 : 596994 : if (!m_first && !dbg_cnt (back_threadfull2))
190 : : return false;
191 : : }
192 : : else
193 : : {
194 : 281143 : if (m_first && !dbg_cnt (back_thread1))
195 : : return false;
196 : :
197 : 281143 : if (!m_first && !dbg_cnt (back_thread2))
198 : : return false;
199 : : }
200 : : return true;
201 : : }
202 : :
203 : : static void
204 : 537 : dump_path (FILE *dump_file, const vec<basic_block> &path)
205 : : {
206 : 2921 : for (unsigned i = path.length (); i > 0; --i)
207 : : {
208 : 1847 : basic_block bb = path[i - 1];
209 : 1847 : fprintf (dump_file, "%d", bb->index);
210 : 1847 : if (i > 1)
211 : 1310 : fprintf (dump_file, "->");
212 : : }
213 : 537 : }
214 : :
215 : : // Dump details of an attempt to register a path.
216 : :
217 : : void
218 : 537 : back_threader::maybe_register_path_dump (edge taken)
219 : : {
220 : 537 : if (m_path.is_empty ())
221 : : return;
222 : :
223 : 537 : fprintf (dump_file, "path: ");
224 : 537 : dump_path (dump_file, m_path);
225 : 537 : fprintf (dump_file, "->");
226 : :
227 : 537 : if (taken == UNREACHABLE_EDGE)
228 : 9 : fprintf (dump_file, "xx REJECTED (unreachable)\n");
229 : 528 : else if (taken)
230 : 79 : fprintf (dump_file, "%d SUCCESS\n", taken->dest->index);
231 : : else
232 : 449 : fprintf (dump_file, "xx REJECTED\n");
233 : : }
234 : :
235 : : // If an outgoing edge can be determined out of the current path,
236 : : // register it for jump threading and return the taken edge.
237 : : //
238 : : // Return NULL if it is unprofitable to thread this path, or the
239 : : // outgoing edge is unknown. Return UNREACHABLE_EDGE if the path is
240 : : // unreachable.
241 : :
242 : : edge
243 : 25429275 : back_threader::maybe_register_path (back_threader_profitability &profit)
244 : : {
245 : 25429275 : edge taken_edge = find_taken_edge (m_path);
246 : :
247 : 25429275 : if (taken_edge && taken_edge != UNREACHABLE_EDGE)
248 : : {
249 : 1443317 : bool irreducible = false;
250 : 1443317 : if (profit.profitable_path_p (m_path, taken_edge, &irreducible)
251 : 1296925 : && debug_counter ()
252 : 2740242 : && m_registry.register_path (m_path, taken_edge))
253 : : {
254 : 1261237 : if (irreducible)
255 : 45118 : vect_free_loop_info_assumptions (m_path[0]->loop_father);
256 : : }
257 : : else
258 : : taken_edge = NULL;
259 : : }
260 : :
261 : 25429275 : if (dump_file && (dump_flags & TDF_DETAILS))
262 : 537 : maybe_register_path_dump (taken_edge);
263 : :
264 : 25429275 : return taken_edge;
265 : : }
266 : :
267 : : // Return the known taken edge out of a path. If the path can be
268 : : // determined to be unreachable, return UNREACHABLE_EDGE. If no
269 : : // outgoing edge can be calculated, return NULL.
270 : :
271 : : edge
272 : 25429275 : back_threader::find_taken_edge (const vec<basic_block> &path)
273 : : {
274 : 25429275 : gcc_checking_assert (path.length () > 1);
275 : 25429275 : switch (gimple_code (m_last_stmt))
276 : : {
277 : 25305604 : case GIMPLE_COND:
278 : 25305604 : return find_taken_edge_cond (path, as_a<gcond *> (m_last_stmt));
279 : :
280 : 123671 : case GIMPLE_SWITCH:
281 : 123671 : return find_taken_edge_switch (path, as_a<gswitch *> (m_last_stmt));
282 : :
283 : : default:
284 : : return NULL;
285 : : }
286 : : }
287 : :
288 : : // Same as find_taken_edge, but for paths ending in a switch.
289 : :
290 : : edge
291 : 123671 : back_threader::find_taken_edge_switch (const vec<basic_block> &path,
292 : : gswitch *sw)
293 : : {
294 : 123671 : tree name = gimple_switch_index (sw);
295 : 123671 : int_range_max r;
296 : :
297 : 123671 : path_range_query solver (*m_ranger, path, m_imports, m_flags & BT_RESOLVE);
298 : 123671 : solver.range_of_expr (r, name, sw);
299 : :
300 : 123671 : if (r.undefined_p ())
301 : : return UNREACHABLE_EDGE;
302 : :
303 : 123297 : if (r.varying_p ())
304 : : return NULL;
305 : :
306 : 68261 : tree label = find_case_label_range (sw, &r);
307 : 68261 : if (!label)
308 : : return NULL;
309 : :
310 : 5859 : return find_edge (gimple_bb (sw), label_to_block (cfun, CASE_LABEL (label)));
311 : 123671 : }
312 : :
313 : : // Same as find_taken_edge, but for paths ending in a GIMPLE_COND.
314 : :
315 : : edge
316 : 25305604 : back_threader::find_taken_edge_cond (const vec<basic_block> &path,
317 : : gcond *cond)
318 : : {
319 : 25305604 : int_range_max r;
320 : :
321 : 25305604 : path_range_query solver (*m_ranger, path, m_imports, m_flags & BT_RESOLVE);
322 : 25305604 : solver.range_of_stmt (r, cond);
323 : :
324 : 25305604 : if (solver.unreachable_path_p ())
325 : : return UNREACHABLE_EDGE;
326 : :
327 : 25218047 : int_range<2> true_range = range_true ();
328 : 25218047 : int_range<2> false_range = range_false ();
329 : :
330 : 25218047 : if (r == true_range || r == false_range)
331 : : {
332 : 1437458 : edge e_true, e_false;
333 : 1437458 : basic_block bb = gimple_bb (cond);
334 : 1437458 : extract_true_false_edges_from_block (bb, &e_true, &e_false);
335 : 1437458 : return r == true_range ? e_true : e_false;
336 : : }
337 : : return NULL;
338 : 25305604 : }
339 : :
340 : : // Find jump threading paths to any of the SSA names in the
341 : : // INTERESTING bitmap, and register any such paths.
342 : : //
343 : : // BB is the current path being processed.
344 : : //
345 : : // OVERALL_PATHS is the search space up to this block
346 : :
347 : : void
348 : 57009996 : back_threader::find_paths_to_names (basic_block bb, bitmap interesting,
349 : : unsigned overall_paths,
350 : : back_threader_profitability &profit)
351 : : {
352 : 57009996 : m_path.safe_push (bb);
353 : :
354 : : // Try to resolve the path without looking back. Avoid resolving paths
355 : : // we know are large but are not (yet) recognized as Finite State Machine.
356 : : // ??? Ideally we'd explore the cheapest path to the loop backedge here,
357 : : // avoiding the exponential greedy search and only start that from there.
358 : : // Precomputing a path-size-to-immediate-dominator-of-successor for each
359 : : // edge might help here. Alternatively copying divergent control flow
360 : : // on the way to the backedge could be worthwhile.
361 : 57009996 : bool large_non_fsm;
362 : 57009996 : if (m_path.length () > 1
363 : 57009996 : && (!profit.possibly_profitable_path_p (m_path, &large_non_fsm)
364 : 25485484 : || (!large_non_fsm
365 : 25429275 : && maybe_register_path (profit))))
366 : : ;
367 : :
368 : : // The backwards thread copier cannot copy blocks that do not belong
369 : : // to the same loop, so when the new source of the path entry no
370 : : // longer belongs to it we don't need to search further.
371 : 45372331 : else if (m_path[0]->loop_father != bb->loop_father)
372 : : ;
373 : :
374 : : // Continue looking for ways to extend the path but limit the
375 : : // search space along a branch
376 : 44003237 : else if ((overall_paths = overall_paths * EDGE_COUNT (bb->preds))
377 : 44003237 : <= (unsigned)param_max_jump_thread_paths
378 : 44003237 : && !m_visited_bbs.add (bb))
379 : : {
380 : : // For further greedy searching we want to remove interesting
381 : : // names defined in BB but add ones on the PHI edges for the
382 : : // respective edges and adding imports from those stmts.
383 : : // We do this by starting with all names
384 : : // not defined in BB as interesting, collecting a list of
385 : : // interesting PHIs in BB on the fly. Then we iterate over
386 : : // predecessor edges, adding interesting PHI edge defs to
387 : : // the set of interesting names to consider when processing it.
388 : 42852197 : auto_bitmap new_interesting;
389 : 42852197 : auto_vec<int, 16> new_imports;
390 : 42852197 : auto_vec<gphi *, 4> interesting_phis;
391 : 42852197 : bitmap_iterator bi;
392 : 42852197 : unsigned i;
393 : 42852197 : auto_vec<tree, 16> worklist;
394 : 96904385 : EXECUTE_IF_SET_IN_BITMAP (interesting, 0, i, bi)
395 : : {
396 : 54052188 : tree name = ssa_name (i);
397 : 54052188 : gimple *def_stmt = SSA_NAME_DEF_STMT (name);
398 : : /* Imports remain interesting. */
399 : 54052188 : if (gimple_bb (def_stmt) != bb)
400 : : {
401 : 26820816 : bitmap_set_bit (new_interesting, i);
402 : 26820816 : continue;
403 : : }
404 : 27231372 : worklist.quick_push (name);
405 : 98751408 : while (!worklist.is_empty ())
406 : : {
407 : 44288664 : tree name = worklist.pop ();
408 : 44288664 : gimple *def_stmt = SSA_NAME_DEF_STMT (name);
409 : : /* Newly discovered imports are interesting. */
410 : 44288664 : if (gimple_bb (def_stmt) != bb)
411 : : {
412 : 5188471 : bitmap_set_bit (new_interesting, SSA_NAME_VERSION (name));
413 : 5188471 : continue;
414 : : }
415 : : /* Local PHIs participate in renaming below. */
416 : 72840571 : if (gphi *phi = dyn_cast<gphi *> (def_stmt))
417 : : {
418 : 5359815 : tree res = gimple_phi_result (phi);
419 : 5359815 : if (!SSA_NAME_OCCURS_IN_ABNORMAL_PHI (res))
420 : 5359691 : interesting_phis.safe_push (phi);
421 : : }
422 : : /* For other local defs process their uses, amending
423 : : imports on the way. */
424 : : else
425 : : {
426 : 33740378 : tree ssa[3];
427 : 33740378 : unsigned lim = gimple_range_ssa_names (ssa, 3, def_stmt);
428 : 53172967 : for (unsigned j = 0; j < lim; ++j)
429 : : {
430 : 19432589 : tree rhs = ssa[j];
431 : 19432589 : if (rhs
432 : 38865178 : && bitmap_set_bit (m_imports,
433 : 19432589 : SSA_NAME_VERSION (rhs)))
434 : : {
435 : 17057292 : new_imports.safe_push (SSA_NAME_VERSION (rhs));
436 : 17057292 : worklist.safe_push (rhs);
437 : : }
438 : : }
439 : : }
440 : : }
441 : : }
442 : 42852197 : if (!bitmap_empty_p (new_interesting)
443 : 42852197 : || !interesting_phis.is_empty ())
444 : : {
445 : 56704754 : auto_vec<int, 4> unwind (interesting_phis.length ());
446 : 56704754 : auto_vec<int, 4> imports_unwind (interesting_phis.length ());
447 : 28352377 : edge_iterator iter;
448 : 28352377 : edge e;
449 : 67716390 : FOR_EACH_EDGE (e, iter, bb->preds)
450 : : {
451 : 42954045 : if (e->flags & EDGE_ABNORMAL
452 : : // This is like path_crosses_loops in profitable_path_p but
453 : : // more restrictive to avoid peeling off loop iterations (see
454 : : // tree-ssa/pr14341.c for an example).
455 : : // ??? Note this restriction only applied when visiting an
456 : : // interesting PHI with the former resolve_phi.
457 : 39364013 : || (!interesting_phis.is_empty ()
458 : 11108910 : && m_path[0]->loop_father != e->src->loop_father))
459 : 3590032 : continue;
460 : 115357149 : for (gphi *phi : interesting_phis)
461 : : {
462 : 8035206 : tree def = PHI_ARG_DEF_FROM_EDGE (phi, e);
463 : 8035206 : if (TREE_CODE (def) == SSA_NAME)
464 : : {
465 : 6750776 : int ver = SSA_NAME_VERSION (def);
466 : 6750776 : if (bitmap_set_bit (new_interesting, ver))
467 : : {
468 : 6727954 : if (bitmap_set_bit (m_imports, ver))
469 : 5293616 : imports_unwind.quick_push (ver);
470 : 6727954 : unwind.quick_push (ver);
471 : : }
472 : : }
473 : : }
474 : 35773981 : find_paths_to_names (e->src, new_interesting, overall_paths,
475 : : profit);
476 : : // Restore new_interesting.
477 : 114049897 : for (int def : unwind)
478 : 6727954 : bitmap_clear_bit (new_interesting, def);
479 : 35773981 : unwind.truncate (0);
480 : : // Restore and m_imports.
481 : 112615559 : for (int def : imports_unwind)
482 : 5293616 : bitmap_clear_bit (m_imports, def);
483 : 35773981 : imports_unwind.truncate (0);
484 : : }
485 : 28352377 : }
486 : : /* m_imports tracks all interesting names on the path, so when
487 : : backtracking we have to restore it. */
488 : 145613883 : for (int j : new_imports)
489 : 17057292 : bitmap_clear_bit (m_imports, j);
490 : 42852197 : m_visited_bbs.remove (bb);
491 : 42852197 : }
492 : 1151040 : else if (dump_file && (dump_flags & TDF_DETAILS))
493 : 15 : fprintf (dump_file, " FAIL: Search space limit %d reached.\n",
494 : : param_max_jump_thread_paths);
495 : :
496 : : // Reset things to their original state.
497 : 57009996 : m_path.pop ();
498 : 57009996 : }
499 : :
500 : : // Search backwards from BB looking for paths where the final
501 : : // conditional maybe threaded to a successor block. Record such paths
502 : : // for jump threading.
503 : :
504 : : void
505 : 25720415 : back_threader::maybe_thread_block (basic_block bb)
506 : : {
507 : 25720415 : if (EDGE_COUNT (bb->succs) <= 1)
508 : 4484400 : return;
509 : :
510 : 25720415 : gimple *stmt = *gsi_last_bb (bb);
511 : 25720415 : if (!stmt)
512 : : return;
513 : :
514 : 25720415 : enum gimple_code code = gimple_code (stmt);
515 : 25720415 : if (code != GIMPLE_SWITCH
516 : 25720415 : && code != GIMPLE_COND)
517 : : return;
518 : :
519 : 21284504 : m_last_stmt = stmt;
520 : 21284504 : m_visited_bbs.empty ();
521 : 21284504 : m_path.truncate (0);
522 : :
523 : : // We compute imports of the path during discovery starting
524 : : // just with names used in the conditional.
525 : 21284504 : bitmap_clear (m_imports);
526 : 21284504 : ssa_op_iter iter;
527 : 21284504 : tree name;
528 : 47213639 : FOR_EACH_SSA_TREE_OPERAND (name, stmt, iter, SSA_OP_USE)
529 : : {
530 : 25977624 : if (!gimple_range_ssa_p (name))
531 : : return;
532 : 25929135 : bitmap_set_bit (m_imports, SSA_NAME_VERSION (name));
533 : : }
534 : :
535 : : // Interesting is the set of imports we still not have see
536 : : // the definition of. So while imports only grow, the
537 : : // set of interesting defs dwindles and once empty we can
538 : : // stop searching.
539 : 21236015 : auto_bitmap interesting;
540 : 21236015 : bitmap_copy (interesting, m_imports);
541 : 21236015 : back_threader_profitability profit (m_flags & BT_SPEED, stmt);
542 : 21236015 : find_paths_to_names (bb, interesting, 1, profit);
543 : 21236015 : }
544 : :
545 : : DEBUG_FUNCTION void
546 : 0 : debug (const vec <basic_block> &path)
547 : : {
548 : 0 : dump_path (stderr, path);
549 : 0 : fputc ('\n', stderr);
550 : 0 : }
551 : :
552 : : void
553 : 0 : back_threader::dump (FILE *out)
554 : : {
555 : 0 : fprintf (out, "\nCandidates for pre-computation:\n");
556 : 0 : fprintf (out, "===================================\n");
557 : :
558 : 0 : bitmap_iterator bi;
559 : 0 : unsigned i;
560 : :
561 : 0 : EXECUTE_IF_SET_IN_BITMAP (m_imports, 0, i, bi)
562 : : {
563 : 0 : tree name = ssa_name (i);
564 : 0 : print_generic_expr (out, name, TDF_NONE);
565 : 0 : fprintf (out, "\n");
566 : : }
567 : 0 : }
568 : :
569 : : void
570 : 0 : back_threader::debug ()
571 : : {
572 : 0 : dump (stderr);
573 : 0 : }
574 : :
575 : : /* Examine jump threading path PATH and return TRUE if it is possibly
576 : : profitable to thread it, otherwise return FALSE. If this function
577 : : returns TRUE profitable_path_p might not be satisfied but when
578 : : the path is extended it might be. In particular indicate in
579 : : *LARGE_NON_FSM whether the thread is too large for a non-FSM thread
580 : : but would be OK if we extend the path to cover the loop backedge.
581 : :
582 : : ?? It seems we should be able to loosen some of the restrictions in
583 : : this function after loop optimizations have run. */
584 : :
585 : : bool
586 : 35773981 : back_threader_profitability::possibly_profitable_path_p
587 : : (const vec<basic_block> &m_path,
588 : : bool *large_non_fsm)
589 : : {
590 : 35773981 : gcc_checking_assert (!m_path.is_empty ());
591 : :
592 : : /* We can an empty path here (excluding the DEF block) when the
593 : : statement that makes a conditional generate a compile-time
594 : : constant result is in the same block as the conditional.
595 : :
596 : : That's not really a jump threading opportunity, but instead is
597 : : simple cprop & simplification. We could handle it here if we
598 : : wanted by wiring up all the incoming edges. If we run this
599 : : early in IPA, that might be worth doing. For now we just
600 : : reject that case. */
601 : 35773981 : if (m_path.length () <= 1)
602 : : return false;
603 : :
604 : 35773981 : gimple_stmt_iterator gsi;
605 : 35773981 : loop_p loop = m_path[0]->loop_father;
606 : :
607 : : // We recompute the following, when we rewrite possibly_profitable_path_p
608 : : // to work incrementally on added BBs we have to unwind them on backtracking
609 : 35773981 : m_n_insns = 0;
610 : 35773981 : m_threaded_through_latch = false;
611 : 35773981 : m_multiway_branch_in_path = false;
612 : 35773981 : m_contains_hot_bb = false;
613 : :
614 : 35773981 : if (dump_file && (dump_flags & TDF_DETAILS))
615 : 648 : fprintf (dump_file, "Checking profitability of path (backwards): ");
616 : :
617 : : /* Count the number of instructions on the path: as these instructions
618 : : will have to be duplicated, we will not record the path if there
619 : : are too many instructions on the path. Also check that all the
620 : : blocks in the path belong to a single loop. */
621 : 152453057 : for (unsigned j = 0; j < m_path.length (); j++)
622 : : {
623 : 116715528 : basic_block bb = m_path[j];
624 : :
625 : 116715528 : if (dump_file && (dump_flags & TDF_DETAILS))
626 : 2376 : fprintf (dump_file, " bb:%i", bb->index);
627 : : /* Remember, blocks in the path are stored in opposite order in
628 : : the PATH array. The last entry in the array represents the
629 : : block with an outgoing edge that we will redirect to the jump
630 : : threading path. Thus we don't care how many statements are
631 : : in that block because it will not be copied or whether or not
632 : : it ends in a multiway branch. */
633 : 233431056 : if (j < m_path.length () - 1)
634 : : {
635 : 80977999 : int orig_n_insns = m_n_insns;
636 : 80977999 : if (!m_contains_hot_bb && m_speed_p)
637 : 34719500 : m_contains_hot_bb |= optimize_bb_for_speed_p (bb);
638 : 80977999 : for (gsi = gsi_after_labels (bb);
639 : 315245343 : !gsi_end_p (gsi);
640 : 234267344 : gsi_next_nondebug (&gsi))
641 : : {
642 : : /* Do not allow OpenACC loop markers and __builtin_constant_p on
643 : : threading paths. The latter is disallowed, because an
644 : : expression might be constant on two threading paths, and
645 : : become non-constant (i.e.: phi) when they merge. */
646 : 234303796 : gimple *stmt = gsi_stmt (gsi);
647 : 234303796 : if (gimple_call_internal_p (stmt, IFN_UNIQUE)
648 : 234303796 : || gimple_call_builtin_p (stmt, BUILT_IN_CONSTANT_P))
649 : : {
650 : 36452 : if (dump_file && (dump_flags & TDF_DETAILS))
651 : 0 : fputc ('\n', dump_file);
652 : 36452 : return false;
653 : : }
654 : : /* Do not count empty statements and labels. */
655 : 234267344 : if (gimple_code (stmt) != GIMPLE_NOP
656 : 234267344 : && !is_gimple_debug (stmt))
657 : 195314395 : m_n_insns += estimate_num_insns (stmt, &eni_size_weights);
658 : : }
659 : 80941547 : if (dump_file && (dump_flags & TDF_DETAILS))
660 : 1728 : fprintf (dump_file, " (%i insns)", m_n_insns-orig_n_insns);
661 : :
662 : : /* We do not look at the block with the threaded branch
663 : : in this loop. So if any block with a last statement that
664 : : is a GIMPLE_SWITCH or GIMPLE_GOTO is seen, then we have a
665 : : multiway branch on our path.
666 : :
667 : : The block in PATH[0] is special, it's the block were we're
668 : : going to be able to eliminate its branch. */
669 : 80941547 : if (j > 0)
670 : : {
671 : 45179374 : gimple *last = *gsi_last_bb (bb);
672 : 45179374 : if (last
673 : 45179374 : && (gimple_code (last) == GIMPLE_SWITCH
674 : 40532919 : || gimple_code (last) == GIMPLE_GOTO))
675 : 322521 : m_multiway_branch_in_path = true;
676 : : }
677 : : }
678 : :
679 : : /* Note if we thread through the latch, we will want to include
680 : : the last entry in the array when determining if we thread
681 : : through the loop latch. */
682 : 116679076 : if (loop->latch == bb)
683 : : {
684 : 7344543 : m_threaded_through_latch = true;
685 : 7344543 : if (dump_file && (dump_flags & TDF_DETAILS))
686 : 113 : fprintf (dump_file, " (latch)");
687 : : }
688 : : }
689 : :
690 : : /* We are going to remove the control statement at the end of the
691 : : last block in the threading path. So don't count it against our
692 : : statement count. */
693 : 35737529 : m_n_insns -= m_exit_jump_benefit;
694 : :
695 : 35737529 : if (dump_file && (dump_flags & TDF_DETAILS))
696 : 648 : fprintf (dump_file, "\n Control statement insns: %i\n"
697 : : " Overall: %i insns\n",
698 : : m_exit_jump_benefit, m_n_insns);
699 : :
700 : : /* Threading is profitable if the path duplicated is hot but also
701 : : in a case we separate cold path from hot path and permit optimization
702 : : of the hot path later. Be on the agressive side here. In some testcases,
703 : : as in PR 78407 this leads to noticeable improvements. */
704 : 35737529 : if (m_speed_p)
705 : : {
706 : 32192353 : if (m_n_insns >= param_max_fsm_thread_path_insns)
707 : : {
708 : 7022 : if (dump_file && (dump_flags & TDF_DETAILS))
709 : 0 : fprintf (dump_file, " FAIL: Jump-thread path not considered: "
710 : : "the number of instructions on the path "
711 : : "exceeds PARAM_MAX_FSM_THREAD_PATH_INSNS.\n");
712 : 7022 : return false;
713 : : }
714 : 64370662 : edge entry = find_edge (m_path[m_path.length () - 1],
715 : 32185331 : m_path[m_path.length () - 2]);
716 : 32185331 : if (probably_never_executed_edge_p (cfun, entry))
717 : : {
718 : 98923 : if (dump_file && (dump_flags & TDF_DETAILS))
719 : 0 : fprintf (dump_file, " FAIL: Jump-thread path not considered: "
720 : : "path entry is probably never executed.\n");
721 : 98923 : return false;
722 : : }
723 : : }
724 : 3545176 : else if (m_n_insns > 1)
725 : : {
726 : 1379949 : if (dump_file && (dump_flags & TDF_DETAILS))
727 : 15 : fprintf (dump_file, " FAIL: Jump-thread path not considered: "
728 : : "duplication of %i insns is needed and optimizing for size.\n",
729 : : m_n_insns);
730 : 1379949 : return false;
731 : : }
732 : :
733 : : /* The generic copier used by the backthreader does not re-use an
734 : : existing threading path to reduce code duplication. So for that
735 : : case, drastically reduce the number of statements we are allowed
736 : : to copy. We don't know yet whether we will thread through the latch
737 : : so we have to be permissive and continue threading, but indicate
738 : : to the caller the thread, if final, wouldn't be profitable. */
739 : 34251635 : if ((!m_threaded_multiway_branch
740 : 192979 : || !loop->latch
741 : 192171 : || loop->latch->index == EXIT_BLOCK)
742 : 34150997 : && (m_n_insns * param_fsm_scale_path_stmts
743 : 34150997 : >= param_max_jump_thread_duplication_stmts))
744 : : {
745 : 8766151 : if (dump_file && (dump_flags & TDF_DETAILS))
746 : 86 : fprintf (dump_file,
747 : : " FAIL: Did not thread around loop and would copy too "
748 : : "many statements.\n");
749 : 8766151 : return false;
750 : : }
751 : 5378420 : *large_non_fsm = (!(m_threaded_through_latch && m_threaded_multiway_branch)
752 : 25485484 : && (m_n_insns * param_fsm_scale_path_stmts
753 : 25460643 : >= param_max_jump_thread_duplication_stmts));
754 : :
755 : 25485484 : if (dump_file && (dump_flags & TDF_DETAILS))
756 : 547 : fputc ('\n', dump_file);
757 : : return true;
758 : : }
759 : :
760 : : /* Examine jump threading path PATH and return TRUE if it is profitable to
761 : : thread it, otherwise return FALSE.
762 : :
763 : : The taken edge out of the path is TAKEN_EDGE.
764 : :
765 : : CREATES_IRREDUCIBLE_LOOP is set to TRUE if threading this path
766 : : would create an irreducible loop.
767 : :
768 : : ?? It seems we should be able to loosen some of the restrictions in
769 : : this function after loop optimizations have run. */
770 : :
771 : : bool
772 : 1443317 : back_threader_profitability::profitable_path_p (const vec<basic_block> &m_path,
773 : : edge taken_edge,
774 : : bool *creates_irreducible_loop)
775 : : {
776 : : // We can assume that possibly_profitable_path_p holds here
777 : :
778 : 1443317 : loop_p loop = m_path[0]->loop_father;
779 : :
780 : 1443317 : if (dump_file && (dump_flags & TDF_DETAILS))
781 : 116 : fprintf (dump_file, "Checking profitability of path (backwards): ");
782 : :
783 : : /* If this path threaded through the loop latch back into the
784 : : same loop and the destination does not dominate the loop
785 : : latch, then this thread would create an irreducible loop. */
786 : 1443317 : *creates_irreducible_loop = false;
787 : 1443317 : if (m_threaded_through_latch
788 : 156594 : && loop == taken_edge->dest->loop_father
789 : 1586893 : && (determine_bb_domination_status (loop, taken_edge->dest)
790 : : == DOMST_NONDOMINATING))
791 : 98198 : *creates_irreducible_loop = true;
792 : :
793 : : /* Threading is profitable if the path duplicated is hot but also
794 : : in a case we separate cold path from hot path and permit optimization
795 : : of the hot path later. Be on the agressive side here. In some testcases,
796 : : as in PR 78407 this leads to noticeable improvements. */
797 : 1443317 : if (m_speed_p
798 : 1443317 : && (optimize_edge_for_speed_p (taken_edge) || m_contains_hot_bb))
799 : : {
800 : 945521 : if (probably_never_executed_edge_p (cfun, taken_edge))
801 : : {
802 : 27781 : if (dump_file && (dump_flags & TDF_DETAILS))
803 : 0 : fprintf (dump_file, " FAIL: Jump-thread path not considered: "
804 : : "path leads to probably never executed edge.\n");
805 : 27781 : return false;
806 : : }
807 : : }
808 : 497796 : else if (m_n_insns > 1)
809 : : {
810 : 42383 : if (dump_file && (dump_flags & TDF_DETAILS))
811 : 0 : fprintf (dump_file, " FAIL: Jump-thread path not considered: "
812 : : "duplication of %i insns is needed and optimizing for size.\n",
813 : : m_n_insns);
814 : 42383 : return false;
815 : : }
816 : :
817 : : /* We avoid creating irreducible inner loops unless we thread through
818 : : a multiway branch, in which case we have deemed it worth losing
819 : : other loop optimizations later.
820 : :
821 : : We also consider it worth creating an irreducible inner loop after
822 : : loop optimizations if the number of copied statement is low. */
823 : 1373153 : if (!m_threaded_multiway_branch
824 : 1367564 : && *creates_irreducible_loop
825 : 89366 : && (!(cfun->curr_properties & PROP_loop_opts_done)
826 : 44649 : || (m_n_insns * param_fsm_scale_path_stmts
827 : 44649 : >= param_max_jump_thread_duplication_stmts)))
828 : : {
829 : 44717 : if (dump_file && (dump_flags & TDF_DETAILS))
830 : 1 : fprintf (dump_file,
831 : : " FAIL: Would create irreducible loop early without "
832 : : "threading multiway branch.\n");
833 : : /* We compute creates_irreducible_loop only late. */
834 : 44717 : return false;
835 : : }
836 : :
837 : : /* The generic copier used by the backthreader does not re-use an
838 : : existing threading path to reduce code duplication. So for that
839 : : case, drastically reduce the number of statements we are allowed
840 : : to copy. */
841 : 1328436 : if (!(m_threaded_through_latch && m_threaded_multiway_branch)
842 : 1325230 : && (m_n_insns * param_fsm_scale_path_stmts
843 : 1325230 : >= param_max_jump_thread_duplication_stmts))
844 : : {
845 : 0 : if (dump_file && (dump_flags & TDF_DETAILS))
846 : 0 : fprintf (dump_file,
847 : : " FAIL: Did not thread around loop and would copy too "
848 : : "many statements.\n");
849 : 0 : return false;
850 : : }
851 : :
852 : : /* When there is a multi-way branch on the path, then threading can
853 : : explode the CFG due to duplicating the edges for that multi-way
854 : : branch. So like above, only allow a multi-way branch on the path
855 : : if we actually thread a multi-way branch. */
856 : 1328436 : if (!m_threaded_multiway_branch && m_multiway_branch_in_path)
857 : : {
858 : 150 : if (dump_file && (dump_flags & TDF_DETAILS))
859 : 6 : fprintf (dump_file,
860 : : " FAIL: Thread through multiway branch without threading "
861 : : "a multiway branch.\n");
862 : 150 : return false;
863 : : }
864 : :
865 : : /* Threading through an empty latch would cause code to be added to
866 : : the latch. This could alter the loop form sufficiently to cause
867 : : loop optimizations to fail. Disable these threads until after
868 : : loop optimizations have run. */
869 : 1223587 : if ((m_threaded_through_latch || taken_edge->dest == loop->latch)
870 : 155810 : && !(cfun->curr_properties & PROP_loop_opts_done)
871 : 1408125 : && empty_block_p (loop->latch))
872 : : {
873 : 31361 : if (dump_file && (dump_flags & TDF_DETAILS))
874 : 21 : fprintf (dump_file,
875 : : " FAIL: Thread through latch before loop opts would create "
876 : : "non-empty latch\n");
877 : 31361 : return false;
878 : : }
879 : 1296925 : if (dump_file && (dump_flags & TDF_DETAILS))
880 : 88 : fputc ('\n', dump_file);
881 : : return true;
882 : : }
883 : :
884 : :
885 : : /* The current path PATH is a vector of blocks forming a jump threading
886 : : path in reverse order. TAKEN_EDGE is the edge taken from path[0].
887 : :
888 : : Convert the current path into the form used by register_jump_thread and
889 : : register it.
890 : :
891 : : Return TRUE if successful or FALSE otherwise. */
892 : :
893 : : bool
894 : 1296925 : back_threader_registry::register_path (const vec<basic_block> &m_path,
895 : : edge taken_edge)
896 : : {
897 : 1296925 : vec<jump_thread_edge *> *jump_thread_path = allocate_thread_path ();
898 : :
899 : : // The generic copier ignores the edge type. We can build the
900 : : // thread edges with any type.
901 : 3321761 : for (unsigned int j = 0; j + 1 < m_path.length (); j++)
902 : : {
903 : 2024836 : basic_block bb1 = m_path[m_path.length () - j - 1];
904 : 2024836 : basic_block bb2 = m_path[m_path.length () - j - 2];
905 : :
906 : 2024836 : edge e = find_edge (bb1, bb2);
907 : 2024836 : gcc_assert (e);
908 : 2024836 : push_edge (jump_thread_path, e, EDGE_COPY_SRC_BLOCK);
909 : : }
910 : :
911 : 1296925 : push_edge (jump_thread_path, taken_edge, EDGE_NO_COPY_SRC_BLOCK);
912 : 1296925 : return register_jump_thread (jump_thread_path);
913 : : }
914 : :
915 : : // Thread all suitable paths in the current function.
916 : : //
917 : : // Return TODO_flags.
918 : :
919 : : unsigned int
920 : 6233079 : back_threader::thread_blocks ()
921 : : {
922 : 6233079 : basic_block bb;
923 : 61186033 : FOR_EACH_BB_FN (bb, m_fun)
924 : 80673369 : if (EDGE_COUNT (bb->succs) > 1)
925 : 25720415 : maybe_thread_block (bb);
926 : :
927 : 6233079 : bool changed = m_registry.thread_through_all_blocks (true);
928 : :
929 : 6233079 : if (m_flags & BT_SPEED)
930 : 7403060 : return changed ? TODO_cleanup_cfg : 0;
931 : :
932 : : return false;
933 : : }
934 : :
935 : : namespace {
936 : :
937 : : const pass_data pass_data_early_thread_jumps =
938 : : {
939 : : GIMPLE_PASS,
940 : : "ethread",
941 : : OPTGROUP_NONE,
942 : : TV_TREE_SSA_THREAD_JUMPS,
943 : : ( PROP_cfg | PROP_ssa ),
944 : : 0,
945 : : 0,
946 : : 0,
947 : : ( TODO_cleanup_cfg | TODO_update_ssa ),
948 : : };
949 : :
950 : : const pass_data pass_data_thread_jumps =
951 : : {
952 : : GIMPLE_PASS,
953 : : "thread",
954 : : OPTGROUP_NONE,
955 : : TV_TREE_SSA_THREAD_JUMPS,
956 : : ( PROP_cfg | PROP_ssa ),
957 : : 0,
958 : : 0,
959 : : 0,
960 : : TODO_update_ssa,
961 : : };
962 : :
963 : : const pass_data pass_data_thread_jumps_full =
964 : : {
965 : : GIMPLE_PASS,
966 : : "threadfull",
967 : : OPTGROUP_NONE,
968 : : TV_TREE_SSA_THREAD_JUMPS,
969 : : ( PROP_cfg | PROP_ssa ),
970 : : 0,
971 : : 0,
972 : : 0,
973 : : TODO_update_ssa,
974 : : };
975 : :
976 : : // Early jump threading pass optimizing for size.
977 : : class pass_early_thread_jumps : public gimple_opt_pass
978 : : {
979 : : public:
980 : 285081 : pass_early_thread_jumps (gcc::context *ctxt)
981 : 570162 : : gimple_opt_pass (pass_data_early_thread_jumps, ctxt)
982 : : {}
983 : :
984 : 0 : opt_pass * clone () override
985 : : {
986 : 0 : return new pass_early_thread_jumps (m_ctxt);
987 : : }
988 : 285081 : void set_pass_param (unsigned int, bool param) override
989 : : {
990 : 285081 : m_first = param;
991 : 285081 : }
992 : 2441318 : bool gate (function *) override
993 : : {
994 : 2441318 : return flag_thread_jumps;
995 : : }
996 : 2441036 : unsigned int execute (function *fun) override
997 : : {
998 : 2441036 : back_threader threader (fun, BT_NONE, m_first);
999 : 2441036 : return threader.thread_blocks ();
1000 : 2441036 : }
1001 : : private:
1002 : : bool m_first;
1003 : : };
1004 : :
1005 : : // Jump threading pass without resolving of unknown SSAs.
1006 : : class pass_thread_jumps : public gimple_opt_pass
1007 : : {
1008 : : public:
1009 : 570162 : pass_thread_jumps (gcc::context *ctxt)
1010 : 1140324 : : gimple_opt_pass (pass_data_thread_jumps, ctxt)
1011 : : {}
1012 : 285081 : opt_pass * clone (void) override
1013 : : {
1014 : 285081 : return new pass_thread_jumps (m_ctxt);
1015 : : }
1016 : 570162 : void set_pass_param (unsigned int, bool param) override
1017 : : {
1018 : 570162 : m_first = param;
1019 : 570162 : }
1020 : 2043164 : bool gate (function *) override
1021 : : {
1022 : 2043164 : return flag_thread_jumps && flag_expensive_optimizations;
1023 : : }
1024 : 1896020 : unsigned int execute (function *fun) override
1025 : : {
1026 : 1896020 : back_threader threader (fun, BT_SPEED, m_first);
1027 : 1896020 : return threader.thread_blocks ();
1028 : 1896020 : }
1029 : : private:
1030 : : bool m_first;
1031 : : };
1032 : :
1033 : : // Jump threading pass that fully resolves unknown SSAs.
1034 : : class pass_thread_jumps_full : public gimple_opt_pass
1035 : : {
1036 : : public:
1037 : 570162 : pass_thread_jumps_full (gcc::context *ctxt)
1038 : 1140324 : : gimple_opt_pass (pass_data_thread_jumps_full, ctxt)
1039 : : {}
1040 : 285081 : opt_pass * clone (void) override
1041 : : {
1042 : 285081 : return new pass_thread_jumps_full (m_ctxt);
1043 : : }
1044 : 570162 : void set_pass_param (unsigned int, bool param) override
1045 : : {
1046 : 570162 : m_first = param;
1047 : 570162 : }
1048 : 2043164 : bool gate (function *) override
1049 : : {
1050 : 2043164 : return flag_thread_jumps && flag_expensive_optimizations;
1051 : : }
1052 : 1896023 : unsigned int execute (function *fun) override
1053 : : {
1054 : 1896023 : back_threader threader (fun, BT_SPEED | BT_RESOLVE, m_first);
1055 : 1896023 : return threader.thread_blocks ();
1056 : 1896023 : }
1057 : : private:
1058 : : bool m_first;
1059 : : };
1060 : :
1061 : : } // namespace {
1062 : :
1063 : : gimple_opt_pass *
1064 : 285081 : make_pass_thread_jumps (gcc::context *ctxt)
1065 : : {
1066 : 285081 : return new pass_thread_jumps (ctxt);
1067 : : }
1068 : :
1069 : : gimple_opt_pass *
1070 : 285081 : make_pass_thread_jumps_full (gcc::context *ctxt)
1071 : : {
1072 : 285081 : return new pass_thread_jumps_full (ctxt);
1073 : : }
1074 : :
1075 : : gimple_opt_pass *
1076 : 285081 : make_pass_early_thread_jumps (gcc::context *ctxt)
1077 : : {
1078 : 285081 : return new pass_early_thread_jumps (ctxt);
1079 : : }
|