Branch data Line data Source code
1 : : /* SSA Jump Threading
2 : : Copyright (C) 2005-2024 Free Software Foundation, Inc.
3 : :
4 : : This file is part of GCC.
5 : :
6 : : GCC is free software; you can redistribute it and/or modify
7 : : it under the terms of the GNU General Public License as published by
8 : : the Free Software Foundation; either version 3, or (at your option)
9 : : any later version.
10 : :
11 : : GCC is distributed in the hope that it will be useful,
12 : : but WITHOUT ANY WARRANTY; without even the implied warranty of
13 : : MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14 : : GNU General Public License for more details.
15 : :
16 : : You should have received a copy of the GNU General Public License
17 : : along with GCC; see the file COPYING3. If not see
18 : : <http://www.gnu.org/licenses/>. */
19 : :
20 : : #include "config.h"
21 : : #include "system.h"
22 : : #include "coretypes.h"
23 : : #include "backend.h"
24 : : #include "predict.h"
25 : : #include "tree.h"
26 : : #include "gimple.h"
27 : : #include "fold-const.h"
28 : : #include "cfgloop.h"
29 : : #include "gimple-iterator.h"
30 : : #include "tree-cfg.h"
31 : : #include "tree-ssa-threadupdate.h"
32 : : #include "tree-ssa-loop.h"
33 : : #include "cfganal.h"
34 : : #include "tree-pass.h"
35 : : #include "gimple-ssa.h"
36 : : #include "tree-phinodes.h"
37 : : #include "tree-inline.h"
38 : : #include "tree-vectorizer.h"
39 : : #include "value-range.h"
40 : : #include "gimple-range.h"
41 : : #include "tree-ssa-threadedge.h"
42 : : #include "gimple-range-path.h"
43 : : #include "ssa.h"
44 : : #include "tree-cfgcleanup.h"
45 : : #include "tree-pretty-print.h"
46 : : #include "cfghooks.h"
47 : : #include "dbgcnt.h"
48 : :
49 : : // Path registry for the backwards threader. After all paths have been
50 : : // registered with register_path(), thread_through_all_blocks() is called
51 : : // to modify the CFG.
52 : :
53 : 23741304 : class back_threader_registry : public back_jt_path_registry
54 : : {
55 : : public:
56 : : bool register_path (const vec<basic_block> &, edge taken);
57 : : };
58 : :
59 : : // Class to abstract the profitability code for the backwards threader.
60 : :
61 : : class back_threader_profitability
62 : : {
63 : : public:
64 : : back_threader_profitability (bool speed_p, gimple *stmt);
65 : : bool possibly_profitable_path_p (const vec<basic_block> &, bool *);
66 : : bool profitable_path_p (const vec<basic_block> &,
67 : : edge taken, bool *irreducible_loop);
68 : : private:
69 : : const bool m_speed_p;
70 : : int m_exit_jump_benefit;
71 : : bool m_threaded_multiway_branch;
72 : : // The following are computed by possibly_profitable_path_p
73 : : bool m_threaded_through_latch;
74 : : bool m_multiway_branch_in_path;
75 : : bool m_contains_hot_bb;
76 : : int m_n_insns;
77 : : };
78 : :
79 : 19555264 : back_threader_profitability::back_threader_profitability (bool speed_p,
80 : 19555264 : gimple *last)
81 : 19555264 : : m_speed_p (speed_p)
82 : : {
83 : 19555264 : m_threaded_multiway_branch = (gimple_code (last) == GIMPLE_SWITCH
84 : 19555264 : || gimple_code (last) == GIMPLE_GOTO);
85 : : // The forward threader has estimate_threading_killed_stmts, in
86 : : // particular it estimates further DCE from eliminating the exit
87 : : // control stmt.
88 : 19555264 : m_exit_jump_benefit = estimate_num_insns (last, &eni_size_weights);
89 : 19555264 : }
90 : :
91 : : // Back threader flags.
92 : : #define BT_NONE 0
93 : : // Generate fast code at the expense of code size.
94 : : #define BT_SPEED 1
95 : : // Resolve unknown SSAs on entry to a threading path. If set, use the
96 : : // ranger. If not, assume all ranges on entry to a path are VARYING.
97 : : #define BT_RESOLVE 2
98 : :
99 : : class back_threader
100 : : {
101 : : public:
102 : : back_threader (function *fun, unsigned flags, bool first);
103 : : ~back_threader ();
104 : : unsigned thread_blocks ();
105 : : private:
106 : : void maybe_thread_block (basic_block bb);
107 : : bool debug_counter ();
108 : : edge maybe_register_path (back_threader_profitability &);
109 : : void maybe_register_path_dump (edge taken_edge);
110 : : void find_paths_to_names (basic_block bb, bitmap imports, unsigned,
111 : : back_threader_profitability &);
112 : : edge find_taken_edge (const vec<basic_block> &path);
113 : : edge find_taken_edge_cond (const vec<basic_block> &path, gcond *);
114 : : edge find_taken_edge_switch (const vec<basic_block> &path, gswitch *);
115 : : virtual void debug ();
116 : : virtual void dump (FILE *out);
117 : :
118 : : back_threader_registry m_registry;
119 : :
120 : : // Current path being analyzed.
121 : : auto_vec<basic_block> m_path;
122 : : // Hash to mark visited BBs while analyzing a path.
123 : : hash_set<basic_block> m_visited_bbs;
124 : : // The set of SSA names, any of which could potentially change the
125 : : // value of the final conditional in a path.
126 : : auto_bitmap m_imports;
127 : : // The last statement in the path.
128 : : gimple *m_last_stmt;
129 : : // Marker to differentiate unreachable edges.
130 : : static const edge UNREACHABLE_EDGE;
131 : : // Set to TRUE if unknown SSA names along a path should be resolved
132 : : // with the ranger. Otherwise, unknown SSA names are assumed to be
133 : : // VARYING. Setting to true is more precise but slower.
134 : : function *m_fun;
135 : : // Ranger for the path solver.
136 : : gimple_ranger *m_ranger;
137 : : unsigned m_flags;
138 : : // Set to TRUE for the first of each thread[12] pass or the first of
139 : : // each threadfull[12] pass. This is used to differentiate between
140 : : // the different threading passes so we can set up debug counters.
141 : : bool m_first;
142 : : };
143 : :
144 : : // Used to differentiate unreachable edges, so we may stop the search
145 : : // in a the given direction.
146 : : const edge back_threader::UNREACHABLE_EDGE = (edge) -1;
147 : :
148 : 5935326 : back_threader::back_threader (function *fun, unsigned flags, bool first)
149 : 5935326 : : m_first (first)
150 : : {
151 : 5935326 : if (flags & BT_SPEED)
152 : 3696613 : loop_optimizer_init (LOOPS_HAVE_PREHEADERS | LOOPS_HAVE_SIMPLE_LATCHES);
153 : : else
154 : 2238713 : loop_optimizer_init (AVOID_CFG_MODIFICATIONS);
155 : :
156 : 5935326 : m_fun = fun;
157 : 5935326 : m_flags = flags;
158 : 5935326 : m_last_stmt = NULL;
159 : :
160 : : // The path solver needs EDGE_DFS_BACK in resolving mode.
161 : 5935326 : if (flags & BT_RESOLVE)
162 : 1848308 : mark_dfs_back_edges ();
163 : :
164 : 5935326 : m_ranger = new gimple_ranger;
165 : 5935326 : }
166 : :
167 : 5935326 : back_threader::~back_threader ()
168 : : {
169 : 5935326 : delete m_ranger;
170 : 5935326 : loop_optimizer_finalize ();
171 : 5935326 : }
172 : :
173 : : // A wrapper for the various debug counters for the threading passes.
174 : : // Returns TRUE if it's OK to register the current threading
175 : : // candidate.
176 : :
177 : : bool
178 : 1132233 : back_threader::debug_counter ()
179 : : {
180 : : // The ethread pass is mostly harmless ;-).
181 : 1132233 : if ((m_flags & BT_SPEED) == 0)
182 : : return true;
183 : :
184 : 755036 : if (m_flags & BT_RESOLVE)
185 : : {
186 : 515553 : if (m_first && !dbg_cnt (back_threadfull1))
187 : : return false;
188 : :
189 : 515553 : if (!m_first && !dbg_cnt (back_threadfull2))
190 : : return false;
191 : : }
192 : : else
193 : : {
194 : 239483 : if (m_first && !dbg_cnt (back_thread1))
195 : : return false;
196 : :
197 : 239483 : if (!m_first && !dbg_cnt (back_thread2))
198 : : return false;
199 : : }
200 : : return true;
201 : : }
202 : :
203 : : static void
204 : 531 : dump_path (FILE *dump_file, const vec<basic_block> &path)
205 : : {
206 : 2883 : for (unsigned i = path.length (); i > 0; --i)
207 : : {
208 : 1821 : basic_block bb = path[i - 1];
209 : 1821 : fprintf (dump_file, "%d", bb->index);
210 : 1821 : if (i > 1)
211 : 1290 : fprintf (dump_file, "->");
212 : : }
213 : 531 : }
214 : :
215 : : // Dump details of an attempt to register a path.
216 : :
217 : : void
218 : 531 : back_threader::maybe_register_path_dump (edge taken)
219 : : {
220 : 531 : if (m_path.is_empty ())
221 : : return;
222 : :
223 : 531 : fprintf (dump_file, "path: ");
224 : 531 : dump_path (dump_file, m_path);
225 : 531 : fprintf (dump_file, "->");
226 : :
227 : 531 : if (taken == UNREACHABLE_EDGE)
228 : 9 : fprintf (dump_file, "xx REJECTED (unreachable)\n");
229 : 522 : else if (taken)
230 : 76 : fprintf (dump_file, "%d SUCCESS\n", taken->dest->index);
231 : : else
232 : 446 : fprintf (dump_file, "xx REJECTED\n");
233 : : }
234 : :
235 : : // If an outgoing edge can be determined out of the current path,
236 : : // register it for jump threading and return the taken edge.
237 : : //
238 : : // Return NULL if it is unprofitable to thread this path, or the
239 : : // outgoing edge is unknown. Return UNREACHABLE_EDGE if the path is
240 : : // unreachable.
241 : :
242 : : edge
243 : 21752828 : back_threader::maybe_register_path (back_threader_profitability &profit)
244 : : {
245 : 21752828 : edge taken_edge = find_taken_edge (m_path);
246 : :
247 : 21752828 : if (taken_edge && taken_edge != UNREACHABLE_EDGE)
248 : : {
249 : 1254331 : bool irreducible = false;
250 : 1254331 : if (profit.profitable_path_p (m_path, taken_edge, &irreducible)
251 : 1132233 : && debug_counter ()
252 : 2386564 : && m_registry.register_path (m_path, taken_edge))
253 : : {
254 : 1100212 : if (irreducible)
255 : 30675 : vect_free_loop_info_assumptions (m_path[0]->loop_father);
256 : : }
257 : : else
258 : : taken_edge = NULL;
259 : : }
260 : :
261 : 21752828 : if (dump_file && (dump_flags & TDF_DETAILS))
262 : 531 : maybe_register_path_dump (taken_edge);
263 : :
264 : 21752828 : return taken_edge;
265 : : }
266 : :
267 : : // Return the known taken edge out of a path. If the path can be
268 : : // determined to be unreachable, return UNREACHABLE_EDGE. If no
269 : : // outgoing edge can be calculated, return NULL.
270 : :
271 : : edge
272 : 21752828 : back_threader::find_taken_edge (const vec<basic_block> &path)
273 : : {
274 : 21752828 : gcc_checking_assert (path.length () > 1);
275 : 21752828 : switch (gimple_code (m_last_stmt))
276 : : {
277 : 21639328 : case GIMPLE_COND:
278 : 21639328 : return find_taken_edge_cond (path, as_a<gcond *> (m_last_stmt));
279 : :
280 : 113500 : case GIMPLE_SWITCH:
281 : 113500 : return find_taken_edge_switch (path, as_a<gswitch *> (m_last_stmt));
282 : :
283 : : default:
284 : : return NULL;
285 : : }
286 : : }
287 : :
288 : : // Same as find_taken_edge, but for paths ending in a switch.
289 : :
290 : : edge
291 : 113500 : back_threader::find_taken_edge_switch (const vec<basic_block> &path,
292 : : gswitch *sw)
293 : : {
294 : 113500 : tree name = gimple_switch_index (sw);
295 : 113500 : int_range_max r;
296 : :
297 : 113500 : path_range_query solver (*m_ranger, path, m_imports, m_flags & BT_RESOLVE);
298 : 113500 : solver.range_of_expr (r, name, sw);
299 : :
300 : 113500 : if (r.undefined_p ())
301 : : return UNREACHABLE_EDGE;
302 : :
303 : 113171 : if (r.varying_p ())
304 : : return NULL;
305 : :
306 : 61042 : tree label = find_case_label_range (sw, &r);
307 : 61042 : if (!label)
308 : : return NULL;
309 : :
310 : 4050 : return find_edge (gimple_bb (sw), label_to_block (cfun, CASE_LABEL (label)));
311 : 113500 : }
312 : :
313 : : // Same as find_taken_edge, but for paths ending in a GIMPLE_COND.
314 : :
315 : : edge
316 : 21639328 : back_threader::find_taken_edge_cond (const vec<basic_block> &path,
317 : : gcond *cond)
318 : : {
319 : 21639328 : int_range_max r;
320 : :
321 : 21639328 : path_range_query solver (*m_ranger, path, m_imports, m_flags & BT_RESOLVE);
322 : 21639328 : solver.range_of_stmt (r, cond);
323 : :
324 : 21639328 : if (solver.unreachable_path_p ())
325 : : return UNREACHABLE_EDGE;
326 : :
327 : 21571665 : int_range<2> true_range = range_true ();
328 : 21571665 : int_range<2> false_range = range_false ();
329 : :
330 : 21571665 : if (r == true_range || r == false_range)
331 : : {
332 : 1250281 : edge e_true, e_false;
333 : 1250281 : basic_block bb = gimple_bb (cond);
334 : 1250281 : extract_true_false_edges_from_block (bb, &e_true, &e_false);
335 : 1250281 : return r == true_range ? e_true : e_false;
336 : : }
337 : : return NULL;
338 : 21639328 : }
339 : :
340 : : // Find jump threading paths to any of the SSA names in the
341 : : // INTERESTING bitmap, and register any such paths.
342 : : //
343 : : // BB is the current path being processed.
344 : : //
345 : : // OVERALL_PATHS is the search space up to this block
346 : :
347 : : void
348 : 51449153 : back_threader::find_paths_to_names (basic_block bb, bitmap interesting,
349 : : unsigned overall_paths,
350 : : back_threader_profitability &profit)
351 : : {
352 : 51449153 : if (m_visited_bbs.add (bb))
353 : 1329107 : return;
354 : :
355 : 50120046 : m_path.safe_push (bb);
356 : :
357 : : // Try to resolve the path without looking back. Avoid resolving paths
358 : : // we know are large but are not (yet) recognized as Finite State Machine.
359 : : // ??? Ideally we'd explore the cheapest path to the loop backedge here,
360 : : // avoiding the exponential greedy search and only start that from there.
361 : : // Precomputing a path-size-to-immediate-dominator-of-successor for each
362 : : // edge might help here. Alternatively copying divergent control flow
363 : : // on the way to the backedge could be worthwhile.
364 : 50120046 : bool large_non_fsm;
365 : 50120046 : if (m_path.length () > 1
366 : 50120046 : && (!profit.possibly_profitable_path_p (m_path, &large_non_fsm)
367 : 21808619 : || (!large_non_fsm
368 : 21752828 : && maybe_register_path (profit))))
369 : : ;
370 : :
371 : : // The backwards thread copier cannot copy blocks that do not belong
372 : : // to the same loop, so when the new source of the path entry no
373 : : // longer belongs to it we don't need to search further.
374 : 40195679 : else if (m_path[0]->loop_father != bb->loop_father)
375 : : ;
376 : :
377 : : // Continue looking for ways to extend the path but limit the
378 : : // search space along a branch
379 : 38955943 : else if ((overall_paths = overall_paths * EDGE_COUNT (bb->preds))
380 : 38955943 : <= (unsigned)param_max_jump_thread_paths)
381 : : {
382 : : // For further greedy searching we want to remove interesting
383 : : // names defined in BB but add ones on the PHI edges for the
384 : : // respective edges and adding imports from those stmts.
385 : : // We do this by starting with all names
386 : : // not defined in BB as interesting, collecting a list of
387 : : // interesting PHIs in BB on the fly. Then we iterate over
388 : : // predecessor edges, adding interesting PHI edge defs to
389 : : // the set of interesting names to consider when processing it.
390 : 38857781 : auto_bitmap new_interesting;
391 : 38857781 : auto_vec<int, 16> new_imports;
392 : 38857781 : auto_vec<gphi *, 4> interesting_phis;
393 : 38857781 : bitmap_iterator bi;
394 : 38857781 : unsigned i;
395 : 38857781 : auto_vec<tree, 16> worklist;
396 : 87673275 : EXECUTE_IF_SET_IN_BITMAP (interesting, 0, i, bi)
397 : : {
398 : 48815494 : tree name = ssa_name (i);
399 : 48815494 : gimple *def_stmt = SSA_NAME_DEF_STMT (name);
400 : : /* Imports remain interesting. */
401 : 48815494 : if (gimple_bb (def_stmt) != bb)
402 : : {
403 : 23961175 : bitmap_set_bit (new_interesting, i);
404 : 23961175 : continue;
405 : : }
406 : 24854319 : worklist.quick_push (name);
407 : 89583794 : while (!worklist.is_empty ())
408 : : {
409 : 39875156 : tree name = worklist.pop ();
410 : 39875156 : gimple *def_stmt = SSA_NAME_DEF_STMT (name);
411 : : /* Newly discovered imports are interesting. */
412 : 39875156 : if (gimple_bb (def_stmt) != bb)
413 : : {
414 : 4614782 : bitmap_set_bit (new_interesting, SSA_NAME_VERSION (name));
415 : 4614782 : continue;
416 : : }
417 : : /* Local PHIs participate in renaming below. */
418 : 65715152 : if (gphi *phi = dyn_cast<gphi *> (def_stmt))
419 : : {
420 : 4805596 : tree res = gimple_phi_result (phi);
421 : 4805596 : if (!SSA_NAME_OCCURS_IN_ABNORMAL_PHI (res))
422 : 4805472 : interesting_phis.safe_push (phi);
423 : : }
424 : : /* For other local defs process their uses, amending
425 : : imports on the way. */
426 : : else
427 : : {
428 : 30454778 : tree ssa[3];
429 : 30454778 : unsigned lim = gimple_range_ssa_names (ssa, 3, def_stmt);
430 : 47688537 : for (unsigned j = 0; j < lim; ++j)
431 : : {
432 : 17233759 : tree rhs = ssa[j];
433 : 17233759 : if (rhs
434 : 34467518 : && bitmap_set_bit (m_imports,
435 : 17233759 : SSA_NAME_VERSION (rhs)))
436 : : {
437 : 15020837 : new_imports.safe_push (SSA_NAME_VERSION (rhs));
438 : 15020837 : worklist.safe_push (rhs);
439 : : }
440 : : }
441 : : }
442 : : }
443 : : }
444 : 38857781 : if (!bitmap_empty_p (new_interesting)
445 : 38857781 : || !interesting_phis.is_empty ())
446 : : {
447 : 50803818 : auto_vec<int, 4> unwind (interesting_phis.length ());
448 : 50803818 : auto_vec<int, 4> imports_unwind (interesting_phis.length ());
449 : 25401909 : edge_iterator iter;
450 : 25401909 : edge e;
451 : 60546858 : FOR_EACH_EDGE (e, iter, bb->preds)
452 : : {
453 : 38396009 : if (e->flags & EDGE_ABNORMAL
454 : : // This is like path_crosses_loops in profitable_path_p but
455 : : // more restrictive to avoid peeling off loop iterations (see
456 : : // tree-ssa/pr14341.c for an example).
457 : : // ??? Note this restriction only applied when visiting an
458 : : // interesting PHI with the former resolve_phi.
459 : 35144949 : || (!interesting_phis.is_empty ()
460 : 9939180 : && m_path[0]->loop_father != e->src->loop_father))
461 : 3251060 : continue;
462 : 102825977 : for (gphi *phi : interesting_phis)
463 : : {
464 : 7144310 : tree def = PHI_ARG_DEF_FROM_EDGE (phi, e);
465 : 7144310 : if (TREE_CODE (def) == SSA_NAME)
466 : : {
467 : 5991679 : int ver = SSA_NAME_VERSION (def);
468 : 5991679 : if (bitmap_set_bit (new_interesting, ver))
469 : : {
470 : 5975071 : if (bitmap_set_bit (m_imports, ver))
471 : 4640530 : imports_unwind.quick_push (ver);
472 : 5975071 : unwind.quick_push (ver);
473 : : }
474 : : }
475 : : }
476 : 31893889 : find_paths_to_names (e->src, new_interesting, overall_paths,
477 : : profit);
478 : : // Restore new_interesting.
479 : 101656738 : for (int def : unwind)
480 : 5975071 : bitmap_clear_bit (new_interesting, def);
481 : 31893889 : unwind.truncate (0);
482 : : // Restore and m_imports.
483 : 100322197 : for (int def : imports_unwind)
484 : 4640530 : bitmap_clear_bit (m_imports, def);
485 : 31893889 : imports_unwind.truncate (0);
486 : : }
487 : 25401909 : }
488 : : /* m_imports tracks all interesting names on the path, so when
489 : : backtracking we have to restore it. */
490 : 131594180 : for (int j : new_imports)
491 : 15020837 : bitmap_clear_bit (m_imports, j);
492 : 38857781 : }
493 : 98162 : else if (dump_file && (dump_flags & TDF_DETAILS))
494 : 9 : fprintf (dump_file, " FAIL: Search space limit %d reached.\n",
495 : : param_max_jump_thread_paths);
496 : :
497 : : // Reset things to their original state.
498 : 50120046 : m_path.pop ();
499 : 50120046 : m_visited_bbs.remove (bb);
500 : : }
501 : :
502 : : // Search backwards from BB looking for paths where the final
503 : : // conditional maybe threaded to a successor block. Record such paths
504 : : // for jump threading.
505 : :
506 : : void
507 : 23798309 : back_threader::maybe_thread_block (basic_block bb)
508 : : {
509 : 23798309 : if (EDGE_COUNT (bb->succs) <= 1)
510 : 4243045 : return;
511 : :
512 : 23798309 : gimple *stmt = *gsi_last_bb (bb);
513 : 23798309 : if (!stmt)
514 : : return;
515 : :
516 : 23798289 : enum gimple_code code = gimple_code (stmt);
517 : 23798289 : if (code != GIMPLE_SWITCH
518 : 23798289 : && code != GIMPLE_COND)
519 : : return;
520 : :
521 : 19603171 : m_last_stmt = stmt;
522 : 19603171 : m_visited_bbs.empty ();
523 : 19603171 : m_path.truncate (0);
524 : :
525 : : // We compute imports of the path during discovery starting
526 : : // just with names used in the conditional.
527 : 19603171 : bitmap_clear (m_imports);
528 : 19603171 : ssa_op_iter iter;
529 : 19603171 : tree name;
530 : 43397904 : FOR_EACH_SSA_TREE_OPERAND (name, stmt, iter, SSA_OP_USE)
531 : : {
532 : 23842640 : if (!gimple_range_ssa_p (name))
533 : : return;
534 : 23794733 : bitmap_set_bit (m_imports, SSA_NAME_VERSION (name));
535 : : }
536 : :
537 : : // Interesting is the set of imports we still not have see
538 : : // the definition of. So while imports only grow, the
539 : : // set of interesting defs dwindles and once empty we can
540 : : // stop searching.
541 : 19555264 : auto_bitmap interesting;
542 : 19555264 : bitmap_copy (interesting, m_imports);
543 : 19555264 : back_threader_profitability profit (m_flags & BT_SPEED, stmt);
544 : 19555264 : find_paths_to_names (bb, interesting, 1, profit);
545 : 19555264 : }
546 : :
547 : : DEBUG_FUNCTION void
548 : 0 : debug (const vec <basic_block> &path)
549 : : {
550 : 0 : dump_path (stderr, path);
551 : 0 : fputc ('\n', stderr);
552 : 0 : }
553 : :
554 : : void
555 : 0 : back_threader::dump (FILE *out)
556 : : {
557 : 0 : fprintf (out, "\nCandidates for pre-computation:\n");
558 : 0 : fprintf (out, "===================================\n");
559 : :
560 : 0 : bitmap_iterator bi;
561 : 0 : unsigned i;
562 : :
563 : 0 : EXECUTE_IF_SET_IN_BITMAP (m_imports, 0, i, bi)
564 : : {
565 : 0 : tree name = ssa_name (i);
566 : 0 : print_generic_expr (out, name, TDF_NONE);
567 : 0 : fprintf (out, "\n");
568 : : }
569 : 0 : }
570 : :
571 : : void
572 : 0 : back_threader::debug ()
573 : : {
574 : 0 : dump (stderr);
575 : 0 : }
576 : :
577 : : /* Examine jump threading path PATH and return TRUE if it is possibly
578 : : profitable to thread it, otherwise return FALSE. If this function
579 : : returns TRUE profitable_path_p might not be satisfied but when
580 : : the path is extended it might be. In particular indicate in
581 : : *LARGE_NON_FSM whether the thread is too large for a non-FSM thread
582 : : but would be OK if we extend the path to cover the loop backedge.
583 : :
584 : : ?? It seems we should be able to loosen some of the restrictions in
585 : : this function after loop optimizations have run. */
586 : :
587 : : bool
588 : 30564782 : back_threader_profitability::possibly_profitable_path_p
589 : : (const vec<basic_block> &m_path,
590 : : bool *large_non_fsm)
591 : : {
592 : 30564782 : gcc_checking_assert (!m_path.is_empty ());
593 : :
594 : : /* We can an empty path here (excluding the DEF block) when the
595 : : statement that makes a conditional generate a compile-time
596 : : constant result is in the same block as the conditional.
597 : :
598 : : That's not really a jump threading opportunity, but instead is
599 : : simple cprop & simplification. We could handle it here if we
600 : : wanted by wiring up all the incoming edges. If we run this
601 : : early in IPA, that might be worth doing. For now we just
602 : : reject that case. */
603 : 30564782 : if (m_path.length () <= 1)
604 : : return false;
605 : :
606 : 30564782 : gimple_stmt_iterator gsi;
607 : 30564782 : loop_p loop = m_path[0]->loop_father;
608 : :
609 : : // We recompute the following, when we rewrite possibly_profitable_path_p
610 : : // to work incrementally on added BBs we have to unwind them on backtracking
611 : 30564782 : m_n_insns = 0;
612 : 30564782 : m_threaded_through_latch = false;
613 : 30564782 : m_multiway_branch_in_path = false;
614 : 30564782 : m_contains_hot_bb = false;
615 : :
616 : 30564782 : if (dump_file && (dump_flags & TDF_DETAILS))
617 : 638 : fprintf (dump_file, "Checking profitability of path (backwards): ");
618 : :
619 : : /* Count the number of instructions on the path: as these instructions
620 : : will have to be duplicated, we will not record the path if there
621 : : are too many instructions on the path. Also check that all the
622 : : blocks in the path belong to a single loop. */
623 : 129238703 : for (unsigned j = 0; j < m_path.length (); j++)
624 : : {
625 : 98698334 : basic_block bb = m_path[j];
626 : :
627 : 98698334 : if (dump_file && (dump_flags & TDF_DETAILS))
628 : 2330 : fprintf (dump_file, " bb:%i", bb->index);
629 : : /* Remember, blocks in the path are stored in opposite order in
630 : : the PATH array. The last entry in the array represents the
631 : : block with an outgoing edge that we will redirect to the jump
632 : : threading path. Thus we don't care how many statements are
633 : : in that block because it will not be copied or whether or not
634 : : it ends in a multiway branch. */
635 : 197396668 : if (j < m_path.length () - 1)
636 : : {
637 : 68157965 : int orig_n_insns = m_n_insns;
638 : 68157965 : if (!m_contains_hot_bb && m_speed_p)
639 : 29355444 : m_contains_hot_bb |= optimize_bb_for_speed_p (bb);
640 : 68157965 : for (gsi = gsi_after_labels (bb);
641 : 263306488 : !gsi_end_p (gsi);
642 : 195148523 : gsi_next_nondebug (&gsi))
643 : : {
644 : : /* Do not allow OpenACC loop markers and __builtin_constant_p on
645 : : threading paths. The latter is disallowed, because an
646 : : expression might be constant on two threading paths, and
647 : : become non-constant (i.e.: phi) when they merge. */
648 : 195172936 : gimple *stmt = gsi_stmt (gsi);
649 : 195172936 : if (gimple_call_internal_p (stmt, IFN_UNIQUE)
650 : 195172936 : || gimple_call_builtin_p (stmt, BUILT_IN_CONSTANT_P))
651 : : {
652 : 24413 : if (dump_file && (dump_flags & TDF_DETAILS))
653 : 0 : fputc ('\n', dump_file);
654 : 24413 : return false;
655 : : }
656 : : /* Do not count empty statements and labels. */
657 : 195148523 : if (gimple_code (stmt) != GIMPLE_NOP
658 : 195148523 : && !is_gimple_debug (stmt))
659 : 162799373 : m_n_insns += estimate_num_insns (stmt, &eni_size_weights);
660 : : }
661 : 68133552 : if (dump_file && (dump_flags & TDF_DETAILS))
662 : 1692 : fprintf (dump_file, " (%i insns)", m_n_insns-orig_n_insns);
663 : :
664 : : /* We do not look at the block with the threaded branch
665 : : in this loop. So if any block with a last statement that
666 : : is a GIMPLE_SWITCH or GIMPLE_GOTO is seen, then we have a
667 : : multiway branch on our path.
668 : :
669 : : The block in PATH[0] is special, it's the block were we're
670 : : going to be able to eliminate its branch. */
671 : 68133552 : if (j > 0)
672 : : {
673 : 37577677 : gimple *last = *gsi_last_bb (bb);
674 : 37577677 : if (last
675 : 37577677 : && (gimple_code (last) == GIMPLE_SWITCH
676 : 34543908 : || gimple_code (last) == GIMPLE_GOTO))
677 : 212209 : m_multiway_branch_in_path = true;
678 : : }
679 : : }
680 : :
681 : : /* Note if we thread through the latch, we will want to include
682 : : the last entry in the array when determining if we thread
683 : : through the loop latch. */
684 : 98673921 : if (loop->latch == bb)
685 : : {
686 : 5342247 : m_threaded_through_latch = true;
687 : 5342247 : if (dump_file && (dump_flags & TDF_DETAILS))
688 : 103 : fprintf (dump_file, " (latch)");
689 : : }
690 : : }
691 : :
692 : : /* We are going to remove the control statement at the end of the
693 : : last block in the threading path. So don't count it against our
694 : : statement count. */
695 : 30540369 : m_n_insns -= m_exit_jump_benefit;
696 : :
697 : 30540369 : if (dump_file && (dump_flags & TDF_DETAILS))
698 : 638 : fprintf (dump_file, "\n Control statement insns: %i\n"
699 : : " Overall: %i insns\n",
700 : : m_exit_jump_benefit, m_n_insns);
701 : :
702 : : /* Threading is profitable if the path duplicated is hot but also
703 : : in a case we separate cold path from hot path and permit optimization
704 : : of the hot path later. Be on the agressive side here. In some testcases,
705 : : as in PR 78407 this leads to noticeable improvements. */
706 : 30540369 : if (m_speed_p)
707 : : {
708 : 27309825 : if (m_n_insns >= param_max_fsm_thread_path_insns)
709 : : {
710 : 6628 : if (dump_file && (dump_flags & TDF_DETAILS))
711 : 0 : fprintf (dump_file, " FAIL: Jump-thread path not considered: "
712 : : "the number of instructions on the path "
713 : : "exceeds PARAM_MAX_FSM_THREAD_PATH_INSNS.\n");
714 : 6628 : return false;
715 : : }
716 : 54606394 : edge entry = find_edge (m_path[m_path.length () - 1],
717 : 27303197 : m_path[m_path.length () - 2]);
718 : 27303197 : if (probably_never_executed_edge_p (cfun, entry))
719 : : {
720 : 81698 : if (dump_file && (dump_flags & TDF_DETAILS))
721 : 0 : fprintf (dump_file, " FAIL: Jump-thread path not considered: "
722 : : "path entry is probably never executed.\n");
723 : 81698 : return false;
724 : : }
725 : : }
726 : 3230544 : else if (m_n_insns > 1)
727 : : {
728 : 1197987 : if (dump_file && (dump_flags & TDF_DETAILS))
729 : 15 : fprintf (dump_file, " FAIL: Jump-thread path not considered: "
730 : : "duplication of %i insns is needed and optimizing for size.\n",
731 : : m_n_insns);
732 : 1197987 : return false;
733 : : }
734 : :
735 : : /* The generic copier used by the backthreader does not re-use an
736 : : existing threading path to reduce code duplication. So for that
737 : : case, drastically reduce the number of statements we are allowed
738 : : to copy. We don't know yet whether we will thread through the latch
739 : : so we have to be permissive and continue threading, but indicate
740 : : to the caller the thread, if final, wouldn't be profitable. */
741 : 29254056 : if ((!m_threaded_multiway_branch
742 : 183823 : || !loop->latch
743 : 183044 : || loop->latch->index == EXIT_BLOCK)
744 : 29160843 : && (m_n_insns * param_fsm_scale_path_stmts
745 : 29160843 : >= param_max_jump_thread_duplication_stmts))
746 : : {
747 : 7445437 : if (dump_file && (dump_flags & TDF_DETAILS))
748 : 82 : fprintf (dump_file,
749 : : " FAIL: Did not thread around loop and would copy too "
750 : : "many statements.\n");
751 : 7445437 : return false;
752 : : }
753 : 3923170 : *large_non_fsm = (!(m_threaded_through_latch && m_threaded_multiway_branch)
754 : 21808619 : && (m_n_insns * param_fsm_scale_path_stmts
755 : 21787701 : >= param_max_jump_thread_duplication_stmts));
756 : :
757 : 21808619 : if (dump_file && (dump_flags & TDF_DETAILS))
758 : 541 : fputc ('\n', dump_file);
759 : : return true;
760 : : }
761 : :
762 : : /* Examine jump threading path PATH and return TRUE if it is profitable to
763 : : thread it, otherwise return FALSE.
764 : :
765 : : The taken edge out of the path is TAKEN_EDGE.
766 : :
767 : : CREATES_IRREDUCIBLE_LOOP is set to TRUE if threading this path
768 : : would create an irreducible loop.
769 : :
770 : : ?? It seems we should be able to loosen some of the restrictions in
771 : : this function after loop optimizations have run. */
772 : :
773 : : bool
774 : 1254331 : back_threader_profitability::profitable_path_p (const vec<basic_block> &m_path,
775 : : edge taken_edge,
776 : : bool *creates_irreducible_loop)
777 : : {
778 : : // We can assume that possibly_profitable_path_p holds here
779 : :
780 : 1254331 : loop_p loop = m_path[0]->loop_father;
781 : :
782 : 1254331 : if (dump_file && (dump_flags & TDF_DETAILS))
783 : 111 : fprintf (dump_file, "Checking profitability of path (backwards): ");
784 : :
785 : : /* If this path threaded through the loop latch back into the
786 : : same loop and the destination does not dominate the loop
787 : : latch, then this thread would create an irreducible loop. */
788 : 1254331 : *creates_irreducible_loop = false;
789 : 1254331 : if (m_threaded_through_latch
790 : 114107 : && loop == taken_edge->dest->loop_father
791 : 1353386 : && (determine_bb_domination_status (loop, taken_edge->dest)
792 : : == DOMST_NONDOMINATING))
793 : 71933 : *creates_irreducible_loop = true;
794 : :
795 : : /* Threading is profitable if the path duplicated is hot but also
796 : : in a case we separate cold path from hot path and permit optimization
797 : : of the hot path later. Be on the agressive side here. In some testcases,
798 : : as in PR 78407 this leads to noticeable improvements. */
799 : 1254331 : if (m_speed_p
800 : 1254331 : && (optimize_edge_for_speed_p (taken_edge) || m_contains_hot_bb))
801 : : {
802 : 810155 : if (probably_never_executed_edge_p (cfun, taken_edge))
803 : : {
804 : 27002 : if (dump_file && (dump_flags & TDF_DETAILS))
805 : 0 : fprintf (dump_file, " FAIL: Jump-thread path not considered: "
806 : : "path leads to probably never executed edge.\n");
807 : 27002 : return false;
808 : : }
809 : : }
810 : 444176 : else if (m_n_insns > 1)
811 : : {
812 : 34299 : if (dump_file && (dump_flags & TDF_DETAILS))
813 : 0 : fprintf (dump_file, " FAIL: Jump-thread path not considered: "
814 : : "duplication of %i insns is needed and optimizing for size.\n",
815 : : m_n_insns);
816 : 34299 : return false;
817 : : }
818 : :
819 : : /* We avoid creating irreducible inner loops unless we thread through
820 : : a multiway branch, in which case we have deemed it worth losing
821 : : other loop optimizations later.
822 : :
823 : : We also consider it worth creating an irreducible inner loop after
824 : : loop optimizations if the number of copied statement is low. */
825 : 1193030 : if (!m_threaded_multiway_branch
826 : 1189149 : && *creates_irreducible_loop
827 : 66784 : && (!(cfun->curr_properties & PROP_loop_opts_done)
828 : 30380 : || (m_n_insns * param_fsm_scale_path_stmts
829 : 30380 : >= param_max_jump_thread_duplication_stmts)))
830 : : {
831 : 36404 : if (dump_file && (dump_flags & TDF_DETAILS))
832 : 1 : fprintf (dump_file,
833 : : " FAIL: Would create irreducible loop early without "
834 : : "threading multiway branch.\n");
835 : : /* We compute creates_irreducible_loop only late. */
836 : 36404 : return false;
837 : : }
838 : :
839 : : /* The generic copier used by the backthreader does not re-use an
840 : : existing threading path to reduce code duplication. So for that
841 : : case, drastically reduce the number of statements we are allowed
842 : : to copy. */
843 : 1156626 : if (!(m_threaded_through_latch && m_threaded_multiway_branch)
844 : 1154732 : && (m_n_insns * param_fsm_scale_path_stmts
845 : 1154732 : >= param_max_jump_thread_duplication_stmts))
846 : : {
847 : 0 : if (dump_file && (dump_flags & TDF_DETAILS))
848 : 0 : fprintf (dump_file,
849 : : " FAIL: Did not thread around loop and would copy too "
850 : : "many statements.\n");
851 : 0 : return false;
852 : : }
853 : :
854 : : /* When there is a multi-way branch on the path, then threading can
855 : : explode the CFG due to duplicating the edges for that multi-way
856 : : branch. So like above, only allow a multi-way branch on the path
857 : : if we actually thread a multi-way branch. */
858 : 1156626 : if (!m_threaded_multiway_branch && m_multiway_branch_in_path)
859 : : {
860 : 136 : if (dump_file && (dump_flags & TDF_DETAILS))
861 : 6 : fprintf (dump_file,
862 : : " FAIL: Thread through multiway branch without threading "
863 : : "a multiway branch.\n");
864 : 136 : return false;
865 : : }
866 : :
867 : : /* Threading through an empty latch would cause code to be added to
868 : : the latch. This could alter the loop form sufficiently to cause
869 : : loop optimizations to fail. Disable these threads until after
870 : : loop optimizations have run. */
871 : 1082824 : if ((m_threaded_through_latch || taken_edge->dest == loop->latch)
872 : 118953 : && !(cfun->curr_properties & PROP_loop_opts_done)
873 : 1226296 : && empty_block_p (loop->latch))
874 : : {
875 : 24257 : if (dump_file && (dump_flags & TDF_DETAILS))
876 : 20 : fprintf (dump_file,
877 : : " FAIL: Thread through latch before loop opts would create "
878 : : "non-empty latch\n");
879 : 24257 : return false;
880 : : }
881 : 1132233 : if (dump_file && (dump_flags & TDF_DETAILS))
882 : 84 : fputc ('\n', dump_file);
883 : : return true;
884 : : }
885 : :
886 : :
887 : : /* The current path PATH is a vector of blocks forming a jump threading
888 : : path in reverse order. TAKEN_EDGE is the edge taken from path[0].
889 : :
890 : : Convert the current path into the form used by register_jump_thread and
891 : : register it.
892 : :
893 : : Return TRUE if successful or FALSE otherwise. */
894 : :
895 : : bool
896 : 1132233 : back_threader_registry::register_path (const vec<basic_block> &m_path,
897 : : edge taken_edge)
898 : : {
899 : 1132233 : vec<jump_thread_edge *> *jump_thread_path = allocate_thread_path ();
900 : :
901 : : // The generic copier ignores the edge type. We can build the
902 : : // thread edges with any type.
903 : 2874258 : for (unsigned int j = 0; j + 1 < m_path.length (); j++)
904 : : {
905 : 1742025 : basic_block bb1 = m_path[m_path.length () - j - 1];
906 : 1742025 : basic_block bb2 = m_path[m_path.length () - j - 2];
907 : :
908 : 1742025 : edge e = find_edge (bb1, bb2);
909 : 1742025 : gcc_assert (e);
910 : 1742025 : push_edge (jump_thread_path, e, EDGE_COPY_SRC_BLOCK);
911 : : }
912 : :
913 : 1132233 : push_edge (jump_thread_path, taken_edge, EDGE_NO_COPY_SRC_BLOCK);
914 : 1132233 : return register_jump_thread (jump_thread_path);
915 : : }
916 : :
917 : : // Thread all suitable paths in the current function.
918 : : //
919 : : // Return TODO_flags.
920 : :
921 : : unsigned int
922 : 5935326 : back_threader::thread_blocks ()
923 : : {
924 : 5935326 : basic_block bb;
925 : 56788391 : FOR_EACH_BB_FN (bb, m_fun)
926 : 74651374 : if (EDGE_COUNT (bb->succs) > 1)
927 : 23798309 : maybe_thread_block (bb);
928 : :
929 : 5935326 : bool changed = m_registry.thread_through_all_blocks (true);
930 : :
931 : 5935326 : if (m_flags & BT_SPEED)
932 : 7233746 : return changed ? TODO_cleanup_cfg : 0;
933 : :
934 : : return false;
935 : : }
936 : :
937 : : namespace {
938 : :
939 : : const pass_data pass_data_early_thread_jumps =
940 : : {
941 : : GIMPLE_PASS,
942 : : "ethread",
943 : : OPTGROUP_NONE,
944 : : TV_TREE_SSA_THREAD_JUMPS,
945 : : ( PROP_cfg | PROP_ssa ),
946 : : 0,
947 : : 0,
948 : : 0,
949 : : ( TODO_cleanup_cfg | TODO_update_ssa ),
950 : : };
951 : :
952 : : const pass_data pass_data_thread_jumps =
953 : : {
954 : : GIMPLE_PASS,
955 : : "thread",
956 : : OPTGROUP_NONE,
957 : : TV_TREE_SSA_THREAD_JUMPS,
958 : : ( PROP_cfg | PROP_ssa ),
959 : : 0,
960 : : 0,
961 : : 0,
962 : : TODO_update_ssa,
963 : : };
964 : :
965 : : const pass_data pass_data_thread_jumps_full =
966 : : {
967 : : GIMPLE_PASS,
968 : : "threadfull",
969 : : OPTGROUP_NONE,
970 : : TV_TREE_SSA_THREAD_JUMPS,
971 : : ( PROP_cfg | PROP_ssa ),
972 : : 0,
973 : : 0,
974 : : 0,
975 : : TODO_update_ssa,
976 : : };
977 : :
978 : : // Early jump threading pass optimizing for size.
979 : : class pass_early_thread_jumps : public gimple_opt_pass
980 : : {
981 : : public:
982 : 280114 : pass_early_thread_jumps (gcc::context *ctxt)
983 : 560228 : : gimple_opt_pass (pass_data_early_thread_jumps, ctxt)
984 : : {}
985 : :
986 : 0 : opt_pass * clone () override
987 : : {
988 : 0 : return new pass_early_thread_jumps (m_ctxt);
989 : : }
990 : 280114 : void set_pass_param (unsigned int, bool param) override
991 : : {
992 : 280114 : m_first = param;
993 : 280114 : }
994 : 2238990 : bool gate (function *) override
995 : : {
996 : 2238990 : return flag_thread_jumps;
997 : : }
998 : 2238713 : unsigned int execute (function *fun) override
999 : : {
1000 : 2238713 : back_threader threader (fun, BT_NONE, m_first);
1001 : 2238713 : return threader.thread_blocks ();
1002 : 2238713 : }
1003 : : private:
1004 : : bool m_first;
1005 : : };
1006 : :
1007 : : // Jump threading pass without resolving of unknown SSAs.
1008 : : class pass_thread_jumps : public gimple_opt_pass
1009 : : {
1010 : : public:
1011 : 560228 : pass_thread_jumps (gcc::context *ctxt)
1012 : 1120456 : : gimple_opt_pass (pass_data_thread_jumps, ctxt)
1013 : : {}
1014 : 280114 : opt_pass * clone (void) override
1015 : : {
1016 : 280114 : return new pass_thread_jumps (m_ctxt);
1017 : : }
1018 : 560228 : void set_pass_param (unsigned int, bool param) override
1019 : : {
1020 : 560228 : m_first = param;
1021 : 560228 : }
1022 : 1992274 : bool gate (function *) override
1023 : : {
1024 : 1992274 : return flag_thread_jumps && flag_expensive_optimizations;
1025 : : }
1026 : 1848305 : unsigned int execute (function *fun) override
1027 : : {
1028 : 1848305 : back_threader threader (fun, BT_SPEED, m_first);
1029 : 1848305 : return threader.thread_blocks ();
1030 : 1848305 : }
1031 : : private:
1032 : : bool m_first;
1033 : : };
1034 : :
1035 : : // Jump threading pass that fully resolves unknown SSAs.
1036 : : class pass_thread_jumps_full : public gimple_opt_pass
1037 : : {
1038 : : public:
1039 : 560228 : pass_thread_jumps_full (gcc::context *ctxt)
1040 : 1120456 : : gimple_opt_pass (pass_data_thread_jumps_full, ctxt)
1041 : : {}
1042 : 280114 : opt_pass * clone (void) override
1043 : : {
1044 : 280114 : return new pass_thread_jumps_full (m_ctxt);
1045 : : }
1046 : 560228 : void set_pass_param (unsigned int, bool param) override
1047 : : {
1048 : 560228 : m_first = param;
1049 : 560228 : }
1050 : 1992274 : bool gate (function *) override
1051 : : {
1052 : 1992274 : return flag_thread_jumps && flag_expensive_optimizations;
1053 : : }
1054 : 1848308 : unsigned int execute (function *fun) override
1055 : : {
1056 : 1848308 : back_threader threader (fun, BT_SPEED | BT_RESOLVE, m_first);
1057 : 1848308 : return threader.thread_blocks ();
1058 : 1848308 : }
1059 : : private:
1060 : : bool m_first;
1061 : : };
1062 : :
1063 : : } // namespace {
1064 : :
1065 : : gimple_opt_pass *
1066 : 280114 : make_pass_thread_jumps (gcc::context *ctxt)
1067 : : {
1068 : 280114 : return new pass_thread_jumps (ctxt);
1069 : : }
1070 : :
1071 : : gimple_opt_pass *
1072 : 280114 : make_pass_thread_jumps_full (gcc::context *ctxt)
1073 : : {
1074 : 280114 : return new pass_thread_jumps_full (ctxt);
1075 : : }
1076 : :
1077 : : gimple_opt_pass *
1078 : 280114 : make_pass_early_thread_jumps (gcc::context *ctxt)
1079 : : {
1080 : 280114 : return new pass_early_thread_jumps (ctxt);
1081 : : }
|