Branch data Line data Source code
1 : : /* Function splitting pass
2 : : Copyright (C) 2010-2025 Free Software Foundation, Inc.
3 : : Contributed by Jan Hubicka <jh@suse.cz>
4 : :
5 : : This file is part of GCC.
6 : :
7 : : GCC is free software; you can redistribute it and/or modify it under
8 : : the terms of the GNU General Public License as published by the Free
9 : : Software Foundation; either version 3, or (at your option) any later
10 : : version.
11 : :
12 : : GCC is distributed in the hope that it will be useful, but WITHOUT ANY
13 : : WARRANTY; without even the implied warranty of MERCHANTABILITY or
14 : : FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
15 : : for more details.
16 : :
17 : : You should have received a copy of the GNU General Public License
18 : : along with GCC; see the file COPYING3. If not see
19 : : <http://www.gnu.org/licenses/>. */
20 : :
21 : : /* The purpose of this pass is to split function bodies to improve
22 : : inlining. I.e. for function of the form:
23 : :
24 : : func (...)
25 : : {
26 : : if (cheap_test)
27 : : something_small
28 : : else
29 : : something_big
30 : : }
31 : :
32 : : Produce:
33 : :
34 : : func.part (...)
35 : : {
36 : : something_big
37 : : }
38 : :
39 : : func (...)
40 : : {
41 : : if (cheap_test)
42 : : something_small
43 : : else
44 : : func.part (...);
45 : : }
46 : :
47 : : When func becomes inlinable and when cheap_test is often true, inlining func,
48 : : but not fund.part leads to performance improvement similar as inlining
49 : : original func while the code size growth is smaller.
50 : :
51 : : The pass is organized in three stages:
52 : : 1) Collect local info about basic block into BB_INFO structure and
53 : : compute function body estimated size and time.
54 : : 2) Via DFS walk find all possible basic blocks where we can split
55 : : and chose best one.
56 : : 3) If split point is found, split at the specified BB by creating a clone
57 : : and updating function to call it.
58 : :
59 : : The decisions what functions to split are in execute_split_functions
60 : : and consider_split.
61 : :
62 : : There are several possible future improvements for this pass including:
63 : :
64 : : 1) Splitting to break up large functions
65 : : 2) Splitting to reduce stack frame usage
66 : : 3) Allow split part of function to use values computed in the header part.
67 : : The values needs to be passed to split function, perhaps via same
68 : : interface as for nested functions or as argument.
69 : : 4) Support for simple rematerialization. I.e. when split part use
70 : : value computed in header from function parameter in very cheap way, we
71 : : can just recompute it.
72 : : 5) Support splitting of nested functions.
73 : : 6) Support non-SSA arguments.
74 : : 7) There is nothing preventing us from producing multiple parts of single function
75 : : when needed or splitting also the parts. */
76 : :
77 : : #include "config.h"
78 : : #include "system.h"
79 : : #include "coretypes.h"
80 : : #include "backend.h"
81 : : #include "rtl.h"
82 : : #include "tree.h"
83 : : #include "gimple.h"
84 : : #include "cfghooks.h"
85 : : #include "alloc-pool.h"
86 : : #include "tree-pass.h"
87 : : #include "ssa.h"
88 : : #include "cgraph.h"
89 : : #include "diagnostic.h"
90 : : #include "fold-const.h"
91 : : #include "cfganal.h"
92 : : #include "calls.h"
93 : : #include "gimplify.h"
94 : : #include "gimple-iterator.h"
95 : : #include "gimplify-me.h"
96 : : #include "gimple-walk.h"
97 : : #include "symbol-summary.h"
98 : : #include "sreal.h"
99 : : #include "ipa-cp.h"
100 : : #include "ipa-prop.h"
101 : : #include "tree-cfg.h"
102 : : #include "tree-into-ssa.h"
103 : : #include "tree-dfa.h"
104 : : #include "tree-inline.h"
105 : : #include "gimple-pretty-print.h"
106 : : #include "ipa-fnsummary.h"
107 : : #include "cfgloop.h"
108 : : #include "attribs.h"
109 : : #include "ipa-strub.h"
110 : :
111 : : /* Per basic block info. */
112 : :
113 : 8609847 : class split_bb_info
114 : : {
115 : : public:
116 : : unsigned int size;
117 : : sreal time;
118 : : };
119 : :
120 : : static vec<split_bb_info> bb_info_vec;
121 : :
122 : : /* Description of split point. */
123 : :
124 : 1294540 : class split_point
125 : : {
126 : : public:
127 : : /* Size of the partitions. */
128 : : sreal header_time, split_time;
129 : : unsigned int header_size, split_size;
130 : :
131 : : /* SSA names that need to be passed into spit function. */
132 : : bitmap ssa_names_to_pass;
133 : :
134 : : /* Basic block where we split (that will become entry point of new function. */
135 : : basic_block entry_bb;
136 : :
137 : : /* Count for entering the split part.
138 : : This is not count of the entry_bb because it may be in loop. */
139 : : profile_count count;
140 : :
141 : : /* Basic blocks we are splitting away. */
142 : : bitmap split_bbs;
143 : :
144 : : /* True when return value is computed on split part and thus it needs
145 : : to be returned. */
146 : : bool split_part_set_retval;
147 : : };
148 : :
149 : : /* Best split point found. */
150 : :
151 : : class split_point best_split_point;
152 : :
153 : : /* Set of basic blocks that are not allowed to dominate a split point. */
154 : :
155 : : static bitmap forbidden_dominators;
156 : :
157 : : static tree find_retval (basic_block return_bb);
158 : :
159 : : /* Callback for walk_stmt_load_store_addr_ops. If T is non-SSA automatic
160 : : variable, check it if it is present in bitmap passed via DATA. */
161 : :
162 : : static bool
163 : 35639 : test_nonssa_use (gimple *, tree t, tree, void *data)
164 : : {
165 : 35639 : t = get_base_address (t);
166 : :
167 : 35639 : if (!t || is_gimple_reg (t))
168 : 0 : return false;
169 : :
170 : 35639 : if (TREE_CODE (t) == PARM_DECL
171 : 35584 : || (VAR_P (t)
172 : 5494 : && auto_var_in_fn_p (t, current_function_decl))
173 : 30965 : || TREE_CODE (t) == RESULT_DECL
174 : : /* Normal labels are part of CFG and will be handled gratefully.
175 : : Forced labels however can be used directly by statements and
176 : : need to stay in one partition along with their uses. */
177 : 65762 : || (TREE_CODE (t) == LABEL_DECL
178 : 7205 : && FORCED_LABEL (t)))
179 : 5519 : return bitmap_bit_p ((bitmap)data, DECL_UID (t));
180 : :
181 : : /* For DECL_BY_REFERENCE, the return value is actually a pointer. We want
182 : : to pretend that the value pointed to is actual result decl. */
183 : 9704 : if ((TREE_CODE (t) == MEM_REF || INDIRECT_REF_P (t))
184 : 20416 : && TREE_CODE (TREE_OPERAND (t, 0)) == SSA_NAME
185 : 20412 : && SSA_NAME_VAR (TREE_OPERAND (t, 0))
186 : 15571 : && TREE_CODE (SSA_NAME_VAR (TREE_OPERAND (t, 0))) == RESULT_DECL
187 : 30120 : && DECL_BY_REFERENCE (DECL_RESULT (current_function_decl)))
188 : 0 : return
189 : 0 : bitmap_bit_p ((bitmap)data,
190 : 0 : DECL_UID (DECL_RESULT (current_function_decl)));
191 : :
192 : : return false;
193 : : }
194 : :
195 : : /* Dump split point CURRENT. */
196 : :
197 : : static void
198 : 13 : dump_split_point (FILE * file, class split_point *current)
199 : : {
200 : 13 : fprintf (file,
201 : : "Split point at BB %i\n"
202 : : " header time: %f header size: %i\n"
203 : : " split time: %f split size: %i\n bbs: ",
204 : 13 : current->entry_bb->index, current->header_time.to_double (),
205 : : current->header_size, current->split_time.to_double (),
206 : : current->split_size);
207 : 13 : dump_bitmap (file, current->split_bbs);
208 : 13 : fprintf (file, " SSA names to pass: ");
209 : 13 : dump_bitmap (file, current->ssa_names_to_pass);
210 : 13 : }
211 : :
212 : : /* Look for all BBs in header that might lead to the split part and verify
213 : : that they are not defining any non-SSA var used by the split part.
214 : : Parameters are the same as for consider_split. */
215 : :
216 : : static bool
217 : 14926 : verify_non_ssa_vars (class split_point *current, bitmap non_ssa_vars,
218 : : basic_block return_bb)
219 : : {
220 : 14926 : bitmap seen = BITMAP_ALLOC (NULL);
221 : 14926 : vec<basic_block> worklist = vNULL;
222 : 14926 : edge e;
223 : 14926 : edge_iterator ei;
224 : 14926 : bool ok = true;
225 : 14926 : basic_block bb;
226 : :
227 : 32154 : FOR_EACH_EDGE (e, ei, current->entry_bb->preds)
228 : 17228 : if (e->src != ENTRY_BLOCK_PTR_FOR_FN (cfun)
229 : 17228 : && !bitmap_bit_p (current->split_bbs, e->src->index))
230 : : {
231 : 16651 : worklist.safe_push (e->src);
232 : 16651 : bitmap_set_bit (seen, e->src->index);
233 : : }
234 : :
235 : 36223 : while (!worklist.is_empty ())
236 : : {
237 : 24689 : bb = worklist.pop ();
238 : 51725 : FOR_EACH_EDGE (e, ei, bb->preds)
239 : 27036 : if (e->src != ENTRY_BLOCK_PTR_FOR_FN (cfun)
240 : 27036 : && bitmap_set_bit (seen, e->src->index))
241 : : {
242 : 10545 : gcc_checking_assert (!bitmap_bit_p (current->split_bbs,
243 : : e->src->index));
244 : 10545 : worklist.safe_push (e->src);
245 : : }
246 : 373154 : for (gimple_stmt_iterator bsi = gsi_start_bb (bb); !gsi_end_p (bsi);
247 : 323776 : gsi_next (&bsi))
248 : : {
249 : 327168 : gimple *stmt = gsi_stmt (bsi);
250 : 327168 : if (is_gimple_debug (stmt))
251 : 268112 : continue;
252 : 59056 : if (walk_stmt_load_store_addr_ops
253 : 59056 : (stmt, non_ssa_vars, test_nonssa_use, test_nonssa_use,
254 : : test_nonssa_use))
255 : : {
256 : 3392 : ok = false;
257 : 3392 : goto done;
258 : : }
259 : 323862 : if (glabel *label_stmt = dyn_cast <glabel *> (stmt))
260 : 86 : if (test_nonssa_use (stmt, gimple_label_label (label_stmt),
261 : : NULL_TREE, non_ssa_vars))
262 : : {
263 : 0 : ok = false;
264 : 0 : goto done;
265 : : }
266 : : }
267 : 22470 : for (gphi_iterator bsi = gsi_start_phis (bb); !gsi_end_p (bsi);
268 : 1173 : gsi_next (&bsi))
269 : : {
270 : 1173 : if (walk_stmt_load_store_addr_ops
271 : 1173 : (gsi_stmt (bsi), non_ssa_vars, test_nonssa_use, test_nonssa_use,
272 : : test_nonssa_use))
273 : : {
274 : 0 : ok = false;
275 : 0 : goto done;
276 : : }
277 : : }
278 : 64412 : FOR_EACH_EDGE (e, ei, bb->succs)
279 : : {
280 : 43115 : if (e->dest != return_bb)
281 : 40347 : continue;
282 : 2768 : for (gphi_iterator bsi = gsi_start_phis (return_bb);
283 : 6279 : !gsi_end_p (bsi);
284 : 3511 : gsi_next (&bsi))
285 : : {
286 : 3511 : gphi *stmt = bsi.phi ();
287 : 3511 : tree op = gimple_phi_arg_def (stmt, e->dest_idx);
288 : :
289 : 7022 : if (virtual_operand_p (gimple_phi_result (stmt)))
290 : 2655 : continue;
291 : 856 : if (TREE_CODE (op) != SSA_NAME
292 : 856 : && test_nonssa_use (stmt, op, op, non_ssa_vars))
293 : : {
294 : 0 : ok = false;
295 : 0 : goto done;
296 : : }
297 : : }
298 : : }
299 : : }
300 : :
301 : : /* Verify that the rest of function does not define any label
302 : : used by the split part. */
303 : 121952 : FOR_EACH_BB_FN (bb, cfun)
304 : 110421 : if (!bitmap_bit_p (current->split_bbs, bb->index)
305 : 110421 : && !bitmap_bit_p (seen, bb->index))
306 : : {
307 : 40011 : gimple_stmt_iterator bsi;
308 : 87138 : for (bsi = gsi_start_bb (bb); !gsi_end_p (bsi); gsi_next (&bsi))
309 : 117537 : if (glabel *label_stmt = dyn_cast <glabel *> (gsi_stmt (bsi)))
310 : : {
311 : 7119 : if (test_nonssa_use (label_stmt,
312 : : gimple_label_label (label_stmt),
313 : : NULL_TREE, non_ssa_vars))
314 : : {
315 : 3 : ok = false;
316 : 3 : goto done;
317 : : }
318 : : }
319 : : else
320 : : break;
321 : : }
322 : :
323 : 11531 : done:
324 : 14926 : BITMAP_FREE (seen);
325 : 14926 : worklist.release ();
326 : 14926 : return ok;
327 : : }
328 : :
329 : : /* If STMT is a call, check the callee against a list of forbidden
330 : : predicate functions. If a match is found, look for uses of the
331 : : call result in condition statements that compare against zero.
332 : : For each such use, find the block targeted by the condition
333 : : statement for the nonzero result, and set the bit for this block
334 : : in the forbidden dominators bitmap. The purpose of this is to avoid
335 : : selecting a split point where we are likely to lose the chance
336 : : to optimize away an unused function call. */
337 : :
338 : : static void
339 : 39042769 : check_forbidden_calls (gimple *stmt)
340 : : {
341 : 39042769 : imm_use_iterator use_iter;
342 : 39042769 : use_operand_p use_p;
343 : 39042769 : tree lhs;
344 : :
345 : : /* At the moment, __builtin_constant_p is the only forbidden
346 : : predicate function call (see PR49642). */
347 : 39042769 : if (!gimple_call_builtin_p (stmt, BUILT_IN_CONSTANT_P))
348 : 39040751 : return;
349 : :
350 : 2018 : lhs = gimple_call_lhs (stmt);
351 : :
352 : 2018 : if (!lhs || TREE_CODE (lhs) != SSA_NAME)
353 : : return;
354 : :
355 : 5863 : FOR_EACH_IMM_USE_FAST (use_p, use_iter, lhs)
356 : : {
357 : 3845 : tree op1;
358 : 3845 : basic_block use_bb, forbidden_bb;
359 : 3845 : enum tree_code code;
360 : 3845 : edge true_edge, false_edge;
361 : 3845 : gcond *use_stmt;
362 : :
363 : 3845 : use_stmt = dyn_cast <gcond *> (USE_STMT (use_p));
364 : 3845 : if (!use_stmt)
365 : 29 : continue;
366 : :
367 : : /* Assuming canonical form for GIMPLE_COND here, with constant
368 : : in second position. */
369 : 3816 : op1 = gimple_cond_rhs (use_stmt);
370 : 3816 : code = gimple_cond_code (use_stmt);
371 : 3816 : use_bb = gimple_bb (use_stmt);
372 : :
373 : 3816 : extract_true_false_edges_from_block (use_bb, &true_edge, &false_edge);
374 : :
375 : : /* We're only interested in comparisons that distinguish
376 : : unambiguously from zero. */
377 : 3816 : if (!integer_zerop (op1) || code == LE_EXPR || code == GE_EXPR)
378 : 0 : continue;
379 : :
380 : 3816 : if (code == EQ_EXPR)
381 : 5 : forbidden_bb = false_edge->dest;
382 : : else
383 : 3811 : forbidden_bb = true_edge->dest;
384 : :
385 : 3816 : bitmap_set_bit (forbidden_dominators, forbidden_bb->index);
386 : : }
387 : : }
388 : :
389 : : /* If BB is dominated by any block in the forbidden dominators set,
390 : : return TRUE; else FALSE. */
391 : :
392 : : static bool
393 : 1299 : dominated_by_forbidden (basic_block bb)
394 : : {
395 : 1299 : unsigned dom_bb;
396 : 1299 : bitmap_iterator bi;
397 : :
398 : 3870 : EXECUTE_IF_SET_IN_BITMAP (forbidden_dominators, 1, dom_bb, bi)
399 : : {
400 : 2585 : if (dominated_by_p (CDI_DOMINATORS, bb,
401 : 2585 : BASIC_BLOCK_FOR_FN (cfun, dom_bb)))
402 : : return true;
403 : : }
404 : :
405 : : return false;
406 : : }
407 : :
408 : : /* For give split point CURRENT and return block RETURN_BB return 1
409 : : if ssa name VAL is set by split part and 0 otherwise. */
410 : : static bool
411 : 18868 : split_part_set_ssa_name_p (tree val, class split_point *current,
412 : : basic_block return_bb)
413 : : {
414 : 18868 : if (TREE_CODE (val) != SSA_NAME)
415 : : return false;
416 : :
417 : 18868 : return (!SSA_NAME_IS_DEFAULT_DEF (val)
418 : 18868 : && (bitmap_bit_p (current->split_bbs,
419 : 15041 : gimple_bb (SSA_NAME_DEF_STMT (val))->index)
420 : 15039 : || gimple_bb (SSA_NAME_DEF_STMT (val)) == return_bb));
421 : : }
422 : :
423 : : /* We found an split_point CURRENT. NON_SSA_VARS is bitmap of all non ssa
424 : : variables used and RETURN_BB is return basic block.
425 : : See if we can split function here. */
426 : :
427 : : static void
428 : 2531864 : consider_split (class split_point *current, bitmap non_ssa_vars,
429 : : basic_block return_bb)
430 : : {
431 : 2531864 : tree parm;
432 : 2531864 : unsigned int num_args = 0;
433 : 2531864 : unsigned int call_overhead;
434 : 2531864 : edge e;
435 : 2531864 : edge_iterator ei;
436 : 2531864 : gphi_iterator bsi;
437 : 2531864 : unsigned int i;
438 : 2531864 : tree retval;
439 : 2531864 : bool back_edge = false;
440 : :
441 : 2531864 : if (dump_file && (dump_flags & TDF_DETAILS))
442 : 6 : dump_split_point (dump_file, current);
443 : :
444 : 2531864 : current->count = profile_count::zero ();
445 : 5501626 : FOR_EACH_EDGE (e, ei, current->entry_bb->preds)
446 : : {
447 : 2969762 : if (e->flags & EDGE_DFS_BACK)
448 : 139794 : back_edge = true;
449 : 2969762 : if (!bitmap_bit_p (current->split_bbs, e->src->index))
450 : 2829967 : current->count += e->count ();
451 : : }
452 : :
453 : : /* Do not split when we would end up calling function anyway.
454 : : Compares are three state, use !(...<...) to also give up when outcome
455 : : is unknown. */
456 : 5063728 : if (!(current->count
457 : 2531864 : < (ENTRY_BLOCK_PTR_FOR_FN (cfun)->count.apply_scale
458 : 5063728 : (param_partial_inlining_entry_probability, 100))))
459 : : {
460 : : /* When profile is guessed, we cannot expect it to give us
461 : : realistic estimate on likeliness of function taking the
462 : : complex path. As a special case, when tail of the function is
463 : : a loop, enable splitting since inlining code skipping the loop
464 : : is likely noticeable win. */
465 : 1443006 : if (back_edge
466 : 82111 : && profile_status_for_fn (cfun) != PROFILE_READ
467 : 1525080 : && current->count
468 : 82074 : < ENTRY_BLOCK_PTR_FOR_FN (cfun)->count)
469 : : {
470 : 15068 : if (dump_file && (dump_flags & TDF_DETAILS))
471 : : {
472 : 0 : fprintf (dump_file,
473 : : " Split before loop, accepting despite low counts");
474 : 0 : current->count.dump (dump_file);
475 : 0 : fprintf (dump_file, " ");
476 : 0 : ENTRY_BLOCK_PTR_FOR_FN (cfun)->count.dump (dump_file);
477 : : }
478 : : }
479 : : else
480 : : {
481 : 1427938 : if (dump_file && (dump_flags & TDF_DETAILS))
482 : 3 : fprintf (dump_file,
483 : : " Refused: incoming frequency is too large.\n");
484 : 2466283 : return;
485 : : }
486 : : }
487 : :
488 : 1103926 : if (!current->header_size)
489 : : {
490 : 0 : if (dump_file && (dump_flags & TDF_DETAILS))
491 : 0 : fprintf (dump_file, " Refused: header empty\n");
492 : 0 : return;
493 : : }
494 : :
495 : : /* Verify that PHI args on entry are either virtual or all their operands
496 : : incoming from header are the same. */
497 : 1284290 : for (bsi = gsi_start_phis (current->entry_bb); !gsi_end_p (bsi); gsi_next (&bsi))
498 : : {
499 : 197241 : gphi *stmt = bsi.phi ();
500 : 197241 : tree val = NULL;
501 : :
502 : 394482 : if (virtual_operand_p (gimple_phi_result (stmt)))
503 : 53290 : continue;
504 : 415043 : for (i = 0; i < gimple_phi_num_args (stmt); i++)
505 : : {
506 : 287969 : edge e = gimple_phi_arg_edge (stmt, i);
507 : 287969 : if (!bitmap_bit_p (current->split_bbs, e->src->index))
508 : : {
509 : 161037 : tree edge_val = gimple_phi_arg_def (stmt, i);
510 : 161037 : if (val && edge_val != val)
511 : : {
512 : 16877 : if (dump_file && (dump_flags & TDF_DETAILS))
513 : 0 : fprintf (dump_file,
514 : : " Refused: entry BB has PHI with multiple variants\n");
515 : 16877 : return;
516 : : }
517 : : val = edge_val;
518 : : }
519 : : }
520 : : }
521 : :
522 : :
523 : : /* See what argument we will pass to the split function and compute
524 : : call overhead. */
525 : 1087049 : call_overhead = eni_size_weights.call_cost;
526 : 3113975 : for (parm = DECL_ARGUMENTS (current_function_decl); parm;
527 : 2026926 : parm = DECL_CHAIN (parm))
528 : : {
529 : 2026926 : if (!is_gimple_reg (parm))
530 : : {
531 : 133649 : if (bitmap_bit_p (non_ssa_vars, DECL_UID (parm)))
532 : : {
533 : 0 : if (dump_file && (dump_flags & TDF_DETAILS))
534 : 0 : fprintf (dump_file,
535 : : " Refused: need to pass non-ssa param values\n");
536 : 0 : return;
537 : : }
538 : : }
539 : : else
540 : : {
541 : 1893277 : tree ddef = ssa_default_def (cfun, parm);
542 : 1893277 : if (ddef
543 : 3740683 : && bitmap_bit_p (current->ssa_names_to_pass,
544 : 1847406 : SSA_NAME_VERSION (ddef)))
545 : : {
546 : 634002 : if (!VOID_TYPE_P (TREE_TYPE (parm)))
547 : 634002 : call_overhead += estimate_move_cost (TREE_TYPE (parm), false);
548 : 634002 : num_args++;
549 : : }
550 : : }
551 : : }
552 : 1087049 : if (!VOID_TYPE_P (TREE_TYPE (TREE_TYPE (current_function_decl))))
553 : 714103 : call_overhead += estimate_move_cost (TREE_TYPE (TREE_TYPE
554 : : (current_function_decl)),
555 : : false);
556 : :
557 : 1087049 : if (current->split_size <= call_overhead)
558 : : {
559 : 529943 : if (dump_file && (dump_flags & TDF_DETAILS))
560 : 0 : fprintf (dump_file,
561 : : " Refused: split size is smaller than call overhead\n");
562 : 529943 : return;
563 : : }
564 : : /* FIXME: The logic here is not very precise, because inliner does use
565 : : inline predicates to reduce function body size. We add 10 to anticipate
566 : : that. Next stage1 we should try to be more meaningful here. */
567 : 557106 : if (current->header_size + call_overhead
568 : 557106 : >= (unsigned int)(DECL_DECLARED_INLINE_P (current_function_decl)
569 : 145406 : ? param_max_inline_insns_single
570 : 557106 : : param_max_inline_insns_auto) + 10)
571 : : {
572 : 386181 : if (dump_file && (dump_flags & TDF_DETAILS))
573 : 0 : fprintf (dump_file,
574 : : " Refused: header size is too large for inline candidate\n");
575 : 386181 : return;
576 : : }
577 : :
578 : : /* Splitting functions brings the target out of comdat group; this will
579 : : lead to code duplication if the function is reused by other unit.
580 : : Limit this duplication. This is consistent with limit in tree-sra.cc
581 : : FIXME: with LTO we ought to be able to do better! */
582 : 304536 : if (DECL_ONE_ONLY (current_function_decl)
583 : 304530 : && current->split_size >= (unsigned int) param_max_inline_insns_auto + 10)
584 : : {
585 : 14665 : if (dump_file && (dump_flags & TDF_DETAILS))
586 : 0 : fprintf (dump_file,
587 : : " Refused: function is COMDAT and tail is too large\n");
588 : 14665 : return;
589 : : }
590 : : /* For comdat functions also reject very small tails; those will likely get
591 : : inlined back and we do not want to risk the duplication overhead.
592 : : FIXME: with LTO we ought to be able to do better! */
593 : 275206 : if (DECL_ONE_ONLY (current_function_decl)
594 : 156260 : && current->split_size
595 : 118940 : <= (unsigned int) param_early_inlining_insns / 2)
596 : : {
597 : 28395 : if (dump_file && (dump_flags & TDF_DETAILS))
598 : 0 : fprintf (dump_file,
599 : : " Refused: function is COMDAT and tail is too small\n");
600 : 28395 : return;
601 : : }
602 : :
603 : : /* FIXME: we currently can pass only SSA function parameters to the split
604 : : arguments. Once parm_adjustment infrastructure is supported by cloning,
605 : : we can pass more than that. */
606 : 127865 : if (num_args != bitmap_count_bits (current->ssa_names_to_pass))
607 : : {
608 : :
609 : 58875 : if (dump_file && (dump_flags & TDF_DETAILS))
610 : 0 : fprintf (dump_file,
611 : : " Refused: need to pass non-param values\n");
612 : 58875 : return;
613 : : }
614 : :
615 : : /* When there are non-ssa vars used in the split region, see if they
616 : : are used in the header region. If so, reject the split.
617 : : FIXME: we can use nested function support to access both. */
618 : 68990 : if (!bitmap_empty_p (non_ssa_vars)
619 : 68990 : && !verify_non_ssa_vars (current, non_ssa_vars, return_bb))
620 : : {
621 : 3395 : if (dump_file && (dump_flags & TDF_DETAILS))
622 : 0 : fprintf (dump_file,
623 : : " Refused: split part has non-ssa uses\n");
624 : 3395 : return;
625 : : }
626 : :
627 : : /* If the split point is dominated by a forbidden block, reject
628 : : the split. */
629 : 65595 : if (!bitmap_empty_p (forbidden_dominators)
630 : 65595 : && dominated_by_forbidden (current->entry_bb))
631 : : {
632 : 14 : if (dump_file && (dump_flags & TDF_DETAILS))
633 : 0 : fprintf (dump_file,
634 : : " Refused: split point dominated by forbidden block\n");
635 : 14 : return;
636 : : }
637 : :
638 : : /* See if retval used by return bb is computed by header or split part.
639 : : When it is computed by split part, we need to produce return statement
640 : : in the split part and add code to header to pass it around.
641 : :
642 : : This is bit tricky to test:
643 : : 1) When there is no return_bb or no return value, we always pass
644 : : value around.
645 : : 2) Invariants are always computed by caller.
646 : : 3) For SSA we need to look if defining statement is in header or split part
647 : : 4) For non-SSA we need to look where the var is computed. */
648 : 65581 : retval = find_retval (return_bb);
649 : 65581 : if (!retval)
650 : : {
651 : : /* If there is a return_bb with no return value in function returning
652 : : value by reference, also make the split part return void, otherwise
653 : : we expansion would try to create a non-POD temporary, which is
654 : : invalid. */
655 : 39608 : if (return_bb != EXIT_BLOCK_PTR_FOR_FN (cfun)
656 : 30290 : && DECL_RESULT (current_function_decl)
657 : 69898 : && DECL_BY_REFERENCE (DECL_RESULT (current_function_decl)))
658 : 0 : current->split_part_set_retval = false;
659 : : else
660 : 39608 : current->split_part_set_retval = true;
661 : : }
662 : 25973 : else if (is_gimple_min_invariant (retval))
663 : 2099 : current->split_part_set_retval = false;
664 : : /* Special case is value returned by reference we record as if it was non-ssa
665 : : set to result_decl. */
666 : 23874 : else if (TREE_CODE (retval) == SSA_NAME
667 : 18868 : && SSA_NAME_VAR (retval)
668 : 5853 : && TREE_CODE (SSA_NAME_VAR (retval)) == RESULT_DECL
669 : 23874 : && DECL_BY_REFERENCE (DECL_RESULT (current_function_decl)))
670 : 0 : current->split_part_set_retval
671 : 0 : = bitmap_bit_p (non_ssa_vars, DECL_UID (SSA_NAME_VAR (retval)));
672 : 23874 : else if (TREE_CODE (retval) == SSA_NAME)
673 : 18868 : current->split_part_set_retval
674 : 18868 : = split_part_set_ssa_name_p (retval, current, return_bb);
675 : 5006 : else if (TREE_CODE (retval) == PARM_DECL)
676 : 0 : current->split_part_set_retval = false;
677 : 5006 : else if (VAR_P (retval)
678 : 436 : || TREE_CODE (retval) == RESULT_DECL)
679 : 5006 : current->split_part_set_retval
680 : 5006 : = bitmap_bit_p (non_ssa_vars, DECL_UID (retval));
681 : : else
682 : 0 : current->split_part_set_retval = true;
683 : :
684 : : /* split_function fixes up at most one PHI non-virtual PHI node in return_bb,
685 : : for the return value. If there are other PHIs, give up. */
686 : 65581 : if (return_bb != EXIT_BLOCK_PTR_FOR_FN (cfun))
687 : : {
688 : 56263 : gphi_iterator psi;
689 : :
690 : 114037 : for (psi = gsi_start_phis (return_bb); !gsi_end_p (psi); gsi_next (&psi))
691 : 57774 : if (!virtual_operand_p (gimple_phi_result (psi.phi ()))
692 : 57774 : && !(retval
693 : 12650 : && current->split_part_set_retval
694 : 12650 : && TREE_CODE (retval) == SSA_NAME
695 : 12650 : && !DECL_BY_REFERENCE (DECL_RESULT (current_function_decl))
696 : 12650 : && SSA_NAME_DEF_STMT (retval) == psi.phi ()))
697 : : {
698 : 0 : if (dump_file && (dump_flags & TDF_DETAILS))
699 : 0 : fprintf (dump_file,
700 : : " Refused: return bb has extra PHIs\n");
701 : 0 : return;
702 : : }
703 : : }
704 : :
705 : 65581 : if (dump_file && (dump_flags & TDF_DETAILS))
706 : 3 : fprintf (dump_file, " Accepted!\n");
707 : :
708 : : /* At the moment chose split point with lowest count and that leaves
709 : : out smallest size of header.
710 : : In future we might re-consider this heuristics. */
711 : 65581 : if (!best_split_point.split_bbs
712 : 16519 : || best_split_point.count
713 : 16519 : > current->count
714 : 135549 : || (best_split_point.count == current->count
715 : 4387 : && best_split_point.split_size < current->split_size))
716 : :
717 : : {
718 : 52367 : if (dump_file && (dump_flags & TDF_DETAILS))
719 : 2 : fprintf (dump_file, " New best split point!\n");
720 : 52367 : if (best_split_point.ssa_names_to_pass)
721 : : {
722 : 3305 : BITMAP_FREE (best_split_point.ssa_names_to_pass);
723 : 3305 : BITMAP_FREE (best_split_point.split_bbs);
724 : : }
725 : 52367 : best_split_point = *current;
726 : 52367 : best_split_point.ssa_names_to_pass = BITMAP_ALLOC (NULL);
727 : 52367 : bitmap_copy (best_split_point.ssa_names_to_pass,
728 : 52367 : current->ssa_names_to_pass);
729 : 52367 : best_split_point.split_bbs = BITMAP_ALLOC (NULL);
730 : 52367 : bitmap_copy (best_split_point.split_bbs, current->split_bbs);
731 : : }
732 : : }
733 : :
734 : : /* Return basic block containing RETURN statement. We allow basic blocks
735 : : of the form:
736 : : <retval> = tmp_var;
737 : : return <retval>
738 : : but return_bb cannot be more complex than this (except for
739 : : -fsanitize=thread we allow TSAN_FUNC_EXIT () internal call in there).
740 : : If nothing is found, return the exit block.
741 : :
742 : : When there are multiple RETURN statement, chose one with return value,
743 : : since that one is more likely shared by multiple code paths.
744 : :
745 : : Return BB is special, because for function splitting it is the only
746 : : basic block that is duplicated in between header and split part of the
747 : : function.
748 : :
749 : : TODO: We might support multiple return blocks. */
750 : :
751 : : static basic_block
752 : 1294556 : find_return_bb (void)
753 : : {
754 : 1294556 : edge e;
755 : 1294556 : basic_block return_bb = EXIT_BLOCK_PTR_FOR_FN (cfun);
756 : 1294556 : gimple_stmt_iterator bsi;
757 : 1294556 : bool found_return = false;
758 : 1294556 : tree retval = NULL_TREE;
759 : :
760 : 2176064 : if (!single_pred_p (EXIT_BLOCK_PTR_FOR_FN (cfun)))
761 : : return return_bb;
762 : :
763 : 1294339 : e = single_pred_edge (EXIT_BLOCK_PTR_FOR_FN (cfun));
764 : 9727634 : for (bsi = gsi_last_bb (e->src); !gsi_end_p (bsi); gsi_prev (&bsi))
765 : : {
766 : 4450986 : gimple *stmt = gsi_stmt (bsi);
767 : 4450986 : if (gimple_code (stmt) == GIMPLE_LABEL
768 : 4440300 : || is_gimple_debug (stmt)
769 : 6918696 : || gimple_clobber_p (stmt))
770 : : ;
771 : 2188226 : else if (gimple_code (stmt) == GIMPLE_ASSIGN
772 : 624109 : && found_return
773 : 624109 : && gimple_assign_single_p (stmt)
774 : 522028 : && (auto_var_in_fn_p (gimple_assign_rhs1 (stmt),
775 : : current_function_decl)
776 : 500665 : || is_gimple_min_invariant (gimple_assign_rhs1 (stmt)))
777 : 2312683 : && retval == gimple_assign_lhs (stmt))
778 : : ;
779 : 2175966 : else if (greturn *return_stmt = dyn_cast <greturn *> (stmt))
780 : : {
781 : 1294339 : found_return = true;
782 : 1294339 : retval = gimple_return_retval (return_stmt);
783 : : }
784 : : /* For -fsanitize=thread, allow also TSAN_FUNC_EXIT () in the return
785 : : bb. */
786 : 881627 : else if ((flag_sanitize & SANITIZE_THREAD)
787 : 881627 : && gimple_call_internal_p (stmt, IFN_TSAN_FUNC_EXIT))
788 : : ;
789 : : else
790 : : break;
791 : : }
792 : 1294339 : if (gsi_end_p (bsi) && found_return)
793 : 412831 : return_bb = e->src;
794 : :
795 : : return return_bb;
796 : : }
797 : :
798 : : /* Given return basic block RETURN_BB, see where return value is really
799 : : stored. */
800 : : static tree
801 : 101310 : find_retval (basic_block return_bb)
802 : : {
803 : 101310 : gimple_stmt_iterator bsi;
804 : 286426 : for (bsi = gsi_start_bb (return_bb); !gsi_end_p (bsi); gsi_next (&bsi))
805 : 175798 : if (greturn *return_stmt = dyn_cast <greturn *> (gsi_stmt (bsi)))
806 : 91928 : return gimple_return_retval (return_stmt);
807 : 83870 : else if (gimple_code (gsi_stmt (bsi)) == GIMPLE_ASSIGN
808 : 83870 : && !gimple_clobber_p (gsi_stmt (bsi)))
809 : 64 : return gimple_assign_rhs1 (gsi_stmt (bsi));
810 : : return NULL;
811 : : }
812 : :
813 : : /* Callback for walk_stmt_load_store_addr_ops. If T is non-SSA automatic
814 : : variable, mark it as used in bitmap passed via DATA.
815 : : Return true when access to T prevents splitting the function. */
816 : :
817 : : static bool
818 : 7884086 : mark_nonssa_use (gimple *, tree t, tree, void *data)
819 : : {
820 : 7884086 : t = get_base_address (t);
821 : :
822 : 7884086 : if (!t || is_gimple_reg (t))
823 : 0 : return false;
824 : :
825 : : /* At present we can't pass non-SSA arguments to split function.
826 : : FIXME: this can be relaxed by passing references to arguments. */
827 : 7884086 : if (TREE_CODE (t) == PARM_DECL)
828 : : {
829 : 252045 : if (dump_file && (dump_flags & TDF_DETAILS))
830 : 0 : fprintf (dump_file,
831 : : "Cannot split: use of non-ssa function parameter.\n");
832 : 252045 : return true;
833 : : }
834 : :
835 : 3231326 : if ((VAR_P (t) && auto_var_in_fn_p (t, current_function_decl))
836 : 5069950 : || TREE_CODE (t) == RESULT_DECL
837 : 12645170 : || (TREE_CODE (t) == LABEL_DECL && FORCED_LABEL (t)))
838 : 2618948 : bitmap_set_bit ((bitmap)data, DECL_UID (t));
839 : :
840 : : /* For DECL_BY_REFERENCE, the return value is actually a pointer. We want
841 : : to pretend that the value pointed to is actual result decl. */
842 : 4193519 : if ((TREE_CODE (t) == MEM_REF || INDIRECT_REF_P (t))
843 : 3438522 : && TREE_CODE (TREE_OPERAND (t, 0)) == SSA_NAME
844 : 3437943 : && SSA_NAME_VAR (TREE_OPERAND (t, 0))
845 : 2785004 : && TREE_CODE (SSA_NAME_VAR (TREE_OPERAND (t, 0))) == RESULT_DECL
846 : 7703033 : && DECL_BY_REFERENCE (DECL_RESULT (current_function_decl)))
847 : 70992 : return
848 : 70992 : bitmap_bit_p ((bitmap)data,
849 : 70992 : DECL_UID (DECL_RESULT (current_function_decl)));
850 : :
851 : : return false;
852 : : }
853 : :
854 : : /* Compute local properties of basic block BB we collect when looking for
855 : : split points. We look for ssa defs and store them in SET_SSA_NAMES,
856 : : for ssa uses and store them in USED_SSA_NAMES and for any non-SSA automatic
857 : : vars stored in NON_SSA_VARS.
858 : :
859 : : When BB has edge to RETURN_BB, collect uses in RETURN_BB too.
860 : :
861 : : Return false when BB contains something that prevents it from being put into
862 : : split function. */
863 : :
864 : : static bool
865 : 4309601 : visit_bb (basic_block bb, basic_block return_bb,
866 : : bitmap set_ssa_names, bitmap used_ssa_names,
867 : : bitmap non_ssa_vars)
868 : : {
869 : 4309601 : edge e;
870 : 4309601 : edge_iterator ei;
871 : 4309601 : bool can_split = true;
872 : :
873 : 46470674 : for (gimple_stmt_iterator bsi = gsi_start_bb (bb); !gsi_end_p (bsi);
874 : 37851472 : gsi_next (&bsi))
875 : : {
876 : 37851472 : gimple *stmt = gsi_stmt (bsi);
877 : 37851472 : tree op;
878 : 37851472 : ssa_op_iter iter;
879 : :
880 : 37851472 : if (is_gimple_debug (stmt))
881 : 24819383 : continue;
882 : :
883 : 13840113 : if (gimple_clobber_p (stmt))
884 : 808024 : continue;
885 : :
886 : : /* FIXME: We can split regions containing EH. We cannot however
887 : : split RESX, EH_DISPATCH and EH_POINTER referring to same region
888 : : into different partitions. This would require tracking of
889 : : EH regions and checking in consider_split_point if they
890 : : are not used elsewhere. */
891 : 13032089 : if (gimple_code (stmt) == GIMPLE_RESX)
892 : : {
893 : 104304 : if (dump_file && (dump_flags & TDF_DETAILS))
894 : 0 : fprintf (dump_file, "Cannot split: resx.\n");
895 : : can_split = false;
896 : : }
897 : 13032089 : if (gimple_code (stmt) == GIMPLE_EH_DISPATCH)
898 : : {
899 : 6736 : if (dump_file && (dump_flags & TDF_DETAILS))
900 : 0 : fprintf (dump_file, "Cannot split: eh dispatch.\n");
901 : : can_split = false;
902 : : }
903 : :
904 : : /* Check calls that would prevent splitting. */
905 : 13032089 : if (gimple_code (stmt) == GIMPLE_CALL)
906 : : {
907 : 2282391 : if (tree decl = gimple_call_fndecl (stmt))
908 : : {
909 : : /* Check builtins that would prevent splitting. */
910 : 2187966 : if (fndecl_built_in_p (decl, BUILT_IN_NORMAL))
911 : 648281 : switch (DECL_FUNCTION_CODE (decl))
912 : : {
913 : : /* FIXME: once we will allow passing non-parm values to
914 : : split part, we need to be sure to handle correct
915 : : builtin_stack_save and builtin_stack_restore. At the
916 : : moment we are safe; there is no way to store
917 : : builtin_stack_save result in non-SSA variable since all
918 : : calls to those are compiler generated. */
919 : 10 : case BUILT_IN_APPLY:
920 : 10 : case BUILT_IN_APPLY_ARGS:
921 : 10 : case BUILT_IN_VA_START:
922 : 10 : if (dump_file && (dump_flags & TDF_DETAILS))
923 : 0 : fprintf (dump_file,
924 : : "Cannot split: builtin_apply and va_start.\n");
925 : : can_split = false;
926 : : break;
927 : 7622 : case BUILT_IN_EH_POINTER:
928 : 7622 : if (dump_file && (dump_flags & TDF_DETAILS))
929 : 0 : fprintf (dump_file,
930 : : "Cannot split: builtin_eh_pointer.\n");
931 : : can_split = false;
932 : : break;
933 : : default:
934 : : break;
935 : : }
936 : :
937 : : /* Calls to functions (which have the warning or error
938 : : attribute on them) should not be split off into another
939 : : function. */
940 : 2187966 : if (lookup_attribute ("warning", DECL_ATTRIBUTES (decl))
941 : 2187966 : || lookup_attribute ("error", DECL_ATTRIBUTES (decl)))
942 : : {
943 : 20 : if (dump_file && (dump_flags & TDF_DETAILS))
944 : 0 : fprintf (dump_file,
945 : : "Cannot split: warning or error attribute.\n");
946 : : can_split = false;
947 : : }
948 : : }
949 : : }
950 : :
951 : 19192079 : FOR_EACH_SSA_TREE_OPERAND (op, stmt, iter, SSA_OP_DEF)
952 : 6159990 : bitmap_set_bit (set_ssa_names, SSA_NAME_VERSION (op));
953 : 25546336 : FOR_EACH_SSA_TREE_OPERAND (op, stmt, iter, SSA_OP_USE)
954 : 12514247 : bitmap_set_bit (used_ssa_names, SSA_NAME_VERSION (op));
955 : 13032089 : can_split &= !walk_stmt_load_store_addr_ops (stmt, non_ssa_vars,
956 : : mark_nonssa_use,
957 : : mark_nonssa_use,
958 : : mark_nonssa_use);
959 : : }
960 : 5255419 : for (gphi_iterator bsi = gsi_start_phis (bb); !gsi_end_p (bsi);
961 : 945818 : gsi_next (&bsi))
962 : : {
963 : 945818 : gphi *stmt = bsi.phi ();
964 : 945818 : unsigned int i;
965 : :
966 : 1891636 : if (virtual_operand_p (gimple_phi_result (stmt)))
967 : 400868 : continue;
968 : 1089900 : bitmap_set_bit (set_ssa_names,
969 : 544950 : SSA_NAME_VERSION (gimple_phi_result (stmt)));
970 : 1794283 : for (i = 0; i < gimple_phi_num_args (stmt); i++)
971 : : {
972 : 1249333 : tree op = gimple_phi_arg_def (stmt, i);
973 : 1249333 : if (TREE_CODE (op) == SSA_NAME)
974 : 860465 : bitmap_set_bit (used_ssa_names, SSA_NAME_VERSION (op));
975 : : }
976 : 544950 : can_split &= !walk_stmt_load_store_addr_ops (stmt, non_ssa_vars,
977 : : mark_nonssa_use,
978 : : mark_nonssa_use,
979 : : mark_nonssa_use);
980 : : }
981 : : /* Record also uses coming from PHI operand in return BB. */
982 : 10029094 : FOR_EACH_EDGE (e, ei, bb->succs)
983 : 5719493 : if (e->dest == return_bb)
984 : : {
985 : 1601240 : for (gphi_iterator bsi = gsi_start_phis (return_bb);
986 : 2344967 : !gsi_end_p (bsi);
987 : 743727 : gsi_next (&bsi))
988 : : {
989 : 743727 : gphi *stmt = bsi.phi ();
990 : 743727 : tree op = gimple_phi_arg_def (stmt, e->dest_idx);
991 : :
992 : 1487454 : if (virtual_operand_p (gimple_phi_result (stmt)))
993 : 449596 : continue;
994 : 294131 : if (TREE_CODE (op) == SSA_NAME)
995 : 145494 : bitmap_set_bit (used_ssa_names, SSA_NAME_VERSION (op));
996 : : else
997 : 148637 : can_split &= !mark_nonssa_use (stmt, op, op, non_ssa_vars);
998 : : }
999 : : }
1000 : 4309601 : return can_split;
1001 : : }
1002 : :
1003 : : /* Stack entry for recursive DFS walk in find_split_point. */
1004 : :
1005 : 5604141 : class stack_entry
1006 : : {
1007 : : public:
1008 : : /* Basic block we are examining. */
1009 : : basic_block bb;
1010 : :
1011 : : /* SSA names set and used by the BB and all BBs reachable
1012 : : from it via DFS walk. */
1013 : : bitmap set_ssa_names, used_ssa_names;
1014 : : bitmap non_ssa_vars;
1015 : :
1016 : : /* All BBS visited from this BB via DFS walk. */
1017 : : bitmap bbs_visited;
1018 : :
1019 : : /* Last examined edge in DFS walk. Since we walk unoriented graph,
1020 : : the value is up to sum of incoming and outgoing edges of BB. */
1021 : : unsigned int edge_num;
1022 : :
1023 : : /* Stack entry index of earliest BB reachable from current BB
1024 : : or any BB visited later in DFS walk. */
1025 : : int earliest;
1026 : :
1027 : : /* Overall time and size of all BBs reached from this BB in DFS walk. */
1028 : : sreal overall_time;
1029 : : int overall_size;
1030 : :
1031 : : /* When false we cannot split on this BB. */
1032 : : bool can_split;
1033 : : };
1034 : :
1035 : :
1036 : : /* Find all articulations and call consider_split on them.
1037 : : OVERALL_TIME and OVERALL_SIZE is time and size of the function.
1038 : :
1039 : : We perform basic algorithm for finding an articulation in a graph
1040 : : created from CFG by considering it to be an unoriented graph.
1041 : :
1042 : : The articulation is discovered via DFS walk. We collect earliest
1043 : : basic block on stack that is reachable via backward edge. Articulation
1044 : : is any basic block such that there is no backward edge bypassing it.
1045 : : To reduce stack usage we maintain heap allocated stack in STACK vector.
1046 : : AUX pointer of BB is set to index it appears in the stack or -1 once
1047 : : it is visited and popped off the stack.
1048 : :
1049 : : The algorithm finds articulation after visiting the whole component
1050 : : reachable by it. This makes it convenient to collect information about
1051 : : the component used by consider_split. */
1052 : :
1053 : : static void
1054 : 1294540 : find_split_points (basic_block return_bb, sreal overall_time, int overall_size)
1055 : : {
1056 : 1294540 : stack_entry first;
1057 : 1294540 : vec<stack_entry> stack = vNULL;
1058 : 1294540 : basic_block bb;
1059 : 1294540 : class split_point current;
1060 : :
1061 : 1294540 : current.header_time = overall_time;
1062 : 1294540 : current.header_size = overall_size;
1063 : 1294540 : current.split_time = 0;
1064 : 1294540 : current.split_size = 0;
1065 : 1294540 : current.ssa_names_to_pass = BITMAP_ALLOC (NULL);
1066 : :
1067 : 1294540 : first.bb = ENTRY_BLOCK_PTR_FOR_FN (cfun);
1068 : 1294540 : first.edge_num = 0;
1069 : 1294540 : first.overall_time = 0;
1070 : 1294540 : first.overall_size = 0;
1071 : 1294540 : first.earliest = INT_MAX;
1072 : 1294540 : first.set_ssa_names = 0;
1073 : 1294540 : first.used_ssa_names = 0;
1074 : 1294540 : first.non_ssa_vars = 0;
1075 : 1294540 : first.bbs_visited = 0;
1076 : 1294540 : first.can_split = false;
1077 : 1294540 : stack.safe_push (first);
1078 : 1294540 : ENTRY_BLOCK_PTR_FOR_FN (cfun)->aux = (void *)(intptr_t)-1;
1079 : :
1080 : 19185923 : while (!stack.is_empty ())
1081 : : {
1082 : 17891383 : stack_entry *entry = &stack.last ();
1083 : :
1084 : : /* We are walking an acyclic graph, so edge_num counts
1085 : : succ and pred edges together. However when considering
1086 : : articulation, we want to have processed everything reachable
1087 : : from articulation but nothing that reaches into it. */
1088 : 17891383 : if (entry->edge_num == EDGE_COUNT (entry->bb->succs)
1089 : 17891383 : && entry->bb != ENTRY_BLOCK_PTR_FOR_FN (cfun))
1090 : : {
1091 : 4309601 : int pos = stack.length ();
1092 : 4309601 : entry->can_split &= visit_bb (entry->bb, return_bb,
1093 : : entry->set_ssa_names,
1094 : : entry->used_ssa_names,
1095 : : entry->non_ssa_vars);
1096 : 2817795 : if (pos <= entry->earliest && !entry->can_split
1097 : 4595532 : && dump_file && (dump_flags & TDF_DETAILS))
1098 : 0 : fprintf (dump_file,
1099 : : "found articulation at bb %i but cannot split\n",
1100 : 0 : entry->bb->index);
1101 : 4309601 : if (pos <= entry->earliest && entry->can_split)
1102 : : {
1103 : 2531864 : if (dump_file && (dump_flags & TDF_DETAILS))
1104 : 6 : fprintf (dump_file, "found articulation at bb %i\n",
1105 : 6 : entry->bb->index);
1106 : 2531864 : current.entry_bb = entry->bb;
1107 : 2531864 : current.ssa_names_to_pass = BITMAP_ALLOC (NULL);
1108 : 2531864 : bitmap_and_compl (current.ssa_names_to_pass,
1109 : 2531864 : entry->used_ssa_names, entry->set_ssa_names);
1110 : 2531864 : current.header_time = overall_time - entry->overall_time;
1111 : 2531864 : current.header_size = overall_size - entry->overall_size;
1112 : 2531864 : current.split_time = entry->overall_time;
1113 : 2531864 : current.split_size = entry->overall_size;
1114 : 2531864 : current.split_bbs = entry->bbs_visited;
1115 : 2531864 : consider_split (¤t, entry->non_ssa_vars, return_bb);
1116 : 2531864 : BITMAP_FREE (current.ssa_names_to_pass);
1117 : : }
1118 : : }
1119 : : /* Do actual DFS walk. */
1120 : 35782766 : if (entry->edge_num
1121 : 17891383 : < (EDGE_COUNT (entry->bb->succs)
1122 : 33193686 : + EDGE_COUNT (entry->bb->preds)))
1123 : : {
1124 : 12287242 : edge e;
1125 : 12287242 : basic_block dest;
1126 : 12287242 : if (entry->edge_num < EDGE_COUNT (entry->bb->succs))
1127 : : {
1128 : 7014033 : e = EDGE_SUCC (entry->bb, entry->edge_num);
1129 : 7014033 : dest = e->dest;
1130 : : }
1131 : : else
1132 : : {
1133 : 5273209 : e = EDGE_PRED (entry->bb, entry->edge_num
1134 : : - EDGE_COUNT (entry->bb->succs));
1135 : 5273209 : dest = e->src;
1136 : : }
1137 : :
1138 : 12287242 : entry->edge_num++;
1139 : :
1140 : : /* New BB to visit, push it to the stack. */
1141 : 12287242 : if (dest != return_bb && dest != EXIT_BLOCK_PTR_FOR_FN (cfun)
1142 : 10546418 : && !dest->aux)
1143 : : {
1144 : 4309601 : stack_entry new_entry;
1145 : :
1146 : 4309601 : new_entry.bb = dest;
1147 : 4309601 : new_entry.edge_num = 0;
1148 : 4309601 : new_entry.overall_time
1149 : 4309601 : = bb_info_vec[dest->index].time;
1150 : 4309601 : new_entry.overall_size
1151 : 4309601 : = bb_info_vec[dest->index].size;
1152 : 4309601 : new_entry.earliest = INT_MAX;
1153 : 4309601 : new_entry.set_ssa_names = BITMAP_ALLOC (NULL);
1154 : 4309601 : new_entry.used_ssa_names = BITMAP_ALLOC (NULL);
1155 : 4309601 : new_entry.bbs_visited = BITMAP_ALLOC (NULL);
1156 : 4309601 : new_entry.non_ssa_vars = BITMAP_ALLOC (NULL);
1157 : 4309601 : new_entry.can_split = true;
1158 : 4309601 : bitmap_set_bit (new_entry.bbs_visited, dest->index);
1159 : 4309601 : stack.safe_push (new_entry);
1160 : 4309601 : dest->aux = (void *)(intptr_t)stack.length ();
1161 : 4309601 : }
1162 : : /* Back edge found, record the earliest point. */
1163 : 7977641 : else if ((intptr_t)dest->aux > 0
1164 : 4118281 : && (intptr_t)dest->aux < entry->earliest)
1165 : 2679103 : entry->earliest = (intptr_t)dest->aux;
1166 : : }
1167 : : /* We are done with examining the edges. Pop off the value from stack
1168 : : and merge stuff we accumulate during the walk. */
1169 : 5604141 : else if (entry->bb != ENTRY_BLOCK_PTR_FOR_FN (cfun))
1170 : : {
1171 : 4309601 : stack_entry *prev = &stack[stack.length () - 2];
1172 : :
1173 : 4309601 : entry->bb->aux = (void *)(intptr_t)-1;
1174 : 4309601 : prev->can_split &= entry->can_split;
1175 : 4309601 : if (prev->set_ssa_names)
1176 : : {
1177 : 3154645 : bitmap_ior_into (prev->set_ssa_names, entry->set_ssa_names);
1178 : 3154645 : bitmap_ior_into (prev->used_ssa_names, entry->used_ssa_names);
1179 : 3154645 : bitmap_ior_into (prev->bbs_visited, entry->bbs_visited);
1180 : 3154645 : bitmap_ior_into (prev->non_ssa_vars, entry->non_ssa_vars);
1181 : : }
1182 : 4309601 : if (prev->earliest > entry->earliest)
1183 : 2674115 : prev->earliest = entry->earliest;
1184 : 4309601 : prev->overall_time += entry->overall_time;
1185 : 4309601 : prev->overall_size += entry->overall_size;
1186 : 4309601 : BITMAP_FREE (entry->set_ssa_names);
1187 : 4309601 : BITMAP_FREE (entry->used_ssa_names);
1188 : 4309601 : BITMAP_FREE (entry->bbs_visited);
1189 : 4309601 : BITMAP_FREE (entry->non_ssa_vars);
1190 : 4309601 : stack.pop ();
1191 : : }
1192 : : else
1193 : 1294540 : stack.pop ();
1194 : : }
1195 : 1294540 : ENTRY_BLOCK_PTR_FOR_FN (cfun)->aux = NULL;
1196 : 6016961 : FOR_EACH_BB_FN (bb, cfun)
1197 : 4722421 : bb->aux = NULL;
1198 : 1294540 : stack.release ();
1199 : 1294540 : BITMAP_FREE (current.ssa_names_to_pass);
1200 : 1294540 : }
1201 : :
1202 : : /* Split function at SPLIT_POINT. */
1203 : :
1204 : : static void
1205 : 49062 : split_function (basic_block return_bb, class split_point *split_point,
1206 : : bool add_tsan_func_exit)
1207 : : {
1208 : 49062 : vec<tree> args_to_pass = vNULL;
1209 : 49062 : bitmap args_to_skip;
1210 : 49062 : tree parm;
1211 : 49062 : int num = 0;
1212 : 49062 : cgraph_node *node, *cur_node = cgraph_node::get (current_function_decl);
1213 : 49062 : basic_block call_bb;
1214 : 49062 : gcall *call, *tsan_func_exit_call = NULL;
1215 : 49062 : edge e;
1216 : 49062 : edge_iterator ei;
1217 : 49062 : tree retval = NULL, real_retval = NULL;
1218 : 49062 : gimple *last_stmt = NULL;
1219 : 49062 : unsigned int i;
1220 : 49062 : tree arg, ddef;
1221 : :
1222 : 49062 : if (dump_file)
1223 : : {
1224 : 7 : fprintf (dump_file, "\n\nSplitting function at:\n");
1225 : 7 : dump_split_point (dump_file, split_point);
1226 : : }
1227 : :
1228 : 49062 : if (cur_node->can_change_signature)
1229 : 48309 : args_to_skip = BITMAP_ALLOC (NULL);
1230 : : else
1231 : : args_to_skip = NULL;
1232 : :
1233 : : /* Collect the parameters of new function and args_to_skip bitmap. */
1234 : 49062 : for (parm = DECL_ARGUMENTS (current_function_decl);
1235 : 154057 : parm; parm = DECL_CHAIN (parm), num++)
1236 : 104995 : if (args_to_skip
1237 : 104995 : && (!is_gimple_reg (parm)
1238 : 100162 : || (ddef = ssa_default_def (cfun, parm)) == NULL_TREE
1239 : 97939 : || !bitmap_bit_p (split_point->ssa_names_to_pass,
1240 : 97939 : SSA_NAME_VERSION (ddef))))
1241 : 38561 : bitmap_set_bit (args_to_skip, num);
1242 : : else
1243 : : {
1244 : : /* This parm might not have been used up to now, but is going to be
1245 : : used, hence register it. */
1246 : 66434 : if (is_gimple_reg (parm))
1247 : 66419 : arg = get_or_create_ssa_default_def (cfun, parm);
1248 : : else
1249 : 15 : arg = parm;
1250 : :
1251 : 66434 : if (!useless_type_conversion_p (DECL_ARG_TYPE (parm), TREE_TYPE (arg)))
1252 : 0 : arg = fold_convert (DECL_ARG_TYPE (parm), arg);
1253 : 66434 : args_to_pass.safe_push (arg);
1254 : : }
1255 : :
1256 : : /* See if the split function will return. */
1257 : 49062 : bool split_part_return_p = false;
1258 : 245899 : FOR_EACH_EDGE (e, ei, return_bb->preds)
1259 : : {
1260 : 196837 : if (bitmap_bit_p (split_point->split_bbs, e->src->index))
1261 : 129093 : split_part_return_p = true;
1262 : : }
1263 : :
1264 : : /* Add return block to what will become the split function.
1265 : : We do not return; no return block is needed. */
1266 : 49062 : if (!split_part_return_p)
1267 : : ;
1268 : : /* We have no return block, so nothing is needed. */
1269 : 35846 : else if (return_bb == EXIT_BLOCK_PTR_FOR_FN (cfun))
1270 : : ;
1271 : : /* When we do not want to return value, we need to construct
1272 : : new return block with empty return statement.
1273 : : FIXME: Once we are able to change return type, we should change function
1274 : : to return void instead of just outputting function with undefined return
1275 : : value. For structures this affects quality of codegen. */
1276 : 35729 : else if ((retval = find_retval (return_bb))
1277 : 35729 : && !split_point->split_part_set_retval)
1278 : : {
1279 : 4157 : bool redirected = true;
1280 : 4157 : basic_block new_return_bb = create_basic_block (NULL, 0, return_bb);
1281 : 4157 : gimple_stmt_iterator gsi = gsi_start_bb (new_return_bb);
1282 : 4157 : gsi_insert_after (&gsi, gimple_build_return (NULL), GSI_NEW_STMT);
1283 : 4157 : new_return_bb->count = profile_count::zero ();
1284 : 17796 : while (redirected)
1285 : : {
1286 : 9482 : redirected = false;
1287 : 23909 : FOR_EACH_EDGE (e, ei, return_bb->preds)
1288 : 19752 : if (bitmap_bit_p (split_point->split_bbs, e->src->index))
1289 : : {
1290 : 5325 : new_return_bb->count += e->count ();
1291 : 5325 : redirect_edge_and_branch (e, new_return_bb);
1292 : 5325 : redirected = true;
1293 : 5325 : break;
1294 : : }
1295 : : }
1296 : 4157 : e = make_single_succ_edge (new_return_bb, EXIT_BLOCK_PTR_FOR_FN (cfun), 0);
1297 : 4157 : add_bb_to_loop (new_return_bb, current_loops->tree_root);
1298 : 4157 : bitmap_set_bit (split_point->split_bbs, new_return_bb->index);
1299 : : }
1300 : : /* When we pass around the value, use existing return block. */
1301 : : else
1302 : 31572 : bitmap_set_bit (split_point->split_bbs, return_bb->index);
1303 : :
1304 : : /* If RETURN_BB has virtual operand PHIs, they must be removed and the
1305 : : virtual operand marked for renaming as we change the CFG in a way that
1306 : : tree-inline is not able to compensate for.
1307 : :
1308 : : Note this can happen whether or not we have a return value. If we have
1309 : : a return value, then RETURN_BB may have PHIs for real operands too. */
1310 : 49062 : if (return_bb != EXIT_BLOCK_PTR_FOR_FN (cfun))
1311 : : {
1312 : 42468 : bool phi_p = false;
1313 : 42468 : for (gphi_iterator gsi = gsi_start_phis (return_bb);
1314 : 86210 : !gsi_end_p (gsi);)
1315 : : {
1316 : 43742 : gphi *stmt = gsi.phi ();
1317 : 87484 : if (!virtual_operand_p (gimple_phi_result (stmt)))
1318 : : {
1319 : 9937 : gsi_next (&gsi);
1320 : 9937 : continue;
1321 : : }
1322 : 33805 : mark_virtual_phi_result_for_renaming (stmt);
1323 : 33805 : remove_phi_node (&gsi, true);
1324 : 33805 : phi_p = true;
1325 : : }
1326 : : /* In reality we have to rename the reaching definition of the
1327 : : virtual operand at return_bb as we will eventually release it
1328 : : when we remove the code region we outlined.
1329 : : So we have to rename all immediate virtual uses of that region
1330 : : if we didn't see a PHI definition yet. */
1331 : : /* ??? In real reality we want to set the reaching vdef of the
1332 : : entry of the SESE region as the vuse of the call and the reaching
1333 : : vdef of the exit of the SESE region as the vdef of the call. */
1334 : 42468 : if (!phi_p)
1335 : 17326 : for (gimple_stmt_iterator gsi = gsi_start_bb (return_bb);
1336 : 30614 : !gsi_end_p (gsi);
1337 : 21951 : gsi_next (&gsi))
1338 : : {
1339 : 22727 : gimple *stmt = gsi_stmt (gsi);
1340 : 31390 : if (gimple_vuse (stmt))
1341 : : {
1342 : 8663 : gimple_set_vuse (stmt, NULL_TREE);
1343 : 8663 : update_stmt (stmt);
1344 : : }
1345 : 31390 : if (gimple_vdef (stmt))
1346 : : break;
1347 : : }
1348 : : }
1349 : :
1350 : 49062 : ipa_param_adjustments *adjustments;
1351 : 98124 : bool skip_return = (!split_part_return_p
1352 : 49062 : || !split_point->split_part_set_retval);
1353 : : /* TODO: Perhaps get rid of args_to_skip entirely, after we make sure the
1354 : : debug info generation and discrepancy avoiding works well too. */
1355 : 48309 : if ((args_to_skip && !bitmap_empty_p (args_to_skip))
1356 : 73994 : || skip_return)
1357 : : {
1358 : 27541 : vec<ipa_adjusted_param, va_gc> *new_params = NULL;
1359 : 27541 : unsigned j;
1360 : 27541 : for (parm = DECL_ARGUMENTS (current_function_decl), j = 0;
1361 : 91348 : parm; parm = DECL_CHAIN (parm), j++)
1362 : 63807 : if (!args_to_skip || !bitmap_bit_p (args_to_skip, j))
1363 : : {
1364 : 25246 : ipa_adjusted_param adj;
1365 : 25246 : memset (&adj, 0, sizeof (adj));
1366 : 25246 : adj.op = IPA_PARAM_OP_COPY;
1367 : 25246 : adj.base_index = j;
1368 : 25246 : adj.prev_clone_index = j;
1369 : 25246 : vec_safe_push (new_params, adj);
1370 : : }
1371 : 27541 : adjustments = new ipa_param_adjustments (new_params, j, skip_return);
1372 : : }
1373 : : else
1374 : : adjustments = NULL;
1375 : :
1376 : : /* Now create the actual clone. */
1377 : 49062 : cgraph_edge::rebuild_edges ();
1378 : 49062 : node = cur_node->create_version_clone_with_body
1379 : 49062 : (vNULL, NULL, adjustments,
1380 : : split_point->split_bbs, split_point->entry_bb, "part");
1381 : 49062 : delete adjustments;
1382 : 49062 : node->split_part = true;
1383 : :
1384 : 49062 : if (cur_node->same_comdat_group)
1385 : : {
1386 : : /* TODO: call is versionable if we make sure that all
1387 : : callers are inside of a comdat group. */
1388 : 2152 : cur_node->calls_comdat_local = true;
1389 : 2152 : node->add_to_same_comdat_group (cur_node);
1390 : : }
1391 : :
1392 : :
1393 : : /* Let's take a time profile for splitted function. */
1394 : 49062 : if (cur_node->tp_first_run)
1395 : 2 : node->tp_first_run = cur_node->tp_first_run + 1;
1396 : :
1397 : : /* For usual cloning it is enough to clear builtin only when signature
1398 : : changes. For partial inlining we however cannot expect the part
1399 : : of builtin implementation to have same semantic as the whole. */
1400 : 49062 : if (fndecl_built_in_p (node->decl))
1401 : 32 : set_decl_built_in_function (node->decl, NOT_BUILT_IN, 0);
1402 : :
1403 : : /* Drop "clobber *this" attribute from first argument of the split
1404 : : function if any. Code before that might be initializing the
1405 : : members. */
1406 : 49062 : if (tree arg = DECL_ARGUMENTS (node->decl))
1407 : 35403 : if (lookup_attribute ("clobber *this", DECL_ATTRIBUTES (arg)))
1408 : 791 : DECL_ATTRIBUTES (arg)
1409 : 1582 : = remove_attribute ("clobber *this",
1410 : 791 : copy_list (DECL_ATTRIBUTES (arg)));
1411 : :
1412 : : /* If return_bb contains any clobbers that refer to SSA_NAMEs
1413 : : set in the split part, remove them. Also reset debug stmts that
1414 : : refer to SSA_NAMEs set in the split part. */
1415 : 49062 : if (return_bb != EXIT_BLOCK_PTR_FOR_FN (cfun))
1416 : : {
1417 : 42468 : gimple_stmt_iterator gsi = gsi_start_bb (return_bb);
1418 : 127347 : while (!gsi_end_p (gsi))
1419 : : {
1420 : 84879 : tree op;
1421 : 84879 : ssa_op_iter iter;
1422 : 84879 : gimple *stmt = gsi_stmt (gsi);
1423 : 84879 : bool remove = false;
1424 : 84879 : if (gimple_clobber_p (stmt) || is_gimple_debug (stmt))
1425 : 42721 : FOR_EACH_SSA_TREE_OPERAND (op, stmt, iter, SSA_OP_USE)
1426 : : {
1427 : 2427 : basic_block bb = gimple_bb (SSA_NAME_DEF_STMT (op));
1428 : 2427 : if (op != retval
1429 : 2427 : && bb
1430 : 174 : && bb != return_bb
1431 : 2592 : && bitmap_bit_p (split_point->split_bbs, bb->index))
1432 : : {
1433 : 0 : if (is_gimple_debug (stmt))
1434 : : {
1435 : 0 : gimple_debug_bind_reset_value (stmt);
1436 : 0 : update_stmt (stmt);
1437 : : }
1438 : : else
1439 : : remove = true;
1440 : : break;
1441 : : }
1442 : : }
1443 : 0 : if (remove)
1444 : 0 : gsi_remove (&gsi, true);
1445 : : else
1446 : 84879 : gsi_next (&gsi);
1447 : : }
1448 : : }
1449 : :
1450 : : /* If the original function is declared inline, there is no point in issuing
1451 : : a warning for the non-inlinable part. */
1452 : 49062 : DECL_NO_INLINE_WARNING_P (node->decl) = 1;
1453 : 49062 : cur_node->remove_callees ();
1454 : 49062 : cur_node->remove_all_references ();
1455 : 49062 : if (!split_part_return_p)
1456 : 13216 : TREE_THIS_VOLATILE (node->decl) = 1;
1457 : 49062 : if (dump_file)
1458 : 7 : dump_function_to_file (node->decl, dump_file, dump_flags);
1459 : :
1460 : : /* Create the basic block we place call into. It is the entry basic block
1461 : : split after last label. */
1462 : 49062 : call_bb = split_point->entry_bb;
1463 : 99613 : for (gimple_stmt_iterator gsi = gsi_start_bb (call_bb); !gsi_end_p (gsi);)
1464 : 50526 : if (gimple_code (gsi_stmt (gsi)) == GIMPLE_LABEL)
1465 : : {
1466 : 1489 : last_stmt = gsi_stmt (gsi);
1467 : 1489 : gsi_next (&gsi);
1468 : : }
1469 : : else
1470 : : break;
1471 : 49062 : call_bb->count = split_point->count;
1472 : 49062 : e = split_block (split_point->entry_bb, last_stmt);
1473 : 49062 : remove_edge (e);
1474 : :
1475 : : /* Produce the call statement. */
1476 : 49062 : gimple_stmt_iterator gsi = gsi_last_bb (call_bb);
1477 : 115496 : FOR_EACH_VEC_ELT (args_to_pass, i, arg)
1478 : 66434 : if (!is_gimple_val (arg))
1479 : : {
1480 : 7 : arg = force_gimple_operand_gsi (&gsi, arg, true, NULL_TREE,
1481 : : false, GSI_CONTINUE_LINKING);
1482 : 7 : args_to_pass[i] = arg;
1483 : : }
1484 : 49062 : call = gimple_build_call_vec (node->decl, args_to_pass);
1485 : 49062 : if (cur_node->get_fun ()->has_musttail && !add_tsan_func_exit)
1486 : 4 : gimple_call_set_must_tail (call, true);
1487 : 49062 : gimple_set_block (call, DECL_INITIAL (current_function_decl));
1488 : 49062 : args_to_pass.release ();
1489 : :
1490 : : /* For optimized away parameters, add on the caller side
1491 : : before the call
1492 : : DEBUG D#X => parm_Y(D)
1493 : : stmts and associate D#X with parm in decl_debug_args_lookup
1494 : : vector to say for debug info that if parameter parm had been passed,
1495 : : it would have value parm_Y(D). */
1496 : 49062 : if (args_to_skip)
1497 : : {
1498 : 48309 : vec<tree, va_gc> **debug_args = NULL;
1499 : 48309 : unsigned i = 0, len = 0;
1500 : 48309 : if (MAY_HAVE_DEBUG_BIND_STMTS)
1501 : : {
1502 : 42320 : debug_args = decl_debug_args_lookup (node->decl);
1503 : 42320 : if (debug_args)
1504 : 18469 : len = vec_safe_length (*debug_args);
1505 : : }
1506 : 48309 : for (parm = DECL_ARGUMENTS (current_function_decl), num = 0;
1507 : 150865 : parm; parm = DECL_CHAIN (parm), num++)
1508 : 102556 : if (bitmap_bit_p (args_to_skip, num) && is_gimple_reg (parm))
1509 : : {
1510 : 36167 : tree ddecl;
1511 : 36167 : gimple *def_temp;
1512 : :
1513 : : /* This needs to be done even without
1514 : : MAY_HAVE_DEBUG_BIND_STMTS, otherwise if it didn't exist
1515 : : before, we'd end up with different SSA_NAME_VERSIONs
1516 : : between -g and -g0. */
1517 : 36167 : arg = get_or_create_ssa_default_def (cfun, parm);
1518 : 36167 : if (!MAY_HAVE_DEBUG_BIND_STMTS || debug_args == NULL)
1519 : 6071 : continue;
1520 : :
1521 : 83446 : while (i < len && (**debug_args)[i] != DECL_ORIGIN (parm))
1522 : 11627 : i += 2;
1523 : 30096 : if (i >= len)
1524 : 0 : continue;
1525 : 30096 : ddecl = (**debug_args)[i + 1];
1526 : 30096 : def_temp
1527 : 30096 : = gimple_build_debug_bind (ddecl, unshare_expr (arg), call);
1528 : 30096 : gsi_insert_after (&gsi, def_temp, GSI_NEW_STMT);
1529 : : }
1530 : 48309 : BITMAP_FREE (args_to_skip);
1531 : : }
1532 : :
1533 : : /* We avoid address being taken on any variable used by split part,
1534 : : so return slot optimization is always possible. Moreover this is
1535 : : required to make DECL_BY_REFERENCE work. */
1536 : 49062 : if (aggregate_value_p (DECL_RESULT (current_function_decl),
1537 : 49062 : TREE_TYPE (current_function_decl))
1538 : 49062 : && (!is_gimple_reg_type (TREE_TYPE (DECL_RESULT (current_function_decl)))
1539 : 87 : || DECL_BY_REFERENCE (DECL_RESULT (current_function_decl))))
1540 : 352 : gimple_call_set_return_slot_opt (call, true);
1541 : :
1542 : 49062 : if (add_tsan_func_exit)
1543 : 2 : tsan_func_exit_call = gimple_build_call_internal (IFN_TSAN_FUNC_EXIT, 0);
1544 : :
1545 : : /* Update return value. This is bit tricky. When we do not return,
1546 : : do nothing. When we return we might need to update return_bb
1547 : : or produce a new return statement. */
1548 : 49062 : if (!split_part_return_p)
1549 : : {
1550 : 13216 : gsi_insert_after (&gsi, call, GSI_NEW_STMT);
1551 : 13216 : if (tsan_func_exit_call)
1552 : 0 : gsi_insert_after (&gsi, tsan_func_exit_call, GSI_NEW_STMT);
1553 : : }
1554 : : else
1555 : : {
1556 : 71692 : e = make_single_succ_edge (call_bb, return_bb,
1557 : 35846 : return_bb == EXIT_BLOCK_PTR_FOR_FN (cfun)
1558 : 35846 : ? 0 : EDGE_FALLTHRU);
1559 : :
1560 : : /* If there is return basic block, see what value we need to store
1561 : : return value into and put call just before it. */
1562 : 35846 : if (return_bb != EXIT_BLOCK_PTR_FOR_FN (cfun))
1563 : : {
1564 : 35729 : real_retval = retval;
1565 : 35729 : if (real_retval && split_point->split_part_set_retval)
1566 : : {
1567 : 12527 : gphi_iterator psi;
1568 : :
1569 : : /* See if we need new SSA_NAME for the result.
1570 : : When DECL_BY_REFERENCE is true, retval is actually pointer to
1571 : : return value and it is constant in whole function. */
1572 : 12527 : if (TREE_CODE (retval) == SSA_NAME
1573 : 12527 : && !DECL_BY_REFERENCE (DECL_RESULT (current_function_decl)))
1574 : : {
1575 : 9668 : retval = copy_ssa_name (retval, call);
1576 : :
1577 : : /* See if there is PHI defining return value. */
1578 : 9668 : for (psi = gsi_start_phis (return_bb);
1579 : 9668 : !gsi_end_p (psi); gsi_next (&psi))
1580 : 19336 : if (!virtual_operand_p (gimple_phi_result (psi.phi ())))
1581 : : break;
1582 : :
1583 : : /* When there is PHI, just update its value. */
1584 : 9668 : if (TREE_CODE (retval) == SSA_NAME
1585 : 9668 : && !gsi_end_p (psi))
1586 : 9668 : add_phi_arg (psi.phi (), retval, e, UNKNOWN_LOCATION);
1587 : : /* Otherwise update the return BB itself.
1588 : : find_return_bb allows at most one assignment to return value,
1589 : : so update first statement. */
1590 : : else
1591 : : {
1592 : 0 : gimple_stmt_iterator bsi;
1593 : 0 : for (bsi = gsi_start_bb (return_bb); !gsi_end_p (bsi);
1594 : 0 : gsi_next (&bsi))
1595 : 0 : if (greturn *return_stmt
1596 : 0 : = dyn_cast <greturn *> (gsi_stmt (bsi)))
1597 : : {
1598 : 0 : gimple_return_set_retval (return_stmt, retval);
1599 : 0 : break;
1600 : : }
1601 : 0 : else if (gimple_code (gsi_stmt (bsi)) == GIMPLE_ASSIGN
1602 : 0 : && !gimple_clobber_p (gsi_stmt (bsi)))
1603 : : {
1604 : 0 : gimple_assign_set_rhs1 (gsi_stmt (bsi), retval);
1605 : 0 : break;
1606 : : }
1607 : 0 : update_stmt (gsi_stmt (bsi));
1608 : : /* Also adjust clobbers and debug stmts in return_bb. */
1609 : 0 : for (bsi = gsi_start_bb (return_bb); !gsi_end_p (bsi);
1610 : 0 : gsi_next (&bsi))
1611 : : {
1612 : 0 : gimple *stmt = gsi_stmt (bsi);
1613 : 0 : if (gimple_clobber_p (stmt)
1614 : 0 : || is_gimple_debug (stmt))
1615 : : {
1616 : 0 : ssa_op_iter iter;
1617 : 0 : use_operand_p use_p;
1618 : 0 : bool update = false;
1619 : 0 : FOR_EACH_SSA_USE_OPERAND (use_p, stmt, iter,
1620 : : SSA_OP_USE)
1621 : 0 : if (USE_FROM_PTR (use_p) == real_retval)
1622 : : {
1623 : 0 : SET_USE (use_p, retval);
1624 : 0 : update = true;
1625 : : }
1626 : 0 : if (update)
1627 : 0 : update_stmt (stmt);
1628 : : }
1629 : : }
1630 : : }
1631 : : }
1632 : 12527 : if (DECL_BY_REFERENCE (DECL_RESULT (current_function_decl)))
1633 : : {
1634 : 0 : gimple_call_set_lhs (call, build_simple_mem_ref (retval));
1635 : 0 : gsi_insert_after (&gsi, call, GSI_NEW_STMT);
1636 : : }
1637 : : else
1638 : : {
1639 : 12527 : tree restype;
1640 : 12527 : restype = TREE_TYPE (DECL_RESULT (current_function_decl));
1641 : 12527 : gsi_insert_after (&gsi, call, GSI_NEW_STMT);
1642 : 12527 : if (!useless_type_conversion_p (TREE_TYPE (retval), restype))
1643 : : {
1644 : 0 : gimple *cpy;
1645 : 0 : tree tem = create_tmp_reg (restype);
1646 : 0 : tem = make_ssa_name (tem, call);
1647 : 0 : cpy = gimple_build_assign (retval, NOP_EXPR, tem);
1648 : 0 : gsi_insert_after (&gsi, cpy, GSI_NEW_STMT);
1649 : 0 : retval = tem;
1650 : : }
1651 : 12527 : gimple_call_set_lhs (call, retval);
1652 : 12527 : update_stmt (call);
1653 : : }
1654 : 12527 : }
1655 : : else
1656 : 23202 : gsi_insert_after (&gsi, call, GSI_NEW_STMT);
1657 : 35729 : if (tsan_func_exit_call)
1658 : 2 : gsi_insert_after (&gsi, tsan_func_exit_call, GSI_NEW_STMT);
1659 : : }
1660 : : /* We don't use return block (there is either no return in function or
1661 : : multiple of them). So create new basic block with return statement.
1662 : : */
1663 : : else
1664 : : {
1665 : 117 : greturn *ret;
1666 : 117 : if (split_point->split_part_set_retval
1667 : 117 : && !VOID_TYPE_P (TREE_TYPE (TREE_TYPE (current_function_decl))))
1668 : : {
1669 : 76 : retval = DECL_RESULT (current_function_decl);
1670 : :
1671 : : /* We use temporary register to hold value when aggregate_value_p
1672 : : is false. Similarly for DECL_BY_REFERENCE we must avoid extra
1673 : : copy. */
1674 : 76 : if (!aggregate_value_p (retval, TREE_TYPE (current_function_decl))
1675 : 76 : && !DECL_BY_REFERENCE (retval))
1676 : 76 : retval = create_tmp_reg (TREE_TYPE (retval));
1677 : 76 : if (is_gimple_reg (retval))
1678 : : {
1679 : : /* When returning by reference, there is only one SSA name
1680 : : assigned to RESULT_DECL (that is pointer to return value).
1681 : : Look it up or create new one if it is missing. */
1682 : 64 : if (DECL_BY_REFERENCE (retval))
1683 : 0 : retval = get_or_create_ssa_default_def (cfun, retval);
1684 : : /* Otherwise produce new SSA name for return value. */
1685 : : else
1686 : 64 : retval = make_ssa_name (retval, call);
1687 : : }
1688 : 76 : if (DECL_BY_REFERENCE (DECL_RESULT (current_function_decl)))
1689 : 0 : gimple_call_set_lhs (call, build_simple_mem_ref (retval));
1690 : : else
1691 : 76 : gimple_call_set_lhs (call, retval);
1692 : 76 : gsi_insert_after (&gsi, call, GSI_NEW_STMT);
1693 : : }
1694 : : else
1695 : : {
1696 : 41 : gsi_insert_after (&gsi, call, GSI_NEW_STMT);
1697 : 41 : if (retval
1698 : 0 : && is_gimple_reg_type (TREE_TYPE (retval))
1699 : 41 : && !is_gimple_val (retval))
1700 : : {
1701 : 0 : gassign *g
1702 : 0 : = gimple_build_assign (make_ssa_name (TREE_TYPE (retval)),
1703 : : retval);
1704 : 0 : retval = gimple_assign_lhs (g);
1705 : 0 : gsi_insert_after (&gsi, g, GSI_NEW_STMT);
1706 : : }
1707 : : }
1708 : 117 : if (tsan_func_exit_call)
1709 : 0 : gsi_insert_after (&gsi, tsan_func_exit_call, GSI_NEW_STMT);
1710 : 117 : ret = gimple_build_return (retval);
1711 : 117 : gsi_insert_after (&gsi, ret, GSI_NEW_STMT);
1712 : : }
1713 : : }
1714 : 49062 : free_dominance_info (CDI_DOMINATORS);
1715 : 49062 : free_dominance_info (CDI_POST_DOMINATORS);
1716 : 49062 : compute_fn_summary (node, true);
1717 : 49062 : }
1718 : :
1719 : : /* Execute function splitting pass. */
1720 : :
1721 : : static unsigned int
1722 : 2099933 : execute_split_functions (void)
1723 : : {
1724 : 2099933 : gimple_stmt_iterator bsi;
1725 : 2099933 : basic_block bb;
1726 : 2099933 : sreal overall_time = 0;
1727 : 2099933 : int overall_size = 0;
1728 : 2099933 : int todo = 0;
1729 : 2099933 : struct cgraph_node *node = cgraph_node::get (current_function_decl);
1730 : :
1731 : 2099933 : if (flags_from_decl_or_type (current_function_decl)
1732 : 2099933 : & (ECF_NORETURN|ECF_MALLOC|ECF_RETURNS_TWICE))
1733 : : {
1734 : 46614 : if (dump_file)
1735 : 0 : fprintf (dump_file, "Not splitting: noreturn/malloc/returns_twice "
1736 : : "function.\n");
1737 : 46614 : return 0;
1738 : : }
1739 : 2053319 : if (MAIN_NAME_P (DECL_NAME (current_function_decl)))
1740 : : {
1741 : 57418 : if (dump_file)
1742 : 3 : fprintf (dump_file, "Not splitting: main function.\n");
1743 : 57418 : return 0;
1744 : : }
1745 : 1995901 : if (node->frequency == NODE_FREQUENCY_UNLIKELY_EXECUTED)
1746 : : {
1747 : 640 : if (dump_file)
1748 : 0 : fprintf (dump_file, "Not splitting: function is unlikely executed.\n");
1749 : 640 : return 0;
1750 : : }
1751 : : /* This can be relaxed; function might become inlinable after splitting
1752 : : away the uninlinable part. */
1753 : 1995261 : if (ipa_fn_summaries
1754 : 1995226 : && ipa_fn_summaries->get (node)
1755 : 1995223 : && !ipa_fn_summaries->get (node)->inlinable)
1756 : : {
1757 : 181917 : if (dump_file)
1758 : 0 : fprintf (dump_file, "Not splitting: not inlinable.\n");
1759 : 181917 : return 0;
1760 : : }
1761 : 1813344 : if (DECL_DISREGARD_INLINE_LIMITS (node->decl))
1762 : : {
1763 : 294296 : if (dump_file)
1764 : 0 : fprintf (dump_file, "Not splitting: disregarding inline limits.\n");
1765 : 294296 : return 0;
1766 : : }
1767 : : /* This can be relaxed; most of versioning tests actually prevents
1768 : : a duplication. */
1769 : 1519048 : if (!tree_versionable_function_p (current_function_decl))
1770 : : {
1771 : 33 : if (dump_file)
1772 : 0 : fprintf (dump_file, "Not splitting: not versionable.\n");
1773 : 33 : return 0;
1774 : : }
1775 : : /* FIXME: we could support this. */
1776 : 1519015 : if (DECL_STRUCT_FUNCTION (current_function_decl)->static_chain_decl)
1777 : : {
1778 : 1762 : if (dump_file)
1779 : 0 : fprintf (dump_file, "Not splitting: nested function.\n");
1780 : 1762 : return 0;
1781 : : }
1782 : :
1783 : : /* See if it makes sense to try to split.
1784 : : It makes sense to split if we inline, that is if we have direct calls to
1785 : : handle or direct calls are possibly going to appear as result of indirect
1786 : : inlining or LTO. Also handle -fprofile-generate as LTO to allow non-LTO
1787 : : training for LTO -fprofile-use build.
1788 : :
1789 : : Note that we are not completely conservative about disqualifying functions
1790 : : called once. It is possible that the caller is called more then once and
1791 : : then inlining would still benefit. */
1792 : 1517253 : if ((!node->callers
1793 : : /* Local functions called once will be completely inlined most of time. */
1794 : 1066104 : || (!node->callers->next_caller && node->local))
1795 : 526103 : && !node->address_taken
1796 : 435263 : && !node->has_aliases_p ()
1797 : 1751850 : && (!flag_lto || !node->externally_visible))
1798 : : {
1799 : 222526 : if (dump_file)
1800 : 21 : fprintf (dump_file, "Not splitting: not called directly "
1801 : : "or called once.\n");
1802 : 222526 : return 0;
1803 : : }
1804 : :
1805 : : /* FIXME: We can actually split if splitting reduces call overhead. */
1806 : 1294727 : if (!flag_inline_small_functions
1807 : 1294727 : && !DECL_DECLARED_INLINE_P (current_function_decl))
1808 : : {
1809 : 22 : if (dump_file)
1810 : 0 : fprintf (dump_file, "Not splitting: not autoinlining and function"
1811 : : " is not inline.\n");
1812 : 22 : return 0;
1813 : : }
1814 : :
1815 : 1294705 : if (lookup_attribute ("noinline", DECL_ATTRIBUTES (current_function_decl)))
1816 : : {
1817 : 0 : if (dump_file)
1818 : 0 : fprintf (dump_file, "Not splitting: function is noinline.\n");
1819 : 0 : return 0;
1820 : : }
1821 : 1294705 : if (lookup_attribute ("section", DECL_ATTRIBUTES (current_function_decl)))
1822 : : {
1823 : 15 : if (dump_file)
1824 : 0 : fprintf (dump_file, "Not splitting: function is in user defined "
1825 : : "section.\n");
1826 : 15 : return 0;
1827 : : }
1828 : 1294690 : if (!strub_splittable_p (node))
1829 : : {
1830 : 134 : if (dump_file)
1831 : 0 : fprintf (dump_file, "Not splitting: function is a strub context.\n");
1832 : 134 : return 0;
1833 : : }
1834 : :
1835 : : /* We enforce splitting after loop headers when profile info is not
1836 : : available. */
1837 : 1294556 : if (profile_status_for_fn (cfun) != PROFILE_READ)
1838 : 1294484 : mark_dfs_back_edges ();
1839 : :
1840 : : /* Initialize bitmap to track forbidden calls. */
1841 : 1294556 : forbidden_dominators = BITMAP_ALLOC (NULL);
1842 : 1294556 : calculate_dominance_info (CDI_DOMINATORS);
1843 : :
1844 : : /* Compute local info about basic blocks and determine function size/time. */
1845 : 1294556 : bb_info_vec.safe_grow_cleared (last_basic_block_for_fn (cfun) + 1, true);
1846 : 1294556 : best_split_point.split_bbs = NULL;
1847 : 1294556 : basic_block return_bb = find_return_bb ();
1848 : 1294556 : int tsan_exit_found = -1;
1849 : 6017036 : FOR_EACH_BB_FN (bb, cfun)
1850 : : {
1851 : 4722496 : sreal time = 0;
1852 : 4722496 : int size = 0;
1853 : 4722496 : sreal freq = bb->count.to_sreal_scale
1854 : 4722496 : (ENTRY_BLOCK_PTR_FOR_FN (cfun)->count);
1855 : :
1856 : 4722496 : if (dump_file && (dump_flags & TDF_DETAILS))
1857 : 9 : fprintf (dump_file, "Basic block %i\n", bb->index);
1858 : :
1859 : 48487745 : for (bsi = gsi_start_bb (bb); !gsi_end_p (bsi); gsi_next (&bsi))
1860 : : {
1861 : 39042769 : sreal this_time;
1862 : 39042769 : int this_size;
1863 : 39042769 : gimple *stmt = gsi_stmt (bsi);
1864 : :
1865 : 39042769 : this_size = estimate_num_insns (stmt, &eni_size_weights);
1866 : 39042769 : this_time = (sreal)estimate_num_insns (stmt, &eni_time_weights)
1867 : 39042769 : * freq;
1868 : 39042769 : size += this_size;
1869 : 39042769 : time += this_time;
1870 : 39042769 : check_forbidden_calls (stmt);
1871 : :
1872 : 39042769 : if (dump_file && (dump_flags & TDF_DETAILS))
1873 : : {
1874 : 17 : fprintf (dump_file, " freq:%4.2f size:%3i time:%4.2f ",
1875 : : freq.to_double (), this_size, this_time.to_double ());
1876 : 17 : print_gimple_stmt (dump_file, stmt, 0);
1877 : : }
1878 : :
1879 : 39042769 : if ((flag_sanitize & SANITIZE_THREAD)
1880 : 39042769 : && gimple_call_internal_p (stmt, IFN_TSAN_FUNC_EXIT))
1881 : : {
1882 : : /* We handle TSAN_FUNC_EXIT for splitting either in the
1883 : : return_bb, or in its immediate predecessors. */
1884 : 110 : if ((bb != return_bb && !find_edge (bb, return_bb))
1885 : 233 : || (tsan_exit_found != -1
1886 : 2 : && tsan_exit_found != (bb != return_bb)))
1887 : : {
1888 : 16 : if (dump_file)
1889 : 0 : fprintf (dump_file, "Not splitting: TSAN_FUNC_EXIT"
1890 : : " in unexpected basic block.\n");
1891 : 16 : BITMAP_FREE (forbidden_dominators);
1892 : 16 : bb_info_vec.release ();
1893 : 16 : return 0;
1894 : : }
1895 : 123 : tsan_exit_found = bb != return_bb;
1896 : : }
1897 : : }
1898 : 4722480 : overall_time += time;
1899 : 4722480 : overall_size += size;
1900 : 4722480 : bb_info_vec[bb->index].time = time;
1901 : 4722480 : bb_info_vec[bb->index].size = size;
1902 : : }
1903 : 1294540 : find_split_points (return_bb, overall_time, overall_size);
1904 : 1294540 : if (best_split_point.split_bbs)
1905 : : {
1906 : 49062 : split_function (return_bb, &best_split_point, tsan_exit_found == 1);
1907 : 49062 : BITMAP_FREE (best_split_point.ssa_names_to_pass);
1908 : 49062 : BITMAP_FREE (best_split_point.split_bbs);
1909 : 49062 : todo = TODO_update_ssa | TODO_cleanup_cfg;
1910 : : }
1911 : 1294540 : BITMAP_FREE (forbidden_dominators);
1912 : 1294540 : bb_info_vec.release ();
1913 : 1294540 : return todo;
1914 : : }
1915 : :
1916 : : namespace {
1917 : :
1918 : : const pass_data pass_data_split_functions =
1919 : : {
1920 : : GIMPLE_PASS, /* type */
1921 : : "fnsplit", /* name */
1922 : : OPTGROUP_NONE, /* optinfo_flags */
1923 : : TV_IPA_FNSPLIT, /* tv_id */
1924 : : PROP_cfg, /* properties_required */
1925 : : 0, /* properties_provided */
1926 : : 0, /* properties_destroyed */
1927 : : 0, /* todo_flags_start */
1928 : : 0, /* todo_flags_finish */
1929 : : };
1930 : :
1931 : : class pass_split_functions : public gimple_opt_pass
1932 : : {
1933 : : public:
1934 : 289080 : pass_split_functions (gcc::context *ctxt)
1935 : 578160 : : gimple_opt_pass (pass_data_split_functions, ctxt)
1936 : : {}
1937 : :
1938 : : /* opt_pass methods: */
1939 : : bool gate (function *) final override;
1940 : 2099495 : unsigned int execute (function *) final override
1941 : : {
1942 : 2099495 : return execute_split_functions ();
1943 : : }
1944 : :
1945 : : }; // class pass_split_functions
1946 : :
1947 : : bool
1948 : 2466470 : pass_split_functions::gate (function *)
1949 : : {
1950 : : /* When doing profile feedback, we want to execute the pass after profiling
1951 : : is read. So disable one in early optimization. */
1952 : 2466470 : return (flag_partial_inlining
1953 : 2466470 : && !profile_arc_flag && !flag_branch_probabilities);
1954 : : }
1955 : :
1956 : : } // anon namespace
1957 : :
1958 : : gimple_opt_pass *
1959 : 289080 : make_pass_split_functions (gcc::context *ctxt)
1960 : : {
1961 : 289080 : return new pass_split_functions (ctxt);
1962 : : }
1963 : :
1964 : : /* Execute function splitting pass. */
1965 : :
1966 : : static unsigned int
1967 : 438 : execute_feedback_split_functions (void)
1968 : : {
1969 : 0 : unsigned int retval = execute_split_functions ();
1970 : 438 : if (retval)
1971 : 2 : retval |= TODO_rebuild_cgraph_edges;
1972 : 438 : return retval;
1973 : : }
1974 : :
1975 : : namespace {
1976 : :
1977 : : const pass_data pass_data_feedback_split_functions =
1978 : : {
1979 : : GIMPLE_PASS, /* type */
1980 : : "feedback_fnsplit", /* name */
1981 : : OPTGROUP_NONE, /* optinfo_flags */
1982 : : TV_IPA_FNSPLIT, /* tv_id */
1983 : : PROP_cfg, /* properties_required */
1984 : : 0, /* properties_provided */
1985 : : 0, /* properties_destroyed */
1986 : : 0, /* todo_flags_start */
1987 : : 0, /* todo_flags_finish */
1988 : : };
1989 : :
1990 : : class pass_feedback_split_functions : public gimple_opt_pass
1991 : : {
1992 : : public:
1993 : 578160 : pass_feedback_split_functions (gcc::context *ctxt)
1994 : 1156320 : : gimple_opt_pass (pass_data_feedback_split_functions, ctxt)
1995 : : {}
1996 : :
1997 : : /* opt_pass methods: */
1998 : : bool gate (function *) final override;
1999 : 438 : unsigned int execute (function *) final override
2000 : : {
2001 : 438 : return execute_feedback_split_functions ();
2002 : : }
2003 : :
2004 : 289080 : opt_pass * clone () final override
2005 : : {
2006 : 289080 : return new pass_feedback_split_functions (m_ctxt);
2007 : : }
2008 : : }; // class pass_feedback_split_functions
2009 : :
2010 : : bool
2011 : 2562 : pass_feedback_split_functions::gate (function *)
2012 : : {
2013 : : /* We don't need to split when profiling at all, we are producing
2014 : : lousy code anyway. */
2015 : 2562 : return (flag_partial_inlining
2016 : 2562 : && flag_branch_probabilities);
2017 : : }
2018 : :
2019 : : } // anon namespace
2020 : :
2021 : : gimple_opt_pass *
2022 : 289080 : make_pass_feedback_split_functions (gcc::context *ctxt)
2023 : : {
2024 : 289080 : return new pass_feedback_split_functions (ctxt);
2025 : : }
|