Branch data Line data Source code
1 : : /* Shrink-wrapping related optimizations.
2 : : Copyright (C) 1987-2025 Free Software Foundation, Inc.
3 : :
4 : : This file is part of GCC.
5 : :
6 : : GCC is free software; you can redistribute it and/or modify it under
7 : : the terms of the GNU General Public License as published by the Free
8 : : Software Foundation; either version 3, or (at your option) any later
9 : : version.
10 : :
11 : : GCC is distributed in the hope that it will be useful, but WITHOUT ANY
12 : : WARRANTY; without even the implied warranty of MERCHANTABILITY or
13 : : FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
14 : : for more details.
15 : :
16 : : You should have received a copy of the GNU General Public License
17 : : along with GCC; see the file COPYING3. If not see
18 : : <http://www.gnu.org/licenses/>. */
19 : :
20 : : /* This file handles shrink-wrapping related optimizations. */
21 : :
22 : : #include "config.h"
23 : : #include "system.h"
24 : : #include "coretypes.h"
25 : : #include "backend.h"
26 : : #include "target.h"
27 : : #include "rtl.h"
28 : : #include "tree.h"
29 : : #include "cfghooks.h"
30 : : #include "df.h"
31 : : #include "memmodel.h"
32 : : #include "tm_p.h"
33 : : #include "regs.h"
34 : : #include "insn-config.h"
35 : : #include "emit-rtl.h"
36 : : #include "output.h"
37 : : #include "tree-pass.h"
38 : : #include "cfgrtl.h"
39 : : #include "cfgbuild.h"
40 : : #include "bb-reorder.h"
41 : : #include "shrink-wrap.h"
42 : : #include "regcprop.h"
43 : : #include "rtl-iter.h"
44 : : #include "valtrack.h"
45 : : #include "function-abi.h"
46 : : #include "print-rtl.h"
47 : :
48 : : /* Return true if INSN requires the stack frame to be set up.
49 : : PROLOGUE_USED contains the hard registers used in the function
50 : : prologue. SET_UP_BY_PROLOGUE is the set of registers we expect the
51 : : prologue to set up for the function. */
52 : : bool
53 : 100022460 : requires_stack_frame_p (rtx_insn *insn, HARD_REG_SET prologue_used,
54 : : HARD_REG_SET set_up_by_prologue)
55 : : {
56 : 100022460 : df_ref def, use;
57 : 100022460 : HARD_REG_SET hardregs;
58 : 100022460 : unsigned regno;
59 : :
60 : 100022460 : if (CALL_P (insn) && !FAKE_CALL_P (insn))
61 : 7291807 : return !SIBLING_CALL_P (insn);
62 : :
63 : : /* We need a frame to get the unique CFA expected by the unwinder. */
64 : 92730653 : if (cfun->can_throw_non_call_exceptions && can_throw_internal (insn))
65 : : return true;
66 : :
67 : 92562869 : CLEAR_HARD_REG_SET (hardregs);
68 : 169485448 : FOR_EACH_INSN_DEF (def, insn)
69 : : {
70 : 76922579 : rtx dreg = DF_REF_REG (def);
71 : :
72 : 76922579 : if (!REG_P (dreg))
73 : 0 : continue;
74 : :
75 : 76922579 : add_to_hard_reg_set (&hardregs, GET_MODE (dreg), REGNO (dreg));
76 : : }
77 : 92562869 : if (hard_reg_set_intersect_p (hardregs, prologue_used))
78 : : return true;
79 : 90893067 : hardregs &= ~crtl->abi->full_reg_clobbers ();
80 : 7795189416 : for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
81 : 7712468458 : if (TEST_HARD_REG_BIT (hardregs, regno)
82 : 7712468458 : && df_regs_ever_live_p (regno))
83 : : return true;
84 : :
85 : 167972162 : FOR_EACH_INSN_USE (use, insn)
86 : : {
87 : 85251204 : rtx reg = DF_REF_REG (use);
88 : :
89 : 85251204 : if (!REG_P (reg))
90 : 0 : continue;
91 : :
92 : 85251204 : add_to_hard_reg_set (&hardregs, GET_MODE (reg),
93 : : REGNO (reg));
94 : : }
95 : 82720958 : if (hard_reg_set_intersect_p (hardregs, set_up_by_prologue))
96 : : return true;
97 : :
98 : : return false;
99 : : }
100 : :
101 : : /* See whether there has a single live edge from BB, which dest uses
102 : : [REGNO, END_REGNO). Return the live edge if its dest bb has
103 : : one or two predecessors. Otherwise return NULL. */
104 : :
105 : : static edge
106 : 302110 : live_edge_for_reg (basic_block bb, int regno, int end_regno)
107 : : {
108 : 302110 : edge e, live_edge;
109 : 302110 : edge_iterator ei;
110 : 302110 : bitmap live;
111 : 302110 : int i;
112 : :
113 : 302110 : live_edge = NULL;
114 : 715462 : FOR_EACH_EDGE (e, ei, bb->succs)
115 : : {
116 : 513825 : live = df_get_live_in (e->dest);
117 : 1441002 : for (i = regno; i < end_regno; i++)
118 : 513825 : if (REGNO_REG_SET_P (live, i))
119 : : {
120 : 400599 : if (live_edge && live_edge != e)
121 : : return NULL;
122 : : live_edge = e;
123 : : }
124 : : }
125 : :
126 : : /* We can sometimes encounter dead code. Don't try to move it
127 : : into the exit block. */
128 : 201637 : if (!live_edge || live_edge->dest == EXIT_BLOCK_PTR_FOR_FN (cfun))
129 : : return NULL;
130 : :
131 : : /* Reject targets of abnormal edges. This is needed for correctness
132 : : on ports like Alpha and MIPS, whose pic_offset_table_rtx can die on
133 : : exception edges even though it is generally treated as call-saved
134 : : for the majority of the compilation. Moving across abnormal edges
135 : : isn't going to be interesting for shrink-wrap usage anyway. */
136 : 199653 : if (live_edge->flags & EDGE_ABNORMAL)
137 : : return NULL;
138 : :
139 : : /* When live_edge->dest->preds == 2, we can create a new block on
140 : : the edge to make it meet the requirement. */
141 : 198501 : if (EDGE_COUNT (live_edge->dest->preds) > 2)
142 : : return NULL;
143 : :
144 : : return live_edge;
145 : : }
146 : :
147 : : /* Try to move INSN from BB to a successor. Return true on success.
148 : : USES and DEFS are the set of registers that are used and defined
149 : : after INSN in BB. SPLIT_P indicates whether a live edge from BB
150 : : is splitted or not. */
151 : :
152 : : static bool
153 : 7578266 : move_insn_for_shrink_wrap (basic_block bb, rtx_insn *insn,
154 : : const_hard_reg_set uses,
155 : : const_hard_reg_set defs,
156 : : bool *split_p,
157 : : struct dead_debug_local *debug)
158 : : {
159 : 7578266 : rtx set, src, dest;
160 : 7578266 : bitmap live_out, live_in, bb_uses = NULL, bb_defs = NULL;
161 : 7578266 : unsigned int i, dregno, end_dregno;
162 : 7578266 : unsigned int sregno = FIRST_PSEUDO_REGISTER;
163 : 7578266 : unsigned int end_sregno = FIRST_PSEUDO_REGISTER;
164 : 7578266 : basic_block next_block;
165 : 7578266 : edge live_edge;
166 : 7578266 : rtx_insn *dinsn;
167 : 7578266 : df_ref def;
168 : :
169 : : /* Look for a simple register assignment. We don't use single_set here
170 : : because we can't deal with any CLOBBERs, USEs, or REG_UNUSED secondary
171 : : destinations. */
172 : 7578266 : if (!INSN_P (insn))
173 : : return false;
174 : 7578266 : set = PATTERN (insn);
175 : 7578266 : if (GET_CODE (set) != SET)
176 : : return false;
177 : 6207229 : src = SET_SRC (set);
178 : 6207229 : dest = SET_DEST (set);
179 : :
180 : : /* For the destination, we want only a register. Also disallow STACK
181 : : or FRAME related adjustments. They are likely part of the prologue,
182 : : so keep them in the entry block. */
183 : 6207229 : if (!REG_P (dest)
184 : 4412324 : || dest == stack_pointer_rtx
185 : 4394419 : || dest == frame_pointer_rtx
186 : 4394419 : || dest == hard_frame_pointer_rtx)
187 : : return false;
188 : :
189 : : /* For the source, we want one of:
190 : : (1) A (non-overlapping) register
191 : : (2) A constant,
192 : : (3) An expression involving no more than one register.
193 : :
194 : : That last point comes from the code following, which was originally
195 : : written to handle only register move operations, and still only handles
196 : : a single source register when checking for overlaps. Happily, the
197 : : same checks can be applied to expressions like (plus reg const). */
198 : :
199 : 4394023 : if (CONSTANT_P (src))
200 : : ;
201 : 3368896 : else if (!REG_P (src))
202 : : {
203 : 2434180 : rtx src_inner = NULL_RTX;
204 : :
205 : 2434180 : if (can_throw_internal (insn))
206 : 1755628 : return false;
207 : :
208 : 2425054 : subrtx_var_iterator::array_type array;
209 : 5278238 : FOR_EACH_SUBRTX_VAR (iter, array, src, ALL)
210 : : {
211 : 4599686 : rtx x = *iter;
212 : 4599686 : switch (GET_RTX_CLASS (GET_CODE (x)))
213 : : {
214 : : case RTX_CONST_OBJ:
215 : : case RTX_COMPARE:
216 : : case RTX_COMM_COMPARE:
217 : : case RTX_BIN_ARITH:
218 : : case RTX_COMM_ARITH:
219 : : case RTX_UNARY:
220 : : case RTX_TERNARY:
221 : : /* Constant or expression. Continue. */
222 : : break;
223 : :
224 : 2747577 : case RTX_OBJ:
225 : 2747577 : case RTX_EXTRA:
226 : 2747577 : switch (GET_CODE (x))
227 : : {
228 : : case UNSPEC:
229 : : case SUBREG:
230 : : case STRICT_LOW_PART:
231 : : case PC:
232 : : case LO_SUM:
233 : : /* Ok. Continue. */
234 : : break;
235 : :
236 : 1242664 : case REG:
237 : : /* Fail if we see a second inner register. */
238 : 1242664 : if (src_inner != NULL)
239 : 1746502 : return false;
240 : : src_inner = x;
241 : : break;
242 : :
243 : : default:
244 : : return false;
245 : : }
246 : : break;
247 : :
248 : : default:
249 : : return false;
250 : : }
251 : : }
252 : :
253 : 678552 : if (src_inner != NULL)
254 : 678531 : src = src_inner;
255 : 2425054 : }
256 : :
257 : : /* Make sure that the source register isn't defined later in BB. */
258 : 2638395 : if (REG_P (src))
259 : : {
260 : 1613247 : sregno = REGNO (src);
261 : 1613247 : end_sregno = END_REGNO (src);
262 : 1613247 : if (overlaps_hard_reg_set_p (defs, GET_MODE (src), sregno))
263 : : return false;
264 : : }
265 : :
266 : : /* Make sure that the destination register isn't referenced later in BB. */
267 : 1898779 : dregno = REGNO (dest);
268 : 1898779 : end_dregno = END_REGNO (dest);
269 : 1898779 : if (overlaps_hard_reg_set_p (uses, GET_MODE (dest), dregno)
270 : 1898779 : || overlaps_hard_reg_set_p (defs, GET_MODE (dest), dregno))
271 : : return false;
272 : :
273 : : /* See whether there is a successor block to which we could move INSN. */
274 : 272274 : live_edge = live_edge_for_reg (bb, dregno, end_dregno);
275 : 272274 : if (!live_edge)
276 : : return false;
277 : :
278 : 168705 : next_block = live_edge->dest;
279 : : /* Create a new basic block on the edge. */
280 : 168705 : if (EDGE_COUNT (next_block->preds) == 2)
281 : : {
282 : : /* split_edge for a block with only one successor is meaningless. */
283 : 88273 : if (EDGE_COUNT (bb->succs) == 1)
284 : : return false;
285 : :
286 : : /* If DF_LIVE doesn't exist, i.e. at -O1, just give up. */
287 : 11665 : if (!df_live)
288 : : return false;
289 : :
290 : 11197 : basic_block old_dest = live_edge->dest;
291 : 11197 : next_block = split_edge (live_edge);
292 : :
293 : : /* We create a new basic block. Call df_grow_bb_info to make sure
294 : : all data structures are allocated. */
295 : 11197 : df_grow_bb_info (df_live);
296 : :
297 : 11197 : bitmap_and (df_get_live_in (next_block), df_get_live_out (bb),
298 : 11197 : df_get_live_in (old_dest));
299 : 11197 : df_set_bb_dirty (next_block);
300 : :
301 : : /* We should not split more than once for a function. */
302 : 11197 : if (*split_p)
303 : : return false;
304 : :
305 : 11197 : *split_p = true;
306 : : }
307 : :
308 : : /* At this point we are committed to moving INSN, but let's try to
309 : : move it as far as we can. */
310 : 106299 : do
311 : : {
312 : 106299 : if (MAY_HAVE_DEBUG_BIND_INSNS)
313 : : {
314 : 836957 : FOR_BB_INSNS_REVERSE (bb, dinsn)
315 : 825512 : if (DEBUG_BIND_INSN_P (dinsn))
316 : : {
317 : 329791 : df_ref use;
318 : 431986 : FOR_EACH_INSN_USE (use, dinsn)
319 : 102195 : if (refers_to_regno_p (dregno, end_dregno,
320 : 102195 : DF_REF_REG (use), (rtx *) NULL))
321 : 26668 : dead_debug_add (debug, use, DF_REF_REGNO (use));
322 : : }
323 : 495721 : else if (dinsn == insn)
324 : : break;
325 : : }
326 : 106299 : live_out = df_get_live_out (bb);
327 : 106299 : live_in = df_get_live_in (next_block);
328 : 106299 : bb = next_block;
329 : :
330 : : /* Check whether BB uses DEST or clobbers DEST. We need to add
331 : : INSN to BB if so. Either way, DEST is no longer live on entry,
332 : : except for any part that overlaps SRC (next loop). */
333 : 106299 : if (!*split_p)
334 : : {
335 : 87566 : bb_uses = &DF_LR_BB_INFO (bb)->use;
336 : 87566 : bb_defs = &DF_LR_BB_INFO (bb)->def;
337 : : }
338 : 106299 : if (df_live)
339 : : {
340 : 204828 : for (i = dregno; i < end_dregno; i++)
341 : : {
342 : 102414 : if (*split_p
343 : 83681 : || REGNO_REG_SET_P (bb_uses, i)
344 : 41221 : || REGNO_REG_SET_P (bb_defs, i)
345 : 184856 : || REGNO_REG_SET_P (&DF_LIVE_BB_INFO (bb)->gen, i))
346 : : next_block = NULL;
347 : 102414 : CLEAR_REGNO_REG_SET (live_out, i);
348 : 102414 : CLEAR_REGNO_REG_SET (live_in, i);
349 : : }
350 : :
351 : : /* Check whether BB clobbers SRC. We need to add INSN to BB if so.
352 : : Either way, SRC is now live on entry. */
353 : 194300 : for (i = sregno; i < end_sregno; i++)
354 : : {
355 : 91886 : if (*split_p
356 : 81445 : || REGNO_REG_SET_P (bb_defs, i)
357 : 168596 : || REGNO_REG_SET_P (&DF_LIVE_BB_INFO (bb)->gen, i))
358 : : next_block = NULL;
359 : 91886 : SET_REGNO_REG_SET (live_out, i);
360 : 91886 : SET_REGNO_REG_SET (live_in, i);
361 : : }
362 : : }
363 : : else
364 : : {
365 : : /* DF_LR_BB_INFO (bb)->def does not comprise the DF_REF_PARTIAL and
366 : : DF_REF_CONDITIONAL defs. So if DF_LIVE doesn't exist, i.e.
367 : : at -O1, just give up searching NEXT_BLOCK. */
368 : 7770 : next_block = NULL;
369 : 7770 : for (i = dregno; i < end_dregno; i++)
370 : : {
371 : 3885 : CLEAR_REGNO_REG_SET (live_out, i);
372 : 3885 : CLEAR_REGNO_REG_SET (live_in, i);
373 : : }
374 : :
375 : 7755 : for (i = sregno; i < end_sregno; i++)
376 : : {
377 : 3870 : SET_REGNO_REG_SET (live_out, i);
378 : 3870 : SET_REGNO_REG_SET (live_in, i);
379 : : }
380 : : }
381 : :
382 : : /* If we don't need to add the move to BB, look for a single
383 : : successor block. */
384 : 106299 : if (next_block)
385 : : {
386 : 29836 : live_edge = live_edge_for_reg (next_block, dregno, end_dregno);
387 : 29836 : if (!live_edge || EDGE_COUNT (live_edge->dest->preds) > 1)
388 : : break;
389 : : next_block = live_edge->dest;
390 : : }
391 : : }
392 : 91133 : while (next_block);
393 : :
394 : : /* For the new created basic block, there is no dataflow info at all.
395 : : So skip the following dataflow update and check. */
396 : 91629 : if (!(*split_p))
397 : : {
398 : : /* BB now defines DEST. It only uses the parts of DEST that overlap SRC
399 : : (next loop). */
400 : 145792 : for (i = dregno; i < end_dregno; i++)
401 : : {
402 : 72896 : CLEAR_REGNO_REG_SET (bb_uses, i);
403 : 72896 : SET_REGNO_REG_SET (bb_defs, i);
404 : : }
405 : :
406 : : /* BB now uses SRC. */
407 : 143841 : for (i = sregno; i < end_sregno; i++)
408 : 70945 : SET_REGNO_REG_SET (bb_uses, i);
409 : : }
410 : :
411 : : /* Insert debug temps for dead REGs used in subsequent debug insns. */
412 : 91629 : if (debug->used && !bitmap_empty_p (debug->used))
413 : 29418 : FOR_EACH_INSN_DEF (def, insn)
414 : 14709 : dead_debug_insert_temp (debug, DF_REF_REGNO (def), insn,
415 : : DEBUG_TEMP_BEFORE_WITH_VALUE);
416 : :
417 : 91629 : rtx_insn *insn_copy = emit_insn_after (PATTERN (insn), bb_note (bb));
418 : : /* Update the LABEL_NUSES count on any referenced labels. The ideal
419 : : solution here would be to actually move the instruction instead
420 : : of copying/deleting it as this loses some notations on the
421 : : insn. */
422 : 91629 : mark_jump_label (PATTERN (insn), insn_copy, 0);
423 : 91629 : delete_insn (insn);
424 : 91629 : return true;
425 : : }
426 : :
427 : : /* Look for register copies in the first block of the function, and move
428 : : them down into successor blocks if the register is used only on one
429 : : path. This exposes more opportunities for shrink-wrapping. These
430 : : kinds of sets often occur when incoming argument registers are moved
431 : : to call-saved registers because their values are live across one or
432 : : more calls during the function. */
433 : :
434 : : static void
435 : 633703 : prepare_shrink_wrap (basic_block entry_block)
436 : : {
437 : 633703 : rtx_insn *insn, *curr;
438 : 633703 : rtx x;
439 : 633703 : HARD_REG_SET uses, defs;
440 : 633703 : df_ref def, use;
441 : 633703 : bool split_p = false;
442 : 633703 : unsigned int i;
443 : 633703 : struct dead_debug_local debug;
444 : :
445 : 633703 : if (JUMP_P (BB_END (entry_block)))
446 : : {
447 : : /* To have more shrink-wrapping opportunities, prepare_shrink_wrap tries
448 : : to sink the copies from parameter to callee saved register out of
449 : : entry block. copyprop_hardreg_forward_bb_without_debug_insn is called
450 : : to release some dependences. */
451 : 348617 : copyprop_hardreg_forward_bb_without_debug_insn (entry_block);
452 : : }
453 : :
454 : 633703 : dead_debug_local_init (&debug, NULL, NULL);
455 : 2534812 : CLEAR_HARD_REG_SET (uses);
456 : 633703 : CLEAR_HARD_REG_SET (defs);
457 : :
458 : 27439594 : FOR_BB_INSNS_REVERSE_SAFE (entry_block, insn, curr)
459 : 13086094 : if (NONDEBUG_INSN_P (insn)
460 : 7578266 : && !move_insn_for_shrink_wrap (entry_block, insn, uses, defs,
461 : : &split_p, &debug))
462 : : {
463 : : /* Add all defined registers to DEFs. */
464 : 83852479 : FOR_EACH_INSN_DEF (def, insn)
465 : : {
466 : 76365842 : x = DF_REF_REG (def);
467 : 76365842 : if (REG_P (x) && HARD_REGISTER_P (x))
468 : 152731684 : for (i = REGNO (x); i < END_REGNO (x); i++)
469 : 76365842 : SET_HARD_REG_BIT (defs, i);
470 : : }
471 : :
472 : : /* Add all used registers to USESs. */
473 : 17054817 : FOR_EACH_INSN_USE (use, insn)
474 : : {
475 : 9568180 : x = DF_REF_REG (use);
476 : 9568180 : if (REG_P (x) && HARD_REGISTER_P (x))
477 : 19136360 : for (i = REGNO (x); i < END_REGNO (x); i++)
478 : 9568180 : SET_HARD_REG_BIT (uses, i);
479 : : }
480 : : }
481 : :
482 : 633703 : dead_debug_local_finish (&debug, NULL);
483 : 633703 : }
484 : :
485 : : /* Return whether basic block PRO can get the prologue. It cannot if it
486 : : has incoming complex edges that need a prologue inserted (we make a new
487 : : block for the prologue, so those edges would need to be redirected, which
488 : : does not work). It also cannot if there exist registers live on entry
489 : : to PRO that are clobbered by the prologue. */
490 : :
491 : : static bool
492 : 73838 : can_get_prologue (basic_block pro, HARD_REG_SET prologue_clobbered)
493 : : {
494 : 73838 : edge e;
495 : 73838 : edge_iterator ei;
496 : 184218 : FOR_EACH_EDGE (e, ei, pro->preds)
497 : 110397 : if (e->flags & EDGE_COMPLEX
498 : 110397 : && !dominated_by_p (CDI_DOMINATORS, e->src, pro))
499 : : return false;
500 : :
501 : : HARD_REG_SET live;
502 : 73821 : REG_SET_TO_HARD_REG_SET (live, df_get_live_in (pro));
503 : 147642 : if (hard_reg_set_intersect_p (live, prologue_clobbered))
504 : : return false;
505 : :
506 : : return true;
507 : : }
508 : :
509 : : /* Return whether we can duplicate basic block BB for shrink wrapping. We
510 : : cannot if the block cannot be duplicated at all, or if any of its incoming
511 : : edges are complex and come from a block that does not require a prologue
512 : : (we cannot redirect such edges), or if the block is too big to copy.
513 : : PRO is the basic block before which we would put the prologue, MAX_SIZE is
514 : : the maximum size block we allow to be copied. */
515 : :
516 : : static bool
517 : 527337 : can_dup_for_shrink_wrapping (basic_block bb, basic_block pro, unsigned max_size)
518 : : {
519 : 527337 : if (!can_duplicate_block_p (bb))
520 : : return false;
521 : :
522 : 527101 : edge e;
523 : 527101 : edge_iterator ei;
524 : 1333341 : FOR_EACH_EDGE (e, ei, bb->preds)
525 : 815140 : if (e->flags & (EDGE_COMPLEX | EDGE_CROSSING)
526 : 815140 : && !dominated_by_p (CDI_DOMINATORS, e->src, pro))
527 : : return false;
528 : :
529 : 518201 : unsigned size = 0;
530 : :
531 : 518201 : rtx_insn *insn;
532 : 4695501 : FOR_BB_INSNS (bb, insn)
533 : 4351078 : if (NONDEBUG_INSN_P (insn))
534 : : {
535 : 1486390 : size += get_attr_min_length (insn);
536 : 1486390 : if (size > max_size)
537 : : return false;
538 : : }
539 : :
540 : : return true;
541 : : }
542 : :
543 : : /* Do whatever needs to be done for exits that run without prologue.
544 : : Sibcalls need nothing done. Normal exits get a simple_return inserted. */
545 : :
546 : : static void
547 : 54533 : handle_simple_exit (edge e)
548 : : {
549 : :
550 : 54533 : if (e->flags & EDGE_SIBCALL)
551 : : {
552 : : /* Tell function.cc to take no further action on this edge. */
553 : 4206 : e->flags |= EDGE_IGNORE;
554 : :
555 : 4206 : e->flags &= ~EDGE_FALLTHRU;
556 : 4206 : emit_barrier_after_bb (e->src);
557 : 4206 : return;
558 : : }
559 : :
560 : : /* If the basic block the edge comes from has multiple successors,
561 : : split the edge. */
562 : 50327 : if (EDGE_COUNT (e->src->succs) > 1)
563 : : {
564 : 1499 : basic_block old_bb = e->src;
565 : 1499 : rtx_insn *end = BB_END (old_bb);
566 : 1499 : rtx_note *note = emit_note_after (NOTE_INSN_DELETED, end);
567 : 1499 : basic_block new_bb = create_basic_block (note, note, old_bb);
568 : 1499 : BB_COPY_PARTITION (new_bb, old_bb);
569 : 1499 : BB_END (old_bb) = end;
570 : :
571 : 1499 : redirect_edge_succ (e, new_bb);
572 : 1499 : new_bb->count = e->count ();
573 : 1499 : e->flags |= EDGE_FALLTHRU;
574 : :
575 : 1499 : e = make_single_succ_edge (new_bb, EXIT_BLOCK_PTR_FOR_FN (cfun), 0);
576 : : }
577 : :
578 : 50327 : e->flags &= ~EDGE_FALLTHRU;
579 : 50327 : rtx_jump_insn *ret = emit_jump_insn_after (targetm.gen_simple_return (),
580 : 50327 : BB_END (e->src));
581 : 50327 : JUMP_LABEL (ret) = simple_return_rtx;
582 : 50327 : emit_barrier_after_bb (e->src);
583 : :
584 : 50327 : if (dump_file)
585 : 11 : fprintf (dump_file, "Made simple_return with UID %d in bb %d\n",
586 : 11 : INSN_UID (ret), e->src->index);
587 : : }
588 : :
589 : : /* Try to perform a kind of shrink-wrapping, making sure the
590 : : prologue/epilogue is emitted only around those parts of the
591 : : function that require it.
592 : :
593 : : There will be exactly one prologue, and it will be executed either
594 : : zero or one time, on any path. Depending on where the prologue is
595 : : placed, some of the basic blocks can be reached via both paths with
596 : : and without a prologue. Such blocks will be duplicated here, and the
597 : : edges changed to match.
598 : :
599 : : Paths that go to the exit without going through the prologue will use
600 : : a simple_return instead of the epilogue. We maximize the number of
601 : : those, making sure to only duplicate blocks that can be duplicated.
602 : : If the prologue can then still be placed in multiple locations, we
603 : : place it as early as possible.
604 : :
605 : : An example, where we duplicate blocks with control flow (legend:
606 : : _B_egin, _R_eturn and _S_imple_return; edges without arrowhead should
607 : : be taken to point down or to the right, to simplify the diagram; here,
608 : : block 3 needs a prologue, the rest does not):
609 : :
610 : :
611 : : B B
612 : : | |
613 : : 2 2
614 : : |\ |\
615 : : | 3 becomes | 3
616 : : |/ | \
617 : : 4 7 4
618 : : |\ |\ |\
619 : : | 5 | 8 | 5
620 : : |/ |/ |/
621 : : 6 9 6
622 : : | | |
623 : : R S R
624 : :
625 : :
626 : : (bb 4 is duplicated to 7, and so on; the prologue is inserted on the
627 : : edge 2->3).
628 : :
629 : : Another example, where part of a loop is duplicated (again, bb 3 is
630 : : the only block that needs a prologue):
631 : :
632 : :
633 : : B 3<-- B ->3<--
634 : : | | | | | | |
635 : : | v | becomes | | v |
636 : : 2---4--- 2---5-- 4---
637 : : | | |
638 : : R S R
639 : :
640 : :
641 : : (bb 4 is duplicated to 5; the prologue is inserted on the edge 5->3).
642 : :
643 : : ENTRY_EDGE is the edge where the prologue will be placed, possibly
644 : : changed by this function. PROLOGUE_SEQ is the prologue we will insert. */
645 : :
646 : : void
647 : 1435842 : try_shrink_wrapping (edge *entry_edge, rtx_insn *prologue_seq)
648 : : {
649 : : /* If we cannot shrink-wrap, are told not to shrink-wrap, or it makes
650 : : no sense to shrink-wrap: then do not shrink-wrap! */
651 : :
652 : 1435842 : if (!SHRINK_WRAPPING_ENABLED)
653 : 1382473 : return;
654 : :
655 : 1008442 : if (crtl->profile && !targetm.profile_before_prologue ())
656 : : return;
657 : :
658 : 1008416 : if (crtl->calls_eh_return)
659 : : return;
660 : :
661 : 1383079 : bool empty_prologue = true;
662 : 1383079 : for (rtx_insn *insn = prologue_seq; insn; insn = NEXT_INSN (insn))
663 : 1008391 : if (!(NOTE_P (insn) && NOTE_KIND (insn) == NOTE_INSN_PROLOGUE_END))
664 : : {
665 : : empty_prologue = false;
666 : : break;
667 : : }
668 : 1008391 : if (empty_prologue)
669 : : return;
670 : :
671 : : /* Move some code down to expose more shrink-wrapping opportunities. */
672 : :
673 : 633703 : basic_block entry = (*entry_edge)->dest;
674 : 633703 : prepare_shrink_wrap (entry);
675 : :
676 : 633703 : if (dump_file)
677 : 39 : fprintf (dump_file, "Attempting shrink-wrapping optimization.\n");
678 : :
679 : : /* Compute the registers set and used in the prologue. */
680 : :
681 : : HARD_REG_SET prologue_clobbered, prologue_used;
682 : 1901109 : CLEAR_HARD_REG_SET (prologue_clobbered);
683 : 3139895 : CLEAR_HARD_REG_SET (prologue_used);
684 : 3139895 : for (rtx_insn *insn = prologue_seq; insn; insn = NEXT_INSN (insn))
685 : 2506192 : if (NONDEBUG_INSN_P (insn))
686 : : {
687 : : HARD_REG_SET this_used;
688 : 1872489 : CLEAR_HARD_REG_SET (this_used);
689 : 1872489 : note_uses (&PATTERN (insn), record_hard_reg_uses, &this_used);
690 : 1872489 : this_used &= ~prologue_clobbered;
691 : 1872489 : prologue_used |= this_used;
692 : 1872489 : note_stores (insn, record_hard_reg_sets, &prologue_clobbered);
693 : : }
694 : 633703 : CLEAR_HARD_REG_BIT (prologue_clobbered, STACK_POINTER_REGNUM);
695 : 633703 : if (frame_pointer_needed)
696 : 41682 : CLEAR_HARD_REG_BIT (prologue_clobbered, HARD_FRAME_POINTER_REGNUM);
697 : :
698 : : /* Find out what registers are set up by the prologue; any use of these
699 : : cannot happen before the prologue. */
700 : :
701 : : struct hard_reg_set_container set_up_by_prologue;
702 : 633703 : CLEAR_HARD_REG_SET (set_up_by_prologue.set);
703 : 633703 : add_to_hard_reg_set (&set_up_by_prologue.set, Pmode, STACK_POINTER_REGNUM);
704 : 633703 : add_to_hard_reg_set (&set_up_by_prologue.set, Pmode, ARG_POINTER_REGNUM);
705 : 633703 : if (frame_pointer_needed)
706 : 41682 : add_to_hard_reg_set (&set_up_by_prologue.set, Pmode,
707 : : HARD_FRAME_POINTER_REGNUM);
708 : 633703 : if (pic_offset_table_rtx
709 : 633703 : && (unsigned) PIC_OFFSET_TABLE_REGNUM != INVALID_REGNUM)
710 : 0 : add_to_hard_reg_set (&set_up_by_prologue.set, Pmode,
711 : 0 : PIC_OFFSET_TABLE_REGNUM);
712 : 633703 : if (crtl->drap_reg)
713 : 5199 : add_to_hard_reg_set (&set_up_by_prologue.set,
714 : 5199 : GET_MODE (crtl->drap_reg),
715 : 5199 : REGNO (crtl->drap_reg));
716 : 633703 : if (targetm.set_up_by_prologue)
717 : 0 : targetm.set_up_by_prologue (&set_up_by_prologue);
718 : :
719 : : /* We will insert the prologue before the basic block PRO. PRO should
720 : : dominate all basic blocks that need the prologue to be executed
721 : : before them. First, make PRO the "tightest wrap" possible. */
722 : :
723 : 633703 : calculate_dominance_info (CDI_DOMINATORS);
724 : :
725 : 633703 : basic_block pro = 0;
726 : :
727 : 633703 : basic_block bb;
728 : 633703 : edge e;
729 : 633703 : edge_iterator ei;
730 : 10124814 : FOR_EACH_BB_FN (bb, cfun)
731 : : {
732 : 9491111 : rtx_insn *insn;
733 : 63566248 : FOR_BB_INSNS (bb, insn)
734 : 59580856 : if (NONDEBUG_INSN_P (insn)
735 : 59580856 : && requires_stack_frame_p (insn, prologue_used,
736 : : set_up_by_prologue.set))
737 : : {
738 : 5505719 : if (dump_file)
739 : : {
740 : 79 : fprintf (dump_file, "Block %d needs prologue due to insn %d:\n",
741 : 79 : bb->index, INSN_UID (insn));
742 : 79 : print_rtl_single (dump_file, insn);
743 : : }
744 : 5505719 : pro = nearest_common_dominator (CDI_DOMINATORS, pro, bb);
745 : 5505719 : break;
746 : : }
747 : : }
748 : :
749 : : /* If nothing needs a prologue, just put it at the start. This really
750 : : shouldn't happen, but we cannot fix it here. */
751 : :
752 : 633703 : if (pro == 0)
753 : : {
754 : 60 : if (dump_file)
755 : 0 : fprintf(dump_file, "Nothing needs a prologue, but it isn't empty; "
756 : : "putting it at the start.\n");
757 : 60 : pro = entry;
758 : : }
759 : :
760 : 633703 : if (dump_file)
761 : 39 : fprintf (dump_file, "After wrapping required blocks, PRO is now %d\n",
762 : : pro->index);
763 : :
764 : : /* Now see if we can put the prologue at the start of PRO. Putting it
765 : : there might require duplicating a block that cannot be duplicated,
766 : : or in some cases we cannot insert the prologue there at all. If PRO
767 : : wont't do, try again with the immediate dominator of PRO, and so on.
768 : :
769 : : The blocks that need duplicating are those reachable from PRO but
770 : : not dominated by it. We keep in BB_WITH a bitmap of the blocks
771 : : reachable from PRO that we already found, and in VEC a stack of
772 : : those we still need to consider (to find successors). */
773 : :
774 : 633703 : auto_bitmap bb_with;
775 : 633703 : bitmap_set_bit (bb_with, pro->index);
776 : :
777 : 633703 : vec<basic_block> vec;
778 : 633703 : vec.create (n_basic_blocks_for_fn (cfun));
779 : 633703 : vec.quick_push (pro);
780 : :
781 : 633703 : unsigned max_grow_size = get_uncond_jump_length ();
782 : 633703 : max_grow_size *= param_max_grow_copy_bb_insns;
783 : :
784 : 633703 : basic_block checked_pro = NULL;
785 : :
786 : 1161040 : while (pro != entry)
787 : : {
788 : 587896 : if (pro != checked_pro)
789 : : {
790 : 63191 : while (pro != entry && !can_get_prologue (pro, prologue_clobbered))
791 : : {
792 : 137 : pro = get_immediate_dominator (CDI_DOMINATORS, pro);
793 : :
794 : 137 : if (bitmap_set_bit (bb_with, pro->index))
795 : 136 : vec.quick_push (pro);
796 : : }
797 : : checked_pro = pro;
798 : : }
799 : :
800 : 587896 : if (vec.is_empty ())
801 : : break;
802 : :
803 : 527337 : basic_block bb = vec.pop ();
804 : 527337 : if (!can_dup_for_shrink_wrapping (bb, pro, max_grow_size))
805 : 186444 : while (!dominated_by_p (CDI_DOMINATORS, bb, pro))
806 : : {
807 : 3530 : gcc_assert (pro != entry);
808 : :
809 : 3530 : pro = get_immediate_dominator (CDI_DOMINATORS, pro);
810 : :
811 : 3530 : if (bitmap_set_bit (bb_with, pro->index))
812 : 3234 : vec.quick_push (pro);
813 : : }
814 : :
815 : 1266549 : FOR_EACH_EDGE (e, ei, bb->succs)
816 : 739212 : if (e->dest != EXIT_BLOCK_PTR_FOR_FN (cfun)
817 : 739212 : && bitmap_set_bit (bb_with, e->dest->index))
818 : 466904 : vec.quick_push (e->dest);
819 : : }
820 : :
821 : 633703 : if (dump_file)
822 : 39 : fprintf (dump_file, "Avoiding non-duplicatable blocks, PRO is now %d\n",
823 : : pro->index);
824 : :
825 : : /* If we can move PRO back without having to duplicate more blocks, do so.
826 : : We do this because putting the prologue earlier is better for scheduling.
827 : :
828 : : We can move back to a block PRE if every path from PRE will eventually
829 : : need a prologue, that is, PRO is a post-dominator of PRE. PRE needs
830 : : to dominate every block reachable from itself. We keep in BB_TMP a
831 : : bitmap of the blocks reachable from PRE that we already found, and in
832 : : VEC a stack of those we still need to consider.
833 : :
834 : : Any block reachable from PRE is also reachable from all predecessors
835 : : of PRE, so if we find we need to move PRE back further we can leave
836 : : everything not considered so far on the stack. Any block dominated
837 : : by PRE is also dominated by all other dominators of PRE, so anything
838 : : found good for some PRE does not need to be reconsidered later.
839 : :
840 : : We don't need to update BB_WITH because none of the new blocks found
841 : : can jump to a block that does not need the prologue. */
842 : :
843 : 633703 : if (pro != entry)
844 : : {
845 : 60559 : calculate_dominance_info (CDI_POST_DOMINATORS);
846 : :
847 : 60559 : auto_bitmap bb_tmp;
848 : 60559 : bitmap_copy (bb_tmp, bb_with);
849 : 60559 : basic_block last_ok = pro;
850 : 60559 : vec.truncate (0);
851 : :
852 : 132220 : while (pro != entry)
853 : : {
854 : 64471 : basic_block pre = get_immediate_dominator (CDI_DOMINATORS, pro);
855 : 64471 : if (!dominated_by_p (CDI_POST_DOMINATORS, pre, pro))
856 : : break;
857 : :
858 : 11102 : if (bitmap_set_bit (bb_tmp, pre->index))
859 : 10714 : vec.quick_push (pre);
860 : :
861 : 26642 : bool ok = true;
862 : 26642 : while (!vec.is_empty ())
863 : : {
864 : 15887 : if (!dominated_by_p (CDI_DOMINATORS, vec.last (), pre))
865 : : {
866 : : ok = false;
867 : : break;
868 : : }
869 : :
870 : 15540 : basic_block bb = vec.pop ();
871 : 35700 : FOR_EACH_EDGE (e, ei, bb->succs)
872 : 20160 : if (bitmap_set_bit (bb_tmp, e->dest->index))
873 : 4826 : vec.quick_push (e->dest);
874 : : }
875 : :
876 : 11102 : if (ok && can_get_prologue (pre, prologue_clobbered))
877 : : last_ok = pre;
878 : :
879 : 11102 : pro = pre;
880 : : }
881 : :
882 : 60559 : pro = last_ok;
883 : :
884 : 60559 : free_dominance_info (CDI_POST_DOMINATORS);
885 : 60559 : }
886 : :
887 : 633703 : vec.release ();
888 : :
889 : 633703 : if (dump_file)
890 : 39 : fprintf (dump_file, "Bumping back to anticipatable blocks, PRO is now %d\n",
891 : : pro->index);
892 : :
893 : 633703 : if (pro == entry)
894 : : {
895 : 580334 : free_dominance_info (CDI_DOMINATORS);
896 : 580334 : return;
897 : : }
898 : :
899 : : /* Compute what fraction of the frequency and count of the blocks that run
900 : : both with and without prologue are for running with prologue. This gives
901 : : the correct answer for reducible flow graphs; for irreducible flow graphs
902 : : our profile is messed up beyond repair anyway. */
903 : :
904 : 53369 : profile_count num = profile_count::zero ();
905 : 53369 : profile_count den = profile_count::zero ();
906 : :
907 : 133291 : FOR_EACH_EDGE (e, ei, pro->preds)
908 : 79922 : if (!dominated_by_p (CDI_DOMINATORS, e->src, pro))
909 : : {
910 : 79685 : if (e->count ().initialized_p ())
911 : 79657 : num += e->count ();
912 : 79685 : if (e->src->count.initialized_p ())
913 : 79665 : den += e->src->count;
914 : : }
915 : :
916 : : /* All is okay, so do it. */
917 : :
918 : 53369 : crtl->shrink_wrapped = true;
919 : 53369 : if (dump_file)
920 : 11 : fprintf (dump_file, "Performing shrink-wrapping.\n");
921 : :
922 : : /* Copy the blocks that can run both with and without prologue. The
923 : : originals run with prologue, the copies without. Store a pointer to
924 : : the copy in the ->aux field of the original. */
925 : :
926 : 697368 : FOR_EACH_BB_FN (bb, cfun)
927 : 643999 : if (bitmap_bit_p (bb_with, bb->index)
928 : 643999 : && !dominated_by_p (CDI_DOMINATORS, bb, pro))
929 : : {
930 : 34111 : basic_block dup = duplicate_block (bb, 0, 0);
931 : :
932 : 34111 : bb->aux = dup;
933 : :
934 : 34111 : if (JUMP_P (BB_END (dup)) && !any_condjump_p (BB_END (dup)))
935 : 2284 : emit_barrier_after_bb (dup);
936 : :
937 : 34111 : if (EDGE_COUNT (dup->succs) == 0)
938 : 7 : emit_barrier_after_bb (dup);
939 : :
940 : 34111 : if (dump_file)
941 : 8 : fprintf (dump_file, "Duplicated %d to %d\n", bb->index, dup->index);
942 : :
943 : 34148 : if (num == profile_count::zero () || den.nonzero_p ())
944 : 34103 : bb->count = bb->count.apply_scale (num, den);
945 : 34111 : dup->count -= bb->count;
946 : : }
947 : :
948 : : /* Now change the edges to point to the copies, where appropriate. */
949 : :
950 : 700891 : FOR_EACH_BB_FN (bb, cfun)
951 : 647522 : if (!dominated_by_p (CDI_DOMINATORS, bb, pro))
952 : : {
953 : 289177 : basic_block src = bb;
954 : 289177 : if (bitmap_bit_p (bb_with, bb->index))
955 : 34111 : src = (basic_block) bb->aux;
956 : :
957 : 719279 : FOR_EACH_EDGE (e, ei, src->succs)
958 : : {
959 : 430102 : if (e->dest == EXIT_BLOCK_PTR_FOR_FN (cfun))
960 : 82674 : continue;
961 : :
962 : 347428 : if (bitmap_bit_p (bb_with, e->dest->index)
963 : 347428 : && !dominated_by_p (CDI_DOMINATORS, e->dest, pro))
964 : : {
965 : 49639 : if (dump_file)
966 : 9 : fprintf (dump_file, "Redirecting edge %d->%d to %d\n",
967 : 9 : e->src->index, e->dest->index,
968 : 9 : ((basic_block) e->dest->aux)->index);
969 : 49639 : redirect_edge_and_branch_force (e, (basic_block) e->dest->aux);
970 : : }
971 : 297789 : else if (e->flags & EDGE_FALLTHRU
972 : 297789 : && bitmap_bit_p (bb_with, bb->index))
973 : 93 : force_nonfallthru (e);
974 : : }
975 : : }
976 : :
977 : : /* Also redirect the function entry edge if necessary. */
978 : :
979 : 106738 : FOR_EACH_EDGE (e, ei, ENTRY_BLOCK_PTR_FOR_FN (cfun)->succs)
980 : 53369 : if (bitmap_bit_p (bb_with, e->dest->index)
981 : 53369 : && !dominated_by_p (CDI_DOMINATORS, e->dest, pro))
982 : : {
983 : 12 : basic_block split_bb = split_edge (e);
984 : 12 : e = single_succ_edge (split_bb);
985 : 12 : redirect_edge_and_branch_force (e, (basic_block) e->dest->aux);
986 : : }
987 : :
988 : : /* Make a simple_return for those exits that run without prologue. */
989 : :
990 : 700903 : FOR_EACH_BB_REVERSE_FN (bb, cfun)
991 : 647534 : if (!bitmap_bit_p (bb_with, bb->index))
992 : 654076 : FOR_EACH_EDGE (e, ei, bb->succs)
993 : 396796 : if (e->dest == EXIT_BLOCK_PTR_FOR_FN (cfun))
994 : 54533 : handle_simple_exit (e);
995 : :
996 : : /* Finally, we want a single edge to put the prologue on. Make a new
997 : : block before the PRO block; the edge beteen them is the edge we want.
998 : : Then redirect those edges into PRO that come from blocks without the
999 : : prologue, to point to the new block instead. The new prologue block
1000 : : is put at the end of the insn chain. */
1001 : :
1002 : 53369 : basic_block new_bb = create_empty_bb (EXIT_BLOCK_PTR_FOR_FN (cfun)->prev_bb);
1003 : 53369 : BB_COPY_PARTITION (new_bb, pro);
1004 : 53369 : new_bb->count = profile_count::zero ();
1005 : 53369 : if (dump_file)
1006 : 11 : fprintf (dump_file, "Made prologue block %d\n", new_bb->index);
1007 : :
1008 : 134008 : for (ei = ei_start (pro->preds); (e = ei_safe_edge (ei)); )
1009 : : {
1010 : 80639 : if (bitmap_bit_p (bb_with, e->src->index)
1011 : 80639 : || dominated_by_p (CDI_DOMINATORS, e->src, pro))
1012 : : {
1013 : 954 : ei_next (&ei);
1014 : 954 : continue;
1015 : : }
1016 : :
1017 : 79685 : new_bb->count += e->count ();
1018 : :
1019 : 79685 : redirect_edge_and_branch_force (e, new_bb);
1020 : 79685 : if (dump_file)
1021 : 14 : fprintf (dump_file, "Redirected edge from %d\n", e->src->index);
1022 : : }
1023 : :
1024 : 53369 : *entry_edge = make_single_succ_edge (new_bb, pro, EDGE_FALLTHRU);
1025 : 53369 : force_nonfallthru (*entry_edge);
1026 : :
1027 : 53369 : free_dominance_info (CDI_DOMINATORS);
1028 : 633703 : }
1029 : :
1030 : : /* Separate shrink-wrapping
1031 : :
1032 : : Instead of putting all of the prologue and epilogue in one spot, we
1033 : : can put parts of it in places where those components are executed less
1034 : : frequently. The following code does this, for prologue and epilogue
1035 : : components that can be put in more than one location, and where those
1036 : : components can be executed more than once (the epilogue component will
1037 : : always be executed before the prologue component is executed a second
1038 : : time).
1039 : :
1040 : : What exactly is a component is target-dependent. The more usual
1041 : : components are simple saves/restores to/from the frame of callee-saved
1042 : : registers. This code treats components abstractly (as an sbitmap),
1043 : : letting the target handle all details.
1044 : :
1045 : : Prologue components are placed in such a way that for every component
1046 : : the prologue is executed as infrequently as possible. We do this by
1047 : : walking the dominator tree, comparing the cost of placing a prologue
1048 : : component before a block to the sum of costs determined for all subtrees
1049 : : of that block.
1050 : :
1051 : : From this placement, we then determine for each component all blocks
1052 : : where at least one of this block's dominators (including itself) will
1053 : : get a prologue inserted. That then is how the components are placed.
1054 : : We could place the epilogue components a bit smarter (we can save a
1055 : : bit of code size sometimes); this is a possible future improvement.
1056 : :
1057 : : Prologues and epilogues are preferably placed into a block, either at
1058 : : the beginning or end of it, if it is needed for all predecessor resp.
1059 : : successor edges; or placed on the edge otherwise.
1060 : :
1061 : : If the placement of any prologue/epilogue leads to a situation we cannot
1062 : : handle (for example, an abnormal edge would need to be split, or some
1063 : : targets want to use some specific registers that may not be available
1064 : : where we want to put them), separate shrink-wrapping for the components
1065 : : in that prologue/epilogue is aborted. */
1066 : :
1067 : :
1068 : : /* Print the sbitmap COMPONENTS to the DUMP_FILE if not empty, with the
1069 : : label LABEL. */
1070 : : static void
1071 : 0 : dump_components (const char *label, sbitmap components)
1072 : : {
1073 : 0 : if (bitmap_empty_p (components))
1074 : : return;
1075 : :
1076 : 0 : fprintf (dump_file, " [%s", label);
1077 : :
1078 : 0 : for (unsigned int j = 0; j < components->n_bits; j++)
1079 : 0 : if (bitmap_bit_p (components, j))
1080 : 0 : fprintf (dump_file, " %u", j);
1081 : :
1082 : 0 : fprintf (dump_file, "]");
1083 : : }
1084 : :
1085 : : /* The data we collect for each bb. */
1086 : : struct sw {
1087 : : /* What components does this BB need? */
1088 : : sbitmap needs_components;
1089 : :
1090 : : /* What components does this BB have? This is the main decision this
1091 : : pass makes. */
1092 : : sbitmap has_components;
1093 : :
1094 : : /* The components for which we placed code at the start of the BB (instead
1095 : : of on all incoming edges). */
1096 : : sbitmap head_components;
1097 : :
1098 : : /* The components for which we placed code at the end of the BB (instead
1099 : : of on all outgoing edges). */
1100 : : sbitmap tail_components;
1101 : :
1102 : : /* The frequency of executing the prologue for this BB, if a prologue is
1103 : : placed on this BB. This is a pessimistic estimate (no prologue is
1104 : : needed for edges from blocks that have the component under consideration
1105 : : active already). */
1106 : : gcov_type own_cost;
1107 : :
1108 : : /* The frequency of executing the prologue for this BB and all BBs
1109 : : dominated by it. */
1110 : : gcov_type total_cost;
1111 : : };
1112 : :
1113 : : /* A helper function for accessing the pass-specific info. */
1114 : : static inline struct sw *
1115 : 0 : SW (basic_block bb)
1116 : : {
1117 : 0 : gcc_assert (bb->aux);
1118 : 0 : return (struct sw *) bb->aux;
1119 : : }
1120 : :
1121 : : /* Create the pass-specific data structures for separately shrink-wrapping
1122 : : with components COMPONENTS. */
1123 : : static void
1124 : 0 : init_separate_shrink_wrap (sbitmap components)
1125 : : {
1126 : 0 : basic_block bb;
1127 : 0 : FOR_ALL_BB_FN (bb, cfun)
1128 : : {
1129 : 0 : bb->aux = xcalloc (1, sizeof (struct sw));
1130 : :
1131 : 0 : SW (bb)->needs_components = targetm.shrink_wrap.components_for_bb (bb);
1132 : :
1133 : : /* Mark all basic blocks without successor as needing all components.
1134 : : This avoids problems in at least cfgcleanup, sel-sched, and
1135 : : regrename (largely to do with all paths to such a block still
1136 : : needing the same dwarf CFI info). */
1137 : 0 : if (EDGE_COUNT (bb->succs) == 0)
1138 : 0 : bitmap_copy (SW (bb)->needs_components, components);
1139 : :
1140 : 0 : if (dump_file)
1141 : : {
1142 : 0 : fprintf (dump_file, "bb %d components:", bb->index);
1143 : 0 : dump_components ("has", SW (bb)->needs_components);
1144 : 0 : fprintf (dump_file, "\n");
1145 : : }
1146 : :
1147 : 0 : SW (bb)->has_components = sbitmap_alloc (SBITMAP_SIZE (components));
1148 : 0 : SW (bb)->head_components = sbitmap_alloc (SBITMAP_SIZE (components));
1149 : 0 : SW (bb)->tail_components = sbitmap_alloc (SBITMAP_SIZE (components));
1150 : 0 : bitmap_clear (SW (bb)->has_components);
1151 : : }
1152 : 0 : }
1153 : :
1154 : : /* Destroy the pass-specific data. */
1155 : : static void
1156 : 0 : fini_separate_shrink_wrap (void)
1157 : : {
1158 : 0 : basic_block bb;
1159 : 0 : FOR_ALL_BB_FN (bb, cfun)
1160 : 0 : if (bb->aux)
1161 : : {
1162 : 0 : sbitmap_free (SW (bb)->needs_components);
1163 : 0 : sbitmap_free (SW (bb)->has_components);
1164 : 0 : sbitmap_free (SW (bb)->head_components);
1165 : 0 : sbitmap_free (SW (bb)->tail_components);
1166 : 0 : free (bb->aux);
1167 : 0 : bb->aux = 0;
1168 : : }
1169 : 0 : }
1170 : :
1171 : : /* Place the prologue for component WHICH, in the basic blocks dominated
1172 : : by HEAD. Do a DFS over the dominator tree, and set bit WHICH in the
1173 : : HAS_COMPONENTS of a block if either the block has that bit set in
1174 : : NEEDS_COMPONENTS, or it is cheaper to place the prologue here than in all
1175 : : dominator subtrees separately. */
1176 : : static void
1177 : 0 : place_prologue_for_one_component (unsigned int which, basic_block head)
1178 : : {
1179 : : /* The block we are currently dealing with. */
1180 : 0 : basic_block bb = head;
1181 : : /* Is this the first time we visit this block, i.e. have we just gone
1182 : : down the tree. */
1183 : 0 : bool first_visit = true;
1184 : :
1185 : : /* Walk the dominator tree, visit one block per iteration of this loop.
1186 : : Each basic block is visited twice: once before visiting any children
1187 : : of the block, and once after visiting all of them (leaf nodes are
1188 : : visited only once). As an optimization, we do not visit subtrees
1189 : : that can no longer influence the prologue placement. */
1190 : 0 : for (;;)
1191 : : {
1192 : : /* First visit of a block: set the (children) cost accumulator to zero;
1193 : : if the block does not have the component itself, walk down. */
1194 : 0 : if (first_visit)
1195 : : {
1196 : : /* Initialize the cost. The cost is the block execution frequency
1197 : : that does not come from backedges. Calculating this by simply
1198 : : adding the cost of all edges that aren't backedges does not
1199 : : work: this does not always add up to the block frequency at
1200 : : all, and even if it does, rounding error makes for bad
1201 : : decisions. */
1202 : 0 : SW (bb)->own_cost = bb->count.to_frequency (cfun);
1203 : :
1204 : 0 : edge e;
1205 : 0 : edge_iterator ei;
1206 : 0 : FOR_EACH_EDGE (e, ei, bb->preds)
1207 : 0 : if (dominated_by_p (CDI_DOMINATORS, e->src, bb))
1208 : : {
1209 : 0 : if (SW (bb)->own_cost > EDGE_FREQUENCY (e))
1210 : 0 : SW (bb)->own_cost -= EDGE_FREQUENCY (e);
1211 : : else
1212 : 0 : SW (bb)->own_cost = 0;
1213 : : }
1214 : :
1215 : 0 : SW (bb)->total_cost = 0;
1216 : :
1217 : 0 : if (!bitmap_bit_p (SW (bb)->needs_components, which)
1218 : 0 : && first_dom_son (CDI_DOMINATORS, bb))
1219 : : {
1220 : 0 : bb = first_dom_son (CDI_DOMINATORS, bb);
1221 : 0 : continue;
1222 : : }
1223 : : }
1224 : :
1225 : : /* If this block does need the component itself, or it is cheaper to
1226 : : put the prologue here than in all the descendants that need it,
1227 : : mark it so. If this block's immediate post-dominator is dominated
1228 : : by this block, and that needs the prologue, we can put it on this
1229 : : block as well (earlier is better). */
1230 : 0 : if (bitmap_bit_p (SW (bb)->needs_components, which)
1231 : 0 : || SW (bb)->total_cost > SW (bb)->own_cost)
1232 : : {
1233 : 0 : SW (bb)->total_cost = SW (bb)->own_cost;
1234 : 0 : bitmap_set_bit (SW (bb)->has_components, which);
1235 : : }
1236 : : else
1237 : : {
1238 : 0 : basic_block kid = get_immediate_dominator (CDI_POST_DOMINATORS, bb);
1239 : 0 : if (dominated_by_p (CDI_DOMINATORS, kid, bb)
1240 : 0 : && bitmap_bit_p (SW (kid)->has_components, which))
1241 : : {
1242 : 0 : SW (bb)->total_cost = SW (bb)->own_cost;
1243 : 0 : bitmap_set_bit (SW (bb)->has_components, which);
1244 : : }
1245 : : }
1246 : :
1247 : : /* We are back where we started, so we are done now. */
1248 : 0 : if (bb == head)
1249 : 0 : return;
1250 : :
1251 : : /* We now know the cost of the subtree rooted at the current block.
1252 : : Accumulate this cost in the parent. */
1253 : 0 : basic_block parent = get_immediate_dominator (CDI_DOMINATORS, bb);
1254 : 0 : SW (parent)->total_cost += SW (bb)->total_cost;
1255 : :
1256 : : /* Don't walk the tree down unless necessary. */
1257 : 0 : if (next_dom_son (CDI_DOMINATORS, bb)
1258 : 0 : && SW (parent)->total_cost <= SW (parent)->own_cost)
1259 : : {
1260 : 0 : bb = next_dom_son (CDI_DOMINATORS, bb);
1261 : 0 : first_visit = true;
1262 : : }
1263 : : else
1264 : : {
1265 : : bb = parent;
1266 : : first_visit = false;
1267 : : }
1268 : : }
1269 : : }
1270 : :
1271 : : /* Set HAS_COMPONENTS in every block to the maximum it can be set to without
1272 : : setting it on any path from entry to exit where it was not already set
1273 : : somewhere (or, for blocks that have no path to the exit, consider only
1274 : : paths from the entry to the block itself). Return whether any changes
1275 : : were made to some HAS_COMPONENTS. */
1276 : : static bool
1277 : 0 : spread_components (sbitmap components)
1278 : : {
1279 : 0 : basic_block entry_block = ENTRY_BLOCK_PTR_FOR_FN (cfun);
1280 : 0 : basic_block exit_block = EXIT_BLOCK_PTR_FOR_FN (cfun);
1281 : :
1282 : : /* A stack of all blocks left to consider, and a bitmap of all blocks
1283 : : on that stack. */
1284 : 0 : vec<basic_block> todo;
1285 : 0 : todo.create (n_basic_blocks_for_fn (cfun));
1286 : 0 : auto_bitmap seen;
1287 : :
1288 : 0 : auto_sbitmap old (SBITMAP_SIZE (components));
1289 : :
1290 : : /* Find for every block the components that are *not* needed on some path
1291 : : from the entry to that block. Do this with a flood fill from the entry
1292 : : block. Every block can be visited at most as often as the number of
1293 : : components (plus one), and usually much less often. */
1294 : :
1295 : 0 : if (dump_file)
1296 : 0 : fprintf (dump_file, "Spreading down...\n");
1297 : :
1298 : 0 : basic_block bb;
1299 : 0 : FOR_ALL_BB_FN (bb, cfun)
1300 : 0 : bitmap_clear (SW (bb)->head_components);
1301 : :
1302 : 0 : bitmap_copy (SW (entry_block)->head_components, components);
1303 : :
1304 : 0 : edge e;
1305 : 0 : edge_iterator ei;
1306 : :
1307 : 0 : todo.quick_push (single_succ (entry_block));
1308 : 0 : bitmap_set_bit (seen, single_succ (entry_block)->index);
1309 : 0 : while (!todo.is_empty ())
1310 : : {
1311 : 0 : bb = todo.pop ();
1312 : :
1313 : 0 : bitmap_copy (old, SW (bb)->head_components);
1314 : :
1315 : 0 : FOR_EACH_EDGE (e, ei, bb->preds)
1316 : 0 : bitmap_ior (SW (bb)->head_components, SW (bb)->head_components,
1317 : 0 : SW (e->src)->head_components);
1318 : :
1319 : 0 : bitmap_and_compl (SW (bb)->head_components, SW (bb)->head_components,
1320 : 0 : SW (bb)->has_components);
1321 : :
1322 : 0 : if (!bitmap_equal_p (old, SW (bb)->head_components))
1323 : 0 : FOR_EACH_EDGE (e, ei, bb->succs)
1324 : 0 : if (bitmap_set_bit (seen, e->dest->index))
1325 : 0 : todo.quick_push (e->dest);
1326 : :
1327 : 0 : bitmap_clear_bit (seen, bb->index);
1328 : : }
1329 : :
1330 : : /* Find for every block the components that are *not* needed on some reverse
1331 : : path from the exit to that block. */
1332 : :
1333 : 0 : if (dump_file)
1334 : 0 : fprintf (dump_file, "Spreading up...\n");
1335 : :
1336 : : /* First, mark all blocks not reachable from the exit block as not needing
1337 : : any component on any path to the exit. Mark everything, and then clear
1338 : : again by a flood fill. */
1339 : :
1340 : 0 : FOR_ALL_BB_FN (bb, cfun)
1341 : 0 : bitmap_copy (SW (bb)->tail_components, components);
1342 : :
1343 : 0 : FOR_EACH_EDGE (e, ei, exit_block->preds)
1344 : : {
1345 : 0 : todo.quick_push (e->src);
1346 : 0 : bitmap_set_bit (seen, e->src->index);
1347 : : }
1348 : :
1349 : 0 : while (!todo.is_empty ())
1350 : : {
1351 : 0 : bb = todo.pop ();
1352 : :
1353 : 0 : if (!bitmap_empty_p (SW (bb)->tail_components))
1354 : 0 : FOR_EACH_EDGE (e, ei, bb->preds)
1355 : 0 : if (bitmap_set_bit (seen, e->src->index))
1356 : 0 : todo.quick_push (e->src);
1357 : :
1358 : 0 : bitmap_clear (SW (bb)->tail_components);
1359 : :
1360 : 0 : bitmap_clear_bit (seen, bb->index);
1361 : : }
1362 : :
1363 : : /* And then, flood fill backwards to find for every block the components
1364 : : not needed on some path to the exit. */
1365 : :
1366 : 0 : bitmap_copy (SW (exit_block)->tail_components, components);
1367 : :
1368 : 0 : FOR_EACH_EDGE (e, ei, exit_block->preds)
1369 : : {
1370 : 0 : todo.quick_push (e->src);
1371 : 0 : bitmap_set_bit (seen, e->src->index);
1372 : : }
1373 : :
1374 : 0 : while (!todo.is_empty ())
1375 : : {
1376 : 0 : bb = todo.pop ();
1377 : :
1378 : 0 : bitmap_copy (old, SW (bb)->tail_components);
1379 : :
1380 : 0 : FOR_EACH_EDGE (e, ei, bb->succs)
1381 : 0 : bitmap_ior (SW (bb)->tail_components, SW (bb)->tail_components,
1382 : 0 : SW (e->dest)->tail_components);
1383 : :
1384 : 0 : bitmap_and_compl (SW (bb)->tail_components, SW (bb)->tail_components,
1385 : 0 : SW (bb)->has_components);
1386 : :
1387 : 0 : if (!bitmap_equal_p (old, SW (bb)->tail_components))
1388 : 0 : FOR_EACH_EDGE (e, ei, bb->preds)
1389 : 0 : if (bitmap_set_bit (seen, e->src->index))
1390 : 0 : todo.quick_push (e->src);
1391 : :
1392 : 0 : bitmap_clear_bit (seen, bb->index);
1393 : : }
1394 : :
1395 : 0 : todo.release ();
1396 : :
1397 : : /* Finally, mark everything not needed both forwards and backwards. */
1398 : :
1399 : 0 : bool did_changes = false;
1400 : :
1401 : 0 : FOR_EACH_BB_FN (bb, cfun)
1402 : : {
1403 : 0 : bitmap_copy (old, SW (bb)->has_components);
1404 : :
1405 : 0 : bitmap_and (SW (bb)->head_components, SW (bb)->head_components,
1406 : 0 : SW (bb)->tail_components);
1407 : 0 : bitmap_and_compl (SW (bb)->has_components, components,
1408 : 0 : SW (bb)->head_components);
1409 : :
1410 : 0 : if (!did_changes && !bitmap_equal_p (old, SW (bb)->has_components))
1411 : : did_changes = true;
1412 : : }
1413 : :
1414 : 0 : FOR_ALL_BB_FN (bb, cfun)
1415 : : {
1416 : 0 : if (dump_file)
1417 : : {
1418 : 0 : fprintf (dump_file, "bb %d components:", bb->index);
1419 : 0 : dump_components ("has", SW (bb)->has_components);
1420 : 0 : fprintf (dump_file, "\n");
1421 : : }
1422 : : }
1423 : :
1424 : 0 : return did_changes;
1425 : 0 : }
1426 : :
1427 : : /* If we cannot handle placing some component's prologues or epilogues where
1428 : : we decided we should place them, unmark that component in COMPONENTS so
1429 : : that it is not wrapped separately. */
1430 : : static void
1431 : 0 : disqualify_problematic_components (sbitmap components)
1432 : : {
1433 : 0 : auto_sbitmap pro (SBITMAP_SIZE (components));
1434 : 0 : auto_sbitmap epi (SBITMAP_SIZE (components));
1435 : :
1436 : 0 : basic_block bb;
1437 : 0 : FOR_EACH_BB_FN (bb, cfun)
1438 : : {
1439 : 0 : edge e;
1440 : 0 : edge_iterator ei;
1441 : 0 : FOR_EACH_EDGE (e, ei, bb->succs)
1442 : : {
1443 : : /* Find which components we want pro/epilogues for here. */
1444 : 0 : bitmap_and_compl (epi, SW (e->src)->has_components,
1445 : 0 : SW (e->dest)->has_components);
1446 : 0 : bitmap_and_compl (pro, SW (e->dest)->has_components,
1447 : 0 : SW (e->src)->has_components);
1448 : :
1449 : : /* Ask the target what it thinks about things. */
1450 : 0 : if (!bitmap_empty_p (epi))
1451 : 0 : targetm.shrink_wrap.disqualify_components (components, e, epi,
1452 : : false);
1453 : 0 : if (!bitmap_empty_p (pro))
1454 : 0 : targetm.shrink_wrap.disqualify_components (components, e, pro,
1455 : : true);
1456 : :
1457 : : /* If this edge doesn't need splitting, we're fine. */
1458 : 0 : if (single_pred_p (e->dest)
1459 : 0 : && e->dest != EXIT_BLOCK_PTR_FOR_FN (cfun))
1460 : 0 : continue;
1461 : :
1462 : : /* If the edge can be split, that is fine too. */
1463 : 0 : if ((e->flags & EDGE_ABNORMAL) == 0)
1464 : 0 : continue;
1465 : :
1466 : : /* We also can handle sibcalls. */
1467 : 0 : if (e->dest == EXIT_BLOCK_PTR_FOR_FN (cfun))
1468 : : {
1469 : 0 : gcc_assert (e->flags & EDGE_SIBCALL);
1470 : 0 : continue;
1471 : : }
1472 : :
1473 : : /* Remove from consideration those components we would need
1474 : : pro/epilogues for on edges where we cannot insert them. */
1475 : 0 : bitmap_and_compl (components, components, epi);
1476 : 0 : bitmap_and_compl (components, components, pro);
1477 : :
1478 : 0 : if (dump_file && !bitmap_subset_p (epi, components))
1479 : : {
1480 : 0 : fprintf (dump_file, " BAD epi %d->%d", e->src->index,
1481 : 0 : e->dest->index);
1482 : 0 : if (e->flags & EDGE_EH)
1483 : 0 : fprintf (dump_file, " for EH");
1484 : 0 : dump_components ("epi", epi);
1485 : 0 : fprintf (dump_file, "\n");
1486 : : }
1487 : :
1488 : 0 : if (dump_file && !bitmap_subset_p (pro, components))
1489 : : {
1490 : 0 : fprintf (dump_file, " BAD pro %d->%d", e->src->index,
1491 : 0 : e->dest->index);
1492 : 0 : if (e->flags & EDGE_EH)
1493 : 0 : fprintf (dump_file, " for EH");
1494 : 0 : dump_components ("pro", pro);
1495 : 0 : fprintf (dump_file, "\n");
1496 : : }
1497 : : }
1498 : : }
1499 : 0 : }
1500 : :
1501 : : /* Place code for prologues and epilogues for COMPONENTS where we can put
1502 : : that code at the start of basic blocks. */
1503 : : static void
1504 : 0 : emit_common_heads_for_components (sbitmap components)
1505 : : {
1506 : 0 : auto_sbitmap pro (SBITMAP_SIZE (components));
1507 : 0 : auto_sbitmap epi (SBITMAP_SIZE (components));
1508 : 0 : auto_sbitmap tmp (SBITMAP_SIZE (components));
1509 : :
1510 : 0 : basic_block bb;
1511 : 0 : FOR_ALL_BB_FN (bb, cfun)
1512 : 0 : bitmap_clear (SW (bb)->head_components);
1513 : :
1514 : 0 : FOR_EACH_BB_FN (bb, cfun)
1515 : : {
1516 : : /* Find which prologue resp. epilogue components are needed for all
1517 : : predecessor edges to this block. */
1518 : :
1519 : : /* First, select all possible components. */
1520 : 0 : bitmap_copy (epi, components);
1521 : 0 : bitmap_copy (pro, components);
1522 : :
1523 : 0 : edge e;
1524 : 0 : edge_iterator ei;
1525 : 0 : FOR_EACH_EDGE (e, ei, bb->preds)
1526 : : {
1527 : 0 : if (e->flags & EDGE_ABNORMAL)
1528 : : {
1529 : 0 : bitmap_clear (epi);
1530 : 0 : bitmap_clear (pro);
1531 : 0 : break;
1532 : : }
1533 : :
1534 : : /* Deselect those epilogue components that should not be inserted
1535 : : for this edge. */
1536 : 0 : bitmap_and_compl (tmp, SW (e->src)->has_components,
1537 : 0 : SW (e->dest)->has_components);
1538 : 0 : bitmap_and (epi, epi, tmp);
1539 : :
1540 : : /* Similar, for the prologue. */
1541 : 0 : bitmap_and_compl (tmp, SW (e->dest)->has_components,
1542 : 0 : SW (e->src)->has_components);
1543 : 0 : bitmap_and (pro, pro, tmp);
1544 : : }
1545 : :
1546 : 0 : if (dump_file && !(bitmap_empty_p (epi) && bitmap_empty_p (pro)))
1547 : 0 : fprintf (dump_file, " bb %d", bb->index);
1548 : :
1549 : 0 : if (dump_file && !bitmap_empty_p (epi))
1550 : 0 : dump_components ("epi", epi);
1551 : 0 : if (dump_file && !bitmap_empty_p (pro))
1552 : 0 : dump_components ("pro", pro);
1553 : :
1554 : 0 : if (dump_file && !(bitmap_empty_p (epi) && bitmap_empty_p (pro)))
1555 : 0 : fprintf (dump_file, "\n");
1556 : :
1557 : : /* Place code after the BB note. */
1558 : 0 : if (!bitmap_empty_p (pro))
1559 : : {
1560 : 0 : start_sequence ();
1561 : 0 : targetm.shrink_wrap.emit_prologue_components (pro);
1562 : 0 : rtx_insn *seq = get_insns ();
1563 : 0 : end_sequence ();
1564 : 0 : record_prologue_seq (seq);
1565 : :
1566 : 0 : emit_insn_after (seq, bb_note (bb));
1567 : :
1568 : 0 : bitmap_ior (SW (bb)->head_components, SW (bb)->head_components, pro);
1569 : : }
1570 : :
1571 : 0 : if (!bitmap_empty_p (epi))
1572 : : {
1573 : 0 : start_sequence ();
1574 : 0 : targetm.shrink_wrap.emit_epilogue_components (epi);
1575 : 0 : rtx_insn *seq = get_insns ();
1576 : 0 : end_sequence ();
1577 : 0 : record_epilogue_seq (seq);
1578 : :
1579 : 0 : emit_insn_after (seq, bb_note (bb));
1580 : :
1581 : 0 : bitmap_ior (SW (bb)->head_components, SW (bb)->head_components, epi);
1582 : : }
1583 : : }
1584 : 0 : }
1585 : :
1586 : : /* Place code for prologues and epilogues for COMPONENTS where we can put
1587 : : that code at the end of basic blocks. */
1588 : : static void
1589 : 0 : emit_common_tails_for_components (sbitmap components)
1590 : : {
1591 : 0 : auto_sbitmap pro (SBITMAP_SIZE (components));
1592 : 0 : auto_sbitmap epi (SBITMAP_SIZE (components));
1593 : 0 : auto_sbitmap tmp (SBITMAP_SIZE (components));
1594 : :
1595 : 0 : basic_block bb;
1596 : 0 : FOR_ALL_BB_FN (bb, cfun)
1597 : 0 : bitmap_clear (SW (bb)->tail_components);
1598 : :
1599 : 0 : FOR_EACH_BB_FN (bb, cfun)
1600 : : {
1601 : : /* Find which prologue resp. epilogue components are needed for all
1602 : : successor edges from this block. */
1603 : 0 : if (EDGE_COUNT (bb->succs) == 0)
1604 : 0 : continue;
1605 : :
1606 : : /* First, select all possible components. */
1607 : 0 : bitmap_copy (epi, components);
1608 : 0 : bitmap_copy (pro, components);
1609 : :
1610 : 0 : edge e;
1611 : 0 : edge_iterator ei;
1612 : 0 : FOR_EACH_EDGE (e, ei, bb->succs)
1613 : : {
1614 : 0 : if (e->flags & EDGE_ABNORMAL)
1615 : : {
1616 : 0 : bitmap_clear (epi);
1617 : 0 : bitmap_clear (pro);
1618 : 0 : break;
1619 : : }
1620 : :
1621 : : /* Deselect those epilogue components that should not be inserted
1622 : : for this edge, and also those that are already put at the head
1623 : : of the successor block. */
1624 : 0 : bitmap_and_compl (tmp, SW (e->src)->has_components,
1625 : 0 : SW (e->dest)->has_components);
1626 : 0 : bitmap_and_compl (tmp, tmp, SW (e->dest)->head_components);
1627 : 0 : bitmap_and (epi, epi, tmp);
1628 : :
1629 : : /* Similarly, for the prologue. */
1630 : 0 : bitmap_and_compl (tmp, SW (e->dest)->has_components,
1631 : 0 : SW (e->src)->has_components);
1632 : 0 : bitmap_and_compl (tmp, tmp, SW (e->dest)->head_components);
1633 : 0 : bitmap_and (pro, pro, tmp);
1634 : : }
1635 : :
1636 : : /* If the last insn of this block is a control flow insn we cannot
1637 : : put anything after it. We can put our code before it instead,
1638 : : but only if that jump insn is a simple jump. */
1639 : 0 : rtx_insn *last_insn = BB_END (bb);
1640 : 0 : if (control_flow_insn_p (last_insn) && !simplejump_p (last_insn))
1641 : : {
1642 : 0 : bitmap_clear (epi);
1643 : 0 : bitmap_clear (pro);
1644 : : }
1645 : :
1646 : 0 : if (dump_file && !(bitmap_empty_p (epi) && bitmap_empty_p (pro)))
1647 : 0 : fprintf (dump_file, " bb %d", bb->index);
1648 : :
1649 : 0 : if (dump_file && !bitmap_empty_p (epi))
1650 : 0 : dump_components ("epi", epi);
1651 : 0 : if (dump_file && !bitmap_empty_p (pro))
1652 : 0 : dump_components ("pro", pro);
1653 : :
1654 : 0 : if (dump_file && !(bitmap_empty_p (epi) && bitmap_empty_p (pro)))
1655 : 0 : fprintf (dump_file, "\n");
1656 : :
1657 : : /* Put the code at the end of the BB, but before any final jump. */
1658 : 0 : if (!bitmap_empty_p (epi))
1659 : : {
1660 : 0 : start_sequence ();
1661 : 0 : targetm.shrink_wrap.emit_epilogue_components (epi);
1662 : 0 : rtx_insn *seq = get_insns ();
1663 : 0 : end_sequence ();
1664 : 0 : record_epilogue_seq (seq);
1665 : :
1666 : 0 : if (control_flow_insn_p (last_insn))
1667 : 0 : emit_insn_before (seq, last_insn);
1668 : : else
1669 : 0 : emit_insn_after (seq, last_insn);
1670 : :
1671 : 0 : bitmap_ior (SW (bb)->tail_components, SW (bb)->tail_components, epi);
1672 : : }
1673 : :
1674 : 0 : if (!bitmap_empty_p (pro))
1675 : : {
1676 : 0 : start_sequence ();
1677 : 0 : targetm.shrink_wrap.emit_prologue_components (pro);
1678 : 0 : rtx_insn *seq = get_insns ();
1679 : 0 : end_sequence ();
1680 : 0 : record_prologue_seq (seq);
1681 : :
1682 : 0 : if (control_flow_insn_p (last_insn))
1683 : 0 : emit_insn_before (seq, last_insn);
1684 : : else
1685 : 0 : emit_insn_after (seq, last_insn);
1686 : :
1687 : 0 : bitmap_ior (SW (bb)->tail_components, SW (bb)->tail_components, pro);
1688 : : }
1689 : : }
1690 : 0 : }
1691 : :
1692 : : /* Place prologues and epilogues for COMPONENTS on edges, if we haven't already
1693 : : placed them inside blocks directly. */
1694 : : static void
1695 : 0 : insert_prologue_epilogue_for_components (sbitmap components)
1696 : : {
1697 : 0 : auto_sbitmap pro (SBITMAP_SIZE (components));
1698 : 0 : auto_sbitmap epi (SBITMAP_SIZE (components));
1699 : :
1700 : 0 : basic_block bb;
1701 : 0 : FOR_EACH_BB_FN (bb, cfun)
1702 : : {
1703 : 0 : if (!bb->aux)
1704 : 0 : continue;
1705 : :
1706 : 0 : edge e;
1707 : 0 : edge_iterator ei;
1708 : 0 : FOR_EACH_EDGE (e, ei, bb->succs)
1709 : : {
1710 : : /* Find which pro/epilogue components are needed on this edge. */
1711 : 0 : bitmap_and_compl (epi, SW (e->src)->has_components,
1712 : 0 : SW (e->dest)->has_components);
1713 : 0 : bitmap_and_compl (pro, SW (e->dest)->has_components,
1714 : 0 : SW (e->src)->has_components);
1715 : 0 : bitmap_and (epi, epi, components);
1716 : 0 : bitmap_and (pro, pro, components);
1717 : :
1718 : : /* Deselect those we already have put at the head or tail of the
1719 : : edge's dest resp. src. */
1720 : 0 : bitmap_and_compl (epi, epi, SW (e->dest)->head_components);
1721 : 0 : bitmap_and_compl (pro, pro, SW (e->dest)->head_components);
1722 : 0 : bitmap_and_compl (epi, epi, SW (e->src)->tail_components);
1723 : 0 : bitmap_and_compl (pro, pro, SW (e->src)->tail_components);
1724 : :
1725 : 0 : if (!bitmap_empty_p (epi) || !bitmap_empty_p (pro))
1726 : : {
1727 : 0 : if (dump_file)
1728 : : {
1729 : 0 : fprintf (dump_file, " %d->%d", e->src->index,
1730 : 0 : e->dest->index);
1731 : 0 : dump_components ("epi", epi);
1732 : 0 : dump_components ("pro", pro);
1733 : 0 : if (e->flags & EDGE_SIBCALL)
1734 : 0 : fprintf (dump_file, " (SIBCALL)");
1735 : 0 : else if (e->flags & EDGE_ABNORMAL)
1736 : 0 : fprintf (dump_file, " (ABNORMAL)");
1737 : 0 : fprintf (dump_file, "\n");
1738 : : }
1739 : :
1740 : : /* Put the epilogue components in place. */
1741 : 0 : start_sequence ();
1742 : 0 : targetm.shrink_wrap.emit_epilogue_components (epi);
1743 : 0 : rtx_insn *seq = get_insns ();
1744 : 0 : end_sequence ();
1745 : 0 : record_epilogue_seq (seq);
1746 : :
1747 : 0 : if (e->flags & EDGE_SIBCALL)
1748 : : {
1749 : 0 : gcc_assert (e->dest == EXIT_BLOCK_PTR_FOR_FN (cfun));
1750 : :
1751 : 0 : rtx_insn *insn = BB_END (e->src);
1752 : 0 : gcc_assert (CALL_P (insn) && SIBLING_CALL_P (insn));
1753 : 0 : emit_insn_before (seq, insn);
1754 : : }
1755 : 0 : else if (e->dest == EXIT_BLOCK_PTR_FOR_FN (cfun))
1756 : : {
1757 : 0 : gcc_assert (e->flags & EDGE_FALLTHRU);
1758 : 0 : basic_block new_bb = split_edge (e);
1759 : 0 : emit_insn_after (seq, BB_END (new_bb));
1760 : : }
1761 : : else
1762 : 0 : insert_insn_on_edge (seq, e);
1763 : :
1764 : : /* Put the prologue components in place. */
1765 : 0 : start_sequence ();
1766 : 0 : targetm.shrink_wrap.emit_prologue_components (pro);
1767 : 0 : seq = get_insns ();
1768 : 0 : end_sequence ();
1769 : 0 : record_prologue_seq (seq);
1770 : :
1771 : 0 : insert_insn_on_edge (seq, e);
1772 : : }
1773 : : }
1774 : : }
1775 : :
1776 : 0 : commit_edge_insertions ();
1777 : 0 : }
1778 : :
1779 : : bool
1780 : 1435842 : use_shrink_wrapping_separate (void)
1781 : : {
1782 : 1435842 : if (!(SHRINK_WRAPPING_ENABLED && flag_shrink_wrap_separate
1783 : 1008442 : && optimize_function_for_speed_p (cfun)
1784 : 946856 : && targetm.shrink_wrap.get_separate_components))
1785 : 1435842 : return false;
1786 : :
1787 : : /* We don't handle "strange" functions. */
1788 : 0 : if (cfun->calls_alloca
1789 : : || cfun->calls_setjmp
1790 : 0 : || cfun->can_throw_non_call_exceptions
1791 : 0 : || crtl->calls_eh_return
1792 : 0 : || crtl->has_nonlocal_goto
1793 : 0 : || crtl->saves_all_registers)
1794 : : return false;
1795 : :
1796 : : return true;
1797 : : }
1798 : :
1799 : : /* The main entry point to this subpass. FIRST_BB is where the prologue
1800 : : would be normally put. */
1801 : : void
1802 : 1435842 : try_shrink_wrapping_separate (basic_block first_bb)
1803 : : {
1804 : 1435842 : if (!use_shrink_wrapping_separate ())
1805 : 1435842 : return;
1806 : :
1807 : : /* Ask the target what components there are. If it returns NULL, don't
1808 : : do anything. */
1809 : 0 : sbitmap components = targetm.shrink_wrap.get_separate_components ();
1810 : 0 : if (!components)
1811 : : return;
1812 : :
1813 : : /* We need LIVE info, not defining anything in the entry block and not
1814 : : using anything in the exit block. A block then needs a component if
1815 : : the register for that component is in the IN or GEN or KILL set for
1816 : : that block. */
1817 : 0 : df_scan->local_flags |= DF_SCAN_EMPTY_ENTRY_EXIT;
1818 : 0 : df_update_entry_exit_and_calls ();
1819 : 0 : df_live_add_problem ();
1820 : 0 : df_live_set_all_dirty ();
1821 : 0 : df_analyze ();
1822 : :
1823 : 0 : calculate_dominance_info (CDI_DOMINATORS);
1824 : 0 : calculate_dominance_info (CDI_POST_DOMINATORS);
1825 : :
1826 : 0 : init_separate_shrink_wrap (components);
1827 : :
1828 : 0 : sbitmap_iterator sbi;
1829 : 0 : unsigned int j;
1830 : 0 : EXECUTE_IF_SET_IN_BITMAP (components, 0, j, sbi)
1831 : 0 : place_prologue_for_one_component (j, first_bb);
1832 : :
1833 : : /* Try to minimize the number of saves and restores. Do this as long as
1834 : : it changes anything. This does not iterate more than a few times. */
1835 : : int spread_times = 0;
1836 : 0 : while (spread_components (components))
1837 : : {
1838 : 0 : spread_times++;
1839 : :
1840 : 0 : if (dump_file)
1841 : 0 : fprintf (dump_file, "Now spread %d times.\n", spread_times);
1842 : : }
1843 : :
1844 : 0 : disqualify_problematic_components (components);
1845 : :
1846 : : /* Don't separately shrink-wrap anything where the "main" prologue will
1847 : : go; the target code can often optimize things if it is presented with
1848 : : all components together (say, if it generates store-multiple insns). */
1849 : 0 : bitmap_and_compl (components, components, SW (first_bb)->has_components);
1850 : :
1851 : 0 : if (bitmap_empty_p (components))
1852 : : {
1853 : 0 : if (dump_file)
1854 : 0 : fprintf (dump_file, "Not wrapping anything separately.\n");
1855 : : }
1856 : : else
1857 : : {
1858 : 0 : if (dump_file)
1859 : : {
1860 : 0 : fprintf (dump_file, "The components we wrap separately are");
1861 : 0 : dump_components ("sep", components);
1862 : 0 : fprintf (dump_file, "\n");
1863 : :
1864 : 0 : fprintf (dump_file, "... Inserting common heads...\n");
1865 : : }
1866 : :
1867 : 0 : emit_common_heads_for_components (components);
1868 : :
1869 : 0 : if (dump_file)
1870 : 0 : fprintf (dump_file, "... Inserting common tails...\n");
1871 : :
1872 : 0 : emit_common_tails_for_components (components);
1873 : :
1874 : 0 : if (dump_file)
1875 : 0 : fprintf (dump_file, "... Inserting the more difficult ones...\n");
1876 : :
1877 : 0 : insert_prologue_epilogue_for_components (components);
1878 : :
1879 : 0 : if (dump_file)
1880 : 0 : fprintf (dump_file, "... Done.\n");
1881 : :
1882 : 0 : targetm.shrink_wrap.set_handled_components (components);
1883 : :
1884 : 0 : crtl->shrink_wrapped_separate = true;
1885 : : }
1886 : :
1887 : 0 : fini_separate_shrink_wrap ();
1888 : :
1889 : 0 : sbitmap_free (components);
1890 : 0 : free_dominance_info (CDI_DOMINATORS);
1891 : 0 : free_dominance_info (CDI_POST_DOMINATORS);
1892 : :
1893 : : /* All done. */
1894 : 0 : df_scan->local_flags &= ~DF_SCAN_EMPTY_ENTRY_EXIT;
1895 : 0 : df_update_entry_exit_and_calls ();
1896 : 0 : df_live_set_all_dirty ();
1897 : 0 : df_analyze ();
1898 : : }
|