Line data Source code
1 : /* Early (pre-RA) rematerialization
2 : Copyright (C) 2017-2026 Free Software Foundation, Inc.
3 :
4 : This file is part of GCC.
5 :
6 : GCC is free software; you can redistribute it and/or modify it under
7 : the terms of the GNU General Public License as published by the Free
8 : Software Foundation; either version 3, or (at your option) any later
9 : version.
10 :
11 : GCC is distributed in the hope that it will be useful, but WITHOUT ANY
12 : WARRANTY; without even the implied warranty of MERCHANTABILITY or
13 : FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
14 : for more details.
15 :
16 : You should have received a copy of the GNU General Public License
17 : along with GCC; see the file COPYING3. If not see
18 : <http://www.gnu.org/licenses/>. */
19 :
20 : #include "config.h"
21 : #include "system.h"
22 : #include "coretypes.h"
23 : #include "backend.h"
24 : #include "rtl.h"
25 : #include "df.h"
26 : #include "tree-pass.h"
27 : #include "memmodel.h"
28 : #include "emit-rtl.h"
29 : #include "insn-config.h"
30 : #include "recog.h"
31 : /* FIXME: The next two are only needed for gen_move_insn. */
32 : #include "tree.h"
33 : #include "expr.h"
34 : #include "target.h"
35 : #include "inchash.h"
36 : #include "rtlhash.h"
37 : #include "print-rtl.h"
38 : #include "rtl-iter.h"
39 : #include "regs.h"
40 : #include "function-abi.h"
41 :
42 : /* This pass runs before register allocation and implements an aggressive
43 : form of rematerialization. It looks for pseudo registers R of mode M
44 : for which:
45 :
46 : (a) there are no call-preserved registers of mode M; and
47 : (b) spilling R to the stack is expensive.
48 :
49 : The assumption is that it's better to recompute R after each call instead
50 : of spilling it, even if this extends the live ranges of other registers.
51 :
52 : The motivating example for which these conditions hold are AArch64 SVE
53 : vectors and predicates. Spilling them to the stack makes the frame
54 : variable-sized, which we'd like to avoid if possible. It's also very
55 : rare for SVE values to be "naturally" live across a call: usually this
56 : happens as a result of CSE or other code motion.
57 :
58 : The pass is split into the following phases:
59 :
60 : Collection phase
61 : ================
62 :
63 : First we go through all pseudo registers looking for any that meet
64 : the conditions above. For each such register R, we go through each
65 : instruction that defines R to see whether any of them are suitable
66 : rematerialization candidates. If at least one is, we treat all the
67 : instructions that define R as candidates, but record which ones are
68 : not in fact suitable. These unsuitable candidates exist only for the
69 : sake of calculating reaching definitions (see below).
70 :
71 : A "candidate" is a single instruction that we want to rematerialize
72 : and a "candidate register" is a register that is set by at least one
73 : candidate.
74 :
75 : Candidate sorting
76 : =================
77 :
78 : Next we sort the candidates based on the cfg postorder, so that if
79 : candidate C1 uses candidate C2, C1 has a lower index than C2.
80 : This is useful when iterating through candidate bitmaps.
81 :
82 : Reaching definition calculation
83 : ===============================
84 :
85 : We then compute standard reaching-definition sets for each candidate.
86 : Each set specifies which candidates might provide the current definition
87 : of a live candidate register.
88 :
89 : From here on, a candidate C is "live" at a point P if the candidate
90 : register defined by C is live at P and if C's definition reaches P.
91 : An instruction I "uses" a candidate C if I takes the register defined by
92 : C as input and if C is one of the reaching definitions of that register.
93 :
94 : Candidate validation and value numbering
95 : ========================================
96 :
97 : Next we simultaneously decide which candidates are valid and look
98 : for candidates that are equivalent to each other, assigning numbers
99 : to each unique candidate value. A candidate C is invalid if:
100 :
101 : (a) C uses an invalid candidate;
102 :
103 : (b) there is a cycle of candidate uses involving C; or
104 :
105 : (c) C takes a candidate register R as input and the reaching
106 : definitions of R do not have the same value number.
107 :
108 : We assign a "representative" candidate C to each value number and from
109 : here on replace references to other candidates with that value number
110 : with references to C. It is then only possible to rematerialize a
111 : register R at point P if (after this replacement) there is a single
112 : reaching definition of R at P.
113 :
114 : Local phase
115 : ===========
116 :
117 : During this phase we go through each block and look for cases in which:
118 :
119 : (a) an instruction I comes between two call instructions CI1 and CI2;
120 :
121 : (b) I uses a candidate register R;
122 :
123 : (c) a candidate C provides the only reaching definition of R; and
124 :
125 : (d) C does not come between CI1 and I.
126 :
127 : We then emit a copy of C after CI1, as well as the transitive closure
128 : TC of the candidates used by C. The copies of TC might use the original
129 : candidate registers or new temporary registers, depending on circumstances.
130 :
131 : For example, if elsewhere we have:
132 :
133 : C3: R3 <- f3 (...)
134 : ...
135 : C2: R2 <- f2 (...)
136 : ...
137 : C1: R1 <- f1 (R2, R3, ...) // uses C2 and C3
138 :
139 : then for a block containing:
140 :
141 : CI1: call
142 : ...
143 : I: use R1 // uses C1
144 : ...
145 : CI2: call
146 :
147 : we would emit:
148 :
149 : CI1: call
150 : C3': R3' <- f3 (...)
151 : C2': R2' <- f2 (...)
152 : C1': R1 <- f1 (R2', R3', ...)
153 : ...
154 : I: use R1
155 : ...
156 : CI2: call
157 :
158 : where R2' and R3' might be fresh registers. If instead we had:
159 :
160 : CI1: call
161 : ...
162 : I1: use R1 // uses C1
163 : ...
164 : I2: use R3 // uses C3
165 : ...
166 : CI2: call
167 :
168 : we would keep the original R3:
169 :
170 : CI1: call
171 : C3': R3 <- f3 (...)
172 : C2': R2' <- f2 (...)
173 : C1': R1 <- f1 (R2', R3, ...)
174 : ...
175 : I1: use R1 // uses C1
176 : ...
177 : I2: use R3 // uses C3
178 : ...
179 : CI2: call
180 :
181 : We also record the last call in each block (if any) and compute:
182 :
183 : rd_after_call:
184 : The set of candidates that either (a) are defined outside the block
185 : and are live after the last call or (b) are defined within the block
186 : and reach the end of the last call. (We don't track whether the
187 : latter values are live or not.)
188 :
189 : required_after_call:
190 : The set of candidates that need to be rematerialized after the
191 : last call in order to satisfy uses in the block itself.
192 :
193 : required_in:
194 : The set of candidates that are live on entry to the block and are
195 : used without an intervening call.
196 :
197 : In addition, we compute the initial values of the sets required by
198 : the global phase below.
199 :
200 : Global phase
201 : ============
202 :
203 : We next compute a maximal solution to the following availability
204 : problem:
205 :
206 : available_in:
207 : The set of candidates that are live on entry to a block and can
208 : be used at that point without rematerialization.
209 :
210 : available_out:
211 : The set of candidates that are live on exit from a block and can
212 : be used at that point without rematerialization.
213 :
214 : available_locally:
215 : The subset of available_out that is due to code in the block itself.
216 : It contains candidates that are defined or used in the block and
217 : not invalidated by a later call.
218 :
219 : We then go through each block B and look for an appropriate place
220 : to insert copies of required_in - available_in. Conceptually we
221 : start by placing the copies at the head of B, but then move the
222 : copy of a candidate C to predecessors if:
223 :
224 : (a) that seems cheaper;
225 :
226 : (b) there is more than one reaching definition of C's register at
227 : the head of B; or
228 :
229 : (c) copying C would clobber a hard register that is live on entry to B.
230 :
231 : Moving a copy of C to a predecessor block PB involves:
232 :
233 : (1) adding C to PB's required_after_call, if PB contains a call; or
234 :
235 : (2) adding C PB's required_in otherwise.
236 :
237 : C is then available on output from each PB and on input to B.
238 :
239 : Once all this is done, we emit instructions for the final required_in
240 : and required_after_call sets. */
241 :
242 : namespace {
243 :
244 : /* An invalid candidate index, used to indicate that there is more than
245 : one reaching definition. */
246 : const unsigned int MULTIPLE_CANDIDATES = -1U;
247 :
248 : /* Pass-specific information about one basic block. */
249 : struct remat_block_info {
250 : /* The last call instruction in the block. */
251 : rtx_insn *last_call;
252 :
253 : /* The set of candidates that are live on entry to the block. NULL is
254 : equivalent to an empty set. */
255 : bitmap rd_in;
256 :
257 : /* The set of candidates that are live on exit from the block. This might
258 : reuse rd_in. NULL is equivalent to an empty set. */
259 : bitmap rd_out;
260 :
261 : /* The subset of RD_OUT that comes from local definitions. NULL is
262 : equivalent to an empty set. */
263 : bitmap rd_gen;
264 :
265 : /* The set of candidates that the block invalidates (because it defines
266 : the register to something else, or because the register's value is
267 : no longer important). NULL is equivalent to an empty set. */
268 : bitmap rd_kill;
269 :
270 : /* The set of candidates that either (a) are defined outside the block
271 : and are live after LAST_CALL or (b) are defined within the block
272 : and reach the instruction after LAST_CALL. (We don't track whether
273 : the latter values are live or not.)
274 :
275 : Only used if LAST_CALL is nonnull. NULL is equivalent to an
276 : empty set. */
277 : bitmap rd_after_call;
278 :
279 : /* Candidates that are live and available without rematerialization
280 : on entry to the block. NULL is equivalent to an empty set. */
281 : bitmap available_in;
282 :
283 : /* Candidates that become available without rematerialization within the
284 : block, and remain so on exit. NULL is equivalent to an empty set. */
285 : bitmap available_locally;
286 :
287 : /* Candidates that are available without rematerialization on exit from
288 : the block. This might reuse available_in or available_locally. */
289 : bitmap available_out;
290 :
291 : /* Candidates that need to be rematerialized either at the start of the
292 : block or before entering the block. */
293 : bitmap required_in;
294 :
295 : /* Candidates that need to be rematerialized after LAST_CALL.
296 : Only used if LAST_CALL is nonnull. */
297 : bitmap required_after_call;
298 :
299 : /* The number of candidates in the block. */
300 : unsigned int num_candidates;
301 :
302 : /* The earliest candidate in the block (i.e. the one with the
303 : highest index). Only valid if NUM_CANDIDATES is nonzero. */
304 : unsigned int first_candidate;
305 :
306 : /* The best (lowest) execution frequency for rematerializing REQUIRED_IN.
307 : This is the execution frequency of the block if LOCAL_REMAT_CHEAPER_P,
308 : otherwise it is the sum of the execution frequencies of whichever
309 : predecessor blocks would do the rematerialization. */
310 : int remat_frequency;
311 :
312 : /* True if the block ends with an abnormal call. */
313 : unsigned int abnormal_call_p : 1;
314 :
315 : /* Used to record whether a graph traversal has visited this block. */
316 : unsigned int visited_p : 1;
317 :
318 : /* True if we have calculated REMAT_FREQUENCY. */
319 : unsigned int remat_frequency_valid_p : 1;
320 :
321 : /* True if it is cheaper to rematerialize candidates at the start of
322 : the block, rather than moving them to predecessor blocks. */
323 : unsigned int local_remat_cheaper_p : 1;
324 : };
325 :
326 : /* Information about a group of candidates with the same value number. */
327 : struct remat_equiv_class {
328 : /* The candidates that have the same value number. */
329 : bitmap members;
330 :
331 : /* The candidate that was first added to MEMBERS. */
332 : unsigned int earliest;
333 :
334 : /* The candidate that represents the others. This is always the one
335 : with the highest index. */
336 : unsigned int representative;
337 : };
338 :
339 : /* Information about an instruction that we might want to rematerialize. */
340 : struct remat_candidate {
341 : /* The pseudo register that the instruction sets. */
342 : unsigned int regno;
343 :
344 : /* A temporary register used when rematerializing uses of this candidate,
345 : if REGNO doesn't have the right value or isn't worth using. */
346 : unsigned int copy_regno;
347 :
348 : /* True if we intend to rematerialize this instruction by emitting
349 : a move of a constant into REGNO, false if we intend to emit a
350 : copy of the original instruction. */
351 : unsigned int constant_p : 1;
352 :
353 : /* True if we still think it's possible to rematerialize INSN. */
354 : unsigned int can_copy_p : 1;
355 :
356 : /* Used to record whether a graph traversal has visited this candidate. */
357 : unsigned int visited_p : 1;
358 :
359 : /* True if we have verified that it's possible to rematerialize INSN.
360 : Once this is true, both it and CAN_COPY_P remain true. */
361 : unsigned int validated_p : 1;
362 :
363 : /* True if we have "stabilized" INSN, i.e. ensured that all non-candidate
364 : registers read by INSN will have the same value when rematerializing INSN.
365 : Only ever true if CAN_COPY_P. */
366 : unsigned int stabilized_p : 1;
367 :
368 : /* Hash value used for value numbering. */
369 : hashval_t hash;
370 :
371 : /* The instruction that sets REGNO. */
372 : rtx_insn *insn;
373 :
374 : /* If CONSTANT_P, the value that should be moved into REGNO when
375 : rematerializing, otherwise the pattern of the instruction that
376 : should be used. */
377 : rtx remat_rtx;
378 :
379 : /* The set of candidates that INSN takes as input. NULL is equivalent
380 : to the empty set. All candidates in this set have a higher index
381 : than the current candidate. */
382 : bitmap uses;
383 :
384 : /* The set of hard registers that would be clobbered by rematerializing
385 : the candidate, including (transitively) all those that would be
386 : clobbered by rematerializing USES. */
387 : bitmap clobbers;
388 :
389 : /* The equivalence class to which the candidate belongs, or null if none. */
390 : remat_equiv_class *equiv_class;
391 : };
392 :
393 : /* Hash functions used for value numbering. */
394 : struct remat_candidate_hasher : nofree_ptr_hash <remat_candidate>
395 : {
396 : typedef value_type compare_type;
397 : static hashval_t hash (const remat_candidate *);
398 : static bool equal (const remat_candidate *, const remat_candidate *);
399 : };
400 :
401 : /* Main class for this pass. */
402 : class early_remat {
403 : public:
404 : early_remat (function *, sbitmap);
405 : ~early_remat ();
406 :
407 : void run (void);
408 :
409 : private:
410 : bitmap alloc_bitmap (void);
411 : bitmap get_bitmap (bitmap *);
412 : void init_temp_bitmap (bitmap *);
413 : void copy_temp_bitmap (bitmap *, bitmap *);
414 :
415 : void dump_insn_id (rtx_insn *);
416 : void dump_candidate_bitmap (bitmap);
417 : void dump_all_candidates (void);
418 : void dump_edge_list (basic_block, bool);
419 : void dump_block_info (basic_block);
420 : void dump_all_blocks (void);
421 :
422 : bool interesting_regno_p (unsigned int);
423 : remat_candidate *add_candidate (rtx_insn *, unsigned int, bool);
424 : bool maybe_add_candidate (rtx_insn *, unsigned int);
425 : bool collect_candidates (void);
426 : void init_block_info (void);
427 : void sort_candidates (void);
428 : void finalize_candidate_indices (void);
429 : void record_equiv_candidates (unsigned int, unsigned int);
430 : static bool rd_confluence_n (edge);
431 : static bool rd_transfer (int);
432 : void compute_rd (void);
433 : unsigned int canon_candidate (unsigned int);
434 : void canon_bitmap (bitmap *);
435 : unsigned int resolve_reaching_def (bitmap);
436 : bool check_candidate_uses (unsigned int);
437 : void compute_clobbers (unsigned int);
438 : void assign_value_number (unsigned int);
439 : void decide_candidate_validity (void);
440 : void restrict_remat_for_unavail_regs (bitmap, const_bitmap);
441 : void restrict_remat_for_call (bitmap, rtx_insn *);
442 : bool stable_use_p (unsigned int);
443 : void emit_copy_before (unsigned int, rtx, rtx);
444 : void stabilize_pattern (unsigned int);
445 : void replace_dest_with_copy (unsigned int);
446 : void stabilize_candidate_uses (unsigned int, bitmap, bitmap, bitmap,
447 : bitmap);
448 : void emit_remat_insns (bitmap, bitmap, bitmap, rtx_insn *);
449 : bool set_available_out (remat_block_info *);
450 : void process_block (basic_block);
451 : void local_phase (void);
452 : static bool avail_confluence_n (edge);
453 : static bool avail_transfer (int);
454 : void compute_availability (void);
455 : void unshare_available_sets (remat_block_info *);
456 : bool can_move_across_edge_p (edge);
457 : bool local_remat_cheaper_p (unsigned int);
458 : bool need_to_move_candidate_p (unsigned int, unsigned int);
459 : void compute_minimum_move_set (unsigned int, bitmap);
460 : void move_to_predecessors (unsigned int, bitmap, bitmap);
461 : void choose_rematerialization_points (void);
462 : void emit_remat_insns_for_block (basic_block);
463 : void global_phase (void);
464 :
465 : /* The function that we're optimizing. */
466 : function *m_fn;
467 :
468 : /* The modes that we want to rematerialize. */
469 : sbitmap m_selected_modes;
470 :
471 : /* All rematerialization candidates, identified by their index into the
472 : vector. */
473 : auto_vec<remat_candidate> m_candidates;
474 :
475 : /* The set of candidate registers. */
476 : bitmap_head m_candidate_regnos;
477 :
478 : /* Temporary sets. */
479 : bitmap_head m_tmp_bitmap;
480 : bitmap m_available;
481 : bitmap m_required;
482 :
483 : /* Information about each basic block. */
484 : auto_vec<remat_block_info> m_block_info;
485 :
486 : /* A mapping from register numbers to the set of associated candidates.
487 : Only valid for registers in M_CANDIDATE_REGNOS. */
488 : auto_vec<bitmap> m_regno_to_candidates;
489 :
490 : /* An obstack used for allocating bitmaps, so that we can free them all
491 : in one go. */
492 : bitmap_obstack m_obstack;
493 :
494 : /* A hash table of candidates used for value numbering. If a candidate
495 : in the table is in an equivalence class, the candidate is marked as
496 : the earliest member of the class. */
497 : hash_table<remat_candidate_hasher> m_value_table;
498 :
499 : /* Used temporarily by callback functions. */
500 : static early_remat *er;
501 : };
502 :
503 : }
504 :
505 : early_remat *early_remat::er;
506 :
507 : /* rtx_equal_p callback that treats any two SCRATCHes as equal.
508 : This allows us to compare two copies of a pattern, even though their
509 : SCRATCHes are always distinct. */
510 :
511 : static bool
512 0 : scratch_equal (const_rtx *x, const_rtx *y, rtx *nx, rtx *ny)
513 : {
514 0 : if (GET_CODE (*x) == SCRATCH && GET_CODE (*y) == SCRATCH)
515 : {
516 0 : *nx = const0_rtx;
517 0 : *ny = const0_rtx;
518 0 : return true;
519 : }
520 : return false;
521 : }
522 :
523 : /* Hash callback functions for remat_candidate. */
524 :
525 : hashval_t
526 0 : remat_candidate_hasher::hash (const remat_candidate *cand)
527 : {
528 0 : return cand->hash;
529 : }
530 :
531 : bool
532 0 : remat_candidate_hasher::equal (const remat_candidate *cand1,
533 : const remat_candidate *cand2)
534 : {
535 0 : return (cand1->regno == cand2->regno
536 0 : && cand1->constant_p == cand2->constant_p
537 0 : && rtx_equal_p (cand1->remat_rtx, cand2->remat_rtx,
538 0 : cand1->constant_p ? NULL : scratch_equal)
539 0 : && (!cand1->uses || bitmap_equal_p (cand1->uses, cand2->uses)));
540 : }
541 :
542 : /* Return true if B is null or empty. */
543 :
544 : inline bool
545 0 : empty_p (bitmap b)
546 : {
547 0 : return !b || bitmap_empty_p (b);
548 : }
549 :
550 : /* Allocate a new bitmap. It will be automatically freed at the end of
551 : the pass. */
552 :
553 : inline bitmap
554 0 : early_remat::alloc_bitmap (void)
555 : {
556 0 : return bitmap_alloc (&m_obstack);
557 : }
558 :
559 : /* Initialize *PTR to an empty bitmap if it is currently null. */
560 :
561 : inline bitmap
562 0 : early_remat::get_bitmap (bitmap *ptr)
563 : {
564 0 : if (!*ptr)
565 0 : *ptr = alloc_bitmap ();
566 0 : return *ptr;
567 : }
568 :
569 : /* *PTR is either null or empty. If it is null, initialize it to an
570 : empty bitmap. */
571 :
572 : inline void
573 0 : early_remat::init_temp_bitmap (bitmap *ptr)
574 : {
575 0 : if (!*ptr)
576 0 : *ptr = alloc_bitmap ();
577 : else
578 0 : gcc_checking_assert (bitmap_empty_p (*ptr));
579 0 : }
580 :
581 : /* Move *SRC to *DEST and leave *SRC empty. */
582 :
583 : inline void
584 0 : early_remat::copy_temp_bitmap (bitmap *dest, bitmap *src)
585 : {
586 0 : if (!empty_p (*src))
587 : {
588 0 : *dest = *src;
589 0 : *src = NULL;
590 : }
591 : else
592 0 : *dest = NULL;
593 : }
594 :
595 : /* Print INSN's identifier to the dump file. */
596 :
597 : void
598 0 : early_remat::dump_insn_id (rtx_insn *insn)
599 : {
600 0 : fprintf (dump_file, "%d[bb:%d]", INSN_UID (insn),
601 0 : BLOCK_FOR_INSN (insn)->index);
602 0 : }
603 :
604 : /* Print candidate set CANDIDATES to the dump file, with a leading space. */
605 :
606 : void
607 0 : early_remat::dump_candidate_bitmap (bitmap candidates)
608 : {
609 0 : if (empty_p (candidates))
610 : {
611 0 : fprintf (dump_file, " none");
612 0 : return;
613 : }
614 :
615 0 : unsigned int cand_index;
616 0 : bitmap_iterator bi;
617 0 : EXECUTE_IF_SET_IN_BITMAP (candidates, 0, cand_index, bi)
618 0 : fprintf (dump_file, " %d", cand_index);
619 : }
620 :
621 : /* Print information about all candidates to the dump file. */
622 :
623 : void
624 0 : early_remat::dump_all_candidates (void)
625 : {
626 0 : fprintf (dump_file, "\n;; Candidates:\n;;\n");
627 0 : fprintf (dump_file, ";; %5s %5s %8s %s\n", "#", "reg", "mode", "insn");
628 0 : fprintf (dump_file, ";; %5s %5s %8s %s\n", "=", "===", "====", "====");
629 0 : unsigned int cand_index;
630 0 : remat_candidate *cand;
631 0 : FOR_EACH_VEC_ELT (m_candidates, cand_index, cand)
632 : {
633 0 : fprintf (dump_file, ";; %5d %5d %8s ", cand_index, cand->regno,
634 0 : GET_MODE_NAME (GET_MODE (regno_reg_rtx[cand->regno])));
635 0 : dump_insn_id (cand->insn);
636 0 : if (!cand->can_copy_p)
637 0 : fprintf (dump_file, " -- can't copy");
638 0 : fprintf (dump_file, "\n");
639 : }
640 :
641 0 : fprintf (dump_file, "\n;; Register-to-candidate mapping:\n;;\n");
642 0 : unsigned int regno;
643 0 : bitmap_iterator bi;
644 0 : EXECUTE_IF_SET_IN_BITMAP (&m_candidate_regnos, 0, regno, bi)
645 : {
646 0 : fprintf (dump_file, ";; %5d:", regno);
647 0 : dump_candidate_bitmap (m_regno_to_candidates[regno]);
648 0 : fprintf (dump_file, "\n");
649 : }
650 0 : }
651 :
652 : /* Print the predecessors or successors of BB to the dump file, with a
653 : leading space. DO_SUCC is true to print successors and false to print
654 : predecessors. */
655 :
656 : void
657 0 : early_remat::dump_edge_list (basic_block bb, bool do_succ)
658 : {
659 0 : edge e;
660 0 : edge_iterator ei;
661 0 : FOR_EACH_EDGE (e, ei, do_succ ? bb->succs : bb->preds)
662 0 : dump_edge_info (dump_file, e, TDF_NONE, do_succ);
663 0 : }
664 :
665 : /* Print information about basic block BB to the dump file. */
666 :
667 : void
668 0 : early_remat::dump_block_info (basic_block bb)
669 : {
670 0 : remat_block_info *info = &m_block_info[bb->index];
671 0 : fprintf (dump_file, ";;\n;; Block %d:", bb->index);
672 0 : int width = 25;
673 :
674 0 : fprintf (dump_file, "\n;;%*s:", width, "predecessors");
675 0 : dump_edge_list (bb, false);
676 :
677 0 : fprintf (dump_file, "\n;;%*s:", width, "successors");
678 0 : dump_edge_list (bb, true);
679 :
680 0 : fprintf (dump_file, "\n;;%*s: %d", width, "frequency",
681 : bb->count.to_frequency (m_fn));
682 :
683 0 : if (info->last_call)
684 0 : fprintf (dump_file, "\n;;%*s: %d", width, "last call",
685 0 : INSN_UID (info->last_call));
686 :
687 0 : if (!empty_p (info->rd_in))
688 : {
689 0 : fprintf (dump_file, "\n;;%*s:", width, "RD in");
690 0 : dump_candidate_bitmap (info->rd_in);
691 : }
692 0 : if (!empty_p (info->rd_kill))
693 : {
694 0 : fprintf (dump_file, "\n;;%*s:", width, "RD kill");
695 0 : dump_candidate_bitmap (info->rd_kill);
696 : }
697 0 : if (!empty_p (info->rd_gen))
698 : {
699 0 : fprintf (dump_file, "\n;;%*s:", width, "RD gen");
700 0 : dump_candidate_bitmap (info->rd_gen);
701 : }
702 0 : if (!empty_p (info->rd_after_call))
703 : {
704 0 : fprintf (dump_file, "\n;;%*s:", width, "RD after call");
705 0 : dump_candidate_bitmap (info->rd_after_call);
706 : }
707 0 : if (!empty_p (info->rd_out))
708 : {
709 0 : fprintf (dump_file, "\n;;%*s:", width, "RD out");
710 0 : if (info->rd_in == info->rd_out)
711 0 : fprintf (dump_file, " RD in");
712 : else
713 0 : dump_candidate_bitmap (info->rd_out);
714 : }
715 0 : if (!empty_p (info->available_in))
716 : {
717 0 : fprintf (dump_file, "\n;;%*s:", width, "available in");
718 0 : dump_candidate_bitmap (info->available_in);
719 : }
720 0 : if (!empty_p (info->available_locally))
721 : {
722 0 : fprintf (dump_file, "\n;;%*s:", width, "available locally");
723 0 : dump_candidate_bitmap (info->available_locally);
724 : }
725 0 : if (!empty_p (info->available_out))
726 : {
727 0 : fprintf (dump_file, "\n;;%*s:", width, "available out");
728 0 : if (info->available_in == info->available_out)
729 0 : fprintf (dump_file, " available in");
730 0 : else if (info->available_locally == info->available_out)
731 0 : fprintf (dump_file, " available locally");
732 : else
733 0 : dump_candidate_bitmap (info->available_out);
734 : }
735 0 : if (!empty_p (info->required_in))
736 : {
737 0 : fprintf (dump_file, "\n;;%*s:", width, "required in");
738 0 : dump_candidate_bitmap (info->required_in);
739 : }
740 0 : if (!empty_p (info->required_after_call))
741 : {
742 0 : fprintf (dump_file, "\n;;%*s:", width, "required after call");
743 0 : dump_candidate_bitmap (info->required_after_call);
744 : }
745 0 : fprintf (dump_file, "\n");
746 0 : }
747 :
748 : /* Print information about all basic blocks to the dump file. */
749 :
750 : void
751 0 : early_remat::dump_all_blocks (void)
752 : {
753 0 : basic_block bb;
754 0 : FOR_EACH_BB_FN (bb, m_fn)
755 0 : dump_block_info (bb);
756 0 : }
757 :
758 : /* Return true if REGNO is worth rematerializing. */
759 :
760 : bool
761 0 : early_remat::interesting_regno_p (unsigned int regno)
762 : {
763 : /* Ignore unused registers. */
764 0 : rtx reg = regno_reg_rtx[regno];
765 0 : if (!reg || DF_REG_DEF_COUNT (regno) == 0)
766 : return false;
767 :
768 : /* Make sure the register has a mode that we want to rematerialize. */
769 0 : if (!bitmap_bit_p (m_selected_modes, GET_MODE (reg)))
770 : return false;
771 :
772 : /* Ignore values that might sometimes be used uninitialized. We could
773 : instead add dummy candidates for the entry block definition, and so
774 : handle uses that are definitely not uninitialized, but the combination
775 : of the two should be rare in practice. */
776 0 : if (bitmap_bit_p (DF_LR_OUT (ENTRY_BLOCK_PTR_FOR_FN (m_fn)), regno))
777 : return false;
778 :
779 : return true;
780 : }
781 :
782 : /* Record the set of register REGNO in instruction INSN as a
783 : rematerialization candidate. CAN_COPY_P is true unless we already
784 : know that rematerialization is impossible (in which case the candidate
785 : only exists for the reaching definition calculation).
786 :
787 : The candidate's index is not fixed at this stage. */
788 :
789 : remat_candidate *
790 0 : early_remat::add_candidate (rtx_insn *insn, unsigned int regno,
791 : bool can_copy_p)
792 : {
793 0 : remat_candidate cand;
794 0 : memset (&cand, 0, sizeof (cand));
795 0 : cand.regno = regno;
796 0 : cand.insn = insn;
797 0 : cand.remat_rtx = PATTERN (insn);
798 0 : cand.can_copy_p = can_copy_p;
799 0 : m_candidates.safe_push (cand);
800 :
801 0 : bitmap_set_bit (&m_candidate_regnos, regno);
802 :
803 0 : return &m_candidates.last ();
804 : }
805 :
806 : /* Return true if we can rematerialize the set of register REGNO in
807 : instruction INSN, and add it as a candidate if so. When returning
808 : false, print the reason to the dump file. */
809 :
810 : bool
811 0 : early_remat::maybe_add_candidate (rtx_insn *insn, unsigned int regno)
812 : {
813 : #define FAILURE_FORMAT ";; Can't rematerialize set of reg %d in %d[bb:%d]: "
814 : #define FAILURE_ARGS regno, INSN_UID (insn), BLOCK_FOR_INSN (insn)->index
815 :
816 : /* The definition must come from an ordinary instruction. */
817 0 : basic_block bb = BLOCK_FOR_INSN (insn);
818 0 : if (!NONJUMP_INSN_P (insn)
819 0 : || (insn == BB_END (bb)
820 0 : && has_abnormal_or_eh_outgoing_edge_p (bb)))
821 : {
822 0 : if (dump_file)
823 0 : fprintf (dump_file, FAILURE_FORMAT "insn alters control flow\n",
824 0 : FAILURE_ARGS);
825 0 : return false;
826 : }
827 :
828 : /* Prefer to rematerialize constants directly -- it's much easier. */
829 0 : machine_mode mode = GET_MODE (regno_reg_rtx[regno]);
830 0 : if (rtx note = find_reg_equal_equiv_note (insn))
831 : {
832 0 : rtx val = XEXP (note, 0);
833 0 : if (CONSTANT_P (val)
834 0 : && targetm.legitimate_constant_p (mode, val))
835 : {
836 0 : remat_candidate *cand = add_candidate (insn, regno, true);
837 0 : cand->constant_p = true;
838 0 : cand->remat_rtx = val;
839 0 : return true;
840 : }
841 : }
842 :
843 : /* See whether the target has reasons to prevent a copy. */
844 0 : if (targetm.cannot_copy_insn_p && targetm.cannot_copy_insn_p (insn))
845 : {
846 0 : if (dump_file)
847 0 : fprintf (dump_file, FAILURE_FORMAT "target forbids copying\n",
848 0 : FAILURE_ARGS);
849 0 : return false;
850 : }
851 :
852 : /* We can't copy trapping instructions. */
853 0 : rtx pat = PATTERN (insn);
854 0 : if (may_trap_p (pat))
855 : {
856 0 : if (dump_file)
857 0 : fprintf (dump_file, FAILURE_FORMAT "insn might trap\n", FAILURE_ARGS);
858 0 : return false;
859 : }
860 :
861 : /* We can't copy instructions that read memory, unless we know that
862 : the contents never change. */
863 0 : subrtx_iterator::array_type array;
864 0 : FOR_EACH_SUBRTX (iter, array, pat, ALL)
865 0 : if (MEM_P (*iter) && !MEM_READONLY_P (*iter))
866 : {
867 0 : if (dump_file)
868 0 : fprintf (dump_file, FAILURE_FORMAT "insn references non-constant"
869 0 : " memory\n", FAILURE_ARGS);
870 0 : return false;
871 : }
872 :
873 : /* Check each defined register. */
874 0 : df_ref ref;
875 0 : FOR_EACH_INSN_DEF (ref, insn)
876 : {
877 0 : unsigned int def_regno = DF_REF_REGNO (ref);
878 0 : if (def_regno == regno)
879 : {
880 : /* Make sure the definition is write-only. (Partial definitions,
881 : such as setting the low part and clobbering the high part,
882 : are otherwise OK.) */
883 0 : if (DF_REF_FLAGS_IS_SET (ref, DF_REF_READ_WRITE))
884 : {
885 0 : if (dump_file)
886 0 : fprintf (dump_file, FAILURE_FORMAT "destination is"
887 0 : " read-modify-write\n", FAILURE_ARGS);
888 0 : return false;
889 : }
890 : }
891 : else
892 : {
893 : /* The instruction can set additional registers, provided that
894 : they're hard registers. This is useful for instructions
895 : that alter the condition codes. */
896 0 : if (!HARD_REGISTER_NUM_P (def_regno))
897 : {
898 0 : if (dump_file)
899 0 : fprintf (dump_file, FAILURE_FORMAT "insn also sets"
900 0 : " pseudo reg %d\n", FAILURE_ARGS, def_regno);
901 0 : return false;
902 : }
903 : }
904 : }
905 :
906 : /* If the instruction uses fixed hard registers, check that those
907 : registers have the same value throughout the function. If the
908 : instruction uses non-fixed hard registers, check that we can
909 : replace them with pseudos. */
910 0 : FOR_EACH_INSN_USE (ref, insn)
911 : {
912 0 : unsigned int use_regno = DF_REF_REGNO (ref);
913 0 : if (HARD_REGISTER_NUM_P (use_regno) && fixed_regs[use_regno])
914 : {
915 0 : if (rtx_unstable_p (DF_REF_REAL_REG (ref)))
916 : {
917 0 : if (dump_file)
918 0 : fprintf (dump_file, FAILURE_FORMAT "insn uses fixed hard reg"
919 0 : " %d\n", FAILURE_ARGS, use_regno);
920 0 : return false;
921 : }
922 : }
923 0 : else if (HARD_REGISTER_NUM_P (use_regno))
924 : {
925 : /* Allocate a dummy pseudo register and temporarily install it.
926 : Make the register number depend on the mode, which should
927 : provide enough sharing for match_dup while also weeding
928 : out cases in which operands with different modes are
929 : explicitly tied. */
930 0 : rtx *loc = DF_REF_REAL_LOC (ref);
931 0 : unsigned int size = RTX_CODE_SIZE (REG);
932 0 : rtx new_reg = (rtx) alloca (size);
933 0 : memset (new_reg, 0, size);
934 0 : PUT_CODE (new_reg, REG);
935 0 : set_mode_and_regno (new_reg, GET_MODE (*loc),
936 0 : LAST_VIRTUAL_REGISTER + 1 + GET_MODE (*loc));
937 0 : validate_change (insn, loc, new_reg, 1);
938 : }
939 : }
940 0 : bool ok_p = verify_changes (0);
941 0 : cancel_changes (0);
942 0 : if (!ok_p)
943 : {
944 0 : if (dump_file)
945 0 : fprintf (dump_file, FAILURE_FORMAT "insn does not allow hard"
946 0 : " register inputs to be replaced\n", FAILURE_ARGS);
947 0 : return false;
948 : }
949 :
950 : #undef FAILURE_ARGS
951 : #undef FAILURE_FORMAT
952 :
953 0 : add_candidate (insn, regno, true);
954 0 : return true;
955 0 : }
956 :
957 : /* Calculate the set of rematerialization candidates. Return true if
958 : we find at least one. */
959 :
960 : bool
961 0 : early_remat::collect_candidates (void)
962 : {
963 0 : unsigned int nregs = DF_REG_SIZE (df);
964 0 : for (unsigned int regno = FIRST_PSEUDO_REGISTER; regno < nregs; ++regno)
965 0 : if (interesting_regno_p (regno))
966 : {
967 : /* Create candidates for all suitable definitions. */
968 0 : bitmap_clear (&m_tmp_bitmap);
969 0 : unsigned int bad = 0;
970 0 : unsigned int id = 0;
971 0 : for (df_ref ref = DF_REG_DEF_CHAIN (regno); ref;
972 0 : ref = DF_REF_NEXT_REG (ref))
973 : {
974 0 : rtx_insn *insn = DF_REF_INSN (ref);
975 0 : if (maybe_add_candidate (insn, regno))
976 0 : bitmap_set_bit (&m_tmp_bitmap, id);
977 : else
978 0 : bad += 1;
979 0 : id += 1;
980 : }
981 :
982 : /* If we found at least one suitable definition, add dummy
983 : candidates for the rest, so that we can see which definitions
984 : are live where. */
985 0 : if (!bitmap_empty_p (&m_tmp_bitmap) && bad)
986 : {
987 0 : id = 0;
988 0 : for (df_ref ref = DF_REG_DEF_CHAIN (regno); ref;
989 0 : ref = DF_REF_NEXT_REG (ref))
990 : {
991 0 : if (!bitmap_bit_p (&m_tmp_bitmap, id))
992 0 : add_candidate (DF_REF_INSN (ref), regno, false);
993 0 : id += 1;
994 : }
995 : }
996 : }
997 :
998 :
999 0 : return !m_candidates.is_empty ();
1000 : }
1001 :
1002 : /* Initialize the m_block_info array. */
1003 :
1004 : void
1005 0 : early_remat::init_block_info (void)
1006 : {
1007 0 : unsigned int n_blocks = last_basic_block_for_fn (m_fn);
1008 0 : m_block_info.safe_grow_cleared (n_blocks, true);
1009 0 : }
1010 :
1011 : /* Maps basic block indices to their position in the forward RPO. */
1012 : static unsigned int *rpo_index;
1013 :
1014 : /* Order remat_candidates X_IN and Y_IN according to the cfg postorder. */
1015 :
1016 : static int
1017 0 : compare_candidates (const void *x_in, const void *y_in)
1018 : {
1019 0 : const remat_candidate *x = (const remat_candidate *) x_in;
1020 0 : const remat_candidate *y = (const remat_candidate *) y_in;
1021 0 : basic_block x_bb = BLOCK_FOR_INSN (x->insn);
1022 0 : basic_block y_bb = BLOCK_FOR_INSN (y->insn);
1023 0 : if (x_bb != y_bb)
1024 : /* Make X and Y follow block postorder. */
1025 0 : return rpo_index[y_bb->index] - rpo_index[x_bb->index];
1026 :
1027 : /* Make X and Y follow a backward traversal of the containing block. */
1028 0 : return DF_INSN_LUID (y->insn) - DF_INSN_LUID (x->insn);
1029 : }
1030 :
1031 : /* Sort the collected rematerialization candidates so that they follow
1032 : cfg postorder. */
1033 :
1034 : void
1035 0 : early_remat::sort_candidates (void)
1036 : {
1037 : /* Make sure the DF LUIDs are up-to-date for all the blocks we
1038 : care about. */
1039 0 : bitmap_clear (&m_tmp_bitmap);
1040 0 : unsigned int cand_index;
1041 0 : remat_candidate *cand;
1042 0 : FOR_EACH_VEC_ELT (m_candidates, cand_index, cand)
1043 : {
1044 0 : basic_block bb = BLOCK_FOR_INSN (cand->insn);
1045 0 : if (bitmap_set_bit (&m_tmp_bitmap, bb->index))
1046 0 : df_recompute_luids (bb);
1047 : }
1048 :
1049 : /* Create a mapping from block numbers to their position in the
1050 : postorder. */
1051 0 : unsigned int n_blocks = last_basic_block_for_fn (m_fn);
1052 0 : int *rpo = df_get_postorder (DF_FORWARD);
1053 0 : unsigned int rpo_len = df_get_n_blocks (DF_FORWARD);
1054 0 : rpo_index = new unsigned int[n_blocks];
1055 0 : for (unsigned int i = 0; i < rpo_len; ++i)
1056 0 : rpo_index[rpo[i]] = i;
1057 :
1058 0 : m_candidates.qsort (compare_candidates);
1059 :
1060 0 : delete[] rpo_index;
1061 0 : }
1062 :
1063 : /* Commit to the current candidate indices and initialize cross-references. */
1064 :
1065 : void
1066 0 : early_remat::finalize_candidate_indices (void)
1067 : {
1068 : /* Create a bitmap for each candidate register. */
1069 0 : m_regno_to_candidates.safe_grow (max_reg_num (), true);
1070 0 : unsigned int regno;
1071 0 : bitmap_iterator bi;
1072 0 : EXECUTE_IF_SET_IN_BITMAP (&m_candidate_regnos, 0, regno, bi)
1073 0 : m_regno_to_candidates[regno] = alloc_bitmap ();
1074 :
1075 : /* Go through each candidate and record its index. */
1076 : unsigned int cand_index;
1077 : remat_candidate *cand;
1078 0 : FOR_EACH_VEC_ELT (m_candidates, cand_index, cand)
1079 : {
1080 0 : basic_block bb = BLOCK_FOR_INSN (cand->insn);
1081 0 : remat_block_info *info = &m_block_info[bb->index];
1082 0 : info->num_candidates += 1;
1083 0 : info->first_candidate = cand_index;
1084 0 : bitmap_set_bit (m_regno_to_candidates[cand->regno], cand_index);
1085 : }
1086 0 : }
1087 :
1088 : /* Record that candidates CAND1_INDEX and CAND2_INDEX are equivalent.
1089 : CAND1_INDEX might already have an equivalence class, but CAND2_INDEX
1090 : doesn't. */
1091 :
1092 : void
1093 0 : early_remat::record_equiv_candidates (unsigned int cand1_index,
1094 : unsigned int cand2_index)
1095 : {
1096 0 : if (dump_file)
1097 0 : fprintf (dump_file, ";; Candidate %d is equivalent to candidate %d\n",
1098 : cand2_index, cand1_index);
1099 :
1100 0 : remat_candidate *cand1 = &m_candidates[cand1_index];
1101 0 : remat_candidate *cand2 = &m_candidates[cand2_index];
1102 0 : gcc_checking_assert (!cand2->equiv_class);
1103 :
1104 0 : remat_equiv_class *ec = cand1->equiv_class;
1105 0 : if (!ec)
1106 : {
1107 0 : ec = XOBNEW (&m_obstack.obstack, remat_equiv_class);
1108 0 : ec->members = alloc_bitmap ();
1109 0 : bitmap_set_bit (ec->members, cand1_index);
1110 0 : ec->earliest = cand1_index;
1111 0 : ec->representative = cand1_index;
1112 0 : cand1->equiv_class = ec;
1113 : }
1114 0 : cand2->equiv_class = ec;
1115 0 : bitmap_set_bit (ec->members, cand2_index);
1116 0 : if (cand2_index > ec->representative)
1117 0 : ec->representative = cand2_index;
1118 0 : }
1119 :
1120 : /* Propagate information from the rd_out set of E->src to the rd_in set
1121 : of E->dest, when computing global reaching definitions. Return true
1122 : if something changed. */
1123 :
1124 : bool
1125 0 : early_remat::rd_confluence_n (edge e)
1126 : {
1127 0 : remat_block_info *src = &er->m_block_info[e->src->index];
1128 0 : remat_block_info *dest = &er->m_block_info[e->dest->index];
1129 :
1130 : /* available_in temporarily contains the set of candidates whose
1131 : registers are live on entry. */
1132 0 : if (empty_p (src->rd_out) || empty_p (dest->available_in))
1133 : return false;
1134 :
1135 0 : return bitmap_ior_and_into (er->get_bitmap (&dest->rd_in),
1136 0 : src->rd_out, dest->available_in);
1137 : }
1138 :
1139 : /* Propagate information from the rd_in set of block BB_INDEX to rd_out.
1140 : Return true if something changed. */
1141 :
1142 : bool
1143 0 : early_remat::rd_transfer (int bb_index)
1144 : {
1145 0 : remat_block_info *info = &er->m_block_info[bb_index];
1146 :
1147 0 : if (empty_p (info->rd_in))
1148 : return false;
1149 :
1150 0 : if (empty_p (info->rd_kill))
1151 : {
1152 0 : gcc_checking_assert (empty_p (info->rd_gen));
1153 0 : if (!info->rd_out)
1154 0 : info->rd_out = info->rd_in;
1155 : else
1156 0 : gcc_checking_assert (info->rd_out == info->rd_in);
1157 : /* Assume that we only get called if something changed. */
1158 0 : return true;
1159 : }
1160 :
1161 0 : if (empty_p (info->rd_gen))
1162 0 : return bitmap_and_compl (er->get_bitmap (&info->rd_out),
1163 0 : info->rd_in, info->rd_kill);
1164 :
1165 0 : return bitmap_ior_and_compl (er->get_bitmap (&info->rd_out), info->rd_gen,
1166 0 : info->rd_in, info->rd_kill);
1167 : }
1168 :
1169 : /* Calculate the rd_* sets for each block. */
1170 :
1171 : void
1172 0 : early_remat::compute_rd (void)
1173 : {
1174 : /* First calculate the rd_kill and rd_gen sets, using the fact
1175 : that m_candidates is sorted in order of decreasing LUID. */
1176 0 : unsigned int cand_index;
1177 0 : remat_candidate *cand;
1178 0 : FOR_EACH_VEC_ELT_REVERSE (m_candidates, cand_index, cand)
1179 : {
1180 0 : rtx_insn *insn = cand->insn;
1181 0 : basic_block bb = BLOCK_FOR_INSN (insn);
1182 0 : remat_block_info *info = &m_block_info[bb->index];
1183 0 : bitmap kill = m_regno_to_candidates[cand->regno];
1184 0 : bitmap_ior_into (get_bitmap (&info->rd_kill), kill);
1185 0 : if (bitmap_bit_p (DF_LR_OUT (bb), cand->regno))
1186 : {
1187 0 : bitmap_and_compl_into (get_bitmap (&info->rd_gen), kill);
1188 0 : bitmap_set_bit (info->rd_gen, cand_index);
1189 : }
1190 : }
1191 :
1192 : /* Set up the initial values of the other sets. */
1193 0 : basic_block bb;
1194 0 : FOR_EACH_BB_FN (bb, m_fn)
1195 : {
1196 0 : remat_block_info *info = &m_block_info[bb->index];
1197 0 : unsigned int regno;
1198 0 : bitmap_iterator bi;
1199 0 : EXECUTE_IF_AND_IN_BITMAP (DF_LR_IN (bb), &m_candidate_regnos,
1200 : 0, regno, bi)
1201 : {
1202 : /* Use available_in to record the set of candidates whose
1203 : registers are live on entry (i.e. a maximum bound on rd_in). */
1204 0 : bitmap_ior_into (get_bitmap (&info->available_in),
1205 0 : m_regno_to_candidates[regno]);
1206 :
1207 : /* Add registers that die in a block to the block's kill set,
1208 : so that we don't needlessly propagate them through the rest
1209 : of the function. */
1210 0 : if (!bitmap_bit_p (DF_LR_OUT (bb), regno))
1211 0 : bitmap_ior_into (get_bitmap (&info->rd_kill),
1212 0 : m_regno_to_candidates[regno]);
1213 : }
1214 :
1215 : /* Initialize each block's rd_out to the minimal set (the set of
1216 : local definitions). */
1217 0 : if (!empty_p (info->rd_gen))
1218 0 : bitmap_copy (get_bitmap (&info->rd_out), info->rd_gen);
1219 : }
1220 :
1221 : /* Iterate until we reach a fixed point. */
1222 0 : er = this;
1223 0 : bitmap_clear (&m_tmp_bitmap);
1224 0 : bitmap_set_range (&m_tmp_bitmap, 0, last_basic_block_for_fn (m_fn));
1225 0 : df_simple_dataflow (DF_FORWARD, NULL, NULL, rd_confluence_n, rd_transfer,
1226 : &m_tmp_bitmap, df_get_postorder (DF_FORWARD),
1227 : df_get_n_blocks (DF_FORWARD));
1228 0 : er = 0;
1229 :
1230 : /* Work out which definitions reach which candidates, again taking
1231 : advantage of the candidate order. */
1232 0 : bitmap_head reaching;
1233 0 : bitmap_initialize (&reaching, &m_obstack);
1234 0 : basic_block old_bb = NULL;
1235 0 : FOR_EACH_VEC_ELT_REVERSE (m_candidates, cand_index, cand)
1236 : {
1237 0 : bb = BLOCK_FOR_INSN (cand->insn);
1238 0 : if (bb != old_bb)
1239 : {
1240 : /* Get the definitions that reach the start of the new block. */
1241 0 : remat_block_info *info = &m_block_info[bb->index];
1242 0 : if (info->rd_in)
1243 0 : bitmap_copy (&reaching, info->rd_in);
1244 : else
1245 0 : bitmap_clear (&reaching);
1246 : old_bb = bb;
1247 : }
1248 : else
1249 : {
1250 : /* Process the definitions of the previous instruction. */
1251 0 : bitmap kill = m_regno_to_candidates[cand[1].regno];
1252 0 : bitmap_and_compl_into (&reaching, kill);
1253 0 : bitmap_set_bit (&reaching, cand_index + 1);
1254 : }
1255 :
1256 0 : if (cand->can_copy_p && !cand->constant_p)
1257 : {
1258 0 : df_ref ref;
1259 0 : FOR_EACH_INSN_USE (ref, cand->insn)
1260 : {
1261 0 : unsigned int regno = DF_REF_REGNO (ref);
1262 0 : if (bitmap_bit_p (&m_candidate_regnos, regno))
1263 : {
1264 0 : bitmap defs = m_regno_to_candidates[regno];
1265 0 : bitmap_and (&m_tmp_bitmap, defs, &reaching);
1266 0 : bitmap_ior_into (get_bitmap (&cand->uses), &m_tmp_bitmap);
1267 : }
1268 : }
1269 : }
1270 : }
1271 0 : bitmap_clear (&reaching);
1272 0 : }
1273 :
1274 : /* If CAND_INDEX is in an equivalence class, return the representative
1275 : of the class, otherwise return CAND_INDEX. */
1276 :
1277 : inline unsigned int
1278 0 : early_remat::canon_candidate (unsigned int cand_index)
1279 : {
1280 0 : if (remat_equiv_class *ec = m_candidates[cand_index].equiv_class)
1281 0 : return ec->representative;
1282 : return cand_index;
1283 : }
1284 :
1285 : /* Make candidate set *PTR refer to candidates using the representative
1286 : of each equivalence class. */
1287 :
1288 : void
1289 0 : early_remat::canon_bitmap (bitmap *ptr)
1290 : {
1291 0 : bitmap old_set = *ptr;
1292 0 : if (empty_p (old_set))
1293 0 : return;
1294 :
1295 0 : bitmap new_set = NULL;
1296 0 : unsigned int old_index;
1297 0 : bitmap_iterator bi;
1298 0 : EXECUTE_IF_SET_IN_BITMAP (old_set, 0, old_index, bi)
1299 : {
1300 0 : unsigned int new_index = canon_candidate (old_index);
1301 0 : if (old_index != new_index)
1302 : {
1303 0 : if (!new_set)
1304 : {
1305 0 : new_set = alloc_bitmap ();
1306 0 : bitmap_copy (new_set, old_set);
1307 : }
1308 0 : bitmap_clear_bit (new_set, old_index);
1309 0 : bitmap_set_bit (new_set, new_index);
1310 : }
1311 : }
1312 0 : if (new_set)
1313 : {
1314 0 : BITMAP_FREE (*ptr);
1315 0 : *ptr = new_set;
1316 : }
1317 : }
1318 :
1319 : /* If the candidates in REACHING all have the same value, return the
1320 : earliest instance of that value (i.e. the first one to be added
1321 : to m_value_table), otherwise return MULTIPLE_CANDIDATES. */
1322 :
1323 : unsigned int
1324 0 : early_remat::resolve_reaching_def (bitmap reaching)
1325 : {
1326 0 : unsigned int cand_index = bitmap_first_set_bit (reaching);
1327 0 : if (remat_equiv_class *ec = m_candidates[cand_index].equiv_class)
1328 : {
1329 0 : if (!bitmap_intersect_compl_p (reaching, ec->members))
1330 0 : return ec->earliest;
1331 : }
1332 0 : else if (bitmap_single_bit_set_p (reaching))
1333 : return cand_index;
1334 :
1335 : return MULTIPLE_CANDIDATES;
1336 : }
1337 :
1338 : /* Check whether all candidate registers used by candidate CAND_INDEX have
1339 : unique definitions. Return true if so, replacing the candidate's uses
1340 : set with the appropriate form for value numbering. */
1341 :
1342 : bool
1343 0 : early_remat::check_candidate_uses (unsigned int cand_index)
1344 : {
1345 0 : remat_candidate *cand = &m_candidates[cand_index];
1346 :
1347 : /* Process the uses for each register in turn. */
1348 0 : bitmap_head uses;
1349 0 : bitmap_initialize (&uses, &m_obstack);
1350 0 : bitmap_copy (&uses, cand->uses);
1351 0 : bitmap uses_ec = alloc_bitmap ();
1352 0 : while (!bitmap_empty_p (&uses))
1353 : {
1354 : /* Get the register for the lowest-indexed candidate remaining,
1355 : and the reaching definitions of that register. */
1356 0 : unsigned int first = bitmap_first_set_bit (&uses);
1357 0 : unsigned int regno = m_candidates[first].regno;
1358 0 : bitmap_and (&m_tmp_bitmap, &uses, m_regno_to_candidates[regno]);
1359 :
1360 : /* See whether all reaching definitions have the same value and if
1361 : so get the index of the first candidate we saw with that value. */
1362 0 : unsigned int def = resolve_reaching_def (&m_tmp_bitmap);
1363 0 : if (def == MULTIPLE_CANDIDATES)
1364 : {
1365 0 : if (dump_file)
1366 0 : fprintf (dump_file, ";; Removing candidate %d because there is"
1367 : " more than one reaching definition of reg %d\n",
1368 : cand_index, regno);
1369 0 : cand->can_copy_p = false;
1370 0 : break;
1371 : }
1372 0 : bitmap_set_bit (uses_ec, def);
1373 0 : bitmap_and_compl_into (&uses, &m_tmp_bitmap);
1374 : }
1375 0 : BITMAP_FREE (cand->uses);
1376 0 : cand->uses = uses_ec;
1377 0 : return cand->can_copy_p;
1378 : }
1379 :
1380 : /* Calculate the set of hard registers that would be clobbered by
1381 : rematerializing candidate CAND_INDEX. At this point the candidate's
1382 : set of uses is final. */
1383 :
1384 : void
1385 0 : early_remat::compute_clobbers (unsigned int cand_index)
1386 : {
1387 0 : remat_candidate *cand = &m_candidates[cand_index];
1388 0 : if (cand->uses)
1389 : {
1390 0 : unsigned int use_index;
1391 0 : bitmap_iterator bi;
1392 0 : EXECUTE_IF_SET_IN_BITMAP (cand->uses, 0, use_index, bi)
1393 0 : if (bitmap clobbers = m_candidates[use_index].clobbers)
1394 0 : bitmap_ior_into (get_bitmap (&cand->clobbers), clobbers);
1395 : }
1396 :
1397 0 : df_ref ref;
1398 0 : FOR_EACH_INSN_DEF (ref, cand->insn)
1399 : {
1400 0 : unsigned int def_regno = DF_REF_REGNO (ref);
1401 0 : if (def_regno != cand->regno)
1402 0 : bitmap_set_bit (get_bitmap (&cand->clobbers), def_regno);
1403 : }
1404 0 : }
1405 :
1406 : /* Mark candidate CAND_INDEX as validated and add it to the value table. */
1407 :
1408 : void
1409 0 : early_remat::assign_value_number (unsigned int cand_index)
1410 : {
1411 0 : remat_candidate *cand = &m_candidates[cand_index];
1412 0 : gcc_checking_assert (cand->can_copy_p && !cand->validated_p);
1413 :
1414 0 : compute_clobbers (cand_index);
1415 0 : cand->validated_p = true;
1416 :
1417 0 : inchash::hash h;
1418 0 : h.add_int (cand->regno);
1419 0 : inchash::add_rtx (cand->remat_rtx, h);
1420 0 : cand->hash = h.end ();
1421 :
1422 0 : remat_candidate **slot
1423 0 : = m_value_table.find_slot_with_hash (cand, cand->hash, INSERT);
1424 0 : if (!*slot)
1425 : {
1426 0 : *slot = cand;
1427 0 : if (dump_file)
1428 0 : fprintf (dump_file, ";; Candidate %d is not equivalent to"
1429 : " others seen so far\n", cand_index);
1430 : }
1431 : else
1432 0 : record_equiv_candidates (*slot - m_candidates.address (), cand_index);
1433 0 : }
1434 :
1435 : /* Make a final decision about which candidates are valid and assign
1436 : value numbers to those that are. */
1437 :
1438 : void
1439 0 : early_remat::decide_candidate_validity (void)
1440 : {
1441 0 : auto_vec<unsigned int, 16> stack;
1442 0 : unsigned int cand1_index;
1443 0 : remat_candidate *cand1;
1444 0 : FOR_EACH_VEC_ELT_REVERSE (m_candidates, cand1_index, cand1)
1445 : {
1446 0 : if (!cand1->can_copy_p || cand1->validated_p)
1447 0 : continue;
1448 :
1449 0 : if (empty_p (cand1->uses))
1450 : {
1451 0 : assign_value_number (cand1_index);
1452 0 : continue;
1453 : }
1454 :
1455 0 : stack.safe_push (cand1_index);
1456 0 : while (!stack.is_empty ())
1457 : {
1458 0 : unsigned int cand2_index = stack.last ();
1459 0 : unsigned int watermark = stack.length ();
1460 0 : remat_candidate *cand2 = &m_candidates[cand2_index];
1461 0 : if (!cand2->can_copy_p || cand2->validated_p)
1462 : {
1463 0 : stack.pop ();
1464 0 : continue;
1465 : }
1466 0 : cand2->visited_p = true;
1467 0 : unsigned int cand3_index;
1468 0 : bitmap_iterator bi;
1469 0 : EXECUTE_IF_SET_IN_BITMAP (cand2->uses, 0, cand3_index, bi)
1470 : {
1471 0 : remat_candidate *cand3 = &m_candidates[cand3_index];
1472 0 : if (!cand3->can_copy_p)
1473 : {
1474 0 : if (dump_file)
1475 0 : fprintf (dump_file, ";; Removing candidate %d because"
1476 : " it uses removed candidate %d\n", cand2_index,
1477 : cand3_index);
1478 0 : cand2->can_copy_p = false;
1479 0 : break;
1480 : }
1481 0 : if (!cand3->validated_p)
1482 : {
1483 0 : if (empty_p (cand3->uses))
1484 0 : assign_value_number (cand3_index);
1485 0 : else if (cand3->visited_p)
1486 : {
1487 0 : if (dump_file)
1488 0 : fprintf (dump_file, ";; Removing candidate %d"
1489 : " because its definition is cyclic\n",
1490 : cand2_index);
1491 0 : cand2->can_copy_p = false;
1492 0 : break;
1493 : }
1494 : else
1495 0 : stack.safe_push (cand3_index);
1496 : }
1497 : }
1498 0 : if (!cand2->can_copy_p)
1499 : {
1500 0 : cand2->visited_p = false;
1501 0 : stack.truncate (watermark - 1);
1502 : }
1503 0 : else if (watermark == stack.length ())
1504 : {
1505 0 : cand2->visited_p = false;
1506 0 : if (check_candidate_uses (cand2_index))
1507 0 : assign_value_number (cand2_index);
1508 0 : stack.pop ();
1509 : }
1510 : }
1511 : }
1512 :
1513 : /* Ensure that the candidates always use the same candidate index
1514 : to refer to an equivalence class. */
1515 0 : FOR_EACH_VEC_ELT_REVERSE (m_candidates, cand1_index, cand1)
1516 0 : if (cand1->can_copy_p && !empty_p (cand1->uses))
1517 : {
1518 0 : canon_bitmap (&cand1->uses);
1519 0 : gcc_checking_assert (bitmap_first_set_bit (cand1->uses) > cand1_index);
1520 : }
1521 0 : }
1522 :
1523 : /* Remove any candidates in CANDIDATES that would clobber a register in
1524 : UNAVAIL_REGS. */
1525 :
1526 : void
1527 0 : early_remat::restrict_remat_for_unavail_regs (bitmap candidates,
1528 : const_bitmap unavail_regs)
1529 : {
1530 0 : bitmap_clear (&m_tmp_bitmap);
1531 0 : unsigned int cand_index;
1532 0 : bitmap_iterator bi;
1533 0 : EXECUTE_IF_SET_IN_BITMAP (candidates, 0, cand_index, bi)
1534 : {
1535 0 : remat_candidate *cand = &m_candidates[cand_index];
1536 0 : if (cand->clobbers
1537 0 : && bitmap_intersect_p (cand->clobbers, unavail_regs))
1538 0 : bitmap_set_bit (&m_tmp_bitmap, cand_index);
1539 : }
1540 0 : bitmap_and_compl_into (candidates, &m_tmp_bitmap);
1541 0 : }
1542 :
1543 : /* Remove any candidates in CANDIDATES that would clobber a register
1544 : that is potentially live across CALL. */
1545 :
1546 : void
1547 0 : early_remat::restrict_remat_for_call (bitmap candidates, rtx_insn *call)
1548 : {
1549 : /* We don't know whether partially-clobbered registers are live
1550 : across the call or not, so assume that they are. */
1551 0 : bitmap_view<HARD_REG_SET> call_preserved_regs
1552 0 : (~insn_callee_abi (call).full_reg_clobbers ());
1553 0 : restrict_remat_for_unavail_regs (candidates, call_preserved_regs);
1554 0 : }
1555 :
1556 : /* Assuming that every path reaching a point P contains a copy of a
1557 : use U of REGNO, return true if another copy of U at P would have
1558 : access to the same value of REGNO. */
1559 :
1560 : bool
1561 0 : early_remat::stable_use_p (unsigned int regno)
1562 : {
1563 : /* Conservatively assume not for hard registers. */
1564 0 : if (HARD_REGISTER_NUM_P (regno))
1565 : return false;
1566 :
1567 : /* See if REGNO has a single definition and is never used uninitialized.
1568 : In this case the definition of REGNO dominates the common dominator
1569 : of the uses U, which in turn dominates P. */
1570 0 : if (DF_REG_DEF_COUNT (regno) == 1
1571 0 : && !bitmap_bit_p (DF_LR_OUT (ENTRY_BLOCK_PTR_FOR_FN (m_fn)), regno))
1572 : return true;
1573 :
1574 : return false;
1575 : }
1576 :
1577 : /* Emit a copy from register DEST to register SRC before candidate
1578 : CAND_INDEX's instruction. */
1579 :
1580 : void
1581 0 : early_remat::emit_copy_before (unsigned int cand_index, rtx dest, rtx src)
1582 : {
1583 0 : remat_candidate *cand = &m_candidates[cand_index];
1584 0 : if (dump_file)
1585 : {
1586 0 : fprintf (dump_file, ";; Stabilizing insn ");
1587 0 : dump_insn_id (cand->insn);
1588 0 : fprintf (dump_file, " by copying source reg %d:%s to temporary reg %d\n",
1589 0 : REGNO (src), GET_MODE_NAME (GET_MODE (src)), REGNO (dest));
1590 : }
1591 0 : emit_insn_before (gen_move_insn (dest, src), cand->insn);
1592 0 : }
1593 :
1594 : /* Check whether any inputs to candidate CAND_INDEX's instruction could
1595 : change at rematerialization points and replace them with new pseudo
1596 : registers if so. */
1597 :
1598 : void
1599 0 : early_remat::stabilize_pattern (unsigned int cand_index)
1600 : {
1601 0 : remat_candidate *cand = &m_candidates[cand_index];
1602 0 : if (cand->stabilized_p)
1603 0 : return;
1604 :
1605 0 : remat_equiv_class *ec = cand->equiv_class;
1606 0 : gcc_checking_assert (!ec || cand_index == ec->representative);
1607 :
1608 : /* Record the replacements we've made so far, so that we don't
1609 : create two separate registers for match_dups. Lookup is O(n),
1610 : but the n is very small. */
1611 0 : typedef std::pair<rtx, rtx> reg_pair;
1612 0 : auto_vec<reg_pair, 16> reg_map;
1613 :
1614 0 : rtx_insn *insn = cand->insn;
1615 0 : df_ref ref;
1616 0 : FOR_EACH_INSN_USE (ref, insn)
1617 : {
1618 0 : unsigned int old_regno = DF_REF_REGNO (ref);
1619 0 : rtx *loc = DF_REF_REAL_LOC (ref);
1620 :
1621 0 : if (HARD_REGISTER_NUM_P (old_regno) && fixed_regs[old_regno])
1622 : {
1623 : /* We checked when adding the candidate that the value is stable. */
1624 0 : gcc_checking_assert (!rtx_unstable_p (*loc));
1625 0 : continue;
1626 : }
1627 :
1628 0 : if (bitmap_bit_p (&m_candidate_regnos, old_regno))
1629 : /* We already know which candidate provides the definition
1630 : and will handle it during copying. */
1631 0 : continue;
1632 :
1633 0 : if (stable_use_p (old_regno))
1634 : /* We can continue to use the existing register. */
1635 0 : continue;
1636 :
1637 : /* We need to replace the register. See whether we've already
1638 : created a suitable copy. */
1639 0 : rtx old_reg = *loc;
1640 0 : rtx new_reg = NULL_RTX;
1641 0 : machine_mode mode = GET_MODE (old_reg);
1642 0 : reg_pair *p;
1643 0 : unsigned int pi;
1644 0 : FOR_EACH_VEC_ELT (reg_map, pi, p)
1645 0 : if (REGNO (p->first) == old_regno
1646 0 : && GET_MODE (p->first) == mode)
1647 : {
1648 0 : new_reg = p->second;
1649 0 : break;
1650 : }
1651 :
1652 0 : if (!new_reg)
1653 : {
1654 : /* Create a new register and initialize it just before
1655 : the instruction. */
1656 0 : new_reg = gen_reg_rtx (mode);
1657 0 : reg_map.safe_push (reg_pair (old_reg, new_reg));
1658 0 : if (ec)
1659 : {
1660 0 : unsigned int member_index;
1661 0 : bitmap_iterator bi;
1662 0 : EXECUTE_IF_SET_IN_BITMAP (ec->members, 0, member_index, bi)
1663 0 : emit_copy_before (member_index, new_reg, old_reg);
1664 : }
1665 : else
1666 0 : emit_copy_before (cand_index, new_reg, old_reg);
1667 : }
1668 0 : validate_change (insn, loc, new_reg, true);
1669 : }
1670 0 : if (num_changes_pending ())
1671 : {
1672 0 : if (!apply_change_group ())
1673 : /* We checked when adding the candidates that the pattern allows
1674 : hard registers to be replaced. Nothing else should make the
1675 : changes invalid. */
1676 0 : gcc_unreachable ();
1677 :
1678 0 : if (ec)
1679 : {
1680 : /* Copy the new pattern to other members of the equivalence
1681 : class. */
1682 0 : unsigned int member_index;
1683 0 : bitmap_iterator bi;
1684 0 : EXECUTE_IF_SET_IN_BITMAP (ec->members, 0, member_index, bi)
1685 0 : if (cand_index != member_index)
1686 : {
1687 0 : rtx_insn *other_insn = m_candidates[member_index].insn;
1688 0 : if (!validate_change (other_insn, &PATTERN (other_insn),
1689 0 : copy_insn (PATTERN (insn)), 0))
1690 : /* If the original instruction was valid then the copy
1691 : should be too. */
1692 0 : gcc_unreachable ();
1693 : }
1694 : }
1695 : }
1696 :
1697 0 : cand->stabilized_p = true;
1698 : }
1699 :
1700 : /* Change CAND's instruction so that it sets CAND->copy_regno instead
1701 : of CAND->regno. */
1702 :
1703 : void
1704 0 : early_remat::replace_dest_with_copy (unsigned int cand_index)
1705 : {
1706 0 : remat_candidate *cand = &m_candidates[cand_index];
1707 0 : df_ref def;
1708 0 : FOR_EACH_INSN_DEF (def, cand->insn)
1709 0 : if (DF_REF_REGNO (def) == cand->regno)
1710 0 : validate_change (cand->insn, DF_REF_REAL_LOC (def),
1711 0 : regno_reg_rtx[cand->copy_regno], 1);
1712 0 : }
1713 :
1714 : /* Make sure that the candidates used by candidate CAND_INDEX are available.
1715 : There are two ways of doing this for an input candidate I:
1716 :
1717 : (1) Using the existing register number and ensuring that I is available.
1718 :
1719 : (2) Using a new register number (recorded in copy_regno) and adding I
1720 : to VIA_COPY. This guarantees that making I available does not
1721 : conflict with other uses of the original register.
1722 :
1723 : REQUIRED is the set of candidates that are required but not available
1724 : before the copy of CAND_INDEX. AVAILABLE is the set of candidates
1725 : that are already available before the copy of CAND_INDEX. REACHING
1726 : is the set of candidates that reach the copy of CAND_INDEX. VIA_COPY
1727 : is the set of candidates that will use new register numbers recorded
1728 : in copy_regno instead of the original ones. */
1729 :
1730 : void
1731 0 : early_remat::stabilize_candidate_uses (unsigned int cand_index,
1732 : bitmap required, bitmap available,
1733 : bitmap reaching, bitmap via_copy)
1734 : {
1735 0 : remat_candidate *cand = &m_candidates[cand_index];
1736 0 : df_ref use;
1737 0 : FOR_EACH_INSN_USE (use, cand->insn)
1738 : {
1739 0 : unsigned int regno = DF_REF_REGNO (use);
1740 0 : if (!bitmap_bit_p (&m_candidate_regnos, regno))
1741 0 : continue;
1742 :
1743 : /* Work out which candidate provides the definition. */
1744 0 : bitmap defs = m_regno_to_candidates[regno];
1745 0 : bitmap_and (&m_tmp_bitmap, cand->uses, defs);
1746 0 : gcc_checking_assert (bitmap_single_bit_set_p (&m_tmp_bitmap));
1747 0 : unsigned int def_index = bitmap_first_set_bit (&m_tmp_bitmap);
1748 :
1749 : /* First see if DEF_INDEX is the only reaching definition of REGNO
1750 : at this point too and if it is or will become available. We can
1751 : continue to use REGNO if so. */
1752 0 : bitmap_and (&m_tmp_bitmap, reaching, defs);
1753 0 : if (bitmap_single_bit_set_p (&m_tmp_bitmap)
1754 0 : && bitmap_first_set_bit (&m_tmp_bitmap) == def_index
1755 0 : && ((available && bitmap_bit_p (available, def_index))
1756 0 : || bitmap_bit_p (required, def_index)))
1757 : {
1758 0 : if (dump_file)
1759 0 : fprintf (dump_file, ";; Keeping reg %d for use of candidate %d"
1760 : " in candidate %d\n", regno, def_index, cand_index);
1761 0 : continue;
1762 : }
1763 :
1764 : /* Otherwise fall back to using a copy. There are other cases
1765 : in which we *could* continue to use REGNO, but there's not
1766 : really much point. Using a separate register ought to make
1767 : things easier for the register allocator. */
1768 0 : remat_candidate *def_cand = &m_candidates[def_index];
1769 0 : rtx *loc = DF_REF_REAL_LOC (use);
1770 0 : rtx new_reg;
1771 0 : if (bitmap_set_bit (via_copy, def_index))
1772 : {
1773 0 : new_reg = gen_reg_rtx (GET_MODE (*loc));
1774 0 : def_cand->copy_regno = REGNO (new_reg);
1775 0 : if (dump_file)
1776 0 : fprintf (dump_file, ";; Creating reg %d for use of candidate %d"
1777 : " in candidate %d\n", REGNO (new_reg), def_index,
1778 : cand_index);
1779 : }
1780 : else
1781 0 : new_reg = regno_reg_rtx[def_cand->copy_regno];
1782 0 : validate_change (cand->insn, loc, new_reg, 1);
1783 : }
1784 0 : }
1785 :
1786 : /* Rematerialize the candidates in REQUIRED after instruction INSN,
1787 : given that the candidates in AVAILABLE are already available
1788 : and that REACHING is the set of candidates live after INSN.
1789 : REQUIRED and AVAILABLE are disjoint on entry.
1790 :
1791 : Clear REQUIRED on exit. */
1792 :
1793 : void
1794 0 : early_remat::emit_remat_insns (bitmap required, bitmap available,
1795 : bitmap reaching, rtx_insn *insn)
1796 : {
1797 : /* Quick exit if there's nothing to do. */
1798 0 : if (empty_p (required))
1799 0 : return;
1800 :
1801 : /* Only reaching definitions should be available or required. */
1802 0 : gcc_checking_assert (!bitmap_intersect_compl_p (required, reaching));
1803 0 : if (available)
1804 0 : gcc_checking_assert (!bitmap_intersect_compl_p (available, reaching));
1805 :
1806 0 : bitmap_head via_copy;
1807 0 : bitmap_initialize (&via_copy, &m_obstack);
1808 0 : while (!bitmap_empty_p (required) || !bitmap_empty_p (&via_copy))
1809 : {
1810 : /* Pick the lowest-indexed candidate left. */
1811 0 : unsigned int required_index = (bitmap_empty_p (required)
1812 0 : ? ~0U : bitmap_first_set_bit (required));
1813 0 : unsigned int via_copy_index = (bitmap_empty_p (&via_copy)
1814 0 : ? ~0U : bitmap_first_set_bit (&via_copy));
1815 0 : unsigned int cand_index = MIN (required_index, via_copy_index);
1816 0 : remat_candidate *cand = &m_candidates[cand_index];
1817 :
1818 0 : bool via_copy_p = (cand_index == via_copy_index);
1819 0 : if (via_copy_p)
1820 0 : bitmap_clear_bit (&via_copy, cand_index);
1821 : else
1822 : {
1823 : /* Remove all candidates for the same register from REQUIRED. */
1824 0 : bitmap_and (&m_tmp_bitmap, reaching,
1825 0 : m_regno_to_candidates[cand->regno]);
1826 0 : bitmap_and_compl_into (required, &m_tmp_bitmap);
1827 0 : gcc_checking_assert (!bitmap_bit_p (required, cand_index));
1828 :
1829 : /* Only rematerialize if we have a single reaching definition
1830 : of the register. */
1831 0 : if (!bitmap_single_bit_set_p (&m_tmp_bitmap))
1832 : {
1833 0 : if (dump_file)
1834 : {
1835 0 : fprintf (dump_file, ";; Can't rematerialize reg %d after ",
1836 : cand->regno);
1837 0 : dump_insn_id (insn);
1838 0 : fprintf (dump_file, ": more than one reaching definition\n");
1839 : }
1840 0 : continue;
1841 : }
1842 :
1843 : /* Skip candidates that can't be rematerialized. */
1844 0 : if (!cand->can_copy_p)
1845 0 : continue;
1846 :
1847 : /* Check the function precondition. */
1848 0 : gcc_checking_assert (!available
1849 : || !bitmap_bit_p (available, cand_index));
1850 : }
1851 :
1852 : /* Invalid candidates should have been weeded out by now. */
1853 0 : gcc_assert (cand->can_copy_p);
1854 :
1855 0 : rtx new_pattern;
1856 0 : if (cand->constant_p)
1857 : {
1858 : /* Emit a simple move. */
1859 0 : unsigned int regno = via_copy_p ? cand->copy_regno : cand->regno;
1860 0 : new_pattern = gen_move_insn (regno_reg_rtx[regno], cand->remat_rtx);
1861 : }
1862 : else
1863 : {
1864 : /* If this is the first time we've copied the instruction, make
1865 : sure that any inputs will have the same value after INSN. */
1866 0 : stabilize_pattern (cand_index);
1867 :
1868 : /* Temporarily adjust the original instruction so that it has
1869 : the right form for the copy. */
1870 0 : if (via_copy_p)
1871 0 : replace_dest_with_copy (cand_index);
1872 0 : if (cand->uses)
1873 0 : stabilize_candidate_uses (cand_index, required, available,
1874 : reaching, &via_copy);
1875 :
1876 : /* Get the new instruction pattern. */
1877 0 : new_pattern = copy_insn (cand->remat_rtx);
1878 :
1879 : /* Undo the temporary changes. */
1880 0 : cancel_changes (0);
1881 : }
1882 :
1883 : /* Emit the new instruction. */
1884 0 : rtx_insn *new_insn = emit_insn_after (new_pattern, insn);
1885 :
1886 0 : if (dump_file)
1887 : {
1888 0 : fprintf (dump_file, ";; Rematerializing candidate %d after ",
1889 : cand_index);
1890 0 : dump_insn_id (insn);
1891 0 : if (via_copy_p)
1892 0 : fprintf (dump_file, " with new destination reg %d",
1893 : cand->copy_regno);
1894 0 : fprintf (dump_file, ":\n\n");
1895 0 : print_rtl_single (dump_file, new_insn);
1896 0 : fprintf (dump_file, "\n");
1897 : }
1898 : }
1899 : }
1900 :
1901 : /* Recompute INFO's available_out set, given that it's distinct from
1902 : available_in and available_locally. */
1903 :
1904 : bool
1905 0 : early_remat::set_available_out (remat_block_info *info)
1906 : {
1907 0 : if (empty_p (info->available_locally))
1908 0 : return bitmap_and_compl (get_bitmap (&info->available_out),
1909 0 : info->available_in, info->rd_kill);
1910 :
1911 0 : if (empty_p (info->rd_kill))
1912 0 : return bitmap_ior (get_bitmap (&info->available_out),
1913 0 : info->available_locally, info->available_in);
1914 :
1915 0 : return bitmap_ior_and_compl (get_bitmap (&info->available_out),
1916 0 : info->available_locally, info->available_in,
1917 0 : info->rd_kill);
1918 : }
1919 :
1920 : /* If BB has more than one call, decide which candidates should be
1921 : rematerialized after the non-final calls and emit the associated
1922 : instructions. Record other information about the block in preparation
1923 : for the global phase. */
1924 :
1925 : void
1926 0 : early_remat::process_block (basic_block bb)
1927 : {
1928 0 : remat_block_info *info = &m_block_info[bb->index];
1929 0 : rtx_insn *last_call = NULL;
1930 0 : rtx_insn *insn;
1931 :
1932 : /* Ensure that we always use the same candidate index to refer to an
1933 : equivalence class. */
1934 0 : if (info->rd_out == info->rd_in)
1935 : {
1936 0 : canon_bitmap (&info->rd_in);
1937 0 : info->rd_out = info->rd_in;
1938 : }
1939 : else
1940 : {
1941 0 : canon_bitmap (&info->rd_in);
1942 0 : canon_bitmap (&info->rd_out);
1943 : }
1944 0 : canon_bitmap (&info->rd_kill);
1945 0 : canon_bitmap (&info->rd_gen);
1946 :
1947 : /* The set of candidates that should be rematerialized on entry to the
1948 : block or after the previous call (whichever is more recent). */
1949 0 : init_temp_bitmap (&m_required);
1950 :
1951 : /* The set of candidates that reach the current instruction (i.e. are
1952 : live just before the instruction). */
1953 0 : bitmap_head reaching;
1954 0 : bitmap_initialize (&reaching, &m_obstack);
1955 0 : if (info->rd_in)
1956 0 : bitmap_copy (&reaching, info->rd_in);
1957 :
1958 : /* The set of candidates that are live and available without
1959 : rematerialization just before the current instruction. This only
1960 : accounts for earlier candidates in the block, or those that become
1961 : available by being added to M_REQUIRED. */
1962 0 : init_temp_bitmap (&m_available);
1963 :
1964 : /* Get the range of candidates in the block. */
1965 0 : unsigned int next_candidate = info->first_candidate;
1966 0 : unsigned int num_candidates = info->num_candidates;
1967 0 : remat_candidate *next_def = (num_candidates > 0
1968 0 : ? &m_candidates[next_candidate]
1969 : : NULL);
1970 :
1971 0 : FOR_BB_INSNS (bb, insn)
1972 : {
1973 0 : if (!NONDEBUG_INSN_P (insn))
1974 0 : continue;
1975 :
1976 : /* First process uses, since this is a forward walk. */
1977 0 : df_ref ref;
1978 0 : FOR_EACH_INSN_USE (ref, insn)
1979 : {
1980 0 : unsigned int regno = DF_REF_REGNO (ref);
1981 0 : if (bitmap_bit_p (&m_candidate_regnos, regno))
1982 : {
1983 0 : bitmap defs = m_regno_to_candidates[regno];
1984 0 : bitmap_and (&m_tmp_bitmap, defs, &reaching);
1985 0 : gcc_checking_assert (!bitmap_empty_p (&m_tmp_bitmap));
1986 0 : if (!bitmap_intersect_p (defs, m_available))
1987 : {
1988 : /* There has been no definition of the register since
1989 : the last call or the start of the block (whichever
1990 : is most recent). Mark the reaching definitions
1991 : as required at that point and thus available here. */
1992 0 : bitmap_ior_into (m_required, &m_tmp_bitmap);
1993 0 : bitmap_ior_into (m_available, &m_tmp_bitmap);
1994 : }
1995 : }
1996 : }
1997 :
1998 0 : if (CALL_P (insn))
1999 : {
2000 0 : if (!last_call)
2001 : {
2002 : /* The first call in the block. Record which candidates are
2003 : required at the start of the block. */
2004 0 : copy_temp_bitmap (&info->required_in, &m_required);
2005 0 : init_temp_bitmap (&m_required);
2006 : }
2007 : else
2008 : {
2009 : /* The fully-local case: candidates that need to be
2010 : rematerialized after a previous call in the block. */
2011 0 : restrict_remat_for_call (m_required, last_call);
2012 0 : emit_remat_insns (m_required, NULL, info->rd_after_call,
2013 : last_call);
2014 : }
2015 0 : last_call = insn;
2016 0 : bitmap_clear (m_available);
2017 0 : gcc_checking_assert (empty_p (m_required));
2018 : }
2019 :
2020 : /* Now process definitions. */
2021 0 : while (next_def && insn == next_def->insn)
2022 : {
2023 0 : unsigned int gen = canon_candidate (next_candidate);
2024 :
2025 : /* Other candidates with the same regno are not available
2026 : any more. */
2027 0 : bitmap kill = m_regno_to_candidates[next_def->regno];
2028 0 : bitmap_and_compl_into (m_available, kill);
2029 0 : bitmap_and_compl_into (&reaching, kill);
2030 :
2031 : /* Record that this candidate is available without
2032 : rematerialization. */
2033 0 : bitmap_set_bit (m_available, gen);
2034 0 : bitmap_set_bit (&reaching, gen);
2035 :
2036 : /* Find the next candidate in the block. */
2037 0 : num_candidates -= 1;
2038 0 : next_candidate -= 1;
2039 0 : if (num_candidates > 0)
2040 0 : next_def -= 1;
2041 : else
2042 : next_def = NULL;
2043 : }
2044 :
2045 0 : if (insn == last_call)
2046 0 : bitmap_copy (get_bitmap (&info->rd_after_call), &reaching);
2047 : }
2048 0 : bitmap_clear (&reaching);
2049 0 : gcc_checking_assert (num_candidates == 0);
2050 :
2051 : /* Remove values from the available set if they aren't live (and so
2052 : aren't interesting to successor blocks). */
2053 0 : if (info->rd_out)
2054 0 : bitmap_and_into (m_available, info->rd_out);
2055 :
2056 : /* Record the accumulated information. */
2057 0 : info->last_call = last_call;
2058 0 : info->abnormal_call_p = (last_call
2059 0 : && last_call == BB_END (bb)
2060 0 : && has_abnormal_or_eh_outgoing_edge_p (bb));
2061 0 : copy_temp_bitmap (&info->available_locally, &m_available);
2062 0 : if (last_call)
2063 0 : copy_temp_bitmap (&info->required_after_call, &m_required);
2064 : else
2065 0 : copy_temp_bitmap (&info->required_in, &m_required);
2066 :
2067 : /* Assume at first that all live-in values are available without
2068 : rematerialization (i.e. start with the most optimistic assumption). */
2069 0 : if (info->available_in)
2070 : {
2071 0 : if (info->rd_in)
2072 0 : bitmap_copy (info->available_in, info->rd_in);
2073 : else
2074 0 : BITMAP_FREE (info->available_in);
2075 : }
2076 :
2077 0 : if (last_call || empty_p (info->available_in))
2078 : /* The values available on exit from the block are exactly those that
2079 : are available locally. This set doesn't change. */
2080 0 : info->available_out = info->available_locally;
2081 0 : else if (empty_p (info->available_locally) && empty_p (info->rd_kill))
2082 : /* The values available on exit are the same as those available on entry.
2083 : Updating one updates the other. */
2084 0 : info->available_out = info->available_in;
2085 : else
2086 0 : set_available_out (info);
2087 0 : }
2088 :
2089 : /* Process each block as for process_block, visiting dominators before
2090 : the blocks they dominate. */
2091 :
2092 : void
2093 0 : early_remat::local_phase (void)
2094 : {
2095 0 : if (dump_file)
2096 0 : fprintf (dump_file, "\n;; Local phase:\n");
2097 :
2098 0 : int *rpo = df_get_postorder (DF_FORWARD);
2099 0 : unsigned int rpo_len = df_get_n_blocks (DF_FORWARD);
2100 0 : for (unsigned int i = 0; i < rpo_len; ++i)
2101 0 : if (rpo[i] >= NUM_FIXED_BLOCKS)
2102 0 : process_block (BASIC_BLOCK_FOR_FN (m_fn, rpo[i]));
2103 0 : }
2104 :
2105 : /* Return true if available values survive across edge E. */
2106 :
2107 : static inline bool
2108 0 : available_across_edge_p (edge e)
2109 : {
2110 0 : return (e->flags & EDGE_EH) == 0;
2111 : }
2112 :
2113 : /* Propagate information from the available_out set of E->src to the
2114 : available_in set of E->dest, when computing global availability.
2115 : Return true if something changed. */
2116 :
2117 : bool
2118 0 : early_remat::avail_confluence_n (edge e)
2119 : {
2120 0 : remat_block_info *src = &er->m_block_info[e->src->index];
2121 0 : remat_block_info *dest = &er->m_block_info[e->dest->index];
2122 :
2123 0 : if (!available_across_edge_p (e))
2124 : return false;
2125 :
2126 0 : if (empty_p (dest->available_in))
2127 : return false;
2128 :
2129 0 : if (!src->available_out)
2130 : {
2131 0 : bitmap_clear (dest->available_in);
2132 0 : return true;
2133 : }
2134 :
2135 0 : return bitmap_and_into (dest->available_in, src->available_out);
2136 : }
2137 :
2138 : /* Propagate information from the available_in set of block BB_INDEX
2139 : to available_out. Return true if something changed. */
2140 :
2141 : bool
2142 0 : early_remat::avail_transfer (int bb_index)
2143 : {
2144 0 : remat_block_info *info = &er->m_block_info[bb_index];
2145 :
2146 0 : if (info->available_out == info->available_locally)
2147 : return false;
2148 :
2149 0 : if (info->available_out == info->available_in)
2150 : /* Assume that we are only called if the input changed. */
2151 : return true;
2152 :
2153 0 : return er->set_available_out (info);
2154 : }
2155 :
2156 : /* Compute global availability for the function, starting with the local
2157 : information computed by local_phase. */
2158 :
2159 : void
2160 0 : early_remat::compute_availability (void)
2161 : {
2162 : /* We use df_simple_dataflow instead of the lcm routines for three reasons:
2163 :
2164 : (1) it avoids recomputing the traversal order;
2165 : (2) many of the sets are likely to be sparse, so we don't necessarily
2166 : want to use sbitmaps; and
2167 : (3) it means we can avoid creating an explicit kill set for the call. */
2168 0 : er = this;
2169 0 : bitmap_clear (&m_tmp_bitmap);
2170 0 : bitmap_set_range (&m_tmp_bitmap, 0, last_basic_block_for_fn (m_fn));
2171 0 : df_simple_dataflow (DF_FORWARD, NULL, NULL,
2172 : avail_confluence_n, avail_transfer,
2173 : &m_tmp_bitmap, df_get_postorder (DF_FORWARD),
2174 : df_get_n_blocks (DF_FORWARD));
2175 0 : er = 0;
2176 :
2177 : /* Restrict the required_in sets to values that aren't available. */
2178 0 : basic_block bb;
2179 0 : FOR_EACH_BB_FN (bb, m_fn)
2180 : {
2181 0 : remat_block_info *info = &m_block_info[bb->index];
2182 0 : if (info->required_in && info->available_in)
2183 0 : bitmap_and_compl_into (info->required_in, info->available_in);
2184 : }
2185 0 : }
2186 :
2187 : /* Make sure that INFO's available_out and available_in sets are unique. */
2188 :
2189 : inline void
2190 0 : early_remat::unshare_available_sets (remat_block_info *info)
2191 : {
2192 0 : if (info->available_in && info->available_in == info->available_out)
2193 : {
2194 0 : info->available_in = alloc_bitmap ();
2195 0 : bitmap_copy (info->available_in, info->available_out);
2196 : }
2197 0 : }
2198 :
2199 : /* Return true if it is possible to move rematerializations from the
2200 : destination of E to the source of E. */
2201 :
2202 : inline bool
2203 0 : early_remat::can_move_across_edge_p (edge e)
2204 : {
2205 0 : return (available_across_edge_p (e)
2206 0 : && !m_block_info[e->src->index].abnormal_call_p);
2207 : }
2208 :
2209 : /* Return true if it is cheaper to rematerialize values at the head of
2210 : block QUERY_BB_INDEX instead of rematerializing in its predecessors. */
2211 :
2212 : bool
2213 0 : early_remat::local_remat_cheaper_p (unsigned int query_bb_index)
2214 : {
2215 0 : if (m_block_info[query_bb_index].remat_frequency_valid_p)
2216 0 : return m_block_info[query_bb_index].local_remat_cheaper_p;
2217 :
2218 : /* Iteratively compute the cost of rematerializing values in the
2219 : predecessor blocks, then compare that with the cost of
2220 : rematerializing at the head of the block.
2221 :
2222 : A cycle indicates that there is no call on that execution path,
2223 : so it isn't necessary to rematerialize on that path. */
2224 0 : auto_vec<basic_block, 16> stack;
2225 0 : stack.quick_push (BASIC_BLOCK_FOR_FN (m_fn, query_bb_index));
2226 0 : while (!stack.is_empty ())
2227 : {
2228 0 : basic_block bb = stack.last ();
2229 0 : remat_block_info *info = &m_block_info[bb->index];
2230 0 : if (info->remat_frequency_valid_p)
2231 : {
2232 0 : stack.pop ();
2233 0 : continue;
2234 : }
2235 :
2236 0 : info->visited_p = true;
2237 0 : int frequency = 0;
2238 0 : bool can_move_p = true;
2239 0 : edge e;
2240 0 : edge_iterator ei;
2241 0 : FOR_EACH_EDGE (e, ei, bb->preds)
2242 0 : if (!can_move_across_edge_p (e))
2243 : {
2244 : can_move_p = false;
2245 : break;
2246 : }
2247 0 : else if (m_block_info[e->src->index].last_call)
2248 : /* We'll rematerialize after the call. */
2249 0 : frequency += e->src->count.to_frequency (m_fn);
2250 0 : else if (m_block_info[e->src->index].remat_frequency_valid_p)
2251 : /* Add the cost of rematerializing at the head of E->src
2252 : or in its predecessors (whichever is cheaper). */
2253 0 : frequency += m_block_info[e->src->index].remat_frequency;
2254 0 : else if (!m_block_info[e->src->index].visited_p)
2255 : /* Queue E->src and then revisit this block again. */
2256 0 : stack.safe_push (e->src);
2257 :
2258 : /* Come back to this block later if we need to process some of
2259 : its predecessors. */
2260 0 : if (stack.last () != bb)
2261 0 : continue;
2262 :
2263 : /* If rematerializing in and before the block have equal cost, prefer
2264 : rematerializing in the block. This should shorten the live range. */
2265 0 : int bb_frequency = bb->count.to_frequency (m_fn);
2266 0 : if (!can_move_p || frequency >= bb_frequency)
2267 : {
2268 0 : info->local_remat_cheaper_p = true;
2269 0 : info->remat_frequency = bb_frequency;
2270 : }
2271 : else
2272 0 : info->remat_frequency = frequency;
2273 0 : info->remat_frequency_valid_p = true;
2274 0 : info->visited_p = false;
2275 0 : if (dump_file)
2276 : {
2277 0 : if (!can_move_p)
2278 0 : fprintf (dump_file, ";; Need to rematerialize at the head of"
2279 : " block %d; cannot move to predecessors.\n", bb->index);
2280 : else
2281 : {
2282 0 : fprintf (dump_file, ";; Block %d has frequency %d,"
2283 : " rematerializing in predecessors has frequency %d",
2284 : bb->index, bb_frequency, frequency);
2285 0 : if (info->local_remat_cheaper_p)
2286 0 : fprintf (dump_file, "; prefer to rematerialize"
2287 : " in the block\n");
2288 : else
2289 0 : fprintf (dump_file, "; prefer to rematerialize"
2290 : " in predecessors\n");
2291 : }
2292 : }
2293 0 : stack.pop ();
2294 : }
2295 0 : return m_block_info[query_bb_index].local_remat_cheaper_p;
2296 0 : }
2297 :
2298 : /* Return true if we cannot rematerialize candidate CAND_INDEX at the head of
2299 : block BB_INDEX. */
2300 :
2301 : bool
2302 0 : early_remat::need_to_move_candidate_p (unsigned int bb_index,
2303 : unsigned int cand_index)
2304 : {
2305 0 : remat_block_info *info = &m_block_info[bb_index];
2306 0 : remat_candidate *cand = &m_candidates[cand_index];
2307 0 : basic_block bb = BASIC_BLOCK_FOR_FN (m_fn, bb_index);
2308 :
2309 : /* If there is more than one reaching definition of REGNO,
2310 : we'll need to rematerialize in predecessors instead. */
2311 0 : bitmap_and (&m_tmp_bitmap, info->rd_in, m_regno_to_candidates[cand->regno]);
2312 0 : if (!bitmap_single_bit_set_p (&m_tmp_bitmap))
2313 : {
2314 0 : if (dump_file)
2315 0 : fprintf (dump_file, ";; Cannot rematerialize %d at the"
2316 : " head of block %d because there is more than one"
2317 : " reaching definition of reg %d\n", cand_index,
2318 : bb_index, cand->regno);
2319 0 : return true;
2320 : }
2321 :
2322 : /* Likewise if rematerializing CAND here would clobber a live register. */
2323 0 : if (cand->clobbers
2324 0 : && bitmap_intersect_p (cand->clobbers, DF_LR_IN (bb)))
2325 : {
2326 0 : if (dump_file)
2327 0 : fprintf (dump_file, ";; Cannot rematerialize %d at the"
2328 : " head of block %d because it would clobber live"
2329 : " registers\n", cand_index, bb_index);
2330 0 : return true;
2331 : }
2332 :
2333 : return false;
2334 : }
2335 :
2336 : /* Set REQUIRED to the minimum set of candidates that must be rematerialized
2337 : in predecessors of block BB_INDEX instead of at the start of the block. */
2338 :
2339 : void
2340 0 : early_remat::compute_minimum_move_set (unsigned int bb_index,
2341 : bitmap required)
2342 : {
2343 0 : remat_block_info *info = &m_block_info[bb_index];
2344 0 : bitmap_head remaining;
2345 :
2346 0 : bitmap_clear (required);
2347 0 : bitmap_initialize (&remaining, &m_obstack);
2348 0 : bitmap_copy (&remaining, info->required_in);
2349 0 : while (!bitmap_empty_p (&remaining))
2350 : {
2351 0 : unsigned int cand_index = bitmap_first_set_bit (&remaining);
2352 0 : remat_candidate *cand = &m_candidates[cand_index];
2353 0 : bitmap_clear_bit (&remaining, cand_index);
2354 :
2355 : /* Leave invalid candidates where they are. */
2356 0 : if (!cand->can_copy_p)
2357 0 : continue;
2358 :
2359 : /* Decide whether to move this candidate. */
2360 0 : if (!bitmap_bit_p (required, cand_index))
2361 : {
2362 0 : if (!need_to_move_candidate_p (bb_index, cand_index))
2363 0 : continue;
2364 0 : bitmap_set_bit (required, cand_index);
2365 : }
2366 :
2367 : /* Also move values used by the candidate, so that we don't
2368 : rematerialize them twice. */
2369 0 : if (cand->uses)
2370 : {
2371 0 : bitmap_ior_and_into (required, cand->uses, info->required_in);
2372 0 : bitmap_ior_and_into (&remaining, cand->uses, info->required_in);
2373 : }
2374 : }
2375 0 : }
2376 :
2377 : /* Make the predecessors of BB_INDEX rematerialize the candidates in
2378 : REQUIRED. Add any blocks whose required_in set changes to
2379 : PENDING_BLOCKS. */
2380 :
2381 : void
2382 0 : early_remat::move_to_predecessors (unsigned int bb_index, bitmap required,
2383 : bitmap pending_blocks)
2384 : {
2385 0 : if (empty_p (required))
2386 0 : return;
2387 0 : remat_block_info *dest_info = &m_block_info[bb_index];
2388 0 : basic_block bb = BASIC_BLOCK_FOR_FN (m_fn, bb_index);
2389 0 : edge e;
2390 0 : edge_iterator ei;
2391 0 : FOR_EACH_EDGE (e, ei, bb->preds)
2392 : {
2393 0 : remat_block_info *src_info = &m_block_info[e->src->index];
2394 :
2395 : /* Restrict the set we add to the reaching definitions. */
2396 0 : bitmap_and (&m_tmp_bitmap, required, src_info->rd_out);
2397 0 : if (bitmap_empty_p (&m_tmp_bitmap))
2398 0 : continue;
2399 :
2400 0 : if (!can_move_across_edge_p (e))
2401 : {
2402 : /* We can't move the rematerialization and we can't do it at
2403 : the start of the block either. In this case we just give up
2404 : and rely on spilling to make the values available across E. */
2405 0 : if (dump_file)
2406 : {
2407 0 : fprintf (dump_file, ";; Cannot rematerialize the following"
2408 0 : " candidates in block %d:", e->src->index);
2409 0 : dump_candidate_bitmap (required);
2410 0 : fprintf (dump_file, "\n");
2411 : }
2412 0 : continue;
2413 : }
2414 :
2415 : /* Remove candidates that are already available. */
2416 0 : if (src_info->available_out)
2417 : {
2418 0 : bitmap_and_compl_into (&m_tmp_bitmap, src_info->available_out);
2419 0 : if (bitmap_empty_p (&m_tmp_bitmap))
2420 0 : continue;
2421 : }
2422 :
2423 : /* Add the remaining candidates to the appropriate required set. */
2424 0 : if (dump_file)
2425 : {
2426 0 : fprintf (dump_file, ";; Moving this set from block %d"
2427 0 : " to block %d:", bb_index, e->src->index);
2428 0 : dump_candidate_bitmap (&m_tmp_bitmap);
2429 0 : fprintf (dump_file, "\n");
2430 : }
2431 : /* If the source block contains a call, we want to rematerialize
2432 : after the call, otherwise we want to rematerialize at the start
2433 : of the block. */
2434 0 : bitmap src_required = get_bitmap (src_info->last_call
2435 : ? &src_info->required_after_call
2436 : : &src_info->required_in);
2437 0 : if (bitmap_ior_into (src_required, &m_tmp_bitmap))
2438 : {
2439 0 : if (!src_info->last_call)
2440 0 : bitmap_set_bit (pending_blocks, e->src->index);
2441 0 : unshare_available_sets (src_info);
2442 0 : bitmap_ior_into (get_bitmap (&src_info->available_out),
2443 : &m_tmp_bitmap);
2444 : }
2445 : }
2446 :
2447 : /* The candidates are now available on entry to the block. */
2448 0 : bitmap_and_compl_into (dest_info->required_in, required);
2449 0 : unshare_available_sets (dest_info);
2450 0 : bitmap_ior_into (get_bitmap (&dest_info->available_in), required);
2451 : }
2452 :
2453 : /* Go through the candidates that are currently marked as being
2454 : rematerialized at the beginning of a block. Decide in each case
2455 : whether that's valid and profitable; if it isn't, move the
2456 : rematerialization to predecessor blocks instead. */
2457 :
2458 : void
2459 0 : early_remat::choose_rematerialization_points (void)
2460 : {
2461 0 : bitmap_head required;
2462 0 : bitmap_head pending_blocks;
2463 :
2464 0 : int *postorder = df_get_postorder (DF_BACKWARD);
2465 0 : unsigned int postorder_len = df_get_n_blocks (DF_BACKWARD);
2466 0 : bitmap_initialize (&required, &m_obstack);
2467 0 : bitmap_initialize (&pending_blocks, &m_obstack);
2468 0 : do
2469 : /* Process the blocks in postorder, to reduce the number of iterations
2470 : of the outer loop. */
2471 0 : for (unsigned int i = 0; i < postorder_len; ++i)
2472 : {
2473 0 : unsigned int bb_index = postorder[i];
2474 0 : remat_block_info *info = &m_block_info[bb_index];
2475 0 : bitmap_clear_bit (&pending_blocks, bb_index);
2476 :
2477 0 : if (empty_p (info->required_in))
2478 0 : continue;
2479 :
2480 0 : if (info->available_in)
2481 0 : gcc_checking_assert (!bitmap_intersect_p (info->required_in,
2482 : info->available_in));
2483 :
2484 0 : if (local_remat_cheaper_p (bb_index))
2485 : {
2486 : /* We'd prefer to rematerialize at the head of the block.
2487 : Only move candidates if we need to. */
2488 0 : compute_minimum_move_set (bb_index, &required);
2489 0 : move_to_predecessors (bb_index, &required, &pending_blocks);
2490 : }
2491 : else
2492 0 : move_to_predecessors (bb_index, info->required_in,
2493 : &pending_blocks);
2494 : }
2495 0 : while (!bitmap_empty_p (&pending_blocks));
2496 0 : bitmap_clear (&required);
2497 0 : }
2498 :
2499 : /* Emit all rematerialization instructions queued for BB. */
2500 :
2501 : void
2502 0 : early_remat::emit_remat_insns_for_block (basic_block bb)
2503 : {
2504 0 : remat_block_info *info = &m_block_info[bb->index];
2505 :
2506 0 : if (info->last_call && !empty_p (info->required_after_call))
2507 : {
2508 0 : restrict_remat_for_call (info->required_after_call, info->last_call);
2509 0 : emit_remat_insns (info->required_after_call, NULL,
2510 : info->rd_after_call, info->last_call);
2511 : }
2512 :
2513 0 : if (!empty_p (info->required_in))
2514 : {
2515 0 : rtx_insn *insn = BB_HEAD (bb);
2516 0 : while (insn != BB_END (bb)
2517 0 : && !INSN_P (NEXT_INSN (insn)))
2518 : insn = NEXT_INSN (insn);
2519 0 : restrict_remat_for_unavail_regs (info->required_in, DF_LR_IN (bb));
2520 0 : emit_remat_insns (info->required_in, info->available_in,
2521 : info->rd_in, insn);
2522 : }
2523 0 : }
2524 :
2525 : /* Decide which candidates in each block's REQUIRED_IN set need to be
2526 : rematerialized and decide where the rematerialization instructions
2527 : should go. Emit queued rematerialization instructions at the start
2528 : of blocks and after the last calls in blocks. */
2529 :
2530 : void
2531 0 : early_remat::global_phase (void)
2532 : {
2533 0 : compute_availability ();
2534 0 : if (dump_file)
2535 : {
2536 0 : fprintf (dump_file, "\n;; Blocks after computing global"
2537 : " availability:\n");
2538 0 : dump_all_blocks ();
2539 : }
2540 :
2541 0 : choose_rematerialization_points ();
2542 0 : if (dump_file)
2543 : {
2544 0 : fprintf (dump_file, "\n;; Blocks after choosing rematerialization"
2545 : " points:\n");
2546 0 : dump_all_blocks ();
2547 : }
2548 :
2549 0 : basic_block bb;
2550 0 : FOR_EACH_BB_FN (bb, m_fn)
2551 0 : emit_remat_insns_for_block (bb);
2552 0 : }
2553 :
2554 : /* Main function for the pass. */
2555 :
2556 : void
2557 0 : early_remat::run (void)
2558 : {
2559 0 : df_analyze ();
2560 :
2561 0 : if (!collect_candidates ())
2562 : return;
2563 :
2564 0 : init_block_info ();
2565 0 : sort_candidates ();
2566 0 : finalize_candidate_indices ();
2567 0 : if (dump_file)
2568 0 : dump_all_candidates ();
2569 :
2570 0 : compute_rd ();
2571 0 : decide_candidate_validity ();
2572 0 : local_phase ();
2573 0 : global_phase ();
2574 : }
2575 :
2576 0 : early_remat::early_remat (function *fn, sbitmap selected_modes)
2577 0 : : m_fn (fn),
2578 0 : m_selected_modes (selected_modes),
2579 0 : m_available (0),
2580 0 : m_required (0),
2581 0 : m_value_table (63)
2582 : {
2583 0 : bitmap_obstack_initialize (&m_obstack);
2584 0 : bitmap_initialize (&m_candidate_regnos, &m_obstack);
2585 0 : bitmap_initialize (&m_tmp_bitmap, &m_obstack);
2586 0 : }
2587 :
2588 0 : early_remat::~early_remat ()
2589 : {
2590 0 : bitmap_obstack_release (&m_obstack);
2591 0 : }
2592 :
2593 : namespace {
2594 :
2595 : const pass_data pass_data_early_remat =
2596 : {
2597 : RTL_PASS, /* type */
2598 : "early_remat", /* name */
2599 : OPTGROUP_NONE, /* optinfo_flags */
2600 : TV_EARLY_REMAT, /* tv_id */
2601 : 0, /* properties_required */
2602 : 0, /* properties_provided */
2603 : 0, /* properties_destroyed */
2604 : 0, /* todo_flags_start */
2605 : TODO_df_finish, /* todo_flags_finish */
2606 : };
2607 :
2608 : class pass_early_remat : public rtl_opt_pass
2609 : {
2610 : public:
2611 285722 : pass_early_remat (gcc::context *ctxt)
2612 571444 : : rtl_opt_pass (pass_data_early_remat, ctxt)
2613 : {}
2614 :
2615 : /* opt_pass methods: */
2616 1471370 : bool gate (function *) final override
2617 : {
2618 1471370 : return optimize > 1 && NUM_POLY_INT_COEFFS > 1;
2619 : }
2620 :
2621 0 : unsigned int execute (function *f) final override
2622 : {
2623 0 : auto_sbitmap selected_modes (NUM_MACHINE_MODES);
2624 0 : bitmap_clear (selected_modes);
2625 0 : targetm.select_early_remat_modes (selected_modes);
2626 0 : if (!bitmap_empty_p (selected_modes))
2627 0 : early_remat (f, selected_modes).run ();
2628 0 : return 0;
2629 0 : }
2630 : }; // class pass_early_remat
2631 :
2632 : } // anon namespace
2633 :
2634 : rtl_opt_pass *
2635 285722 : make_pass_early_remat (gcc::context *ctxt)
2636 : {
2637 285722 : return new pass_early_remat (ctxt);
2638 : }
|