Branch data Line data Source code
1 : : /* Early (pre-RA) rematerialization
2 : : Copyright (C) 2017-2024 Free Software Foundation, Inc.
3 : :
4 : : This file is part of GCC.
5 : :
6 : : GCC is free software; you can redistribute it and/or modify it under
7 : : the terms of the GNU General Public License as published by the Free
8 : : Software Foundation; either version 3, or (at your option) any later
9 : : version.
10 : :
11 : : GCC is distributed in the hope that it will be useful, but WITHOUT ANY
12 : : WARRANTY; without even the implied warranty of MERCHANTABILITY or
13 : : FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
14 : : for more details.
15 : :
16 : : You should have received a copy of the GNU General Public License
17 : : along with GCC; see the file COPYING3. If not see
18 : : <http://www.gnu.org/licenses/>. */
19 : :
20 : : #include "config.h"
21 : : #include "system.h"
22 : : #include "coretypes.h"
23 : : #include "backend.h"
24 : : #include "rtl.h"
25 : : #include "df.h"
26 : : #include "tree-pass.h"
27 : : #include "memmodel.h"
28 : : #include "emit-rtl.h"
29 : : #include "insn-config.h"
30 : : #include "recog.h"
31 : : /* FIXME: The next two are only needed for gen_move_insn. */
32 : : #include "tree.h"
33 : : #include "expr.h"
34 : : #include "target.h"
35 : : #include "inchash.h"
36 : : #include "rtlhash.h"
37 : : #include "print-rtl.h"
38 : : #include "rtl-iter.h"
39 : : #include "regs.h"
40 : : #include "function-abi.h"
41 : :
42 : : /* This pass runs before register allocation and implements an aggressive
43 : : form of rematerialization. It looks for pseudo registers R of mode M
44 : : for which:
45 : :
46 : : (a) there are no call-preserved registers of mode M; and
47 : : (b) spilling R to the stack is expensive.
48 : :
49 : : The assumption is that it's better to recompute R after each call instead
50 : : of spilling it, even if this extends the live ranges of other registers.
51 : :
52 : : The motivating example for which these conditions hold are AArch64 SVE
53 : : vectors and predicates. Spilling them to the stack makes the frame
54 : : variable-sized, which we'd like to avoid if possible. It's also very
55 : : rare for SVE values to be "naturally" live across a call: usually this
56 : : happens as a result of CSE or other code motion.
57 : :
58 : : The pass is split into the following phases:
59 : :
60 : : Collection phase
61 : : ================
62 : :
63 : : First we go through all pseudo registers looking for any that meet
64 : : the conditions above. For each such register R, we go through each
65 : : instruction that defines R to see whether any of them are suitable
66 : : rematerialization candidates. If at least one is, we treat all the
67 : : instructions that define R as candidates, but record which ones are
68 : : not in fact suitable. These unsuitable candidates exist only for the
69 : : sake of calculating reaching definitions (see below).
70 : :
71 : : A "candidate" is a single instruction that we want to rematerialize
72 : : and a "candidate register" is a register that is set by at least one
73 : : candidate.
74 : :
75 : : Candidate sorting
76 : : =================
77 : :
78 : : Next we sort the candidates based on the cfg postorder, so that if
79 : : candidate C1 uses candidate C2, C1 has a lower index than C2.
80 : : This is useful when iterating through candidate bitmaps.
81 : :
82 : : Reaching definition calculation
83 : : ===============================
84 : :
85 : : We then compute standard reaching-definition sets for each candidate.
86 : : Each set specifies which candidates might provide the current definition
87 : : of a live candidate register.
88 : :
89 : : From here on, a candidate C is "live" at a point P if the candidate
90 : : register defined by C is live at P and if C's definition reaches P.
91 : : An instruction I "uses" a candidate C if I takes the register defined by
92 : : C as input and if C is one of the reaching definitions of that register.
93 : :
94 : : Candidate validation and value numbering
95 : : ========================================
96 : :
97 : : Next we simultaneously decide which candidates are valid and look
98 : : for candidates that are equivalent to each other, assigning numbers
99 : : to each unique candidate value. A candidate C is invalid if:
100 : :
101 : : (a) C uses an invalid candidate;
102 : :
103 : : (b) there is a cycle of candidate uses involving C; or
104 : :
105 : : (c) C takes a candidate register R as input and the reaching
106 : : definitions of R do not have the same value number.
107 : :
108 : : We assign a "representative" candidate C to each value number and from
109 : : here on replace references to other candidates with that value number
110 : : with references to C. It is then only possible to rematerialize a
111 : : register R at point P if (after this replacement) there is a single
112 : : reaching definition of R at P.
113 : :
114 : : Local phase
115 : : ===========
116 : :
117 : : During this phase we go through each block and look for cases in which:
118 : :
119 : : (a) an instruction I comes between two call instructions CI1 and CI2;
120 : :
121 : : (b) I uses a candidate register R;
122 : :
123 : : (c) a candidate C provides the only reaching definition of R; and
124 : :
125 : : (d) C does not come between CI1 and I.
126 : :
127 : : We then emit a copy of C after CI1, as well as the transitive closure
128 : : TC of the candidates used by C. The copies of TC might use the original
129 : : candidate registers or new temporary registers, depending on circumstances.
130 : :
131 : : For example, if elsewhere we have:
132 : :
133 : : C3: R3 <- f3 (...)
134 : : ...
135 : : C2: R2 <- f2 (...)
136 : : ...
137 : : C1: R1 <- f1 (R2, R3, ...) // uses C2 and C3
138 : :
139 : : then for a block containing:
140 : :
141 : : CI1: call
142 : : ...
143 : : I: use R1 // uses C1
144 : : ...
145 : : CI2: call
146 : :
147 : : we would emit:
148 : :
149 : : CI1: call
150 : : C3': R3' <- f3 (...)
151 : : C2': R2' <- f2 (...)
152 : : C1': R1 <- f1 (R2', R3', ...)
153 : : ...
154 : : I: use R1
155 : : ...
156 : : CI2: call
157 : :
158 : : where R2' and R3' might be fresh registers. If instead we had:
159 : :
160 : : CI1: call
161 : : ...
162 : : I1: use R1 // uses C1
163 : : ...
164 : : I2: use R3 // uses C3
165 : : ...
166 : : CI2: call
167 : :
168 : : we would keep the original R3:
169 : :
170 : : CI1: call
171 : : C3': R3 <- f3 (...)
172 : : C2': R2' <- f2 (...)
173 : : C1': R1 <- f1 (R2', R3, ...)
174 : : ...
175 : : I1: use R1 // uses C1
176 : : ...
177 : : I2: use R3 // uses C3
178 : : ...
179 : : CI2: call
180 : :
181 : : We also record the last call in each block (if any) and compute:
182 : :
183 : : rd_after_call:
184 : : The set of candidates that either (a) are defined outside the block
185 : : and are live after the last call or (b) are defined within the block
186 : : and reach the end of the last call. (We don't track whether the
187 : : latter values are live or not.)
188 : :
189 : : required_after_call:
190 : : The set of candidates that need to be rematerialized after the
191 : : last call in order to satisfy uses in the block itself.
192 : :
193 : : required_in:
194 : : The set of candidates that are live on entry to the block and are
195 : : used without an intervening call.
196 : :
197 : : In addition, we compute the initial values of the sets required by
198 : : the global phase below.
199 : :
200 : : Global phase
201 : : ============
202 : :
203 : : We next compute a maximal solution to the following availability
204 : : problem:
205 : :
206 : : available_in:
207 : : The set of candidates that are live on entry to a block and can
208 : : be used at that point without rematerialization.
209 : :
210 : : available_out:
211 : : The set of candidates that are live on exit from a block and can
212 : : be used at that point without rematerialization.
213 : :
214 : : available_locally:
215 : : The subset of available_out that is due to code in the block itself.
216 : : It contains candidates that are defined or used in the block and
217 : : not invalidated by a later call.
218 : :
219 : : We then go through each block B and look for an appropriate place
220 : : to insert copies of required_in - available_in. Conceptually we
221 : : start by placing the copies at the head of B, but then move the
222 : : copy of a candidate C to predecessors if:
223 : :
224 : : (a) that seems cheaper;
225 : :
226 : : (b) there is more than one reaching definition of C's register at
227 : : the head of B; or
228 : :
229 : : (c) copying C would clobber a hard register that is live on entry to B.
230 : :
231 : : Moving a copy of C to a predecessor block PB involves:
232 : :
233 : : (1) adding C to PB's required_after_call, if PB contains a call; or
234 : :
235 : : (2) adding C PB's required_in otherwise.
236 : :
237 : : C is then available on output from each PB and on input to B.
238 : :
239 : : Once all this is done, we emit instructions for the final required_in
240 : : and required_after_call sets. */
241 : :
242 : : namespace {
243 : :
244 : : /* An invalid candidate index, used to indicate that there is more than
245 : : one reaching definition. */
246 : : const unsigned int MULTIPLE_CANDIDATES = -1U;
247 : :
248 : : /* Pass-specific information about one basic block. */
249 : : struct remat_block_info {
250 : : /* The last call instruction in the block. */
251 : : rtx_insn *last_call;
252 : :
253 : : /* The set of candidates that are live on entry to the block. NULL is
254 : : equivalent to an empty set. */
255 : : bitmap rd_in;
256 : :
257 : : /* The set of candidates that are live on exit from the block. This might
258 : : reuse rd_in. NULL is equivalent to an empty set. */
259 : : bitmap rd_out;
260 : :
261 : : /* The subset of RD_OUT that comes from local definitions. NULL is
262 : : equivalent to an empty set. */
263 : : bitmap rd_gen;
264 : :
265 : : /* The set of candidates that the block invalidates (because it defines
266 : : the register to something else, or because the register's value is
267 : : no longer important). NULL is equivalent to an empty set. */
268 : : bitmap rd_kill;
269 : :
270 : : /* The set of candidates that either (a) are defined outside the block
271 : : and are live after LAST_CALL or (b) are defined within the block
272 : : and reach the instruction after LAST_CALL. (We don't track whether
273 : : the latter values are live or not.)
274 : :
275 : : Only used if LAST_CALL is nonnull. NULL is equivalent to an
276 : : empty set. */
277 : : bitmap rd_after_call;
278 : :
279 : : /* Candidates that are live and available without rematerialization
280 : : on entry to the block. NULL is equivalent to an empty set. */
281 : : bitmap available_in;
282 : :
283 : : /* Candidates that become available without rematerialization within the
284 : : block, and remain so on exit. NULL is equivalent to an empty set. */
285 : : bitmap available_locally;
286 : :
287 : : /* Candidates that are available without rematerialization on exit from
288 : : the block. This might reuse available_in or available_locally. */
289 : : bitmap available_out;
290 : :
291 : : /* Candidates that need to be rematerialized either at the start of the
292 : : block or before entering the block. */
293 : : bitmap required_in;
294 : :
295 : : /* Candidates that need to be rematerialized after LAST_CALL.
296 : : Only used if LAST_CALL is nonnull. */
297 : : bitmap required_after_call;
298 : :
299 : : /* The number of candidates in the block. */
300 : : unsigned int num_candidates;
301 : :
302 : : /* The earliest candidate in the block (i.e. the one with the
303 : : highest index). Only valid if NUM_CANDIDATES is nonzero. */
304 : : unsigned int first_candidate;
305 : :
306 : : /* The best (lowest) execution frequency for rematerializing REQUIRED_IN.
307 : : This is the execution frequency of the block if LOCAL_REMAT_CHEAPER_P,
308 : : otherwise it is the sum of the execution frequencies of whichever
309 : : predecessor blocks would do the rematerialization. */
310 : : int remat_frequency;
311 : :
312 : : /* True if the block ends with an abnormal call. */
313 : : unsigned int abnormal_call_p : 1;
314 : :
315 : : /* Used to record whether a graph traversal has visited this block. */
316 : : unsigned int visited_p : 1;
317 : :
318 : : /* True if we have calculated REMAT_FREQUENCY. */
319 : : unsigned int remat_frequency_valid_p : 1;
320 : :
321 : : /* True if it is cheaper to rematerialize candidates at the start of
322 : : the block, rather than moving them to predecessor blocks. */
323 : : unsigned int local_remat_cheaper_p : 1;
324 : : };
325 : :
326 : : /* Information about a group of candidates with the same value number. */
327 : : struct remat_equiv_class {
328 : : /* The candidates that have the same value number. */
329 : : bitmap members;
330 : :
331 : : /* The candidate that was first added to MEMBERS. */
332 : : unsigned int earliest;
333 : :
334 : : /* The candidate that represents the others. This is always the one
335 : : with the highest index. */
336 : : unsigned int representative;
337 : : };
338 : :
339 : : /* Information about an instruction that we might want to rematerialize. */
340 : : struct remat_candidate {
341 : : /* The pseudo register that the instruction sets. */
342 : : unsigned int regno;
343 : :
344 : : /* A temporary register used when rematerializing uses of this candidate,
345 : : if REGNO doesn't have the right value or isn't worth using. */
346 : : unsigned int copy_regno;
347 : :
348 : : /* True if we intend to rematerialize this instruction by emitting
349 : : a move of a constant into REGNO, false if we intend to emit a
350 : : copy of the original instruction. */
351 : : unsigned int constant_p : 1;
352 : :
353 : : /* True if we still think it's possible to rematerialize INSN. */
354 : : unsigned int can_copy_p : 1;
355 : :
356 : : /* Used to record whether a graph traversal has visited this candidate. */
357 : : unsigned int visited_p : 1;
358 : :
359 : : /* True if we have verified that it's possible to rematerialize INSN.
360 : : Once this is true, both it and CAN_COPY_P remain true. */
361 : : unsigned int validated_p : 1;
362 : :
363 : : /* True if we have "stabilized" INSN, i.e. ensured that all non-candidate
364 : : registers read by INSN will have the same value when rematerializing INSN.
365 : : Only ever true if CAN_COPY_P. */
366 : : unsigned int stabilized_p : 1;
367 : :
368 : : /* Hash value used for value numbering. */
369 : : hashval_t hash;
370 : :
371 : : /* The instruction that sets REGNO. */
372 : : rtx_insn *insn;
373 : :
374 : : /* If CONSTANT_P, the value that should be moved into REGNO when
375 : : rematerializing, otherwise the pattern of the instruction that
376 : : should be used. */
377 : : rtx remat_rtx;
378 : :
379 : : /* The set of candidates that INSN takes as input. NULL is equivalent
380 : : to the empty set. All candidates in this set have a higher index
381 : : than the current candidate. */
382 : : bitmap uses;
383 : :
384 : : /* The set of hard registers that would be clobbered by rematerializing
385 : : the candidate, including (transitively) all those that would be
386 : : clobbered by rematerializing USES. */
387 : : bitmap clobbers;
388 : :
389 : : /* The equivalence class to which the candidate belongs, or null if none. */
390 : : remat_equiv_class *equiv_class;
391 : : };
392 : :
393 : : /* Hash functions used for value numbering. */
394 : : struct remat_candidate_hasher : nofree_ptr_hash <remat_candidate>
395 : : {
396 : : typedef value_type compare_type;
397 : : static hashval_t hash (const remat_candidate *);
398 : : static bool equal (const remat_candidate *, const remat_candidate *);
399 : : };
400 : :
401 : : /* Main class for this pass. */
402 : : class early_remat {
403 : : public:
404 : : early_remat (function *, sbitmap);
405 : : ~early_remat ();
406 : :
407 : : void run (void);
408 : :
409 : : private:
410 : : bitmap alloc_bitmap (void);
411 : : bitmap get_bitmap (bitmap *);
412 : : void init_temp_bitmap (bitmap *);
413 : : void copy_temp_bitmap (bitmap *, bitmap *);
414 : :
415 : : void dump_insn_id (rtx_insn *);
416 : : void dump_candidate_bitmap (bitmap);
417 : : void dump_all_candidates (void);
418 : : void dump_edge_list (basic_block, bool);
419 : : void dump_block_info (basic_block);
420 : : void dump_all_blocks (void);
421 : :
422 : : bool interesting_regno_p (unsigned int);
423 : : remat_candidate *add_candidate (rtx_insn *, unsigned int, bool);
424 : : bool maybe_add_candidate (rtx_insn *, unsigned int);
425 : : bool collect_candidates (void);
426 : : void init_block_info (void);
427 : : void sort_candidates (void);
428 : : void finalize_candidate_indices (void);
429 : : void record_equiv_candidates (unsigned int, unsigned int);
430 : : static bool rd_confluence_n (edge);
431 : : static bool rd_transfer (int);
432 : : void compute_rd (void);
433 : : unsigned int canon_candidate (unsigned int);
434 : : void canon_bitmap (bitmap *);
435 : : unsigned int resolve_reaching_def (bitmap);
436 : : bool check_candidate_uses (unsigned int);
437 : : void compute_clobbers (unsigned int);
438 : : void assign_value_number (unsigned int);
439 : : void decide_candidate_validity (void);
440 : : void restrict_remat_for_unavail_regs (bitmap, const_bitmap);
441 : : void restrict_remat_for_call (bitmap, rtx_insn *);
442 : : bool stable_use_p (unsigned int);
443 : : void emit_copy_before (unsigned int, rtx, rtx);
444 : : void stabilize_pattern (unsigned int);
445 : : void replace_dest_with_copy (unsigned int);
446 : : void stabilize_candidate_uses (unsigned int, bitmap, bitmap, bitmap,
447 : : bitmap);
448 : : void emit_remat_insns (bitmap, bitmap, bitmap, rtx_insn *);
449 : : bool set_available_out (remat_block_info *);
450 : : void process_block (basic_block);
451 : : void local_phase (void);
452 : : static bool avail_confluence_n (edge);
453 : : static bool avail_transfer (int);
454 : : void compute_availability (void);
455 : : void unshare_available_sets (remat_block_info *);
456 : : bool can_move_across_edge_p (edge);
457 : : bool local_remat_cheaper_p (unsigned int);
458 : : bool need_to_move_candidate_p (unsigned int, unsigned int);
459 : : void compute_minimum_move_set (unsigned int, bitmap);
460 : : void move_to_predecessors (unsigned int, bitmap, bitmap);
461 : : void choose_rematerialization_points (void);
462 : : void emit_remat_insns_for_block (basic_block);
463 : : void global_phase (void);
464 : :
465 : : /* The function that we're optimizing. */
466 : : function *m_fn;
467 : :
468 : : /* The modes that we want to rematerialize. */
469 : : sbitmap m_selected_modes;
470 : :
471 : : /* All rematerialization candidates, identified by their index into the
472 : : vector. */
473 : : auto_vec<remat_candidate> m_candidates;
474 : :
475 : : /* The set of candidate registers. */
476 : : bitmap_head m_candidate_regnos;
477 : :
478 : : /* Temporary sets. */
479 : : bitmap_head m_tmp_bitmap;
480 : : bitmap m_available;
481 : : bitmap m_required;
482 : :
483 : : /* Information about each basic block. */
484 : : auto_vec<remat_block_info> m_block_info;
485 : :
486 : : /* A mapping from register numbers to the set of associated candidates.
487 : : Only valid for registers in M_CANDIDATE_REGNOS. */
488 : : auto_vec<bitmap> m_regno_to_candidates;
489 : :
490 : : /* An obstack used for allocating bitmaps, so that we can free them all
491 : : in one go. */
492 : : bitmap_obstack m_obstack;
493 : :
494 : : /* A hash table of candidates used for value numbering. If a candidate
495 : : in the table is in an equivalence class, the candidate is marked as
496 : : the earliest member of the class. */
497 : : hash_table<remat_candidate_hasher> m_value_table;
498 : :
499 : : /* Used temporarily by callback functions. */
500 : : static early_remat *er;
501 : : };
502 : :
503 : : }
504 : :
505 : : early_remat *early_remat::er;
506 : :
507 : : /* rtx_equal_p callback that treats any two SCRATCHes as equal.
508 : : This allows us to compare two copies of a pattern, even though their
509 : : SCRATCHes are always distinct. */
510 : :
511 : : static bool
512 : 0 : scratch_equal (const_rtx *x, const_rtx *y, rtx *nx, rtx *ny)
513 : : {
514 : 0 : if (GET_CODE (*x) == SCRATCH && GET_CODE (*y) == SCRATCH)
515 : : {
516 : 0 : *nx = const0_rtx;
517 : 0 : *ny = const0_rtx;
518 : 0 : return true;
519 : : }
520 : : return false;
521 : : }
522 : :
523 : : /* Hash callback functions for remat_candidate. */
524 : :
525 : : hashval_t
526 : 0 : remat_candidate_hasher::hash (const remat_candidate *cand)
527 : : {
528 : 0 : return cand->hash;
529 : : }
530 : :
531 : : bool
532 : 0 : remat_candidate_hasher::equal (const remat_candidate *cand1,
533 : : const remat_candidate *cand2)
534 : : {
535 : 0 : return (cand1->regno == cand2->regno
536 : 0 : && cand1->constant_p == cand2->constant_p
537 : 0 : && rtx_equal_p (cand1->remat_rtx, cand2->remat_rtx,
538 : 0 : cand1->constant_p ? NULL : scratch_equal)
539 : 0 : && (!cand1->uses || bitmap_equal_p (cand1->uses, cand2->uses)));
540 : : }
541 : :
542 : : /* Return true if B is null or empty. */
543 : :
544 : : inline bool
545 : 0 : empty_p (bitmap b)
546 : : {
547 : 0 : return !b || bitmap_empty_p (b);
548 : : }
549 : :
550 : : /* Allocate a new bitmap. It will be automatically freed at the end of
551 : : the pass. */
552 : :
553 : : inline bitmap
554 : 0 : early_remat::alloc_bitmap (void)
555 : : {
556 : 0 : return bitmap_alloc (&m_obstack);
557 : : }
558 : :
559 : : /* Initialize *PTR to an empty bitmap if it is currently null. */
560 : :
561 : : inline bitmap
562 : 0 : early_remat::get_bitmap (bitmap *ptr)
563 : : {
564 : 0 : if (!*ptr)
565 : 0 : *ptr = alloc_bitmap ();
566 : 0 : return *ptr;
567 : : }
568 : :
569 : : /* *PTR is either null or empty. If it is null, initialize it to an
570 : : empty bitmap. */
571 : :
572 : : inline void
573 : 0 : early_remat::init_temp_bitmap (bitmap *ptr)
574 : : {
575 : 0 : if (!*ptr)
576 : 0 : *ptr = alloc_bitmap ();
577 : : else
578 : 0 : gcc_checking_assert (bitmap_empty_p (*ptr));
579 : 0 : }
580 : :
581 : : /* Move *SRC to *DEST and leave *SRC empty. */
582 : :
583 : : inline void
584 : 0 : early_remat::copy_temp_bitmap (bitmap *dest, bitmap *src)
585 : : {
586 : 0 : if (!empty_p (*src))
587 : : {
588 : 0 : *dest = *src;
589 : 0 : *src = NULL;
590 : : }
591 : : else
592 : 0 : *dest = NULL;
593 : : }
594 : :
595 : : /* Print INSN's identifier to the dump file. */
596 : :
597 : : void
598 : 0 : early_remat::dump_insn_id (rtx_insn *insn)
599 : : {
600 : 0 : fprintf (dump_file, "%d[bb:%d]", INSN_UID (insn),
601 : 0 : BLOCK_FOR_INSN (insn)->index);
602 : 0 : }
603 : :
604 : : /* Print candidate set CANDIDATES to the dump file, with a leading space. */
605 : :
606 : : void
607 : 0 : early_remat::dump_candidate_bitmap (bitmap candidates)
608 : : {
609 : 0 : if (empty_p (candidates))
610 : : {
611 : 0 : fprintf (dump_file, " none");
612 : 0 : return;
613 : : }
614 : :
615 : 0 : unsigned int cand_index;
616 : 0 : bitmap_iterator bi;
617 : 0 : EXECUTE_IF_SET_IN_BITMAP (candidates, 0, cand_index, bi)
618 : 0 : fprintf (dump_file, " %d", cand_index);
619 : : }
620 : :
621 : : /* Print information about all candidates to the dump file. */
622 : :
623 : : void
624 : 0 : early_remat::dump_all_candidates (void)
625 : : {
626 : 0 : fprintf (dump_file, "\n;; Candidates:\n;;\n");
627 : 0 : fprintf (dump_file, ";; %5s %5s %8s %s\n", "#", "reg", "mode", "insn");
628 : 0 : fprintf (dump_file, ";; %5s %5s %8s %s\n", "=", "===", "====", "====");
629 : 0 : unsigned int cand_index;
630 : 0 : remat_candidate *cand;
631 : 0 : FOR_EACH_VEC_ELT (m_candidates, cand_index, cand)
632 : : {
633 : 0 : fprintf (dump_file, ";; %5d %5d %8s ", cand_index, cand->regno,
634 : 0 : GET_MODE_NAME (GET_MODE (regno_reg_rtx[cand->regno])));
635 : 0 : dump_insn_id (cand->insn);
636 : 0 : if (!cand->can_copy_p)
637 : 0 : fprintf (dump_file, " -- can't copy");
638 : 0 : fprintf (dump_file, "\n");
639 : : }
640 : :
641 : 0 : fprintf (dump_file, "\n;; Register-to-candidate mapping:\n;;\n");
642 : 0 : unsigned int regno;
643 : 0 : bitmap_iterator bi;
644 : 0 : EXECUTE_IF_SET_IN_BITMAP (&m_candidate_regnos, 0, regno, bi)
645 : : {
646 : 0 : fprintf (dump_file, ";; %5d:", regno);
647 : 0 : dump_candidate_bitmap (m_regno_to_candidates[regno]);
648 : 0 : fprintf (dump_file, "\n");
649 : : }
650 : 0 : }
651 : :
652 : : /* Print the predecessors or successors of BB to the dump file, with a
653 : : leading space. DO_SUCC is true to print successors and false to print
654 : : predecessors. */
655 : :
656 : : void
657 : 0 : early_remat::dump_edge_list (basic_block bb, bool do_succ)
658 : : {
659 : 0 : edge e;
660 : 0 : edge_iterator ei;
661 : 0 : FOR_EACH_EDGE (e, ei, do_succ ? bb->succs : bb->preds)
662 : 0 : dump_edge_info (dump_file, e, TDF_NONE, do_succ);
663 : 0 : }
664 : :
665 : : /* Print information about basic block BB to the dump file. */
666 : :
667 : : void
668 : 0 : early_remat::dump_block_info (basic_block bb)
669 : : {
670 : 0 : remat_block_info *info = &m_block_info[bb->index];
671 : 0 : fprintf (dump_file, ";;\n;; Block %d:", bb->index);
672 : 0 : int width = 25;
673 : :
674 : 0 : fprintf (dump_file, "\n;;%*s:", width, "predecessors");
675 : 0 : dump_edge_list (bb, false);
676 : :
677 : 0 : fprintf (dump_file, "\n;;%*s:", width, "successors");
678 : 0 : dump_edge_list (bb, true);
679 : :
680 : 0 : fprintf (dump_file, "\n;;%*s: %d", width, "frequency",
681 : : bb->count.to_frequency (m_fn));
682 : :
683 : 0 : if (info->last_call)
684 : 0 : fprintf (dump_file, "\n;;%*s: %d", width, "last call",
685 : 0 : INSN_UID (info->last_call));
686 : :
687 : 0 : if (!empty_p (info->rd_in))
688 : : {
689 : 0 : fprintf (dump_file, "\n;;%*s:", width, "RD in");
690 : 0 : dump_candidate_bitmap (info->rd_in);
691 : : }
692 : 0 : if (!empty_p (info->rd_kill))
693 : : {
694 : 0 : fprintf (dump_file, "\n;;%*s:", width, "RD kill");
695 : 0 : dump_candidate_bitmap (info->rd_kill);
696 : : }
697 : 0 : if (!empty_p (info->rd_gen))
698 : : {
699 : 0 : fprintf (dump_file, "\n;;%*s:", width, "RD gen");
700 : 0 : dump_candidate_bitmap (info->rd_gen);
701 : : }
702 : 0 : if (!empty_p (info->rd_after_call))
703 : : {
704 : 0 : fprintf (dump_file, "\n;;%*s:", width, "RD after call");
705 : 0 : dump_candidate_bitmap (info->rd_after_call);
706 : : }
707 : 0 : if (!empty_p (info->rd_out))
708 : : {
709 : 0 : fprintf (dump_file, "\n;;%*s:", width, "RD out");
710 : 0 : if (info->rd_in == info->rd_out)
711 : 0 : fprintf (dump_file, " RD in");
712 : : else
713 : 0 : dump_candidate_bitmap (info->rd_out);
714 : : }
715 : 0 : if (!empty_p (info->available_in))
716 : : {
717 : 0 : fprintf (dump_file, "\n;;%*s:", width, "available in");
718 : 0 : dump_candidate_bitmap (info->available_in);
719 : : }
720 : 0 : if (!empty_p (info->available_locally))
721 : : {
722 : 0 : fprintf (dump_file, "\n;;%*s:", width, "available locally");
723 : 0 : dump_candidate_bitmap (info->available_locally);
724 : : }
725 : 0 : if (!empty_p (info->available_out))
726 : : {
727 : 0 : fprintf (dump_file, "\n;;%*s:", width, "available out");
728 : 0 : if (info->available_in == info->available_out)
729 : 0 : fprintf (dump_file, " available in");
730 : 0 : else if (info->available_locally == info->available_out)
731 : 0 : fprintf (dump_file, " available locally");
732 : : else
733 : 0 : dump_candidate_bitmap (info->available_out);
734 : : }
735 : 0 : if (!empty_p (info->required_in))
736 : : {
737 : 0 : fprintf (dump_file, "\n;;%*s:", width, "required in");
738 : 0 : dump_candidate_bitmap (info->required_in);
739 : : }
740 : 0 : if (!empty_p (info->required_after_call))
741 : : {
742 : 0 : fprintf (dump_file, "\n;;%*s:", width, "required after call");
743 : 0 : dump_candidate_bitmap (info->required_after_call);
744 : : }
745 : 0 : fprintf (dump_file, "\n");
746 : 0 : }
747 : :
748 : : /* Print information about all basic blocks to the dump file. */
749 : :
750 : : void
751 : 0 : early_remat::dump_all_blocks (void)
752 : : {
753 : 0 : basic_block bb;
754 : 0 : FOR_EACH_BB_FN (bb, m_fn)
755 : 0 : dump_block_info (bb);
756 : 0 : }
757 : :
758 : : /* Return true if REGNO is worth rematerializing. */
759 : :
760 : : bool
761 : 0 : early_remat::interesting_regno_p (unsigned int regno)
762 : : {
763 : : /* Ignore unused registers. */
764 : 0 : rtx reg = regno_reg_rtx[regno];
765 : 0 : if (!reg || DF_REG_DEF_COUNT (regno) == 0)
766 : : return false;
767 : :
768 : : /* Make sure the register has a mode that we want to rematerialize. */
769 : 0 : if (!bitmap_bit_p (m_selected_modes, GET_MODE (reg)))
770 : : return false;
771 : :
772 : : /* Ignore values that might sometimes be used uninitialized. We could
773 : : instead add dummy candidates for the entry block definition, and so
774 : : handle uses that are definitely not uninitialized, but the combination
775 : : of the two should be rare in practice. */
776 : 0 : if (bitmap_bit_p (DF_LR_OUT (ENTRY_BLOCK_PTR_FOR_FN (m_fn)), regno))
777 : : return false;
778 : :
779 : : return true;
780 : : }
781 : :
782 : : /* Record the set of register REGNO in instruction INSN as a
783 : : rematerialization candidate. CAN_COPY_P is true unless we already
784 : : know that rematerialization is impossible (in which case the candidate
785 : : only exists for the reaching definition calculation).
786 : :
787 : : The candidate's index is not fixed at this stage. */
788 : :
789 : : remat_candidate *
790 : 0 : early_remat::add_candidate (rtx_insn *insn, unsigned int regno,
791 : : bool can_copy_p)
792 : : {
793 : 0 : remat_candidate cand;
794 : 0 : memset (&cand, 0, sizeof (cand));
795 : 0 : cand.regno = regno;
796 : 0 : cand.insn = insn;
797 : 0 : cand.remat_rtx = PATTERN (insn);
798 : 0 : cand.can_copy_p = can_copy_p;
799 : 0 : m_candidates.safe_push (cand);
800 : :
801 : 0 : bitmap_set_bit (&m_candidate_regnos, regno);
802 : :
803 : 0 : return &m_candidates.last ();
804 : : }
805 : :
806 : : /* Return true if we can rematerialize the set of register REGNO in
807 : : instruction INSN, and add it as a candidate if so. When returning
808 : : false, print the reason to the dump file. */
809 : :
810 : : bool
811 : 0 : early_remat::maybe_add_candidate (rtx_insn *insn, unsigned int regno)
812 : : {
813 : : #define FAILURE_FORMAT ";; Can't rematerialize set of reg %d in %d[bb:%d]: "
814 : : #define FAILURE_ARGS regno, INSN_UID (insn), BLOCK_FOR_INSN (insn)->index
815 : :
816 : : /* The definition must come from an ordinary instruction. */
817 : 0 : basic_block bb = BLOCK_FOR_INSN (insn);
818 : 0 : if (!NONJUMP_INSN_P (insn)
819 : 0 : || (insn == BB_END (bb)
820 : 0 : && has_abnormal_or_eh_outgoing_edge_p (bb)))
821 : : {
822 : 0 : if (dump_file)
823 : 0 : fprintf (dump_file, FAILURE_FORMAT "insn alters control flow\n",
824 : 0 : FAILURE_ARGS);
825 : 0 : return false;
826 : : }
827 : :
828 : : /* Prefer to rematerialize constants directly -- it's much easier. */
829 : 0 : machine_mode mode = GET_MODE (regno_reg_rtx[regno]);
830 : 0 : if (rtx note = find_reg_equal_equiv_note (insn))
831 : : {
832 : 0 : rtx val = XEXP (note, 0);
833 : 0 : if (CONSTANT_P (val)
834 : 0 : && targetm.legitimate_constant_p (mode, val))
835 : : {
836 : 0 : remat_candidate *cand = add_candidate (insn, regno, true);
837 : 0 : cand->constant_p = true;
838 : 0 : cand->remat_rtx = val;
839 : 0 : return true;
840 : : }
841 : : }
842 : :
843 : : /* See whether the target has reasons to prevent a copy. */
844 : 0 : if (targetm.cannot_copy_insn_p && targetm.cannot_copy_insn_p (insn))
845 : : {
846 : 0 : if (dump_file)
847 : 0 : fprintf (dump_file, FAILURE_FORMAT "target forbids copying\n",
848 : 0 : FAILURE_ARGS);
849 : 0 : return false;
850 : : }
851 : :
852 : : /* We can't copy trapping instructions. */
853 : 0 : rtx pat = PATTERN (insn);
854 : 0 : if (may_trap_p (pat))
855 : : {
856 : 0 : if (dump_file)
857 : 0 : fprintf (dump_file, FAILURE_FORMAT "insn might trap\n", FAILURE_ARGS);
858 : 0 : return false;
859 : : }
860 : :
861 : : /* We can't copy instructions that read memory, unless we know that
862 : : the contents never change. */
863 : 0 : subrtx_iterator::array_type array;
864 : 0 : FOR_EACH_SUBRTX (iter, array, pat, ALL)
865 : 0 : if (MEM_P (*iter) && !MEM_READONLY_P (*iter))
866 : : {
867 : 0 : if (dump_file)
868 : 0 : fprintf (dump_file, FAILURE_FORMAT "insn references non-constant"
869 : 0 : " memory\n", FAILURE_ARGS);
870 : 0 : return false;
871 : : }
872 : :
873 : : /* Check each defined register. */
874 : 0 : df_ref ref;
875 : 0 : FOR_EACH_INSN_DEF (ref, insn)
876 : : {
877 : 0 : unsigned int def_regno = DF_REF_REGNO (ref);
878 : 0 : if (def_regno == regno)
879 : : {
880 : : /* Make sure the definition is write-only. (Partial definitions,
881 : : such as setting the low part and clobbering the high part,
882 : : are otherwise OK.) */
883 : 0 : if (DF_REF_FLAGS_IS_SET (ref, DF_REF_READ_WRITE))
884 : : {
885 : 0 : if (dump_file)
886 : 0 : fprintf (dump_file, FAILURE_FORMAT "destination is"
887 : 0 : " read-modify-write\n", FAILURE_ARGS);
888 : 0 : return false;
889 : : }
890 : : }
891 : : else
892 : : {
893 : : /* The instruction can set additional registers, provided that
894 : : they're hard registers. This is useful for instructions
895 : : that alter the condition codes. */
896 : 0 : if (!HARD_REGISTER_NUM_P (def_regno))
897 : : {
898 : 0 : if (dump_file)
899 : 0 : fprintf (dump_file, FAILURE_FORMAT "insn also sets"
900 : 0 : " pseudo reg %d\n", FAILURE_ARGS, def_regno);
901 : 0 : return false;
902 : : }
903 : : }
904 : : }
905 : :
906 : : /* If the instruction uses fixed hard registers, check that those
907 : : registers have the same value throughout the function. If the
908 : : instruction uses non-fixed hard registers, check that we can
909 : : replace them with pseudos. */
910 : 0 : FOR_EACH_INSN_USE (ref, insn)
911 : : {
912 : 0 : unsigned int use_regno = DF_REF_REGNO (ref);
913 : 0 : if (HARD_REGISTER_NUM_P (use_regno) && fixed_regs[use_regno])
914 : : {
915 : 0 : if (rtx_unstable_p (DF_REF_REAL_REG (ref)))
916 : : {
917 : 0 : if (dump_file)
918 : 0 : fprintf (dump_file, FAILURE_FORMAT "insn uses fixed hard reg"
919 : 0 : " %d\n", FAILURE_ARGS, use_regno);
920 : 0 : return false;
921 : : }
922 : : }
923 : 0 : else if (HARD_REGISTER_NUM_P (use_regno))
924 : : {
925 : : /* Allocate a dummy pseudo register and temporarily install it.
926 : : Make the register number depend on the mode, which should
927 : : provide enough sharing for match_dup while also weeding
928 : : out cases in which operands with different modes are
929 : : explicitly tied. */
930 : 0 : rtx *loc = DF_REF_REAL_LOC (ref);
931 : 0 : unsigned int size = RTX_CODE_SIZE (REG);
932 : 0 : rtx new_reg = (rtx) alloca (size);
933 : 0 : memset (new_reg, 0, size);
934 : 0 : PUT_CODE (new_reg, REG);
935 : 0 : set_mode_and_regno (new_reg, GET_MODE (*loc),
936 : 0 : LAST_VIRTUAL_REGISTER + 1 + GET_MODE (*loc));
937 : 0 : validate_change (insn, loc, new_reg, 1);
938 : : }
939 : : }
940 : 0 : bool ok_p = verify_changes (0);
941 : 0 : cancel_changes (0);
942 : 0 : if (!ok_p)
943 : : {
944 : 0 : if (dump_file)
945 : 0 : fprintf (dump_file, FAILURE_FORMAT "insn does not allow hard"
946 : 0 : " register inputs to be replaced\n", FAILURE_ARGS);
947 : 0 : return false;
948 : : }
949 : :
950 : : #undef FAILURE_ARGS
951 : : #undef FAILURE_FORMAT
952 : :
953 : 0 : add_candidate (insn, regno, true);
954 : 0 : return true;
955 : 0 : }
956 : :
957 : : /* Calculate the set of rematerialization candidates. Return true if
958 : : we find at least one. */
959 : :
960 : : bool
961 : 0 : early_remat::collect_candidates (void)
962 : : {
963 : 0 : unsigned int nregs = DF_REG_SIZE (df);
964 : 0 : for (unsigned int regno = FIRST_PSEUDO_REGISTER; regno < nregs; ++regno)
965 : 0 : if (interesting_regno_p (regno))
966 : : {
967 : : /* Create candidates for all suitable definitions. */
968 : 0 : bitmap_clear (&m_tmp_bitmap);
969 : 0 : unsigned int bad = 0;
970 : 0 : unsigned int id = 0;
971 : 0 : for (df_ref ref = DF_REG_DEF_CHAIN (regno); ref;
972 : 0 : ref = DF_REF_NEXT_REG (ref))
973 : : {
974 : 0 : rtx_insn *insn = DF_REF_INSN (ref);
975 : 0 : if (maybe_add_candidate (insn, regno))
976 : 0 : bitmap_set_bit (&m_tmp_bitmap, id);
977 : : else
978 : 0 : bad += 1;
979 : 0 : id += 1;
980 : : }
981 : :
982 : : /* If we found at least one suitable definition, add dummy
983 : : candidates for the rest, so that we can see which definitions
984 : : are live where. */
985 : 0 : if (!bitmap_empty_p (&m_tmp_bitmap) && bad)
986 : : {
987 : 0 : id = 0;
988 : 0 : for (df_ref ref = DF_REG_DEF_CHAIN (regno); ref;
989 : 0 : ref = DF_REF_NEXT_REG (ref))
990 : : {
991 : 0 : if (!bitmap_bit_p (&m_tmp_bitmap, id))
992 : 0 : add_candidate (DF_REF_INSN (ref), regno, false);
993 : 0 : id += 1;
994 : : }
995 : : }
996 : : }
997 : :
998 : :
999 : 0 : return !m_candidates.is_empty ();
1000 : : }
1001 : :
1002 : : /* Initialize the m_block_info array. */
1003 : :
1004 : : void
1005 : 0 : early_remat::init_block_info (void)
1006 : : {
1007 : 0 : unsigned int n_blocks = last_basic_block_for_fn (m_fn);
1008 : 0 : m_block_info.safe_grow_cleared (n_blocks, true);
1009 : 0 : }
1010 : :
1011 : : /* Maps basic block indices to their position in the forward RPO. */
1012 : : static unsigned int *rpo_index;
1013 : :
1014 : : /* Order remat_candidates X_IN and Y_IN according to the cfg postorder. */
1015 : :
1016 : : static int
1017 : 0 : compare_candidates (const void *x_in, const void *y_in)
1018 : : {
1019 : 0 : const remat_candidate *x = (const remat_candidate *) x_in;
1020 : 0 : const remat_candidate *y = (const remat_candidate *) y_in;
1021 : 0 : basic_block x_bb = BLOCK_FOR_INSN (x->insn);
1022 : 0 : basic_block y_bb = BLOCK_FOR_INSN (y->insn);
1023 : 0 : if (x_bb != y_bb)
1024 : : /* Make X and Y follow block postorder. */
1025 : 0 : return rpo_index[y_bb->index] - rpo_index[x_bb->index];
1026 : :
1027 : : /* Make X and Y follow a backward traversal of the containing block. */
1028 : 0 : return DF_INSN_LUID (y->insn) - DF_INSN_LUID (x->insn);
1029 : : }
1030 : :
1031 : : /* Sort the collected rematerialization candidates so that they follow
1032 : : cfg postorder. */
1033 : :
1034 : : void
1035 : 0 : early_remat::sort_candidates (void)
1036 : : {
1037 : : /* Make sure the DF LUIDs are up-to-date for all the blocks we
1038 : : care about. */
1039 : 0 : bitmap_clear (&m_tmp_bitmap);
1040 : 0 : unsigned int cand_index;
1041 : 0 : remat_candidate *cand;
1042 : 0 : FOR_EACH_VEC_ELT (m_candidates, cand_index, cand)
1043 : : {
1044 : 0 : basic_block bb = BLOCK_FOR_INSN (cand->insn);
1045 : 0 : if (bitmap_set_bit (&m_tmp_bitmap, bb->index))
1046 : 0 : df_recompute_luids (bb);
1047 : : }
1048 : :
1049 : : /* Create a mapping from block numbers to their position in the
1050 : : postorder. */
1051 : 0 : unsigned int n_blocks = last_basic_block_for_fn (m_fn);
1052 : 0 : int *rpo = df_get_postorder (DF_FORWARD);
1053 : 0 : unsigned int rpo_len = df_get_n_blocks (DF_FORWARD);
1054 : 0 : rpo_index = new unsigned int[n_blocks];
1055 : 0 : for (unsigned int i = 0; i < rpo_len; ++i)
1056 : 0 : rpo_index[rpo[i]] = i;
1057 : :
1058 : 0 : m_candidates.qsort (compare_candidates);
1059 : :
1060 : 0 : delete[] rpo_index;
1061 : 0 : }
1062 : :
1063 : : /* Commit to the current candidate indices and initialize cross-references. */
1064 : :
1065 : : void
1066 : 0 : early_remat::finalize_candidate_indices (void)
1067 : : {
1068 : : /* Create a bitmap for each candidate register. */
1069 : 0 : m_regno_to_candidates.safe_grow (max_reg_num (), true);
1070 : 0 : unsigned int regno;
1071 : 0 : bitmap_iterator bi;
1072 : 0 : EXECUTE_IF_SET_IN_BITMAP (&m_candidate_regnos, 0, regno, bi)
1073 : 0 : m_regno_to_candidates[regno] = alloc_bitmap ();
1074 : :
1075 : : /* Go through each candidate and record its index. */
1076 : : unsigned int cand_index;
1077 : : remat_candidate *cand;
1078 : 0 : FOR_EACH_VEC_ELT (m_candidates, cand_index, cand)
1079 : : {
1080 : 0 : basic_block bb = BLOCK_FOR_INSN (cand->insn);
1081 : 0 : remat_block_info *info = &m_block_info[bb->index];
1082 : 0 : info->num_candidates += 1;
1083 : 0 : info->first_candidate = cand_index;
1084 : 0 : bitmap_set_bit (m_regno_to_candidates[cand->regno], cand_index);
1085 : : }
1086 : 0 : }
1087 : :
1088 : : /* Record that candidates CAND1_INDEX and CAND2_INDEX are equivalent.
1089 : : CAND1_INDEX might already have an equivalence class, but CAND2_INDEX
1090 : : doesn't. */
1091 : :
1092 : : void
1093 : 0 : early_remat::record_equiv_candidates (unsigned int cand1_index,
1094 : : unsigned int cand2_index)
1095 : : {
1096 : 0 : if (dump_file)
1097 : 0 : fprintf (dump_file, ";; Candidate %d is equivalent to candidate %d\n",
1098 : : cand2_index, cand1_index);
1099 : :
1100 : 0 : remat_candidate *cand1 = &m_candidates[cand1_index];
1101 : 0 : remat_candidate *cand2 = &m_candidates[cand2_index];
1102 : 0 : gcc_checking_assert (!cand2->equiv_class);
1103 : :
1104 : 0 : remat_equiv_class *ec = cand1->equiv_class;
1105 : 0 : if (!ec)
1106 : : {
1107 : 0 : ec = XOBNEW (&m_obstack.obstack, remat_equiv_class);
1108 : 0 : ec->members = alloc_bitmap ();
1109 : 0 : bitmap_set_bit (ec->members, cand1_index);
1110 : 0 : ec->earliest = cand1_index;
1111 : 0 : ec->representative = cand1_index;
1112 : 0 : cand1->equiv_class = ec;
1113 : : }
1114 : 0 : cand2->equiv_class = ec;
1115 : 0 : bitmap_set_bit (ec->members, cand2_index);
1116 : 0 : if (cand2_index > ec->representative)
1117 : 0 : ec->representative = cand2_index;
1118 : 0 : }
1119 : :
1120 : : /* Propagate information from the rd_out set of E->src to the rd_in set
1121 : : of E->dest, when computing global reaching definitions. Return true
1122 : : if something changed. */
1123 : :
1124 : : bool
1125 : 0 : early_remat::rd_confluence_n (edge e)
1126 : : {
1127 : 0 : remat_block_info *src = &er->m_block_info[e->src->index];
1128 : 0 : remat_block_info *dest = &er->m_block_info[e->dest->index];
1129 : :
1130 : : /* available_in temporarily contains the set of candidates whose
1131 : : registers are live on entry. */
1132 : 0 : if (empty_p (src->rd_out) || empty_p (dest->available_in))
1133 : : return false;
1134 : :
1135 : 0 : return bitmap_ior_and_into (er->get_bitmap (&dest->rd_in),
1136 : 0 : src->rd_out, dest->available_in);
1137 : : }
1138 : :
1139 : : /* Propagate information from the rd_in set of block BB_INDEX to rd_out.
1140 : : Return true if something changed. */
1141 : :
1142 : : bool
1143 : 0 : early_remat::rd_transfer (int bb_index)
1144 : : {
1145 : 0 : remat_block_info *info = &er->m_block_info[bb_index];
1146 : :
1147 : 0 : if (empty_p (info->rd_in))
1148 : : return false;
1149 : :
1150 : 0 : if (empty_p (info->rd_kill))
1151 : : {
1152 : 0 : gcc_checking_assert (empty_p (info->rd_gen));
1153 : 0 : if (!info->rd_out)
1154 : 0 : info->rd_out = info->rd_in;
1155 : : else
1156 : 0 : gcc_checking_assert (info->rd_out == info->rd_in);
1157 : : /* Assume that we only get called if something changed. */
1158 : 0 : return true;
1159 : : }
1160 : :
1161 : 0 : if (empty_p (info->rd_gen))
1162 : 0 : return bitmap_and_compl (er->get_bitmap (&info->rd_out),
1163 : 0 : info->rd_in, info->rd_kill);
1164 : :
1165 : 0 : return bitmap_ior_and_compl (er->get_bitmap (&info->rd_out), info->rd_gen,
1166 : 0 : info->rd_in, info->rd_kill);
1167 : : }
1168 : :
1169 : : /* Calculate the rd_* sets for each block. */
1170 : :
1171 : : void
1172 : 0 : early_remat::compute_rd (void)
1173 : : {
1174 : : /* First calculate the rd_kill and rd_gen sets, using the fact
1175 : : that m_candidates is sorted in order of decreasing LUID. */
1176 : 0 : unsigned int cand_index;
1177 : 0 : remat_candidate *cand;
1178 : 0 : FOR_EACH_VEC_ELT_REVERSE (m_candidates, cand_index, cand)
1179 : : {
1180 : 0 : rtx_insn *insn = cand->insn;
1181 : 0 : basic_block bb = BLOCK_FOR_INSN (insn);
1182 : 0 : remat_block_info *info = &m_block_info[bb->index];
1183 : 0 : bitmap kill = m_regno_to_candidates[cand->regno];
1184 : 0 : bitmap_ior_into (get_bitmap (&info->rd_kill), kill);
1185 : 0 : if (bitmap_bit_p (DF_LR_OUT (bb), cand->regno))
1186 : : {
1187 : 0 : bitmap_and_compl_into (get_bitmap (&info->rd_gen), kill);
1188 : 0 : bitmap_set_bit (info->rd_gen, cand_index);
1189 : : }
1190 : : }
1191 : :
1192 : : /* Set up the initial values of the other sets. */
1193 : 0 : basic_block bb;
1194 : 0 : FOR_EACH_BB_FN (bb, m_fn)
1195 : : {
1196 : 0 : remat_block_info *info = &m_block_info[bb->index];
1197 : 0 : unsigned int regno;
1198 : 0 : bitmap_iterator bi;
1199 : 0 : EXECUTE_IF_AND_IN_BITMAP (DF_LR_IN (bb), &m_candidate_regnos,
1200 : : 0, regno, bi)
1201 : : {
1202 : : /* Use available_in to record the set of candidates whose
1203 : : registers are live on entry (i.e. a maximum bound on rd_in). */
1204 : 0 : bitmap_ior_into (get_bitmap (&info->available_in),
1205 : 0 : m_regno_to_candidates[regno]);
1206 : :
1207 : : /* Add registers that die in a block to the block's kill set,
1208 : : so that we don't needlessly propagate them through the rest
1209 : : of the function. */
1210 : 0 : if (!bitmap_bit_p (DF_LR_OUT (bb), regno))
1211 : 0 : bitmap_ior_into (get_bitmap (&info->rd_kill),
1212 : 0 : m_regno_to_candidates[regno]);
1213 : : }
1214 : :
1215 : : /* Initialize each block's rd_out to the minimal set (the set of
1216 : : local definitions). */
1217 : 0 : if (!empty_p (info->rd_gen))
1218 : 0 : bitmap_copy (get_bitmap (&info->rd_out), info->rd_gen);
1219 : : }
1220 : :
1221 : : /* Iterate until we reach a fixed point. */
1222 : 0 : er = this;
1223 : 0 : bitmap_clear (&m_tmp_bitmap);
1224 : 0 : bitmap_set_range (&m_tmp_bitmap, 0, last_basic_block_for_fn (m_fn));
1225 : 0 : df_simple_dataflow (DF_FORWARD, NULL, NULL, rd_confluence_n, rd_transfer,
1226 : : &m_tmp_bitmap, df_get_postorder (DF_FORWARD),
1227 : : df_get_n_blocks (DF_FORWARD));
1228 : 0 : er = 0;
1229 : :
1230 : : /* Work out which definitions reach which candidates, again taking
1231 : : advantage of the candidate order. */
1232 : 0 : bitmap_head reaching;
1233 : 0 : bitmap_initialize (&reaching, &m_obstack);
1234 : 0 : basic_block old_bb = NULL;
1235 : 0 : FOR_EACH_VEC_ELT_REVERSE (m_candidates, cand_index, cand)
1236 : : {
1237 : 0 : bb = BLOCK_FOR_INSN (cand->insn);
1238 : 0 : if (bb != old_bb)
1239 : : {
1240 : : /* Get the definitions that reach the start of the new block. */
1241 : 0 : remat_block_info *info = &m_block_info[bb->index];
1242 : 0 : if (info->rd_in)
1243 : 0 : bitmap_copy (&reaching, info->rd_in);
1244 : : else
1245 : 0 : bitmap_clear (&reaching);
1246 : : old_bb = bb;
1247 : : }
1248 : : else
1249 : : {
1250 : : /* Process the definitions of the previous instruction. */
1251 : 0 : bitmap kill = m_regno_to_candidates[cand[1].regno];
1252 : 0 : bitmap_and_compl_into (&reaching, kill);
1253 : 0 : bitmap_set_bit (&reaching, cand_index + 1);
1254 : : }
1255 : :
1256 : 0 : if (cand->can_copy_p && !cand->constant_p)
1257 : : {
1258 : 0 : df_ref ref;
1259 : 0 : FOR_EACH_INSN_USE (ref, cand->insn)
1260 : : {
1261 : 0 : unsigned int regno = DF_REF_REGNO (ref);
1262 : 0 : if (bitmap_bit_p (&m_candidate_regnos, regno))
1263 : : {
1264 : 0 : bitmap defs = m_regno_to_candidates[regno];
1265 : 0 : bitmap_and (&m_tmp_bitmap, defs, &reaching);
1266 : 0 : bitmap_ior_into (get_bitmap (&cand->uses), &m_tmp_bitmap);
1267 : : }
1268 : : }
1269 : : }
1270 : : }
1271 : 0 : bitmap_clear (&reaching);
1272 : 0 : }
1273 : :
1274 : : /* If CAND_INDEX is in an equivalence class, return the representative
1275 : : of the class, otherwise return CAND_INDEX. */
1276 : :
1277 : : inline unsigned int
1278 : 0 : early_remat::canon_candidate (unsigned int cand_index)
1279 : : {
1280 : 0 : if (remat_equiv_class *ec = m_candidates[cand_index].equiv_class)
1281 : 0 : return ec->representative;
1282 : : return cand_index;
1283 : : }
1284 : :
1285 : : /* Make candidate set *PTR refer to candidates using the representative
1286 : : of each equivalence class. */
1287 : :
1288 : : void
1289 : 0 : early_remat::canon_bitmap (bitmap *ptr)
1290 : : {
1291 : 0 : bitmap old_set = *ptr;
1292 : 0 : if (empty_p (old_set))
1293 : 0 : return;
1294 : :
1295 : 0 : bitmap new_set = NULL;
1296 : 0 : unsigned int old_index;
1297 : 0 : bitmap_iterator bi;
1298 : 0 : EXECUTE_IF_SET_IN_BITMAP (old_set, 0, old_index, bi)
1299 : : {
1300 : 0 : unsigned int new_index = canon_candidate (old_index);
1301 : 0 : if (old_index != new_index)
1302 : : {
1303 : 0 : if (!new_set)
1304 : : {
1305 : 0 : new_set = alloc_bitmap ();
1306 : 0 : bitmap_copy (new_set, old_set);
1307 : : }
1308 : 0 : bitmap_clear_bit (new_set, old_index);
1309 : 0 : bitmap_set_bit (new_set, new_index);
1310 : : }
1311 : : }
1312 : 0 : if (new_set)
1313 : : {
1314 : 0 : BITMAP_FREE (*ptr);
1315 : 0 : *ptr = new_set;
1316 : : }
1317 : : }
1318 : :
1319 : : /* If the candidates in REACHING all have the same value, return the
1320 : : earliest instance of that value (i.e. the first one to be added
1321 : : to m_value_table), otherwise return MULTIPLE_CANDIDATES. */
1322 : :
1323 : : unsigned int
1324 : 0 : early_remat::resolve_reaching_def (bitmap reaching)
1325 : : {
1326 : 0 : unsigned int cand_index = bitmap_first_set_bit (reaching);
1327 : 0 : if (remat_equiv_class *ec = m_candidates[cand_index].equiv_class)
1328 : : {
1329 : 0 : if (!bitmap_intersect_compl_p (reaching, ec->members))
1330 : 0 : return ec->earliest;
1331 : : }
1332 : 0 : else if (bitmap_single_bit_set_p (reaching))
1333 : : return cand_index;
1334 : :
1335 : : return MULTIPLE_CANDIDATES;
1336 : : }
1337 : :
1338 : : /* Check whether all candidate registers used by candidate CAND_INDEX have
1339 : : unique definitions. Return true if so, replacing the candidate's uses
1340 : : set with the appropriate form for value numbering. */
1341 : :
1342 : : bool
1343 : 0 : early_remat::check_candidate_uses (unsigned int cand_index)
1344 : : {
1345 : 0 : remat_candidate *cand = &m_candidates[cand_index];
1346 : :
1347 : : /* Process the uses for each register in turn. */
1348 : 0 : bitmap_head uses;
1349 : 0 : bitmap_initialize (&uses, &m_obstack);
1350 : 0 : bitmap_copy (&uses, cand->uses);
1351 : 0 : bitmap uses_ec = alloc_bitmap ();
1352 : 0 : while (!bitmap_empty_p (&uses))
1353 : : {
1354 : : /* Get the register for the lowest-indexed candidate remaining,
1355 : : and the reaching definitions of that register. */
1356 : 0 : unsigned int first = bitmap_first_set_bit (&uses);
1357 : 0 : unsigned int regno = m_candidates[first].regno;
1358 : 0 : bitmap_and (&m_tmp_bitmap, &uses, m_regno_to_candidates[regno]);
1359 : :
1360 : : /* See whether all reaching definitions have the same value and if
1361 : : so get the index of the first candidate we saw with that value. */
1362 : 0 : unsigned int def = resolve_reaching_def (&m_tmp_bitmap);
1363 : 0 : if (def == MULTIPLE_CANDIDATES)
1364 : : {
1365 : 0 : if (dump_file)
1366 : 0 : fprintf (dump_file, ";; Removing candidate %d because there is"
1367 : : " more than one reaching definition of reg %d\n",
1368 : : cand_index, regno);
1369 : 0 : cand->can_copy_p = false;
1370 : 0 : break;
1371 : : }
1372 : 0 : bitmap_set_bit (uses_ec, def);
1373 : 0 : bitmap_and_compl_into (&uses, &m_tmp_bitmap);
1374 : : }
1375 : 0 : BITMAP_FREE (cand->uses);
1376 : 0 : cand->uses = uses_ec;
1377 : 0 : return cand->can_copy_p;
1378 : : }
1379 : :
1380 : : /* Calculate the set of hard registers that would be clobbered by
1381 : : rematerializing candidate CAND_INDEX. At this point the candidate's
1382 : : set of uses is final. */
1383 : :
1384 : : void
1385 : 0 : early_remat::compute_clobbers (unsigned int cand_index)
1386 : : {
1387 : 0 : remat_candidate *cand = &m_candidates[cand_index];
1388 : 0 : if (cand->uses)
1389 : : {
1390 : 0 : unsigned int use_index;
1391 : 0 : bitmap_iterator bi;
1392 : 0 : EXECUTE_IF_SET_IN_BITMAP (cand->uses, 0, use_index, bi)
1393 : 0 : if (bitmap clobbers = m_candidates[use_index].clobbers)
1394 : 0 : bitmap_ior_into (get_bitmap (&cand->clobbers), clobbers);
1395 : : }
1396 : :
1397 : 0 : df_ref ref;
1398 : 0 : FOR_EACH_INSN_DEF (ref, cand->insn)
1399 : : {
1400 : 0 : unsigned int def_regno = DF_REF_REGNO (ref);
1401 : 0 : if (def_regno != cand->regno)
1402 : 0 : bitmap_set_bit (get_bitmap (&cand->clobbers), def_regno);
1403 : : }
1404 : 0 : }
1405 : :
1406 : : /* Mark candidate CAND_INDEX as validated and add it to the value table. */
1407 : :
1408 : : void
1409 : 0 : early_remat::assign_value_number (unsigned int cand_index)
1410 : : {
1411 : 0 : remat_candidate *cand = &m_candidates[cand_index];
1412 : 0 : gcc_checking_assert (cand->can_copy_p && !cand->validated_p);
1413 : :
1414 : 0 : compute_clobbers (cand_index);
1415 : 0 : cand->validated_p = true;
1416 : :
1417 : 0 : inchash::hash h;
1418 : 0 : h.add_int (cand->regno);
1419 : 0 : inchash::add_rtx (cand->remat_rtx, h);
1420 : 0 : cand->hash = h.end ();
1421 : :
1422 : 0 : remat_candidate **slot
1423 : 0 : = m_value_table.find_slot_with_hash (cand, cand->hash, INSERT);
1424 : 0 : if (!*slot)
1425 : : {
1426 : 0 : *slot = cand;
1427 : 0 : if (dump_file)
1428 : 0 : fprintf (dump_file, ";; Candidate %d is not equivalent to"
1429 : : " others seen so far\n", cand_index);
1430 : : }
1431 : : else
1432 : 0 : record_equiv_candidates (*slot - m_candidates.address (), cand_index);
1433 : 0 : }
1434 : :
1435 : : /* Make a final decision about which candidates are valid and assign
1436 : : value numbers to those that are. */
1437 : :
1438 : : void
1439 : 0 : early_remat::decide_candidate_validity (void)
1440 : : {
1441 : 0 : auto_vec<unsigned int, 16> stack;
1442 : 0 : unsigned int cand1_index;
1443 : 0 : remat_candidate *cand1;
1444 : 0 : FOR_EACH_VEC_ELT_REVERSE (m_candidates, cand1_index, cand1)
1445 : : {
1446 : 0 : if (!cand1->can_copy_p || cand1->validated_p)
1447 : 0 : continue;
1448 : :
1449 : 0 : if (empty_p (cand1->uses))
1450 : : {
1451 : 0 : assign_value_number (cand1_index);
1452 : 0 : continue;
1453 : : }
1454 : :
1455 : 0 : stack.safe_push (cand1_index);
1456 : 0 : while (!stack.is_empty ())
1457 : : {
1458 : 0 : unsigned int cand2_index = stack.last ();
1459 : 0 : unsigned int watermark = stack.length ();
1460 : 0 : remat_candidate *cand2 = &m_candidates[cand2_index];
1461 : 0 : if (!cand2->can_copy_p || cand2->validated_p)
1462 : : {
1463 : 0 : stack.pop ();
1464 : 0 : continue;
1465 : : }
1466 : 0 : cand2->visited_p = true;
1467 : 0 : unsigned int cand3_index;
1468 : 0 : bitmap_iterator bi;
1469 : 0 : EXECUTE_IF_SET_IN_BITMAP (cand2->uses, 0, cand3_index, bi)
1470 : : {
1471 : 0 : remat_candidate *cand3 = &m_candidates[cand3_index];
1472 : 0 : if (!cand3->can_copy_p)
1473 : : {
1474 : 0 : if (dump_file)
1475 : 0 : fprintf (dump_file, ";; Removing candidate %d because"
1476 : : " it uses removed candidate %d\n", cand2_index,
1477 : : cand3_index);
1478 : 0 : cand2->can_copy_p = false;
1479 : 0 : break;
1480 : : }
1481 : 0 : if (!cand3->validated_p)
1482 : : {
1483 : 0 : if (empty_p (cand3->uses))
1484 : 0 : assign_value_number (cand3_index);
1485 : 0 : else if (cand3->visited_p)
1486 : : {
1487 : 0 : if (dump_file)
1488 : 0 : fprintf (dump_file, ";; Removing candidate %d"
1489 : : " because its definition is cyclic\n",
1490 : : cand2_index);
1491 : 0 : cand2->can_copy_p = false;
1492 : 0 : break;
1493 : : }
1494 : : else
1495 : 0 : stack.safe_push (cand3_index);
1496 : : }
1497 : : }
1498 : 0 : if (!cand2->can_copy_p)
1499 : : {
1500 : 0 : cand2->visited_p = false;
1501 : 0 : stack.truncate (watermark - 1);
1502 : : }
1503 : 0 : else if (watermark == stack.length ())
1504 : : {
1505 : 0 : cand2->visited_p = false;
1506 : 0 : if (check_candidate_uses (cand2_index))
1507 : 0 : assign_value_number (cand2_index);
1508 : 0 : stack.pop ();
1509 : : }
1510 : : }
1511 : : }
1512 : :
1513 : : /* Ensure that the candidates always use the same candidate index
1514 : : to refer to an equivalence class. */
1515 : 0 : FOR_EACH_VEC_ELT_REVERSE (m_candidates, cand1_index, cand1)
1516 : 0 : if (cand1->can_copy_p && !empty_p (cand1->uses))
1517 : : {
1518 : 0 : canon_bitmap (&cand1->uses);
1519 : 0 : gcc_checking_assert (bitmap_first_set_bit (cand1->uses) > cand1_index);
1520 : : }
1521 : 0 : }
1522 : :
1523 : : /* Remove any candidates in CANDIDATES that would clobber a register in
1524 : : UNAVAIL_REGS. */
1525 : :
1526 : : void
1527 : 0 : early_remat::restrict_remat_for_unavail_regs (bitmap candidates,
1528 : : const_bitmap unavail_regs)
1529 : : {
1530 : 0 : bitmap_clear (&m_tmp_bitmap);
1531 : 0 : unsigned int cand_index;
1532 : 0 : bitmap_iterator bi;
1533 : 0 : EXECUTE_IF_SET_IN_BITMAP (candidates, 0, cand_index, bi)
1534 : : {
1535 : 0 : remat_candidate *cand = &m_candidates[cand_index];
1536 : 0 : if (cand->clobbers
1537 : 0 : && bitmap_intersect_p (cand->clobbers, unavail_regs))
1538 : 0 : bitmap_set_bit (&m_tmp_bitmap, cand_index);
1539 : : }
1540 : 0 : bitmap_and_compl_into (candidates, &m_tmp_bitmap);
1541 : 0 : }
1542 : :
1543 : : /* Remove any candidates in CANDIDATES that would clobber a register
1544 : : that is potentially live across CALL. */
1545 : :
1546 : : void
1547 : 0 : early_remat::restrict_remat_for_call (bitmap candidates, rtx_insn *call)
1548 : : {
1549 : : /* We don't know whether partially-clobbered registers are live
1550 : : across the call or not, so assume that they are. */
1551 : 0 : bitmap_view<HARD_REG_SET> call_preserved_regs
1552 : 0 : (~insn_callee_abi (call).full_reg_clobbers ());
1553 : 0 : restrict_remat_for_unavail_regs (candidates, call_preserved_regs);
1554 : 0 : }
1555 : :
1556 : : /* Assuming that every path reaching a point P contains a copy of a
1557 : : use U of REGNO, return true if another copy of U at P would have
1558 : : access to the same value of REGNO. */
1559 : :
1560 : : bool
1561 : 0 : early_remat::stable_use_p (unsigned int regno)
1562 : : {
1563 : : /* Conservatively assume not for hard registers. */
1564 : 0 : if (HARD_REGISTER_NUM_P (regno))
1565 : : return false;
1566 : :
1567 : : /* See if REGNO has a single definition and is never used uninitialized.
1568 : : In this case the definition of REGNO dominates the common dominator
1569 : : of the uses U, which in turn dominates P. */
1570 : 0 : if (DF_REG_DEF_COUNT (regno) == 1
1571 : 0 : && !bitmap_bit_p (DF_LR_OUT (ENTRY_BLOCK_PTR_FOR_FN (m_fn)), regno))
1572 : : return true;
1573 : :
1574 : : return false;
1575 : : }
1576 : :
1577 : : /* Emit a copy from register DEST to register SRC before candidate
1578 : : CAND_INDEX's instruction. */
1579 : :
1580 : : void
1581 : 0 : early_remat::emit_copy_before (unsigned int cand_index, rtx dest, rtx src)
1582 : : {
1583 : 0 : remat_candidate *cand = &m_candidates[cand_index];
1584 : 0 : if (dump_file)
1585 : : {
1586 : 0 : fprintf (dump_file, ";; Stabilizing insn ");
1587 : 0 : dump_insn_id (cand->insn);
1588 : 0 : fprintf (dump_file, " by copying source reg %d:%s to temporary reg %d\n",
1589 : 0 : REGNO (src), GET_MODE_NAME (GET_MODE (src)), REGNO (dest));
1590 : : }
1591 : 0 : emit_insn_before (gen_move_insn (dest, src), cand->insn);
1592 : 0 : }
1593 : :
1594 : : /* Check whether any inputs to candidate CAND_INDEX's instruction could
1595 : : change at rematerialization points and replace them with new pseudo
1596 : : registers if so. */
1597 : :
1598 : : void
1599 : 0 : early_remat::stabilize_pattern (unsigned int cand_index)
1600 : : {
1601 : 0 : remat_candidate *cand = &m_candidates[cand_index];
1602 : 0 : if (cand->stabilized_p)
1603 : 0 : return;
1604 : :
1605 : 0 : remat_equiv_class *ec = cand->equiv_class;
1606 : 0 : gcc_checking_assert (!ec || cand_index == ec->representative);
1607 : :
1608 : : /* Record the replacements we've made so far, so that we don't
1609 : : create two separate registers for match_dups. Lookup is O(n),
1610 : : but the n is very small. */
1611 : 0 : typedef std::pair<rtx, rtx> reg_pair;
1612 : 0 : auto_vec<reg_pair, 16> reg_map;
1613 : :
1614 : 0 : rtx_insn *insn = cand->insn;
1615 : 0 : df_ref ref;
1616 : 0 : FOR_EACH_INSN_USE (ref, insn)
1617 : : {
1618 : 0 : unsigned int old_regno = DF_REF_REGNO (ref);
1619 : 0 : rtx *loc = DF_REF_REAL_LOC (ref);
1620 : :
1621 : 0 : if (HARD_REGISTER_NUM_P (old_regno) && fixed_regs[old_regno])
1622 : : {
1623 : : /* We checked when adding the candidate that the value is stable. */
1624 : 0 : gcc_checking_assert (!rtx_unstable_p (*loc));
1625 : 0 : continue;
1626 : : }
1627 : :
1628 : 0 : if (bitmap_bit_p (&m_candidate_regnos, old_regno))
1629 : : /* We already know which candidate provides the definition
1630 : : and will handle it during copying. */
1631 : 0 : continue;
1632 : :
1633 : 0 : if (stable_use_p (old_regno))
1634 : : /* We can continue to use the existing register. */
1635 : 0 : continue;
1636 : :
1637 : : /* We need to replace the register. See whether we've already
1638 : : created a suitable copy. */
1639 : 0 : rtx old_reg = *loc;
1640 : 0 : rtx new_reg = NULL_RTX;
1641 : 0 : machine_mode mode = GET_MODE (old_reg);
1642 : 0 : reg_pair *p;
1643 : 0 : unsigned int pi;
1644 : 0 : FOR_EACH_VEC_ELT (reg_map, pi, p)
1645 : 0 : if (REGNO (p->first) == old_regno
1646 : 0 : && GET_MODE (p->first) == mode)
1647 : : {
1648 : 0 : new_reg = p->second;
1649 : 0 : break;
1650 : : }
1651 : :
1652 : 0 : if (!new_reg)
1653 : : {
1654 : : /* Create a new register and initialize it just before
1655 : : the instruction. */
1656 : 0 : new_reg = gen_reg_rtx (mode);
1657 : 0 : reg_map.safe_push (reg_pair (old_reg, new_reg));
1658 : 0 : if (ec)
1659 : : {
1660 : 0 : unsigned int member_index;
1661 : 0 : bitmap_iterator bi;
1662 : 0 : EXECUTE_IF_SET_IN_BITMAP (ec->members, 0, member_index, bi)
1663 : 0 : emit_copy_before (member_index, new_reg, old_reg);
1664 : : }
1665 : : else
1666 : 0 : emit_copy_before (cand_index, new_reg, old_reg);
1667 : : }
1668 : 0 : validate_change (insn, loc, new_reg, true);
1669 : : }
1670 : 0 : if (num_changes_pending ())
1671 : : {
1672 : 0 : if (!apply_change_group ())
1673 : : /* We checked when adding the candidates that the pattern allows
1674 : : hard registers to be replaced. Nothing else should make the
1675 : : changes invalid. */
1676 : 0 : gcc_unreachable ();
1677 : :
1678 : 0 : if (ec)
1679 : : {
1680 : : /* Copy the new pattern to other members of the equivalence
1681 : : class. */
1682 : 0 : unsigned int member_index;
1683 : 0 : bitmap_iterator bi;
1684 : 0 : EXECUTE_IF_SET_IN_BITMAP (ec->members, 0, member_index, bi)
1685 : 0 : if (cand_index != member_index)
1686 : : {
1687 : 0 : rtx_insn *other_insn = m_candidates[member_index].insn;
1688 : 0 : if (!validate_change (other_insn, &PATTERN (other_insn),
1689 : 0 : copy_insn (PATTERN (insn)), 0))
1690 : : /* If the original instruction was valid then the copy
1691 : : should be too. */
1692 : 0 : gcc_unreachable ();
1693 : : }
1694 : : }
1695 : : }
1696 : :
1697 : 0 : cand->stabilized_p = true;
1698 : 0 : }
1699 : :
1700 : : /* Change CAND's instruction so that it sets CAND->copy_regno instead
1701 : : of CAND->regno. */
1702 : :
1703 : : void
1704 : 0 : early_remat::replace_dest_with_copy (unsigned int cand_index)
1705 : : {
1706 : 0 : remat_candidate *cand = &m_candidates[cand_index];
1707 : 0 : df_ref def;
1708 : 0 : FOR_EACH_INSN_DEF (def, cand->insn)
1709 : 0 : if (DF_REF_REGNO (def) == cand->regno)
1710 : 0 : validate_change (cand->insn, DF_REF_REAL_LOC (def),
1711 : 0 : regno_reg_rtx[cand->copy_regno], 1);
1712 : 0 : }
1713 : :
1714 : : /* Make sure that the candidates used by candidate CAND_INDEX are available.
1715 : : There are two ways of doing this for an input candidate I:
1716 : :
1717 : : (1) Using the existing register number and ensuring that I is available.
1718 : :
1719 : : (2) Using a new register number (recorded in copy_regno) and adding I
1720 : : to VIA_COPY. This guarantees that making I available does not
1721 : : conflict with other uses of the original register.
1722 : :
1723 : : REQUIRED is the set of candidates that are required but not available
1724 : : before the copy of CAND_INDEX. AVAILABLE is the set of candidates
1725 : : that are already available before the copy of CAND_INDEX. REACHING
1726 : : is the set of candidates that reach the copy of CAND_INDEX. VIA_COPY
1727 : : is the set of candidates that will use new register numbers recorded
1728 : : in copy_regno instead of the original ones. */
1729 : :
1730 : : void
1731 : 0 : early_remat::stabilize_candidate_uses (unsigned int cand_index,
1732 : : bitmap required, bitmap available,
1733 : : bitmap reaching, bitmap via_copy)
1734 : : {
1735 : 0 : remat_candidate *cand = &m_candidates[cand_index];
1736 : 0 : df_ref use;
1737 : 0 : FOR_EACH_INSN_USE (use, cand->insn)
1738 : : {
1739 : 0 : unsigned int regno = DF_REF_REGNO (use);
1740 : 0 : if (!bitmap_bit_p (&m_candidate_regnos, regno))
1741 : 0 : continue;
1742 : :
1743 : : /* Work out which candidate provides the definition. */
1744 : 0 : bitmap defs = m_regno_to_candidates[regno];
1745 : 0 : bitmap_and (&m_tmp_bitmap, cand->uses, defs);
1746 : 0 : gcc_checking_assert (bitmap_single_bit_set_p (&m_tmp_bitmap));
1747 : 0 : unsigned int def_index = bitmap_first_set_bit (&m_tmp_bitmap);
1748 : :
1749 : : /* First see if DEF_INDEX is the only reaching definition of REGNO
1750 : : at this point too and if it is or will become available. We can
1751 : : continue to use REGNO if so. */
1752 : 0 : bitmap_and (&m_tmp_bitmap, reaching, defs);
1753 : 0 : if (bitmap_single_bit_set_p (&m_tmp_bitmap)
1754 : 0 : && bitmap_first_set_bit (&m_tmp_bitmap) == def_index
1755 : 0 : && ((available && bitmap_bit_p (available, def_index))
1756 : 0 : || bitmap_bit_p (required, def_index)))
1757 : : {
1758 : 0 : if (dump_file)
1759 : 0 : fprintf (dump_file, ";; Keeping reg %d for use of candidate %d"
1760 : : " in candidate %d\n", regno, def_index, cand_index);
1761 : 0 : continue;
1762 : : }
1763 : :
1764 : : /* Otherwise fall back to using a copy. There are other cases
1765 : : in which we *could* continue to use REGNO, but there's not
1766 : : really much point. Using a separate register ought to make
1767 : : things easier for the register allocator. */
1768 : 0 : remat_candidate *def_cand = &m_candidates[def_index];
1769 : 0 : rtx *loc = DF_REF_REAL_LOC (use);
1770 : 0 : rtx new_reg;
1771 : 0 : if (bitmap_set_bit (via_copy, def_index))
1772 : : {
1773 : 0 : new_reg = gen_reg_rtx (GET_MODE (*loc));
1774 : 0 : def_cand->copy_regno = REGNO (new_reg);
1775 : 0 : if (dump_file)
1776 : 0 : fprintf (dump_file, ";; Creating reg %d for use of candidate %d"
1777 : : " in candidate %d\n", REGNO (new_reg), def_index,
1778 : : cand_index);
1779 : : }
1780 : : else
1781 : 0 : new_reg = regno_reg_rtx[def_cand->copy_regno];
1782 : 0 : validate_change (cand->insn, loc, new_reg, 1);
1783 : : }
1784 : 0 : }
1785 : :
1786 : : /* Rematerialize the candidates in REQUIRED after instruction INSN,
1787 : : given that the candidates in AVAILABLE are already available
1788 : : and that REACHING is the set of candidates live after INSN.
1789 : : REQUIRED and AVAILABLE are disjoint on entry.
1790 : :
1791 : : Clear REQUIRED on exit. */
1792 : :
1793 : : void
1794 : 0 : early_remat::emit_remat_insns (bitmap required, bitmap available,
1795 : : bitmap reaching, rtx_insn *insn)
1796 : : {
1797 : : /* Quick exit if there's nothing to do. */
1798 : 0 : if (empty_p (required))
1799 : 0 : return;
1800 : :
1801 : : /* Only reaching definitions should be available or required. */
1802 : 0 : gcc_checking_assert (!bitmap_intersect_compl_p (required, reaching));
1803 : 0 : if (available)
1804 : 0 : gcc_checking_assert (!bitmap_intersect_compl_p (available, reaching));
1805 : :
1806 : 0 : bitmap_head via_copy;
1807 : 0 : bitmap_initialize (&via_copy, &m_obstack);
1808 : 0 : while (!bitmap_empty_p (required) || !bitmap_empty_p (&via_copy))
1809 : : {
1810 : : /* Pick the lowest-indexed candidate left. */
1811 : 0 : unsigned int required_index = (bitmap_empty_p (required)
1812 : 0 : ? ~0U : bitmap_first_set_bit (required));
1813 : 0 : unsigned int via_copy_index = (bitmap_empty_p (&via_copy)
1814 : 0 : ? ~0U : bitmap_first_set_bit (&via_copy));
1815 : 0 : unsigned int cand_index = MIN (required_index, via_copy_index);
1816 : 0 : remat_candidate *cand = &m_candidates[cand_index];
1817 : :
1818 : 0 : bool via_copy_p = (cand_index == via_copy_index);
1819 : 0 : if (via_copy_p)
1820 : 0 : bitmap_clear_bit (&via_copy, cand_index);
1821 : : else
1822 : : {
1823 : : /* Remove all candidates for the same register from REQUIRED. */
1824 : 0 : bitmap_and (&m_tmp_bitmap, reaching,
1825 : 0 : m_regno_to_candidates[cand->regno]);
1826 : 0 : bitmap_and_compl_into (required, &m_tmp_bitmap);
1827 : 0 : gcc_checking_assert (!bitmap_bit_p (required, cand_index));
1828 : :
1829 : : /* Only rematerialize if we have a single reaching definition
1830 : : of the register. */
1831 : 0 : if (!bitmap_single_bit_set_p (&m_tmp_bitmap))
1832 : : {
1833 : 0 : if (dump_file)
1834 : : {
1835 : 0 : fprintf (dump_file, ";; Can't rematerialize reg %d after ",
1836 : : cand->regno);
1837 : 0 : dump_insn_id (insn);
1838 : 0 : fprintf (dump_file, ": more than one reaching definition\n");
1839 : : }
1840 : 0 : continue;
1841 : : }
1842 : :
1843 : : /* Skip candidates that can't be rematerialized. */
1844 : 0 : if (!cand->can_copy_p)
1845 : 0 : continue;
1846 : :
1847 : : /* Check the function precondition. */
1848 : 0 : gcc_checking_assert (!available
1849 : : || !bitmap_bit_p (available, cand_index));
1850 : : }
1851 : :
1852 : : /* Invalid candidates should have been weeded out by now. */
1853 : 0 : gcc_assert (cand->can_copy_p);
1854 : :
1855 : 0 : rtx new_pattern;
1856 : 0 : if (cand->constant_p)
1857 : : {
1858 : : /* Emit a simple move. */
1859 : 0 : unsigned int regno = via_copy_p ? cand->copy_regno : cand->regno;
1860 : 0 : new_pattern = gen_move_insn (regno_reg_rtx[regno], cand->remat_rtx);
1861 : : }
1862 : : else
1863 : : {
1864 : : /* If this is the first time we've copied the instruction, make
1865 : : sure that any inputs will have the same value after INSN. */
1866 : 0 : stabilize_pattern (cand_index);
1867 : :
1868 : : /* Temporarily adjust the original instruction so that it has
1869 : : the right form for the copy. */
1870 : 0 : if (via_copy_p)
1871 : 0 : replace_dest_with_copy (cand_index);
1872 : 0 : if (cand->uses)
1873 : 0 : stabilize_candidate_uses (cand_index, required, available,
1874 : : reaching, &via_copy);
1875 : :
1876 : : /* Get the new instruction pattern. */
1877 : 0 : new_pattern = copy_insn (cand->remat_rtx);
1878 : :
1879 : : /* Undo the temporary changes. */
1880 : 0 : cancel_changes (0);
1881 : : }
1882 : :
1883 : : /* Emit the new instruction. */
1884 : 0 : rtx_insn *new_insn = emit_insn_after (new_pattern, insn);
1885 : :
1886 : 0 : if (dump_file)
1887 : : {
1888 : 0 : fprintf (dump_file, ";; Rematerializing candidate %d after ",
1889 : : cand_index);
1890 : 0 : dump_insn_id (insn);
1891 : 0 : if (via_copy_p)
1892 : 0 : fprintf (dump_file, " with new destination reg %d",
1893 : : cand->copy_regno);
1894 : 0 : fprintf (dump_file, ":\n\n");
1895 : 0 : print_rtl_single (dump_file, new_insn);
1896 : 0 : fprintf (dump_file, "\n");
1897 : : }
1898 : : }
1899 : : }
1900 : :
1901 : : /* Recompute INFO's available_out set, given that it's distinct from
1902 : : available_in and available_locally. */
1903 : :
1904 : : bool
1905 : 0 : early_remat::set_available_out (remat_block_info *info)
1906 : : {
1907 : 0 : if (empty_p (info->available_locally))
1908 : 0 : return bitmap_and_compl (get_bitmap (&info->available_out),
1909 : 0 : info->available_in, info->rd_kill);
1910 : :
1911 : 0 : if (empty_p (info->rd_kill))
1912 : 0 : return bitmap_ior (get_bitmap (&info->available_out),
1913 : 0 : info->available_locally, info->available_in);
1914 : :
1915 : 0 : return bitmap_ior_and_compl (get_bitmap (&info->available_out),
1916 : 0 : info->available_locally, info->available_in,
1917 : 0 : info->rd_kill);
1918 : : }
1919 : :
1920 : : /* If BB has more than one call, decide which candidates should be
1921 : : rematerialized after the non-final calls and emit the associated
1922 : : instructions. Record other information about the block in preparation
1923 : : for the global phase. */
1924 : :
1925 : : void
1926 : 0 : early_remat::process_block (basic_block bb)
1927 : : {
1928 : 0 : remat_block_info *info = &m_block_info[bb->index];
1929 : 0 : rtx_insn *last_call = NULL;
1930 : 0 : rtx_insn *insn;
1931 : :
1932 : : /* Ensure that we always use the same candidate index to refer to an
1933 : : equivalence class. */
1934 : 0 : if (info->rd_out == info->rd_in)
1935 : : {
1936 : 0 : canon_bitmap (&info->rd_in);
1937 : 0 : info->rd_out = info->rd_in;
1938 : : }
1939 : : else
1940 : : {
1941 : 0 : canon_bitmap (&info->rd_in);
1942 : 0 : canon_bitmap (&info->rd_out);
1943 : : }
1944 : 0 : canon_bitmap (&info->rd_kill);
1945 : 0 : canon_bitmap (&info->rd_gen);
1946 : :
1947 : : /* The set of candidates that should be rematerialized on entry to the
1948 : : block or after the previous call (whichever is more recent). */
1949 : 0 : init_temp_bitmap (&m_required);
1950 : :
1951 : : /* The set of candidates that reach the current instruction (i.e. are
1952 : : live just before the instruction). */
1953 : 0 : bitmap_head reaching;
1954 : 0 : bitmap_initialize (&reaching, &m_obstack);
1955 : 0 : if (info->rd_in)
1956 : 0 : bitmap_copy (&reaching, info->rd_in);
1957 : :
1958 : : /* The set of candidates that are live and available without
1959 : : rematerialization just before the current instruction. This only
1960 : : accounts for earlier candidates in the block, or those that become
1961 : : available by being added to M_REQUIRED. */
1962 : 0 : init_temp_bitmap (&m_available);
1963 : :
1964 : : /* Get the range of candidates in the block. */
1965 : 0 : unsigned int next_candidate = info->first_candidate;
1966 : 0 : unsigned int num_candidates = info->num_candidates;
1967 : 0 : remat_candidate *next_def = (num_candidates > 0
1968 : 0 : ? &m_candidates[next_candidate]
1969 : 0 : : NULL);
1970 : :
1971 : 0 : FOR_BB_INSNS (bb, insn)
1972 : : {
1973 : 0 : if (!NONDEBUG_INSN_P (insn))
1974 : 0 : continue;
1975 : :
1976 : : /* First process uses, since this is a forward walk. */
1977 : 0 : df_ref ref;
1978 : 0 : FOR_EACH_INSN_USE (ref, insn)
1979 : : {
1980 : 0 : unsigned int regno = DF_REF_REGNO (ref);
1981 : 0 : if (bitmap_bit_p (&m_candidate_regnos, regno))
1982 : : {
1983 : 0 : bitmap defs = m_regno_to_candidates[regno];
1984 : 0 : bitmap_and (&m_tmp_bitmap, defs, &reaching);
1985 : 0 : gcc_checking_assert (!bitmap_empty_p (&m_tmp_bitmap));
1986 : 0 : if (!bitmap_intersect_p (defs, m_available))
1987 : : {
1988 : : /* There has been no definition of the register since
1989 : : the last call or the start of the block (whichever
1990 : : is most recent). Mark the reaching definitions
1991 : : as required at that point and thus available here. */
1992 : 0 : bitmap_ior_into (m_required, &m_tmp_bitmap);
1993 : 0 : bitmap_ior_into (m_available, &m_tmp_bitmap);
1994 : : }
1995 : : }
1996 : : }
1997 : :
1998 : 0 : if (CALL_P (insn))
1999 : : {
2000 : 0 : if (!last_call)
2001 : : {
2002 : : /* The first call in the block. Record which candidates are
2003 : : required at the start of the block. */
2004 : 0 : copy_temp_bitmap (&info->required_in, &m_required);
2005 : 0 : init_temp_bitmap (&m_required);
2006 : : }
2007 : : else
2008 : : {
2009 : : /* The fully-local case: candidates that need to be
2010 : : rematerialized after a previous call in the block. */
2011 : 0 : restrict_remat_for_call (m_required, last_call);
2012 : 0 : emit_remat_insns (m_required, NULL, info->rd_after_call,
2013 : : last_call);
2014 : : }
2015 : 0 : last_call = insn;
2016 : 0 : bitmap_clear (m_available);
2017 : 0 : gcc_checking_assert (empty_p (m_required));
2018 : : }
2019 : :
2020 : : /* Now process definitions. */
2021 : 0 : while (next_def && insn == next_def->insn)
2022 : : {
2023 : 0 : unsigned int gen = canon_candidate (next_candidate);
2024 : :
2025 : : /* Other candidates with the same regno are not available
2026 : : any more. */
2027 : 0 : bitmap kill = m_regno_to_candidates[next_def->regno];
2028 : 0 : bitmap_and_compl_into (m_available, kill);
2029 : 0 : bitmap_and_compl_into (&reaching, kill);
2030 : :
2031 : : /* Record that this candidate is available without
2032 : : rematerialization. */
2033 : 0 : bitmap_set_bit (m_available, gen);
2034 : 0 : bitmap_set_bit (&reaching, gen);
2035 : :
2036 : : /* Find the next candidate in the block. */
2037 : 0 : num_candidates -= 1;
2038 : 0 : next_candidate -= 1;
2039 : 0 : if (num_candidates > 0)
2040 : 0 : next_def -= 1;
2041 : : else
2042 : : next_def = NULL;
2043 : : }
2044 : :
2045 : 0 : if (insn == last_call)
2046 : 0 : bitmap_copy (get_bitmap (&info->rd_after_call), &reaching);
2047 : : }
2048 : 0 : bitmap_clear (&reaching);
2049 : 0 : gcc_checking_assert (num_candidates == 0);
2050 : :
2051 : : /* Remove values from the available set if they aren't live (and so
2052 : : aren't interesting to successor blocks). */
2053 : 0 : if (info->rd_out)
2054 : 0 : bitmap_and_into (m_available, info->rd_out);
2055 : :
2056 : : /* Record the accumulated information. */
2057 : 0 : info->last_call = last_call;
2058 : 0 : info->abnormal_call_p = (last_call
2059 : 0 : && last_call == BB_END (bb)
2060 : 0 : && has_abnormal_or_eh_outgoing_edge_p (bb));
2061 : 0 : copy_temp_bitmap (&info->available_locally, &m_available);
2062 : 0 : if (last_call)
2063 : 0 : copy_temp_bitmap (&info->required_after_call, &m_required);
2064 : : else
2065 : 0 : copy_temp_bitmap (&info->required_in, &m_required);
2066 : :
2067 : : /* Assume at first that all live-in values are available without
2068 : : rematerialization (i.e. start with the most optimistic assumption). */
2069 : 0 : if (info->available_in)
2070 : : {
2071 : 0 : if (info->rd_in)
2072 : 0 : bitmap_copy (info->available_in, info->rd_in);
2073 : : else
2074 : 0 : BITMAP_FREE (info->available_in);
2075 : : }
2076 : :
2077 : 0 : if (last_call || empty_p (info->available_in))
2078 : : /* The values available on exit from the block are exactly those that
2079 : : are available locally. This set doesn't change. */
2080 : 0 : info->available_out = info->available_locally;
2081 : 0 : else if (empty_p (info->available_locally) && empty_p (info->rd_kill))
2082 : : /* The values available on exit are the same as those available on entry.
2083 : : Updating one updates the other. */
2084 : 0 : info->available_out = info->available_in;
2085 : : else
2086 : 0 : set_available_out (info);
2087 : 0 : }
2088 : :
2089 : : /* Process each block as for process_block, visiting dominators before
2090 : : the blocks they dominate. */
2091 : :
2092 : : void
2093 : 0 : early_remat::local_phase (void)
2094 : : {
2095 : 0 : if (dump_file)
2096 : 0 : fprintf (dump_file, "\n;; Local phase:\n");
2097 : :
2098 : 0 : int *rpo = df_get_postorder (DF_FORWARD);
2099 : 0 : unsigned int rpo_len = df_get_n_blocks (DF_FORWARD);
2100 : 0 : for (unsigned int i = 0; i < rpo_len; ++i)
2101 : 0 : if (rpo[i] >= NUM_FIXED_BLOCKS)
2102 : 0 : process_block (BASIC_BLOCK_FOR_FN (m_fn, rpo[i]));
2103 : 0 : }
2104 : :
2105 : : /* Return true if available values survive across edge E. */
2106 : :
2107 : : static inline bool
2108 : 0 : available_across_edge_p (edge e)
2109 : : {
2110 : 0 : return (e->flags & EDGE_EH) == 0;
2111 : : }
2112 : :
2113 : : /* Propagate information from the available_out set of E->src to the
2114 : : available_in set of E->dest, when computing global availability.
2115 : : Return true if something changed. */
2116 : :
2117 : : bool
2118 : 0 : early_remat::avail_confluence_n (edge e)
2119 : : {
2120 : 0 : remat_block_info *src = &er->m_block_info[e->src->index];
2121 : 0 : remat_block_info *dest = &er->m_block_info[e->dest->index];
2122 : :
2123 : 0 : if (!available_across_edge_p (e))
2124 : : return false;
2125 : :
2126 : 0 : if (empty_p (dest->available_in))
2127 : : return false;
2128 : :
2129 : 0 : if (!src->available_out)
2130 : : {
2131 : 0 : bitmap_clear (dest->available_in);
2132 : 0 : return true;
2133 : : }
2134 : :
2135 : 0 : return bitmap_and_into (dest->available_in, src->available_out);
2136 : : }
2137 : :
2138 : : /* Propagate information from the available_in set of block BB_INDEX
2139 : : to available_out. Return true if something changed. */
2140 : :
2141 : : bool
2142 : 0 : early_remat::avail_transfer (int bb_index)
2143 : : {
2144 : 0 : remat_block_info *info = &er->m_block_info[bb_index];
2145 : :
2146 : 0 : if (info->available_out == info->available_locally)
2147 : : return false;
2148 : :
2149 : 0 : if (info->available_out == info->available_in)
2150 : : /* Assume that we are only called if the input changed. */
2151 : : return true;
2152 : :
2153 : 0 : return er->set_available_out (info);
2154 : : }
2155 : :
2156 : : /* Compute global availability for the function, starting with the local
2157 : : information computed by local_phase. */
2158 : :
2159 : : void
2160 : 0 : early_remat::compute_availability (void)
2161 : : {
2162 : : /* We use df_simple_dataflow instead of the lcm routines for three reasons:
2163 : :
2164 : : (1) it avoids recomputing the traversal order;
2165 : : (2) many of the sets are likely to be sparse, so we don't necessarily
2166 : : want to use sbitmaps; and
2167 : : (3) it means we can avoid creating an explicit kill set for the call. */
2168 : 0 : er = this;
2169 : 0 : bitmap_clear (&m_tmp_bitmap);
2170 : 0 : bitmap_set_range (&m_tmp_bitmap, 0, last_basic_block_for_fn (m_fn));
2171 : 0 : df_simple_dataflow (DF_FORWARD, NULL, NULL,
2172 : : avail_confluence_n, avail_transfer,
2173 : : &m_tmp_bitmap, df_get_postorder (DF_FORWARD),
2174 : : df_get_n_blocks (DF_FORWARD));
2175 : 0 : er = 0;
2176 : :
2177 : : /* Restrict the required_in sets to values that aren't available. */
2178 : 0 : basic_block bb;
2179 : 0 : FOR_EACH_BB_FN (bb, m_fn)
2180 : : {
2181 : 0 : remat_block_info *info = &m_block_info[bb->index];
2182 : 0 : if (info->required_in && info->available_in)
2183 : 0 : bitmap_and_compl_into (info->required_in, info->available_in);
2184 : : }
2185 : 0 : }
2186 : :
2187 : : /* Make sure that INFO's available_out and available_in sets are unique. */
2188 : :
2189 : : inline void
2190 : 0 : early_remat::unshare_available_sets (remat_block_info *info)
2191 : : {
2192 : 0 : if (info->available_in && info->available_in == info->available_out)
2193 : : {
2194 : 0 : info->available_in = alloc_bitmap ();
2195 : 0 : bitmap_copy (info->available_in, info->available_out);
2196 : : }
2197 : 0 : }
2198 : :
2199 : : /* Return true if it is possible to move rematerializations from the
2200 : : destination of E to the source of E. */
2201 : :
2202 : : inline bool
2203 : 0 : early_remat::can_move_across_edge_p (edge e)
2204 : : {
2205 : 0 : return (available_across_edge_p (e)
2206 : 0 : && !m_block_info[e->src->index].abnormal_call_p);
2207 : : }
2208 : :
2209 : : /* Return true if it is cheaper to rematerialize values at the head of
2210 : : block QUERY_BB_INDEX instead of rematerializing in its predecessors. */
2211 : :
2212 : : bool
2213 : 0 : early_remat::local_remat_cheaper_p (unsigned int query_bb_index)
2214 : : {
2215 : 0 : if (m_block_info[query_bb_index].remat_frequency_valid_p)
2216 : 0 : return m_block_info[query_bb_index].local_remat_cheaper_p;
2217 : :
2218 : : /* Iteratively compute the cost of rematerializing values in the
2219 : : predecessor blocks, then compare that with the cost of
2220 : : rematerializing at the head of the block.
2221 : :
2222 : : A cycle indicates that there is no call on that execution path,
2223 : : so it isn't necessary to rematerialize on that path. */
2224 : 0 : auto_vec<basic_block, 16> stack;
2225 : 0 : stack.quick_push (BASIC_BLOCK_FOR_FN (m_fn, query_bb_index));
2226 : 0 : while (!stack.is_empty ())
2227 : : {
2228 : 0 : basic_block bb = stack.last ();
2229 : 0 : remat_block_info *info = &m_block_info[bb->index];
2230 : 0 : if (info->remat_frequency_valid_p)
2231 : : {
2232 : 0 : stack.pop ();
2233 : 0 : continue;
2234 : : }
2235 : :
2236 : 0 : info->visited_p = true;
2237 : 0 : int frequency = 0;
2238 : 0 : bool can_move_p = true;
2239 : 0 : edge e;
2240 : 0 : edge_iterator ei;
2241 : 0 : FOR_EACH_EDGE (e, ei, bb->preds)
2242 : 0 : if (!can_move_across_edge_p (e))
2243 : : {
2244 : : can_move_p = false;
2245 : : break;
2246 : : }
2247 : 0 : else if (m_block_info[e->src->index].last_call)
2248 : : /* We'll rematerialize after the call. */
2249 : 0 : frequency += e->src->count.to_frequency (m_fn);
2250 : 0 : else if (m_block_info[e->src->index].remat_frequency_valid_p)
2251 : : /* Add the cost of rematerializing at the head of E->src
2252 : : or in its predecessors (whichever is cheaper). */
2253 : 0 : frequency += m_block_info[e->src->index].remat_frequency;
2254 : 0 : else if (!m_block_info[e->src->index].visited_p)
2255 : : /* Queue E->src and then revisit this block again. */
2256 : 0 : stack.safe_push (e->src);
2257 : :
2258 : : /* Come back to this block later if we need to process some of
2259 : : its predecessors. */
2260 : 0 : if (stack.last () != bb)
2261 : 0 : continue;
2262 : :
2263 : : /* If rematerializing in and before the block have equal cost, prefer
2264 : : rematerializing in the block. This should shorten the live range. */
2265 : 0 : int bb_frequency = bb->count.to_frequency (m_fn);
2266 : 0 : if (!can_move_p || frequency >= bb_frequency)
2267 : : {
2268 : 0 : info->local_remat_cheaper_p = true;
2269 : 0 : info->remat_frequency = bb_frequency;
2270 : : }
2271 : : else
2272 : 0 : info->remat_frequency = frequency;
2273 : 0 : info->remat_frequency_valid_p = true;
2274 : 0 : info->visited_p = false;
2275 : 0 : if (dump_file)
2276 : : {
2277 : 0 : if (!can_move_p)
2278 : 0 : fprintf (dump_file, ";; Need to rematerialize at the head of"
2279 : : " block %d; cannot move to predecessors.\n", bb->index);
2280 : : else
2281 : : {
2282 : 0 : fprintf (dump_file, ";; Block %d has frequency %d,"
2283 : : " rematerializing in predecessors has frequency %d",
2284 : : bb->index, bb_frequency, frequency);
2285 : 0 : if (info->local_remat_cheaper_p)
2286 : 0 : fprintf (dump_file, "; prefer to rematerialize"
2287 : : " in the block\n");
2288 : : else
2289 : 0 : fprintf (dump_file, "; prefer to rematerialize"
2290 : : " in predecessors\n");
2291 : : }
2292 : : }
2293 : 0 : stack.pop ();
2294 : : }
2295 : 0 : return m_block_info[query_bb_index].local_remat_cheaper_p;
2296 : 0 : }
2297 : :
2298 : : /* Return true if we cannot rematerialize candidate CAND_INDEX at the head of
2299 : : block BB_INDEX. */
2300 : :
2301 : : bool
2302 : 0 : early_remat::need_to_move_candidate_p (unsigned int bb_index,
2303 : : unsigned int cand_index)
2304 : : {
2305 : 0 : remat_block_info *info = &m_block_info[bb_index];
2306 : 0 : remat_candidate *cand = &m_candidates[cand_index];
2307 : 0 : basic_block bb = BASIC_BLOCK_FOR_FN (m_fn, bb_index);
2308 : :
2309 : : /* If there is more than one reaching definition of REGNO,
2310 : : we'll need to rematerialize in predecessors instead. */
2311 : 0 : bitmap_and (&m_tmp_bitmap, info->rd_in, m_regno_to_candidates[cand->regno]);
2312 : 0 : if (!bitmap_single_bit_set_p (&m_tmp_bitmap))
2313 : : {
2314 : 0 : if (dump_file)
2315 : 0 : fprintf (dump_file, ";; Cannot rematerialize %d at the"
2316 : : " head of block %d because there is more than one"
2317 : : " reaching definition of reg %d\n", cand_index,
2318 : : bb_index, cand->regno);
2319 : 0 : return true;
2320 : : }
2321 : :
2322 : : /* Likewise if rematerializing CAND here would clobber a live register. */
2323 : 0 : if (cand->clobbers
2324 : 0 : && bitmap_intersect_p (cand->clobbers, DF_LR_IN (bb)))
2325 : : {
2326 : 0 : if (dump_file)
2327 : 0 : fprintf (dump_file, ";; Cannot rematerialize %d at the"
2328 : : " head of block %d because it would clobber live"
2329 : : " registers\n", cand_index, bb_index);
2330 : 0 : return true;
2331 : : }
2332 : :
2333 : : return false;
2334 : : }
2335 : :
2336 : : /* Set REQUIRED to the minimum set of candidates that must be rematerialized
2337 : : in predecessors of block BB_INDEX instead of at the start of the block. */
2338 : :
2339 : : void
2340 : 0 : early_remat::compute_minimum_move_set (unsigned int bb_index,
2341 : : bitmap required)
2342 : : {
2343 : 0 : remat_block_info *info = &m_block_info[bb_index];
2344 : 0 : bitmap_head remaining;
2345 : :
2346 : 0 : bitmap_clear (required);
2347 : 0 : bitmap_initialize (&remaining, &m_obstack);
2348 : 0 : bitmap_copy (&remaining, info->required_in);
2349 : 0 : while (!bitmap_empty_p (&remaining))
2350 : : {
2351 : 0 : unsigned int cand_index = bitmap_first_set_bit (&remaining);
2352 : 0 : remat_candidate *cand = &m_candidates[cand_index];
2353 : 0 : bitmap_clear_bit (&remaining, cand_index);
2354 : :
2355 : : /* Leave invalid candidates where they are. */
2356 : 0 : if (!cand->can_copy_p)
2357 : 0 : continue;
2358 : :
2359 : : /* Decide whether to move this candidate. */
2360 : 0 : if (!bitmap_bit_p (required, cand_index))
2361 : : {
2362 : 0 : if (!need_to_move_candidate_p (bb_index, cand_index))
2363 : 0 : continue;
2364 : 0 : bitmap_set_bit (required, cand_index);
2365 : : }
2366 : :
2367 : : /* Also move values used by the candidate, so that we don't
2368 : : rematerialize them twice. */
2369 : 0 : if (cand->uses)
2370 : : {
2371 : 0 : bitmap_ior_and_into (required, cand->uses, info->required_in);
2372 : 0 : bitmap_ior_and_into (&remaining, cand->uses, info->required_in);
2373 : : }
2374 : : }
2375 : 0 : }
2376 : :
2377 : : /* Make the predecessors of BB_INDEX rematerialize the candidates in
2378 : : REQUIRED. Add any blocks whose required_in set changes to
2379 : : PENDING_BLOCKS. */
2380 : :
2381 : : void
2382 : 0 : early_remat::move_to_predecessors (unsigned int bb_index, bitmap required,
2383 : : bitmap pending_blocks)
2384 : : {
2385 : 0 : if (empty_p (required))
2386 : 0 : return;
2387 : 0 : remat_block_info *dest_info = &m_block_info[bb_index];
2388 : 0 : basic_block bb = BASIC_BLOCK_FOR_FN (m_fn, bb_index);
2389 : 0 : edge e;
2390 : 0 : edge_iterator ei;
2391 : 0 : FOR_EACH_EDGE (e, ei, bb->preds)
2392 : : {
2393 : 0 : remat_block_info *src_info = &m_block_info[e->src->index];
2394 : :
2395 : : /* Restrict the set we add to the reaching definitions. */
2396 : 0 : bitmap_and (&m_tmp_bitmap, required, src_info->rd_out);
2397 : 0 : if (bitmap_empty_p (&m_tmp_bitmap))
2398 : 0 : continue;
2399 : :
2400 : 0 : if (!can_move_across_edge_p (e))
2401 : : {
2402 : : /* We can't move the rematerialization and we can't do it at
2403 : : the start of the block either. In this case we just give up
2404 : : and rely on spilling to make the values available across E. */
2405 : 0 : if (dump_file)
2406 : : {
2407 : 0 : fprintf (dump_file, ";; Cannot rematerialize the following"
2408 : 0 : " candidates in block %d:", e->src->index);
2409 : 0 : dump_candidate_bitmap (required);
2410 : 0 : fprintf (dump_file, "\n");
2411 : : }
2412 : 0 : continue;
2413 : : }
2414 : :
2415 : : /* Remove candidates that are already available. */
2416 : 0 : if (src_info->available_out)
2417 : : {
2418 : 0 : bitmap_and_compl_into (&m_tmp_bitmap, src_info->available_out);
2419 : 0 : if (bitmap_empty_p (&m_tmp_bitmap))
2420 : 0 : continue;
2421 : : }
2422 : :
2423 : : /* Add the remaining candidates to the appropriate required set. */
2424 : 0 : if (dump_file)
2425 : : {
2426 : 0 : fprintf (dump_file, ";; Moving this set from block %d"
2427 : 0 : " to block %d:", bb_index, e->src->index);
2428 : 0 : dump_candidate_bitmap (&m_tmp_bitmap);
2429 : 0 : fprintf (dump_file, "\n");
2430 : : }
2431 : : /* If the source block contains a call, we want to rematerialize
2432 : : after the call, otherwise we want to rematerialize at the start
2433 : : of the block. */
2434 : 0 : bitmap src_required = get_bitmap (src_info->last_call
2435 : : ? &src_info->required_after_call
2436 : : : &src_info->required_in);
2437 : 0 : if (bitmap_ior_into (src_required, &m_tmp_bitmap))
2438 : : {
2439 : 0 : if (!src_info->last_call)
2440 : 0 : bitmap_set_bit (pending_blocks, e->src->index);
2441 : 0 : unshare_available_sets (src_info);
2442 : 0 : bitmap_ior_into (get_bitmap (&src_info->available_out),
2443 : : &m_tmp_bitmap);
2444 : : }
2445 : : }
2446 : :
2447 : : /* The candidates are now available on entry to the block. */
2448 : 0 : bitmap_and_compl_into (dest_info->required_in, required);
2449 : 0 : unshare_available_sets (dest_info);
2450 : 0 : bitmap_ior_into (get_bitmap (&dest_info->available_in), required);
2451 : : }
2452 : :
2453 : : /* Go through the candidates that are currently marked as being
2454 : : rematerialized at the beginning of a block. Decide in each case
2455 : : whether that's valid and profitable; if it isn't, move the
2456 : : rematerialization to predecessor blocks instead. */
2457 : :
2458 : : void
2459 : 0 : early_remat::choose_rematerialization_points (void)
2460 : : {
2461 : 0 : bitmap_head required;
2462 : 0 : bitmap_head pending_blocks;
2463 : :
2464 : 0 : int *postorder = df_get_postorder (DF_BACKWARD);
2465 : 0 : unsigned int postorder_len = df_get_n_blocks (DF_BACKWARD);
2466 : 0 : bitmap_initialize (&required, &m_obstack);
2467 : 0 : bitmap_initialize (&pending_blocks, &m_obstack);
2468 : 0 : do
2469 : : /* Process the blocks in postorder, to reduce the number of iterations
2470 : : of the outer loop. */
2471 : 0 : for (unsigned int i = 0; i < postorder_len; ++i)
2472 : : {
2473 : 0 : unsigned int bb_index = postorder[i];
2474 : 0 : remat_block_info *info = &m_block_info[bb_index];
2475 : 0 : bitmap_clear_bit (&pending_blocks, bb_index);
2476 : :
2477 : 0 : if (empty_p (info->required_in))
2478 : 0 : continue;
2479 : :
2480 : 0 : if (info->available_in)
2481 : 0 : gcc_checking_assert (!bitmap_intersect_p (info->required_in,
2482 : : info->available_in));
2483 : :
2484 : 0 : if (local_remat_cheaper_p (bb_index))
2485 : : {
2486 : : /* We'd prefer to rematerialize at the head of the block.
2487 : : Only move candidates if we need to. */
2488 : 0 : compute_minimum_move_set (bb_index, &required);
2489 : 0 : move_to_predecessors (bb_index, &required, &pending_blocks);
2490 : : }
2491 : : else
2492 : 0 : move_to_predecessors (bb_index, info->required_in,
2493 : : &pending_blocks);
2494 : : }
2495 : 0 : while (!bitmap_empty_p (&pending_blocks));
2496 : 0 : bitmap_clear (&required);
2497 : 0 : }
2498 : :
2499 : : /* Emit all rematerialization instructions queued for BB. */
2500 : :
2501 : : void
2502 : 0 : early_remat::emit_remat_insns_for_block (basic_block bb)
2503 : : {
2504 : 0 : remat_block_info *info = &m_block_info[bb->index];
2505 : :
2506 : 0 : if (info->last_call && !empty_p (info->required_after_call))
2507 : : {
2508 : 0 : restrict_remat_for_call (info->required_after_call, info->last_call);
2509 : 0 : emit_remat_insns (info->required_after_call, NULL,
2510 : : info->rd_after_call, info->last_call);
2511 : : }
2512 : :
2513 : 0 : if (!empty_p (info->required_in))
2514 : : {
2515 : 0 : rtx_insn *insn = BB_HEAD (bb);
2516 : 0 : while (insn != BB_END (bb)
2517 : 0 : && !INSN_P (NEXT_INSN (insn)))
2518 : : insn = NEXT_INSN (insn);
2519 : 0 : restrict_remat_for_unavail_regs (info->required_in, DF_LR_IN (bb));
2520 : 0 : emit_remat_insns (info->required_in, info->available_in,
2521 : : info->rd_in, insn);
2522 : : }
2523 : 0 : }
2524 : :
2525 : : /* Decide which candidates in each block's REQUIRED_IN set need to be
2526 : : rematerialized and decide where the rematerialization instructions
2527 : : should go. Emit queued rematerialization instructions at the start
2528 : : of blocks and after the last calls in blocks. */
2529 : :
2530 : : void
2531 : 0 : early_remat::global_phase (void)
2532 : : {
2533 : 0 : compute_availability ();
2534 : 0 : if (dump_file)
2535 : : {
2536 : 0 : fprintf (dump_file, "\n;; Blocks after computing global"
2537 : : " availability:\n");
2538 : 0 : dump_all_blocks ();
2539 : : }
2540 : :
2541 : 0 : choose_rematerialization_points ();
2542 : 0 : if (dump_file)
2543 : : {
2544 : 0 : fprintf (dump_file, "\n;; Blocks after choosing rematerialization"
2545 : : " points:\n");
2546 : 0 : dump_all_blocks ();
2547 : : }
2548 : :
2549 : 0 : basic_block bb;
2550 : 0 : FOR_EACH_BB_FN (bb, m_fn)
2551 : 0 : emit_remat_insns_for_block (bb);
2552 : 0 : }
2553 : :
2554 : : /* Main function for the pass. */
2555 : :
2556 : : void
2557 : 0 : early_remat::run (void)
2558 : : {
2559 : 0 : df_analyze ();
2560 : :
2561 : 0 : if (!collect_candidates ())
2562 : : return;
2563 : :
2564 : 0 : init_block_info ();
2565 : 0 : sort_candidates ();
2566 : 0 : finalize_candidate_indices ();
2567 : 0 : if (dump_file)
2568 : 0 : dump_all_candidates ();
2569 : :
2570 : 0 : compute_rd ();
2571 : 0 : decide_candidate_validity ();
2572 : 0 : local_phase ();
2573 : 0 : global_phase ();
2574 : : }
2575 : :
2576 : 0 : early_remat::early_remat (function *fn, sbitmap selected_modes)
2577 : 0 : : m_fn (fn),
2578 : 0 : m_selected_modes (selected_modes),
2579 : 0 : m_available (0),
2580 : 0 : m_required (0),
2581 : 0 : m_value_table (63)
2582 : : {
2583 : 0 : bitmap_obstack_initialize (&m_obstack);
2584 : 0 : bitmap_initialize (&m_candidate_regnos, &m_obstack);
2585 : 0 : bitmap_initialize (&m_tmp_bitmap, &m_obstack);
2586 : 0 : }
2587 : :
2588 : 0 : early_remat::~early_remat ()
2589 : : {
2590 : 0 : bitmap_obstack_release (&m_obstack);
2591 : 0 : }
2592 : :
2593 : : namespace {
2594 : :
2595 : : const pass_data pass_data_early_remat =
2596 : : {
2597 : : RTL_PASS, /* type */
2598 : : "early_remat", /* name */
2599 : : OPTGROUP_NONE, /* optinfo_flags */
2600 : : TV_EARLY_REMAT, /* tv_id */
2601 : : 0, /* properties_required */
2602 : : 0, /* properties_provided */
2603 : : 0, /* properties_destroyed */
2604 : : 0, /* todo_flags_start */
2605 : : TODO_df_finish, /* todo_flags_finish */
2606 : : };
2607 : :
2608 : : class pass_early_remat : public rtl_opt_pass
2609 : : {
2610 : : public:
2611 : 280114 : pass_early_remat (gcc::context *ctxt)
2612 : 560228 : : rtl_opt_pass (pass_data_early_remat, ctxt)
2613 : : {}
2614 : :
2615 : : /* opt_pass methods: */
2616 : 1415668 : bool gate (function *) final override
2617 : : {
2618 : 1415668 : return optimize > 1 && NUM_POLY_INT_COEFFS > 1;
2619 : : }
2620 : :
2621 : 0 : unsigned int execute (function *f) final override
2622 : : {
2623 : 0 : auto_sbitmap selected_modes (NUM_MACHINE_MODES);
2624 : 0 : bitmap_clear (selected_modes);
2625 : 0 : targetm.select_early_remat_modes (selected_modes);
2626 : 0 : if (!bitmap_empty_p (selected_modes))
2627 : 0 : early_remat (f, selected_modes).run ();
2628 : 0 : return 0;
2629 : 0 : }
2630 : : }; // class pass_early_remat
2631 : :
2632 : : } // anon namespace
2633 : :
2634 : : rtl_opt_pass *
2635 : 280114 : make_pass_early_remat (gcc::context *ctxt)
2636 : : {
2637 : 280114 : return new pass_early_remat (ctxt);
2638 : : }
|